1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 #include "tree-pass.h"
66 #include "pass_manager.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
69 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
70 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
72 #ifndef CHECK_STACK_LIMIT
73 #define CHECK_STACK_LIMIT (-1)
76 /* Return index of given mode in mult and division cost tables. */
77 #define MODE_INDEX(mode) \
78 ((mode) == QImode ? 0 \
79 : (mode) == HImode ? 1 \
80 : (mode) == SImode ? 2 \
81 : (mode) == DImode ? 3 \
84 /* Processor costs (relative to an add) */
85 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
86 #define COSTS_N_BYTES(N) ((N) * 2)
88 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
90 static stringop_algs ix86_size_memcpy
[2] = {
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
92 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
93 static stringop_algs ix86_size_memset
[2] = {
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
95 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
98 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
99 COSTS_N_BYTES (2), /* cost of an add instruction */
100 COSTS_N_BYTES (3), /* cost of a lea instruction */
101 COSTS_N_BYTES (2), /* variable shift costs */
102 COSTS_N_BYTES (3), /* constant shift costs */
103 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
104 COSTS_N_BYTES (3), /* HI */
105 COSTS_N_BYTES (3), /* SI */
106 COSTS_N_BYTES (3), /* DI */
107 COSTS_N_BYTES (5)}, /* other */
108 0, /* cost of multiply per each bit set */
109 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
110 COSTS_N_BYTES (3), /* HI */
111 COSTS_N_BYTES (3), /* SI */
112 COSTS_N_BYTES (3), /* DI */
113 COSTS_N_BYTES (5)}, /* other */
114 COSTS_N_BYTES (3), /* cost of movsx */
115 COSTS_N_BYTES (3), /* cost of movzx */
116 0, /* "large" insn */
118 2, /* cost for loading QImode using movzbl */
119 {2, 2, 2}, /* cost of loading integer registers
120 in QImode, HImode and SImode.
121 Relative to reg-reg move (2). */
122 {2, 2, 2}, /* cost of storing integer registers */
123 2, /* cost of reg,reg fld/fst */
124 {2, 2, 2}, /* cost of loading fp registers
125 in SFmode, DFmode and XFmode */
126 {2, 2, 2}, /* cost of storing fp registers
127 in SFmode, DFmode and XFmode */
128 3, /* cost of moving MMX register */
129 {3, 3}, /* cost of loading MMX registers
130 in SImode and DImode */
131 {3, 3}, /* cost of storing MMX registers
132 in SImode and DImode */
133 3, /* cost of moving SSE register */
134 {3, 3, 3}, /* cost of loading SSE registers
135 in SImode, DImode and TImode */
136 {3, 3, 3}, /* cost of storing SSE registers
137 in SImode, DImode and TImode */
138 3, /* MMX or SSE register to integer */
139 0, /* size of l1 cache */
140 0, /* size of l2 cache */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
145 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
146 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
147 COSTS_N_BYTES (2), /* cost of FABS instruction. */
148 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
149 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
152 1, /* scalar_stmt_cost. */
153 1, /* scalar load_cost. */
154 1, /* scalar_store_cost. */
155 1, /* vec_stmt_cost. */
156 1, /* vec_to_scalar_cost. */
157 1, /* scalar_to_vec_cost. */
158 1, /* vec_align_load_cost. */
159 1, /* vec_unalign_load_cost. */
160 1, /* vec_store_cost. */
161 1, /* cond_taken_branch_cost. */
162 1, /* cond_not_taken_branch_cost. */
165 /* Processor costs (relative to an add) */
166 static stringop_algs i386_memcpy
[2] = {
167 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
168 DUMMY_STRINGOP_ALGS
};
169 static stringop_algs i386_memset
[2] = {
170 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
171 DUMMY_STRINGOP_ALGS
};
174 struct processor_costs i386_cost
= { /* 386 specific costs */
175 COSTS_N_INSNS (1), /* cost of an add instruction */
176 COSTS_N_INSNS (1), /* cost of a lea instruction */
177 COSTS_N_INSNS (3), /* variable shift costs */
178 COSTS_N_INSNS (2), /* constant shift costs */
179 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
180 COSTS_N_INSNS (6), /* HI */
181 COSTS_N_INSNS (6), /* SI */
182 COSTS_N_INSNS (6), /* DI */
183 COSTS_N_INSNS (6)}, /* other */
184 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
185 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
186 COSTS_N_INSNS (23), /* HI */
187 COSTS_N_INSNS (23), /* SI */
188 COSTS_N_INSNS (23), /* DI */
189 COSTS_N_INSNS (23)}, /* other */
190 COSTS_N_INSNS (3), /* cost of movsx */
191 COSTS_N_INSNS (2), /* cost of movzx */
192 15, /* "large" insn */
194 4, /* cost for loading QImode using movzbl */
195 {2, 4, 2}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 4, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {8, 8, 8}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {8, 8, 8}, /* cost of storing fp registers
203 in SFmode, DFmode and XFmode */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of l1 cache */
216 0, /* size of l2 cache */
217 0, /* size of prefetch block */
218 0, /* number of parallel prefetches */
220 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
221 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
222 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
223 COSTS_N_INSNS (22), /* cost of FABS instruction. */
224 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
225 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
228 1, /* scalar_stmt_cost. */
229 1, /* scalar load_cost. */
230 1, /* scalar_store_cost. */
231 1, /* vec_stmt_cost. */
232 1, /* vec_to_scalar_cost. */
233 1, /* scalar_to_vec_cost. */
234 1, /* vec_align_load_cost. */
235 2, /* vec_unalign_load_cost. */
236 1, /* vec_store_cost. */
237 3, /* cond_taken_branch_cost. */
238 1, /* cond_not_taken_branch_cost. */
241 static stringop_algs i486_memcpy
[2] = {
242 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
243 DUMMY_STRINGOP_ALGS
};
244 static stringop_algs i486_memset
[2] = {
245 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
246 DUMMY_STRINGOP_ALGS
};
249 struct processor_costs i486_cost
= { /* 486 specific costs */
250 COSTS_N_INSNS (1), /* cost of an add instruction */
251 COSTS_N_INSNS (1), /* cost of a lea instruction */
252 COSTS_N_INSNS (3), /* variable shift costs */
253 COSTS_N_INSNS (2), /* constant shift costs */
254 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
255 COSTS_N_INSNS (12), /* HI */
256 COSTS_N_INSNS (12), /* SI */
257 COSTS_N_INSNS (12), /* DI */
258 COSTS_N_INSNS (12)}, /* other */
259 1, /* cost of multiply per each bit set */
260 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
261 COSTS_N_INSNS (40), /* HI */
262 COSTS_N_INSNS (40), /* SI */
263 COSTS_N_INSNS (40), /* DI */
264 COSTS_N_INSNS (40)}, /* other */
265 COSTS_N_INSNS (3), /* cost of movsx */
266 COSTS_N_INSNS (2), /* cost of movzx */
267 15, /* "large" insn */
269 4, /* cost for loading QImode using movzbl */
270 {2, 4, 2}, /* cost of loading integer registers
271 in QImode, HImode and SImode.
272 Relative to reg-reg move (2). */
273 {2, 4, 2}, /* cost of storing integer registers */
274 2, /* cost of reg,reg fld/fst */
275 {8, 8, 8}, /* cost of loading fp registers
276 in SFmode, DFmode and XFmode */
277 {8, 8, 8}, /* cost of storing fp registers
278 in SFmode, DFmode and XFmode */
279 2, /* cost of moving MMX register */
280 {4, 8}, /* cost of loading MMX registers
281 in SImode and DImode */
282 {4, 8}, /* cost of storing MMX registers
283 in SImode and DImode */
284 2, /* cost of moving SSE register */
285 {4, 8, 16}, /* cost of loading SSE registers
286 in SImode, DImode and TImode */
287 {4, 8, 16}, /* cost of storing SSE registers
288 in SImode, DImode and TImode */
289 3, /* MMX or SSE register to integer */
290 4, /* size of l1 cache. 486 has 8kB cache
291 shared for code and data, so 4kB is
292 not really precise. */
293 4, /* size of l2 cache */
294 0, /* size of prefetch block */
295 0, /* number of parallel prefetches */
297 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
298 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
299 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
300 COSTS_N_INSNS (3), /* cost of FABS instruction. */
301 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
302 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
305 1, /* scalar_stmt_cost. */
306 1, /* scalar load_cost. */
307 1, /* scalar_store_cost. */
308 1, /* vec_stmt_cost. */
309 1, /* vec_to_scalar_cost. */
310 1, /* scalar_to_vec_cost. */
311 1, /* vec_align_load_cost. */
312 2, /* vec_unalign_load_cost. */
313 1, /* vec_store_cost. */
314 3, /* cond_taken_branch_cost. */
315 1, /* cond_not_taken_branch_cost. */
318 static stringop_algs pentium_memcpy
[2] = {
319 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
320 DUMMY_STRINGOP_ALGS
};
321 static stringop_algs pentium_memset
[2] = {
322 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
323 DUMMY_STRINGOP_ALGS
};
326 struct processor_costs pentium_cost
= {
327 COSTS_N_INSNS (1), /* cost of an add instruction */
328 COSTS_N_INSNS (1), /* cost of a lea instruction */
329 COSTS_N_INSNS (4), /* variable shift costs */
330 COSTS_N_INSNS (1), /* constant shift costs */
331 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
332 COSTS_N_INSNS (11), /* HI */
333 COSTS_N_INSNS (11), /* SI */
334 COSTS_N_INSNS (11), /* DI */
335 COSTS_N_INSNS (11)}, /* other */
336 0, /* cost of multiply per each bit set */
337 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
338 COSTS_N_INSNS (25), /* HI */
339 COSTS_N_INSNS (25), /* SI */
340 COSTS_N_INSNS (25), /* DI */
341 COSTS_N_INSNS (25)}, /* other */
342 COSTS_N_INSNS (3), /* cost of movsx */
343 COSTS_N_INSNS (2), /* cost of movzx */
344 8, /* "large" insn */
346 6, /* cost for loading QImode using movzbl */
347 {2, 4, 2}, /* cost of loading integer registers
348 in QImode, HImode and SImode.
349 Relative to reg-reg move (2). */
350 {2, 4, 2}, /* cost of storing integer registers */
351 2, /* cost of reg,reg fld/fst */
352 {2, 2, 6}, /* cost of loading fp registers
353 in SFmode, DFmode and XFmode */
354 {4, 4, 6}, /* cost of storing fp registers
355 in SFmode, DFmode and XFmode */
356 8, /* cost of moving MMX register */
357 {8, 8}, /* cost of loading MMX registers
358 in SImode and DImode */
359 {8, 8}, /* cost of storing MMX registers
360 in SImode and DImode */
361 2, /* cost of moving SSE register */
362 {4, 8, 16}, /* cost of loading SSE registers
363 in SImode, DImode and TImode */
364 {4, 8, 16}, /* cost of storing SSE registers
365 in SImode, DImode and TImode */
366 3, /* MMX or SSE register to integer */
367 8, /* size of l1 cache. */
368 8, /* size of l2 cache */
369 0, /* size of prefetch block */
370 0, /* number of parallel prefetches */
372 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
373 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
374 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
375 COSTS_N_INSNS (1), /* cost of FABS instruction. */
376 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
377 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
380 1, /* scalar_stmt_cost. */
381 1, /* scalar load_cost. */
382 1, /* scalar_store_cost. */
383 1, /* vec_stmt_cost. */
384 1, /* vec_to_scalar_cost. */
385 1, /* scalar_to_vec_cost. */
386 1, /* vec_align_load_cost. */
387 2, /* vec_unalign_load_cost. */
388 1, /* vec_store_cost. */
389 3, /* cond_taken_branch_cost. */
390 1, /* cond_not_taken_branch_cost. */
393 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
394 (we ensure the alignment). For small blocks inline loop is still a
395 noticeable win, for bigger blocks either rep movsl or rep movsb is
396 way to go. Rep movsb has apparently more expensive startup time in CPU,
397 but after 4K the difference is down in the noise. */
398 static stringop_algs pentiumpro_memcpy
[2] = {
399 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
400 {8192, rep_prefix_4_byte
, false},
401 {-1, rep_prefix_1_byte
, false}}},
402 DUMMY_STRINGOP_ALGS
};
403 static stringop_algs pentiumpro_memset
[2] = {
404 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
405 {8192, rep_prefix_4_byte
, false},
406 {-1, libcall
, false}}},
407 DUMMY_STRINGOP_ALGS
};
409 struct processor_costs pentiumpro_cost
= {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1), /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (4), /* HI */
416 COSTS_N_INSNS (4), /* SI */
417 COSTS_N_INSNS (4), /* DI */
418 COSTS_N_INSNS (4)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (17), /* HI */
422 COSTS_N_INSNS (17), /* SI */
423 COSTS_N_INSNS (17), /* DI */
424 COSTS_N_INSNS (17)}, /* other */
425 COSTS_N_INSNS (1), /* cost of movsx */
426 COSTS_N_INSNS (1), /* cost of movzx */
427 8, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 4, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 2, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 2, /* cost of moving MMX register */
440 {2, 2}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {2, 2}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {2, 2, 8}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {2, 2, 8}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 256, /* size of l2 cache */
452 32, /* size of prefetch block */
453 6, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (2), /* cost of FABS instruction. */
459 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 static stringop_algs geode_memcpy
[2] = {
477 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
478 DUMMY_STRINGOP_ALGS
};
479 static stringop_algs geode_memset
[2] = {
480 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
481 DUMMY_STRINGOP_ALGS
};
483 struct processor_costs geode_cost
= {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (1), /* cost of a lea instruction */
486 COSTS_N_INSNS (2), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (4), /* HI */
490 COSTS_N_INSNS (7), /* SI */
491 COSTS_N_INSNS (7), /* DI */
492 COSTS_N_INSNS (7)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (23), /* HI */
496 COSTS_N_INSNS (39), /* SI */
497 COSTS_N_INSNS (39), /* DI */
498 COSTS_N_INSNS (39)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 1, /* cost for loading QImode using movzbl */
504 {1, 1, 1}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {1, 1, 1}, /* cost of storing integer registers */
508 1, /* cost of reg,reg fld/fst */
509 {1, 1, 1}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {4, 6, 6}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
514 1, /* cost of moving MMX register */
515 {1, 1}, /* cost of loading MMX registers
516 in SImode and DImode */
517 {1, 1}, /* cost of storing MMX registers
518 in SImode and DImode */
519 1, /* cost of moving SSE register */
520 {1, 1, 1}, /* cost of loading SSE registers
521 in SImode, DImode and TImode */
522 {1, 1, 1}, /* cost of storing SSE registers
523 in SImode, DImode and TImode */
524 1, /* MMX or SSE register to integer */
525 64, /* size of l1 cache. */
526 128, /* size of l2 cache. */
527 32, /* size of prefetch block */
528 1, /* number of parallel prefetches */
530 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (1), /* cost of FABS instruction. */
534 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
538 1, /* scalar_stmt_cost. */
539 1, /* scalar load_cost. */
540 1, /* scalar_store_cost. */
541 1, /* vec_stmt_cost. */
542 1, /* vec_to_scalar_cost. */
543 1, /* scalar_to_vec_cost. */
544 1, /* vec_align_load_cost. */
545 2, /* vec_unalign_load_cost. */
546 1, /* vec_store_cost. */
547 3, /* cond_taken_branch_cost. */
548 1, /* cond_not_taken_branch_cost. */
551 static stringop_algs k6_memcpy
[2] = {
552 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
553 DUMMY_STRINGOP_ALGS
};
554 static stringop_algs k6_memset
[2] = {
555 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
556 DUMMY_STRINGOP_ALGS
};
558 struct processor_costs k6_cost
= {
559 COSTS_N_INSNS (1), /* cost of an add instruction */
560 COSTS_N_INSNS (2), /* cost of a lea instruction */
561 COSTS_N_INSNS (1), /* variable shift costs */
562 COSTS_N_INSNS (1), /* constant shift costs */
563 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
564 COSTS_N_INSNS (3), /* HI */
565 COSTS_N_INSNS (3), /* SI */
566 COSTS_N_INSNS (3), /* DI */
567 COSTS_N_INSNS (3)}, /* other */
568 0, /* cost of multiply per each bit set */
569 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
570 COSTS_N_INSNS (18), /* HI */
571 COSTS_N_INSNS (18), /* SI */
572 COSTS_N_INSNS (18), /* DI */
573 COSTS_N_INSNS (18)}, /* other */
574 COSTS_N_INSNS (2), /* cost of movsx */
575 COSTS_N_INSNS (2), /* cost of movzx */
576 8, /* "large" insn */
578 3, /* cost for loading QImode using movzbl */
579 {4, 5, 4}, /* cost of loading integer registers
580 in QImode, HImode and SImode.
581 Relative to reg-reg move (2). */
582 {2, 3, 2}, /* cost of storing integer registers */
583 4, /* cost of reg,reg fld/fst */
584 {6, 6, 6}, /* cost of loading fp registers
585 in SFmode, DFmode and XFmode */
586 {4, 4, 4}, /* cost of storing fp registers
587 in SFmode, DFmode and XFmode */
588 2, /* cost of moving MMX register */
589 {2, 2}, /* cost of loading MMX registers
590 in SImode and DImode */
591 {2, 2}, /* cost of storing MMX registers
592 in SImode and DImode */
593 2, /* cost of moving SSE register */
594 {2, 2, 8}, /* cost of loading SSE registers
595 in SImode, DImode and TImode */
596 {2, 2, 8}, /* cost of storing SSE registers
597 in SImode, DImode and TImode */
598 6, /* MMX or SSE register to integer */
599 32, /* size of l1 cache. */
600 32, /* size of l2 cache. Some models
601 have integrated l2 cache, but
602 optimizing for k6 is not important
603 enough to worry about that. */
604 32, /* size of prefetch block */
605 1, /* number of parallel prefetches */
607 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
608 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
609 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
610 COSTS_N_INSNS (2), /* cost of FABS instruction. */
611 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
612 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
615 1, /* scalar_stmt_cost. */
616 1, /* scalar load_cost. */
617 1, /* scalar_store_cost. */
618 1, /* vec_stmt_cost. */
619 1, /* vec_to_scalar_cost. */
620 1, /* scalar_to_vec_cost. */
621 1, /* vec_align_load_cost. */
622 2, /* vec_unalign_load_cost. */
623 1, /* vec_store_cost. */
624 3, /* cond_taken_branch_cost. */
625 1, /* cond_not_taken_branch_cost. */
628 /* For some reason, Athlon deals better with REP prefix (relative to loops)
629 compared to K8. Alignment becomes important after 8 bytes for memcpy and
630 128 bytes for memset. */
631 static stringop_algs athlon_memcpy
[2] = {
632 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
633 DUMMY_STRINGOP_ALGS
};
634 static stringop_algs athlon_memset
[2] = {
635 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
636 DUMMY_STRINGOP_ALGS
};
638 struct processor_costs athlon_cost
= {
639 COSTS_N_INSNS (1), /* cost of an add instruction */
640 COSTS_N_INSNS (2), /* cost of a lea instruction */
641 COSTS_N_INSNS (1), /* variable shift costs */
642 COSTS_N_INSNS (1), /* constant shift costs */
643 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
644 COSTS_N_INSNS (5), /* HI */
645 COSTS_N_INSNS (5), /* SI */
646 COSTS_N_INSNS (5), /* DI */
647 COSTS_N_INSNS (5)}, /* other */
648 0, /* cost of multiply per each bit set */
649 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
650 COSTS_N_INSNS (26), /* HI */
651 COSTS_N_INSNS (42), /* SI */
652 COSTS_N_INSNS (74), /* DI */
653 COSTS_N_INSNS (74)}, /* other */
654 COSTS_N_INSNS (1), /* cost of movsx */
655 COSTS_N_INSNS (1), /* cost of movzx */
656 8, /* "large" insn */
658 4, /* cost for loading QImode using movzbl */
659 {3, 4, 3}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {3, 4, 3}, /* cost of storing integer registers */
663 4, /* cost of reg,reg fld/fst */
664 {4, 4, 12}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {6, 6, 8}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {4, 4}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {4, 4}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, /* cost of moving SSE register */
674 {4, 4, 6}, /* cost of loading SSE registers
675 in SImode, DImode and TImode */
676 {4, 4, 5}, /* cost of storing SSE registers
677 in SImode, DImode and TImode */
678 5, /* MMX or SSE register to integer */
679 64, /* size of l1 cache. */
680 256, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 6, /* number of parallel prefetches */
684 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
685 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
686 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
687 COSTS_N_INSNS (2), /* cost of FABS instruction. */
688 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
689 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
692 1, /* scalar_stmt_cost. */
693 1, /* scalar load_cost. */
694 1, /* scalar_store_cost. */
695 1, /* vec_stmt_cost. */
696 1, /* vec_to_scalar_cost. */
697 1, /* scalar_to_vec_cost. */
698 1, /* vec_align_load_cost. */
699 2, /* vec_unalign_load_cost. */
700 1, /* vec_store_cost. */
701 3, /* cond_taken_branch_cost. */
702 1, /* cond_not_taken_branch_cost. */
705 /* K8 has optimized REP instruction for medium sized blocks, but for very
706 small blocks it is better to use loop. For large blocks, libcall can
707 do nontemporary accesses and beat inline considerably. */
708 static stringop_algs k8_memcpy
[2] = {
709 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
710 {-1, rep_prefix_4_byte
, false}}},
711 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
712 {-1, libcall
, false}}}};
713 static stringop_algs k8_memset
[2] = {
714 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
715 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
716 {libcall
, {{48, unrolled_loop
, false},
717 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
719 struct processor_costs k8_cost
= {
720 COSTS_N_INSNS (1), /* cost of an add instruction */
721 COSTS_N_INSNS (2), /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (5)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
739 4, /* cost for loading QImode using movzbl */
740 {3, 4, 3}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {3, 4, 3}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {4, 4, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {3, 3}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {4, 4}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {4, 3, 6}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {4, 4, 5}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of l1 cache. */
761 512, /* size of l2 cache. */
762 64, /* size of prefetch block */
763 /* New AMD processors never drop prefetches; if they cannot be performed
764 immediately, they are queued. We set number of simultaneous prefetches
765 to a large constant to reflect this (it probably is not a good idea not
766 to limit number of prefetches at all, as their execution also takes some
768 100, /* number of parallel prefetches */
770 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (2), /* cost of FABS instruction. */
774 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
779 4, /* scalar_stmt_cost. */
780 2, /* scalar load_cost. */
781 2, /* scalar_store_cost. */
782 5, /* vec_stmt_cost. */
783 0, /* vec_to_scalar_cost. */
784 2, /* scalar_to_vec_cost. */
785 2, /* vec_align_load_cost. */
786 3, /* vec_unalign_load_cost. */
787 3, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 2, /* cond_not_taken_branch_cost. */
792 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
793 very small blocks it is better to use loop. For large blocks, libcall can
794 do nontemporary accesses and beat inline considerably. */
795 static stringop_algs amdfam10_memcpy
[2] = {
796 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
797 {-1, rep_prefix_4_byte
, false}}},
798 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
799 {-1, libcall
, false}}}};
800 static stringop_algs amdfam10_memset
[2] = {
801 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
802 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
803 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
804 {-1, libcall
, false}}}};
805 struct processor_costs amdfam10_cost
= {
806 COSTS_N_INSNS (1), /* cost of an add instruction */
807 COSTS_N_INSNS (2), /* cost of a lea instruction */
808 COSTS_N_INSNS (1), /* variable shift costs */
809 COSTS_N_INSNS (1), /* constant shift costs */
810 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
811 COSTS_N_INSNS (4), /* HI */
812 COSTS_N_INSNS (3), /* SI */
813 COSTS_N_INSNS (4), /* DI */
814 COSTS_N_INSNS (5)}, /* other */
815 0, /* cost of multiply per each bit set */
816 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
817 COSTS_N_INSNS (35), /* HI */
818 COSTS_N_INSNS (51), /* SI */
819 COSTS_N_INSNS (83), /* DI */
820 COSTS_N_INSNS (83)}, /* other */
821 COSTS_N_INSNS (1), /* cost of movsx */
822 COSTS_N_INSNS (1), /* cost of movzx */
823 8, /* "large" insn */
825 4, /* cost for loading QImode using movzbl */
826 {3, 4, 3}, /* cost of loading integer registers
827 in QImode, HImode and SImode.
828 Relative to reg-reg move (2). */
829 {3, 4, 3}, /* cost of storing integer registers */
830 4, /* cost of reg,reg fld/fst */
831 {4, 4, 12}, /* cost of loading fp registers
832 in SFmode, DFmode and XFmode */
833 {6, 6, 8}, /* cost of storing fp registers
834 in SFmode, DFmode and XFmode */
835 2, /* cost of moving MMX register */
836 {3, 3}, /* cost of loading MMX registers
837 in SImode and DImode */
838 {4, 4}, /* cost of storing MMX registers
839 in SImode and DImode */
840 2, /* cost of moving SSE register */
841 {4, 4, 3}, /* cost of loading SSE registers
842 in SImode, DImode and TImode */
843 {4, 4, 5}, /* cost of storing SSE registers
844 in SImode, DImode and TImode */
845 3, /* MMX or SSE register to integer */
847 MOVD reg64, xmmreg Double FSTORE 4
848 MOVD reg32, xmmreg Double FSTORE 4
850 MOVD reg64, xmmreg Double FADD 3
852 MOVD reg32, xmmreg Double FADD 3
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
862 100, /* number of parallel prefetches */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 6, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 2, /* vec_unalign_load_cost. */
881 2, /* vec_store_cost. */
882 2, /* cond_taken_branch_cost. */
883 1, /* cond_not_taken_branch_cost. */
886 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall
888 can do nontemporary accesses and beat inline considerably. */
889 static stringop_algs bdver1_memcpy
[2] = {
890 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
891 {-1, rep_prefix_4_byte
, false}}},
892 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
893 {-1, libcall
, false}}}};
894 static stringop_algs bdver1_memset
[2] = {
895 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
896 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
897 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
898 {-1, libcall
, false}}}};
900 const struct processor_costs bdver1_cost
= {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (1), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (4), /* SI */
908 COSTS_N_INSNS (6), /* DI */
909 COSTS_N_INSNS (6)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (35), /* HI */
913 COSTS_N_INSNS (51), /* SI */
914 COSTS_N_INSNS (83), /* DI */
915 COSTS_N_INSNS (83)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
920 4, /* cost for loading QImode using movzbl */
921 {5, 5, 4}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {4, 4, 4}, /* cost of storing integer registers */
925 2, /* cost of reg,reg fld/fst */
926 {5, 5, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {4, 4, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {4, 4}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 4, 4}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 4}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 2, /* MMX or SSE register to integer */
942 MOVD reg64, xmmreg Double FSTORE 4
943 MOVD reg32, xmmreg Double FSTORE 4
945 MOVD reg64, xmmreg Double FADD 3
947 MOVD reg32, xmmreg Double FADD 3
949 16, /* size of l1 cache. */
950 2048, /* size of l2 cache. */
951 64, /* size of prefetch block */
952 /* New AMD processors never drop prefetches; if they cannot be performed
953 immediately, they are queued. We set number of simultaneous prefetches
954 to a large constant to reflect this (it probably is not a good idea not
955 to limit number of prefetches at all, as their execution also takes some
957 100, /* number of parallel prefetches */
959 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
960 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
961 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
962 COSTS_N_INSNS (2), /* cost of FABS instruction. */
963 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
964 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
968 6, /* scalar_stmt_cost. */
969 4, /* scalar load_cost. */
970 4, /* scalar_store_cost. */
971 6, /* vec_stmt_cost. */
972 0, /* vec_to_scalar_cost. */
973 2, /* scalar_to_vec_cost. */
974 4, /* vec_align_load_cost. */
975 4, /* vec_unalign_load_cost. */
976 4, /* vec_store_cost. */
977 2, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
981 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
982 very small blocks it is better to use loop. For large blocks, libcall
983 can do nontemporary accesses and beat inline considerably. */
985 static stringop_algs bdver2_memcpy
[2] = {
986 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
987 {-1, rep_prefix_4_byte
, false}}},
988 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
989 {-1, libcall
, false}}}};
990 static stringop_algs bdver2_memset
[2] = {
991 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
992 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
993 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
994 {-1, libcall
, false}}}};
996 const struct processor_costs bdver2_cost
= {
997 COSTS_N_INSNS (1), /* cost of an add instruction */
998 COSTS_N_INSNS (1), /* cost of a lea instruction */
999 COSTS_N_INSNS (1), /* variable shift costs */
1000 COSTS_N_INSNS (1), /* constant shift costs */
1001 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1002 COSTS_N_INSNS (4), /* HI */
1003 COSTS_N_INSNS (4), /* SI */
1004 COSTS_N_INSNS (6), /* DI */
1005 COSTS_N_INSNS (6)}, /* other */
1006 0, /* cost of multiply per each bit set */
1007 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1008 COSTS_N_INSNS (35), /* HI */
1009 COSTS_N_INSNS (51), /* SI */
1010 COSTS_N_INSNS (83), /* DI */
1011 COSTS_N_INSNS (83)}, /* other */
1012 COSTS_N_INSNS (1), /* cost of movsx */
1013 COSTS_N_INSNS (1), /* cost of movzx */
1014 8, /* "large" insn */
1016 4, /* cost for loading QImode using movzbl */
1017 {5, 5, 4}, /* cost of loading integer registers
1018 in QImode, HImode and SImode.
1019 Relative to reg-reg move (2). */
1020 {4, 4, 4}, /* cost of storing integer registers */
1021 2, /* cost of reg,reg fld/fst */
1022 {5, 5, 12}, /* cost of loading fp registers
1023 in SFmode, DFmode and XFmode */
1024 {4, 4, 8}, /* cost of storing fp registers
1025 in SFmode, DFmode and XFmode */
1026 2, /* cost of moving MMX register */
1027 {4, 4}, /* cost of loading MMX registers
1028 in SImode and DImode */
1029 {4, 4}, /* cost of storing MMX registers
1030 in SImode and DImode */
1031 2, /* cost of moving SSE register */
1032 {4, 4, 4}, /* cost of loading SSE registers
1033 in SImode, DImode and TImode */
1034 {4, 4, 4}, /* cost of storing SSE registers
1035 in SImode, DImode and TImode */
1036 2, /* MMX or SSE register to integer */
1038 MOVD reg64, xmmreg Double FSTORE 4
1039 MOVD reg32, xmmreg Double FSTORE 4
1041 MOVD reg64, xmmreg Double FADD 3
1043 MOVD reg32, xmmreg Double FADD 3
1045 16, /* size of l1 cache. */
1046 2048, /* size of l2 cache. */
1047 64, /* size of prefetch block */
1048 /* New AMD processors never drop prefetches; if they cannot be performed
1049 immediately, they are queued. We set number of simultaneous prefetches
1050 to a large constant to reflect this (it probably is not a good idea not
1051 to limit number of prefetches at all, as their execution also takes some
1053 100, /* number of parallel prefetches */
1054 2, /* Branch cost */
1055 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1056 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1057 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1058 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1059 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1060 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1064 6, /* scalar_stmt_cost. */
1065 4, /* scalar load_cost. */
1066 4, /* scalar_store_cost. */
1067 6, /* vec_stmt_cost. */
1068 0, /* vec_to_scalar_cost. */
1069 2, /* scalar_to_vec_cost. */
1070 4, /* vec_align_load_cost. */
1071 4, /* vec_unalign_load_cost. */
1072 4, /* vec_store_cost. */
1073 2, /* cond_taken_branch_cost. */
1074 1, /* cond_not_taken_branch_cost. */
1078 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1079 very small blocks it is better to use loop. For large blocks, libcall
1080 can do nontemporary accesses and beat inline considerably. */
1081 static stringop_algs bdver3_memcpy
[2] = {
1082 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1083 {-1, rep_prefix_4_byte
, false}}},
1084 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}};
1086 static stringop_algs bdver3_memset
[2] = {
1087 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1088 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1089 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1090 {-1, libcall
, false}}}};
1091 struct processor_costs bdver3_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (1), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (4), /* SI */
1099 COSTS_N_INSNS (6), /* DI */
1100 COSTS_N_INSNS (6)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (35), /* HI */
1104 COSTS_N_INSNS (51), /* SI */
1105 COSTS_N_INSNS (83), /* DI */
1106 COSTS_N_INSNS (83)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {5, 5, 4}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {4, 4, 4}, /* cost of storing integer registers */
1116 2, /* cost of reg,reg fld/fst */
1117 {5, 5, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {4, 4, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {4, 4}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 4, 4}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 4}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 2, /* MMX or SSE register to integer */
1132 16, /* size of l1 cache. */
1133 2048, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 2, /* Branch cost */
1142 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1151 6, /* scalar_stmt_cost. */
1152 4, /* scalar load_cost. */
1153 4, /* scalar_store_cost. */
1154 6, /* vec_stmt_cost. */
1155 0, /* vec_to_scalar_cost. */
1156 2, /* scalar_to_vec_cost. */
1157 4, /* vec_align_load_cost. */
1158 4, /* vec_unalign_load_cost. */
1159 4, /* vec_store_cost. */
1160 2, /* cond_taken_branch_cost. */
1161 1, /* cond_not_taken_branch_cost. */
1164 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1165 very small blocks it is better to use loop. For large blocks, libcall
1166 can do nontemporary accesses and beat inline considerably. */
1167 static stringop_algs bdver4_memcpy
[2] = {
1168 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1169 {-1, rep_prefix_4_byte
, false}}},
1170 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1171 {-1, libcall
, false}}}};
1172 static stringop_algs bdver4_memset
[2] = {
1173 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1174 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1175 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1176 {-1, libcall
, false}}}};
1177 struct processor_costs bdver4_cost
= {
1178 COSTS_N_INSNS (1), /* cost of an add instruction */
1179 COSTS_N_INSNS (1), /* cost of a lea instruction */
1180 COSTS_N_INSNS (1), /* variable shift costs */
1181 COSTS_N_INSNS (1), /* constant shift costs */
1182 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1183 COSTS_N_INSNS (4), /* HI */
1184 COSTS_N_INSNS (4), /* SI */
1185 COSTS_N_INSNS (6), /* DI */
1186 COSTS_N_INSNS (6)}, /* other */
1187 0, /* cost of multiply per each bit set */
1188 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1189 COSTS_N_INSNS (35), /* HI */
1190 COSTS_N_INSNS (51), /* SI */
1191 COSTS_N_INSNS (83), /* DI */
1192 COSTS_N_INSNS (83)}, /* other */
1193 COSTS_N_INSNS (1), /* cost of movsx */
1194 COSTS_N_INSNS (1), /* cost of movzx */
1195 8, /* "large" insn */
1197 4, /* cost for loading QImode using movzbl */
1198 {5, 5, 4}, /* cost of loading integer registers
1199 in QImode, HImode and SImode.
1200 Relative to reg-reg move (2). */
1201 {4, 4, 4}, /* cost of storing integer registers */
1202 2, /* cost of reg,reg fld/fst */
1203 {5, 5, 12}, /* cost of loading fp registers
1204 in SFmode, DFmode and XFmode */
1205 {4, 4, 8}, /* cost of storing fp registers
1206 in SFmode, DFmode and XFmode */
1207 2, /* cost of moving MMX register */
1208 {4, 4}, /* cost of loading MMX registers
1209 in SImode and DImode */
1210 {4, 4}, /* cost of storing MMX registers
1211 in SImode and DImode */
1212 2, /* cost of moving SSE register */
1213 {4, 4, 4}, /* cost of loading SSE registers
1214 in SImode, DImode and TImode */
1215 {4, 4, 4}, /* cost of storing SSE registers
1216 in SImode, DImode and TImode */
1217 2, /* MMX or SSE register to integer */
1218 16, /* size of l1 cache. */
1219 2048, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1237 6, /* scalar_stmt_cost. */
1238 4, /* scalar load_cost. */
1239 4, /* scalar_store_cost. */
1240 6, /* vec_stmt_cost. */
1241 0, /* vec_to_scalar_cost. */
1242 2, /* scalar_to_vec_cost. */
1243 4, /* vec_align_load_cost. */
1244 4, /* vec_unalign_load_cost. */
1245 4, /* vec_store_cost. */
1246 2, /* cond_taken_branch_cost. */
1247 1, /* cond_not_taken_branch_cost. */
1250 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1251 very small blocks it is better to use loop. For large blocks, libcall can
1252 do nontemporary accesses and beat inline considerably. */
1253 static stringop_algs btver1_memcpy
[2] = {
1254 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1255 {-1, rep_prefix_4_byte
, false}}},
1256 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1257 {-1, libcall
, false}}}};
1258 static stringop_algs btver1_memset
[2] = {
1259 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1260 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1261 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1262 {-1, libcall
, false}}}};
1263 const struct processor_costs btver1_cost
= {
1264 COSTS_N_INSNS (1), /* cost of an add instruction */
1265 COSTS_N_INSNS (2), /* cost of a lea instruction */
1266 COSTS_N_INSNS (1), /* variable shift costs */
1267 COSTS_N_INSNS (1), /* constant shift costs */
1268 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1269 COSTS_N_INSNS (4), /* HI */
1270 COSTS_N_INSNS (3), /* SI */
1271 COSTS_N_INSNS (4), /* DI */
1272 COSTS_N_INSNS (5)}, /* other */
1273 0, /* cost of multiply per each bit set */
1274 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1275 COSTS_N_INSNS (35), /* HI */
1276 COSTS_N_INSNS (51), /* SI */
1277 COSTS_N_INSNS (83), /* DI */
1278 COSTS_N_INSNS (83)}, /* other */
1279 COSTS_N_INSNS (1), /* cost of movsx */
1280 COSTS_N_INSNS (1), /* cost of movzx */
1281 8, /* "large" insn */
1283 4, /* cost for loading QImode using movzbl */
1284 {3, 4, 3}, /* cost of loading integer registers
1285 in QImode, HImode and SImode.
1286 Relative to reg-reg move (2). */
1287 {3, 4, 3}, /* cost of storing integer registers */
1288 4, /* cost of reg,reg fld/fst */
1289 {4, 4, 12}, /* cost of loading fp registers
1290 in SFmode, DFmode and XFmode */
1291 {6, 6, 8}, /* cost of storing fp registers
1292 in SFmode, DFmode and XFmode */
1293 2, /* cost of moving MMX register */
1294 {3, 3}, /* cost of loading MMX registers
1295 in SImode and DImode */
1296 {4, 4}, /* cost of storing MMX registers
1297 in SImode and DImode */
1298 2, /* cost of moving SSE register */
1299 {4, 4, 3}, /* cost of loading SSE registers
1300 in SImode, DImode and TImode */
1301 {4, 4, 5}, /* cost of storing SSE registers
1302 in SImode, DImode and TImode */
1303 3, /* MMX or SSE register to integer */
1305 MOVD reg64, xmmreg Double FSTORE 4
1306 MOVD reg32, xmmreg Double FSTORE 4
1308 MOVD reg64, xmmreg Double FADD 3
1310 MOVD reg32, xmmreg Double FADD 3
1312 32, /* size of l1 cache. */
1313 512, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 100, /* number of parallel prefetches */
1316 2, /* Branch cost */
1317 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1318 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1319 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1320 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1321 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1322 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1326 4, /* scalar_stmt_cost. */
1327 2, /* scalar load_cost. */
1328 2, /* scalar_store_cost. */
1329 6, /* vec_stmt_cost. */
1330 0, /* vec_to_scalar_cost. */
1331 2, /* scalar_to_vec_cost. */
1332 2, /* vec_align_load_cost. */
1333 2, /* vec_unalign_load_cost. */
1334 2, /* vec_store_cost. */
1335 2, /* cond_taken_branch_cost. */
1336 1, /* cond_not_taken_branch_cost. */
1339 static stringop_algs btver2_memcpy
[2] = {
1340 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1341 {-1, rep_prefix_4_byte
, false}}},
1342 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1343 {-1, libcall
, false}}}};
1344 static stringop_algs btver2_memset
[2] = {
1345 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1346 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1347 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1348 {-1, libcall
, false}}}};
1349 const struct processor_costs btver2_cost
= {
1350 COSTS_N_INSNS (1), /* cost of an add instruction */
1351 COSTS_N_INSNS (2), /* cost of a lea instruction */
1352 COSTS_N_INSNS (1), /* variable shift costs */
1353 COSTS_N_INSNS (1), /* constant shift costs */
1354 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1355 COSTS_N_INSNS (4), /* HI */
1356 COSTS_N_INSNS (3), /* SI */
1357 COSTS_N_INSNS (4), /* DI */
1358 COSTS_N_INSNS (5)}, /* other */
1359 0, /* cost of multiply per each bit set */
1360 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1361 COSTS_N_INSNS (35), /* HI */
1362 COSTS_N_INSNS (51), /* SI */
1363 COSTS_N_INSNS (83), /* DI */
1364 COSTS_N_INSNS (83)}, /* other */
1365 COSTS_N_INSNS (1), /* cost of movsx */
1366 COSTS_N_INSNS (1), /* cost of movzx */
1367 8, /* "large" insn */
1369 4, /* cost for loading QImode using movzbl */
1370 {3, 4, 3}, /* cost of loading integer registers
1371 in QImode, HImode and SImode.
1372 Relative to reg-reg move (2). */
1373 {3, 4, 3}, /* cost of storing integer registers */
1374 4, /* cost of reg,reg fld/fst */
1375 {4, 4, 12}, /* cost of loading fp registers
1376 in SFmode, DFmode and XFmode */
1377 {6, 6, 8}, /* cost of storing fp registers
1378 in SFmode, DFmode and XFmode */
1379 2, /* cost of moving MMX register */
1380 {3, 3}, /* cost of loading MMX registers
1381 in SImode and DImode */
1382 {4, 4}, /* cost of storing MMX registers
1383 in SImode and DImode */
1384 2, /* cost of moving SSE register */
1385 {4, 4, 3}, /* cost of loading SSE registers
1386 in SImode, DImode and TImode */
1387 {4, 4, 5}, /* cost of storing SSE registers
1388 in SImode, DImode and TImode */
1389 3, /* MMX or SSE register to integer */
1391 MOVD reg64, xmmreg Double FSTORE 4
1392 MOVD reg32, xmmreg Double FSTORE 4
1394 MOVD reg64, xmmreg Double FADD 3
1396 MOVD reg32, xmmreg Double FADD 3
1398 32, /* size of l1 cache. */
1399 2048, /* size of l2 cache. */
1400 64, /* size of prefetch block */
1401 100, /* number of parallel prefetches */
1402 2, /* Branch cost */
1403 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1404 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1405 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1406 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1407 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1408 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1411 4, /* scalar_stmt_cost. */
1412 2, /* scalar load_cost. */
1413 2, /* scalar_store_cost. */
1414 6, /* vec_stmt_cost. */
1415 0, /* vec_to_scalar_cost. */
1416 2, /* scalar_to_vec_cost. */
1417 2, /* vec_align_load_cost. */
1418 2, /* vec_unalign_load_cost. */
1419 2, /* vec_store_cost. */
1420 2, /* cond_taken_branch_cost. */
1421 1, /* cond_not_taken_branch_cost. */
1424 static stringop_algs pentium4_memcpy
[2] = {
1425 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1426 DUMMY_STRINGOP_ALGS
};
1427 static stringop_algs pentium4_memset
[2] = {
1428 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1429 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1430 DUMMY_STRINGOP_ALGS
};
1433 struct processor_costs pentium4_cost
= {
1434 COSTS_N_INSNS (1), /* cost of an add instruction */
1435 COSTS_N_INSNS (3), /* cost of a lea instruction */
1436 COSTS_N_INSNS (4), /* variable shift costs */
1437 COSTS_N_INSNS (4), /* constant shift costs */
1438 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1439 COSTS_N_INSNS (15), /* HI */
1440 COSTS_N_INSNS (15), /* SI */
1441 COSTS_N_INSNS (15), /* DI */
1442 COSTS_N_INSNS (15)}, /* other */
1443 0, /* cost of multiply per each bit set */
1444 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1445 COSTS_N_INSNS (56), /* HI */
1446 COSTS_N_INSNS (56), /* SI */
1447 COSTS_N_INSNS (56), /* DI */
1448 COSTS_N_INSNS (56)}, /* other */
1449 COSTS_N_INSNS (1), /* cost of movsx */
1450 COSTS_N_INSNS (1), /* cost of movzx */
1451 16, /* "large" insn */
1453 2, /* cost for loading QImode using movzbl */
1454 {4, 5, 4}, /* cost of loading integer registers
1455 in QImode, HImode and SImode.
1456 Relative to reg-reg move (2). */
1457 {2, 3, 2}, /* cost of storing integer registers */
1458 2, /* cost of reg,reg fld/fst */
1459 {2, 2, 6}, /* cost of loading fp registers
1460 in SFmode, DFmode and XFmode */
1461 {4, 4, 6}, /* cost of storing fp registers
1462 in SFmode, DFmode and XFmode */
1463 2, /* cost of moving MMX register */
1464 {2, 2}, /* cost of loading MMX registers
1465 in SImode and DImode */
1466 {2, 2}, /* cost of storing MMX registers
1467 in SImode and DImode */
1468 12, /* cost of moving SSE register */
1469 {12, 12, 12}, /* cost of loading SSE registers
1470 in SImode, DImode and TImode */
1471 {2, 2, 8}, /* cost of storing SSE registers
1472 in SImode, DImode and TImode */
1473 10, /* MMX or SSE register to integer */
1474 8, /* size of l1 cache. */
1475 256, /* size of l2 cache. */
1476 64, /* size of prefetch block */
1477 6, /* number of parallel prefetches */
1478 2, /* Branch cost */
1479 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1480 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1481 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1482 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1483 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1484 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1487 1, /* scalar_stmt_cost. */
1488 1, /* scalar load_cost. */
1489 1, /* scalar_store_cost. */
1490 1, /* vec_stmt_cost. */
1491 1, /* vec_to_scalar_cost. */
1492 1, /* scalar_to_vec_cost. */
1493 1, /* vec_align_load_cost. */
1494 2, /* vec_unalign_load_cost. */
1495 1, /* vec_store_cost. */
1496 3, /* cond_taken_branch_cost. */
1497 1, /* cond_not_taken_branch_cost. */
1500 static stringop_algs nocona_memcpy
[2] = {
1501 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1502 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1503 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1505 static stringop_algs nocona_memset
[2] = {
1506 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1507 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1508 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1509 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1512 struct processor_costs nocona_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (1), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (10), /* HI */
1519 COSTS_N_INSNS (10), /* SI */
1520 COSTS_N_INSNS (10), /* DI */
1521 COSTS_N_INSNS (10)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (66), /* HI */
1525 COSTS_N_INSNS (66), /* SI */
1526 COSTS_N_INSNS (66), /* DI */
1527 COSTS_N_INSNS (66)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 16, /* "large" insn */
1531 17, /* MOVE_RATIO */
1532 4, /* cost for loading QImode using movzbl */
1533 {4, 4, 4}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {4, 4, 4}, /* cost of storing integer registers */
1537 3, /* cost of reg,reg fld/fst */
1538 {12, 12, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {4, 4, 4}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 6, /* cost of moving MMX register */
1543 {12, 12}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {12, 12}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 6, /* cost of moving SSE register */
1548 {12, 12, 12}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {12, 12, 12}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 8, /* MMX or SSE register to integer */
1553 8, /* size of l1 cache. */
1554 1024, /* size of l2 cache. */
1555 128, /* size of prefetch block */
1556 8, /* number of parallel prefetches */
1557 1, /* Branch cost */
1558 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1559 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1560 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1561 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1562 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1563 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1566 1, /* scalar_stmt_cost. */
1567 1, /* scalar load_cost. */
1568 1, /* scalar_store_cost. */
1569 1, /* vec_stmt_cost. */
1570 1, /* vec_to_scalar_cost. */
1571 1, /* scalar_to_vec_cost. */
1572 1, /* vec_align_load_cost. */
1573 2, /* vec_unalign_load_cost. */
1574 1, /* vec_store_cost. */
1575 3, /* cond_taken_branch_cost. */
1576 1, /* cond_not_taken_branch_cost. */
1579 static stringop_algs atom_memcpy
[2] = {
1580 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1581 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1582 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1583 static stringop_algs atom_memset
[2] = {
1584 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1585 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1586 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1587 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1589 struct processor_costs atom_cost
= {
1590 COSTS_N_INSNS (1), /* cost of an add instruction */
1591 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1592 COSTS_N_INSNS (1), /* variable shift costs */
1593 COSTS_N_INSNS (1), /* constant shift costs */
1594 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1595 COSTS_N_INSNS (4), /* HI */
1596 COSTS_N_INSNS (3), /* SI */
1597 COSTS_N_INSNS (4), /* DI */
1598 COSTS_N_INSNS (2)}, /* other */
1599 0, /* cost of multiply per each bit set */
1600 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1601 COSTS_N_INSNS (26), /* HI */
1602 COSTS_N_INSNS (42), /* SI */
1603 COSTS_N_INSNS (74), /* DI */
1604 COSTS_N_INSNS (74)}, /* other */
1605 COSTS_N_INSNS (1), /* cost of movsx */
1606 COSTS_N_INSNS (1), /* cost of movzx */
1607 8, /* "large" insn */
1608 17, /* MOVE_RATIO */
1609 4, /* cost for loading QImode using movzbl */
1610 {4, 4, 4}, /* cost of loading integer registers
1611 in QImode, HImode and SImode.
1612 Relative to reg-reg move (2). */
1613 {4, 4, 4}, /* cost of storing integer registers */
1614 4, /* cost of reg,reg fld/fst */
1615 {12, 12, 12}, /* cost of loading fp registers
1616 in SFmode, DFmode and XFmode */
1617 {6, 6, 8}, /* cost of storing fp registers
1618 in SFmode, DFmode and XFmode */
1619 2, /* cost of moving MMX register */
1620 {8, 8}, /* cost of loading MMX registers
1621 in SImode and DImode */
1622 {8, 8}, /* cost of storing MMX registers
1623 in SImode and DImode */
1624 2, /* cost of moving SSE register */
1625 {8, 8, 8}, /* cost of loading SSE registers
1626 in SImode, DImode and TImode */
1627 {8, 8, 8}, /* cost of storing SSE registers
1628 in SImode, DImode and TImode */
1629 5, /* MMX or SSE register to integer */
1630 32, /* size of l1 cache. */
1631 256, /* size of l2 cache. */
1632 64, /* size of prefetch block */
1633 6, /* number of parallel prefetches */
1634 3, /* Branch cost */
1635 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1636 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1637 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1638 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1639 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1640 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1643 1, /* scalar_stmt_cost. */
1644 1, /* scalar load_cost. */
1645 1, /* scalar_store_cost. */
1646 1, /* vec_stmt_cost. */
1647 1, /* vec_to_scalar_cost. */
1648 1, /* scalar_to_vec_cost. */
1649 1, /* vec_align_load_cost. */
1650 2, /* vec_unalign_load_cost. */
1651 1, /* vec_store_cost. */
1652 3, /* cond_taken_branch_cost. */
1653 1, /* cond_not_taken_branch_cost. */
1656 static stringop_algs slm_memcpy
[2] = {
1657 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1658 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1659 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1660 static stringop_algs slm_memset
[2] = {
1661 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1662 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1663 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1664 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1666 struct processor_costs slm_cost
= {
1667 COSTS_N_INSNS (1), /* cost of an add instruction */
1668 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1669 COSTS_N_INSNS (1), /* variable shift costs */
1670 COSTS_N_INSNS (1), /* constant shift costs */
1671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1672 COSTS_N_INSNS (4), /* HI */
1673 COSTS_N_INSNS (3), /* SI */
1674 COSTS_N_INSNS (4), /* DI */
1675 COSTS_N_INSNS (2)}, /* other */
1676 0, /* cost of multiply per each bit set */
1677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1678 COSTS_N_INSNS (26), /* HI */
1679 COSTS_N_INSNS (42), /* SI */
1680 COSTS_N_INSNS (74), /* DI */
1681 COSTS_N_INSNS (74)}, /* other */
1682 COSTS_N_INSNS (1), /* cost of movsx */
1683 COSTS_N_INSNS (1), /* cost of movzx */
1684 8, /* "large" insn */
1685 17, /* MOVE_RATIO */
1686 4, /* cost for loading QImode using movzbl */
1687 {4, 4, 4}, /* cost of loading integer registers
1688 in QImode, HImode and SImode.
1689 Relative to reg-reg move (2). */
1690 {4, 4, 4}, /* cost of storing integer registers */
1691 4, /* cost of reg,reg fld/fst */
1692 {12, 12, 12}, /* cost of loading fp registers
1693 in SFmode, DFmode and XFmode */
1694 {6, 6, 8}, /* cost of storing fp registers
1695 in SFmode, DFmode and XFmode */
1696 2, /* cost of moving MMX register */
1697 {8, 8}, /* cost of loading MMX registers
1698 in SImode and DImode */
1699 {8, 8}, /* cost of storing MMX registers
1700 in SImode and DImode */
1701 2, /* cost of moving SSE register */
1702 {8, 8, 8}, /* cost of loading SSE registers
1703 in SImode, DImode and TImode */
1704 {8, 8, 8}, /* cost of storing SSE registers
1705 in SImode, DImode and TImode */
1706 5, /* MMX or SSE register to integer */
1707 32, /* size of l1 cache. */
1708 256, /* size of l2 cache. */
1709 64, /* size of prefetch block */
1710 6, /* number of parallel prefetches */
1711 3, /* Branch cost */
1712 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1713 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1714 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1715 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1716 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1717 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1720 1, /* scalar_stmt_cost. */
1721 1, /* scalar load_cost. */
1722 1, /* scalar_store_cost. */
1723 1, /* vec_stmt_cost. */
1724 1, /* vec_to_scalar_cost. */
1725 1, /* scalar_to_vec_cost. */
1726 1, /* vec_align_load_cost. */
1727 2, /* vec_unalign_load_cost. */
1728 1, /* vec_store_cost. */
1729 3, /* cond_taken_branch_cost. */
1730 1, /* cond_not_taken_branch_cost. */
1733 /* Generic should produce code tuned for Core-i7 (and newer chips)
1734 and btver1 (and newer chips). */
1736 static stringop_algs generic_memcpy
[2] = {
1737 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1738 {-1, libcall
, false}}},
1739 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1740 {-1, libcall
, false}}}};
1741 static stringop_algs generic_memset
[2] = {
1742 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1743 {-1, libcall
, false}}},
1744 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1745 {-1, libcall
, false}}}};
1747 struct processor_costs generic_cost
= {
1748 COSTS_N_INSNS (1), /* cost of an add instruction */
1749 /* On all chips taken into consideration lea is 2 cycles and more. With
1750 this cost however our current implementation of synth_mult results in
1751 use of unnecessary temporary registers causing regression on several
1752 SPECfp benchmarks. */
1753 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1754 COSTS_N_INSNS (1), /* variable shift costs */
1755 COSTS_N_INSNS (1), /* constant shift costs */
1756 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1757 COSTS_N_INSNS (4), /* HI */
1758 COSTS_N_INSNS (3), /* SI */
1759 COSTS_N_INSNS (4), /* DI */
1760 COSTS_N_INSNS (2)}, /* other */
1761 0, /* cost of multiply per each bit set */
1762 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1763 COSTS_N_INSNS (26), /* HI */
1764 COSTS_N_INSNS (42), /* SI */
1765 COSTS_N_INSNS (74), /* DI */
1766 COSTS_N_INSNS (74)}, /* other */
1767 COSTS_N_INSNS (1), /* cost of movsx */
1768 COSTS_N_INSNS (1), /* cost of movzx */
1769 8, /* "large" insn */
1770 17, /* MOVE_RATIO */
1771 4, /* cost for loading QImode using movzbl */
1772 {4, 4, 4}, /* cost of loading integer registers
1773 in QImode, HImode and SImode.
1774 Relative to reg-reg move (2). */
1775 {4, 4, 4}, /* cost of storing integer registers */
1776 4, /* cost of reg,reg fld/fst */
1777 {12, 12, 12}, /* cost of loading fp registers
1778 in SFmode, DFmode and XFmode */
1779 {6, 6, 8}, /* cost of storing fp registers
1780 in SFmode, DFmode and XFmode */
1781 2, /* cost of moving MMX register */
1782 {8, 8}, /* cost of loading MMX registers
1783 in SImode and DImode */
1784 {8, 8}, /* cost of storing MMX registers
1785 in SImode and DImode */
1786 2, /* cost of moving SSE register */
1787 {8, 8, 8}, /* cost of loading SSE registers
1788 in SImode, DImode and TImode */
1789 {8, 8, 8}, /* cost of storing SSE registers
1790 in SImode, DImode and TImode */
1791 5, /* MMX or SSE register to integer */
1792 32, /* size of l1 cache. */
1793 512, /* size of l2 cache. */
1794 64, /* size of prefetch block */
1795 6, /* number of parallel prefetches */
1796 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1797 value is increased to perhaps more appropriate value of 5. */
1798 3, /* Branch cost */
1799 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1800 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1801 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1802 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1803 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1804 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1807 1, /* scalar_stmt_cost. */
1808 1, /* scalar load_cost. */
1809 1, /* scalar_store_cost. */
1810 1, /* vec_stmt_cost. */
1811 1, /* vec_to_scalar_cost. */
1812 1, /* scalar_to_vec_cost. */
1813 1, /* vec_align_load_cost. */
1814 2, /* vec_unalign_load_cost. */
1815 1, /* vec_store_cost. */
1816 3, /* cond_taken_branch_cost. */
1817 1, /* cond_not_taken_branch_cost. */
1820 /* core_cost should produce code tuned for Core familly of CPUs. */
1821 static stringop_algs core_memcpy
[2] = {
1822 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1823 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1824 {-1, libcall
, false}}}};
1825 static stringop_algs core_memset
[2] = {
1826 {libcall
, {{6, loop_1_byte
, true},
1828 {8192, rep_prefix_4_byte
, true},
1829 {-1, libcall
, false}}},
1830 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1831 {-1, libcall
, false}}}};
1834 struct processor_costs core_cost
= {
1835 COSTS_N_INSNS (1), /* cost of an add instruction */
1836 /* On all chips taken into consideration lea is 2 cycles and more. With
1837 this cost however our current implementation of synth_mult results in
1838 use of unnecessary temporary registers causing regression on several
1839 SPECfp benchmarks. */
1840 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1841 COSTS_N_INSNS (1), /* variable shift costs */
1842 COSTS_N_INSNS (1), /* constant shift costs */
1843 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1844 COSTS_N_INSNS (4), /* HI */
1845 COSTS_N_INSNS (3), /* SI */
1846 COSTS_N_INSNS (4), /* DI */
1847 COSTS_N_INSNS (2)}, /* other */
1848 0, /* cost of multiply per each bit set */
1849 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1850 COSTS_N_INSNS (26), /* HI */
1851 COSTS_N_INSNS (42), /* SI */
1852 COSTS_N_INSNS (74), /* DI */
1853 COSTS_N_INSNS (74)}, /* other */
1854 COSTS_N_INSNS (1), /* cost of movsx */
1855 COSTS_N_INSNS (1), /* cost of movzx */
1856 8, /* "large" insn */
1857 17, /* MOVE_RATIO */
1858 4, /* cost for loading QImode using movzbl */
1859 {4, 4, 4}, /* cost of loading integer registers
1860 in QImode, HImode and SImode.
1861 Relative to reg-reg move (2). */
1862 {4, 4, 4}, /* cost of storing integer registers */
1863 4, /* cost of reg,reg fld/fst */
1864 {12, 12, 12}, /* cost of loading fp registers
1865 in SFmode, DFmode and XFmode */
1866 {6, 6, 8}, /* cost of storing fp registers
1867 in SFmode, DFmode and XFmode */
1868 2, /* cost of moving MMX register */
1869 {8, 8}, /* cost of loading MMX registers
1870 in SImode and DImode */
1871 {8, 8}, /* cost of storing MMX registers
1872 in SImode and DImode */
1873 2, /* cost of moving SSE register */
1874 {8, 8, 8}, /* cost of loading SSE registers
1875 in SImode, DImode and TImode */
1876 {8, 8, 8}, /* cost of storing SSE registers
1877 in SImode, DImode and TImode */
1878 5, /* MMX or SSE register to integer */
1879 64, /* size of l1 cache. */
1880 512, /* size of l2 cache. */
1881 64, /* size of prefetch block */
1882 6, /* number of parallel prefetches */
1883 /* FIXME perhaps more appropriate value is 5. */
1884 3, /* Branch cost */
1885 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1886 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1887 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1888 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1889 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1890 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1893 1, /* scalar_stmt_cost. */
1894 1, /* scalar load_cost. */
1895 1, /* scalar_store_cost. */
1896 1, /* vec_stmt_cost. */
1897 1, /* vec_to_scalar_cost. */
1898 1, /* scalar_to_vec_cost. */
1899 1, /* vec_align_load_cost. */
1900 2, /* vec_unalign_load_cost. */
1901 1, /* vec_store_cost. */
1902 3, /* cond_taken_branch_cost. */
1903 1, /* cond_not_taken_branch_cost. */
1907 /* Set by -mtune. */
1908 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1910 /* Set by -mtune or -Os. */
1911 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1913 /* Processor feature/optimization bitmasks. */
1914 #define m_386 (1<<PROCESSOR_I386)
1915 #define m_486 (1<<PROCESSOR_I486)
1916 #define m_PENT (1<<PROCESSOR_PENTIUM)
1917 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1918 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1919 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1920 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1921 #define m_CORE2 (1<<PROCESSOR_CORE2)
1922 #define m_COREI7 (1<<PROCESSOR_COREI7)
1923 #define m_COREI7_AVX (1<<PROCESSOR_COREI7_AVX)
1924 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1925 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_COREI7_AVX | m_HASWELL)
1926 #define m_ATOM (1<<PROCESSOR_ATOM)
1927 #define m_SLM (1<<PROCESSOR_SLM)
1929 #define m_GEODE (1<<PROCESSOR_GEODE)
1930 #define m_K6 (1<<PROCESSOR_K6)
1931 #define m_K6_GEODE (m_K6 | m_GEODE)
1932 #define m_K8 (1<<PROCESSOR_K8)
1933 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1934 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1935 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1936 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1937 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1938 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1939 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
1940 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1941 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1942 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
1943 #define m_BTVER (m_BTVER1 | m_BTVER2)
1944 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1946 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1948 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1950 #define DEF_TUNE(tune, name, selector) name,
1951 #include "x86-tune.def"
1955 /* Feature tests against the various tunings. */
1956 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1958 /* Feature tests against the various tunings used to create ix86_tune_features
1959 based on the processor mask. */
1960 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1962 #define DEF_TUNE(tune, name, selector) selector,
1963 #include "x86-tune.def"
1967 /* Feature tests against the various architecture variations. */
1968 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1970 /* Feature tests against the various architecture variations, used to create
1971 ix86_arch_features based on the processor mask. */
1972 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1973 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1974 ~(m_386
| m_486
| m_PENT
| m_K6
),
1976 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1979 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1982 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1985 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1989 /* In case the average insn count for single function invocation is
1990 lower than this constant, emit fast (but longer) prologue and
1992 #define FAST_PROLOGUE_INSN_COUNT 20
1994 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1995 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1996 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1997 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1999 /* Array of the smallest class containing reg number REGNO, indexed by
2000 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2002 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2004 /* ax, dx, cx, bx */
2005 AREG
, DREG
, CREG
, BREG
,
2006 /* si, di, bp, sp */
2007 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2009 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2010 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2013 /* flags, fpsr, fpcr, frame */
2014 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2016 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2019 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2022 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2023 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2024 /* SSE REX registers */
2025 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2027 /* AVX-512 SSE registers */
2028 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2029 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2030 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2031 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2032 /* Mask registers. */
2033 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2034 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2035 /* MPX bound registers */
2036 BND_REGS
, BND_REGS
, BND_REGS
, BND_REGS
,
2039 /* The "default" register map used in 32bit mode. */
2041 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2043 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2044 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2045 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2046 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2047 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2048 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2049 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2050 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2051 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2052 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2053 101, 102, 103, 104, /* bound registers */
2056 /* The "default" register map used in 64bit mode. */
2058 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2060 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2061 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2062 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2063 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2064 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2065 8,9,10,11,12,13,14,15, /* extended integer registers */
2066 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2067 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2068 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2069 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2070 126, 127, 128, 129, /* bound registers */
2073 /* Define the register numbers to be used in Dwarf debugging information.
2074 The SVR4 reference port C compiler uses the following register numbers
2075 in its Dwarf output code:
2076 0 for %eax (gcc regno = 0)
2077 1 for %ecx (gcc regno = 2)
2078 2 for %edx (gcc regno = 1)
2079 3 for %ebx (gcc regno = 3)
2080 4 for %esp (gcc regno = 7)
2081 5 for %ebp (gcc regno = 6)
2082 6 for %esi (gcc regno = 4)
2083 7 for %edi (gcc regno = 5)
2084 The following three DWARF register numbers are never generated by
2085 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2086 believes these numbers have these meanings.
2087 8 for %eip (no gcc equivalent)
2088 9 for %eflags (gcc regno = 17)
2089 10 for %trapno (no gcc equivalent)
2090 It is not at all clear how we should number the FP stack registers
2091 for the x86 architecture. If the version of SDB on x86/svr4 were
2092 a bit less brain dead with respect to floating-point then we would
2093 have a precedent to follow with respect to DWARF register numbers
2094 for x86 FP registers, but the SDB on x86/svr4 is so completely
2095 broken with respect to FP registers that it is hardly worth thinking
2096 of it as something to strive for compatibility with.
2097 The version of x86/svr4 SDB I have at the moment does (partially)
2098 seem to believe that DWARF register number 11 is associated with
2099 the x86 register %st(0), but that's about all. Higher DWARF
2100 register numbers don't seem to be associated with anything in
2101 particular, and even for DWARF regno 11, SDB only seems to under-
2102 stand that it should say that a variable lives in %st(0) (when
2103 asked via an `=' command) if we said it was in DWARF regno 11,
2104 but SDB still prints garbage when asked for the value of the
2105 variable in question (via a `/' command).
2106 (Also note that the labels SDB prints for various FP stack regs
2107 when doing an `x' command are all wrong.)
2108 Note that these problems generally don't affect the native SVR4
2109 C compiler because it doesn't allow the use of -O with -g and
2110 because when it is *not* optimizing, it allocates a memory
2111 location for each floating-point variable, and the memory
2112 location is what gets described in the DWARF AT_location
2113 attribute for the variable in question.
2114 Regardless of the severe mental illness of the x86/svr4 SDB, we
2115 do something sensible here and we use the following DWARF
2116 register numbers. Note that these are all stack-top-relative
2118 11 for %st(0) (gcc regno = 8)
2119 12 for %st(1) (gcc regno = 9)
2120 13 for %st(2) (gcc regno = 10)
2121 14 for %st(3) (gcc regno = 11)
2122 15 for %st(4) (gcc regno = 12)
2123 16 for %st(5) (gcc regno = 13)
2124 17 for %st(6) (gcc regno = 14)
2125 18 for %st(7) (gcc regno = 15)
2127 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2129 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2130 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2131 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2132 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2133 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2134 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2135 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2136 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2137 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2138 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2139 -1, -1, -1, -1, /* bound registers */
2142 /* Define parameter passing and return registers. */
2144 static int const x86_64_int_parameter_registers
[6] =
2146 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2149 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2151 CX_REG
, DX_REG
, R8_REG
, R9_REG
2154 static int const x86_64_int_return_registers
[4] =
2156 AX_REG
, DX_REG
, DI_REG
, SI_REG
2159 /* Additional registers that are clobbered by SYSV calls. */
2161 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2165 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2166 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2169 /* Define the structure for the machine field in struct function. */
2171 struct GTY(()) stack_local_entry
{
2172 unsigned short mode
;
2175 struct stack_local_entry
*next
;
2178 /* Structure describing stack frame layout.
2179 Stack grows downward:
2185 saved static chain if ix86_static_chain_on_stack
2187 saved frame pointer if frame_pointer_needed
2188 <- HARD_FRAME_POINTER
2194 <- sse_regs_save_offset
2197 [va_arg registers] |
2201 [padding2] | = to_allocate
2210 int outgoing_arguments_size
;
2212 /* The offsets relative to ARG_POINTER. */
2213 HOST_WIDE_INT frame_pointer_offset
;
2214 HOST_WIDE_INT hard_frame_pointer_offset
;
2215 HOST_WIDE_INT stack_pointer_offset
;
2216 HOST_WIDE_INT hfp_save_offset
;
2217 HOST_WIDE_INT reg_save_offset
;
2218 HOST_WIDE_INT sse_reg_save_offset
;
2220 /* When save_regs_using_mov is set, emit prologue using
2221 move instead of push instructions. */
2222 bool save_regs_using_mov
;
2225 /* Which cpu are we scheduling for. */
2226 enum attr_cpu ix86_schedule
;
2228 /* Which cpu are we optimizing for. */
2229 enum processor_type ix86_tune
;
2231 /* Which instruction set architecture to use. */
2232 enum processor_type ix86_arch
;
2234 /* True if processor has SSE prefetch instruction. */
2235 unsigned char x86_prefetch_sse
;
2237 /* -mstackrealign option */
2238 static const char ix86_force_align_arg_pointer_string
[]
2239 = "force_align_arg_pointer";
2241 static rtx (*ix86_gen_leave
) (void);
2242 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2243 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2244 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2245 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2246 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2247 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2248 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2249 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2250 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2251 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2252 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2254 /* Preferred alignment for stack boundary in bits. */
2255 unsigned int ix86_preferred_stack_boundary
;
2257 /* Alignment for incoming stack boundary in bits specified at
2259 static unsigned int ix86_user_incoming_stack_boundary
;
2261 /* Default alignment for incoming stack boundary in bits. */
2262 static unsigned int ix86_default_incoming_stack_boundary
;
2264 /* Alignment for incoming stack boundary in bits. */
2265 unsigned int ix86_incoming_stack_boundary
;
2267 /* Calling abi specific va_list type nodes. */
2268 static GTY(()) tree sysv_va_list_type_node
;
2269 static GTY(()) tree ms_va_list_type_node
;
2271 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2272 char internal_label_prefix
[16];
2273 int internal_label_prefix_len
;
2275 /* Fence to use after loop using movnt. */
2278 /* Register class used for passing given 64bit part of the argument.
2279 These represent classes as documented by the PS ABI, with the exception
2280 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2281 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2283 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2284 whenever possible (upper half does contain padding). */
2285 enum x86_64_reg_class
2288 X86_64_INTEGER_CLASS
,
2289 X86_64_INTEGERSI_CLASS
,
2296 X86_64_COMPLEX_X87_CLASS
,
2300 #define MAX_CLASSES 4
2302 /* Table of constants used by fldpi, fldln2, etc.... */
2303 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2304 static bool ext_80387_constants_init
= 0;
2307 static struct machine_function
* ix86_init_machine_status (void);
2308 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2309 static bool ix86_function_value_regno_p (const unsigned int);
2310 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2312 static rtx
ix86_static_chain (const_tree
, bool);
2313 static int ix86_function_regparm (const_tree
, const_tree
);
2314 static void ix86_compute_frame_layout (struct ix86_frame
*);
2315 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2317 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2318 static tree
ix86_canonical_va_list_type (tree
);
2319 static void predict_jump (int);
2320 static unsigned int split_stack_prologue_scratch_regno (void);
2321 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2323 enum ix86_function_specific_strings
2325 IX86_FUNCTION_SPECIFIC_ARCH
,
2326 IX86_FUNCTION_SPECIFIC_TUNE
,
2327 IX86_FUNCTION_SPECIFIC_MAX
2330 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2331 const char *, enum fpmath_unit
, bool);
2332 static void ix86_function_specific_save (struct cl_target_option
*,
2333 struct gcc_options
*opts
);
2334 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2335 struct cl_target_option
*);
2336 static void ix86_function_specific_print (FILE *, int,
2337 struct cl_target_option
*);
2338 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2339 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2340 struct gcc_options
*,
2341 struct gcc_options
*,
2342 struct gcc_options
*);
2343 static bool ix86_can_inline_p (tree
, tree
);
2344 static void ix86_set_current_function (tree
);
2345 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2347 static enum calling_abi
ix86_function_abi (const_tree
);
2350 #ifndef SUBTARGET32_DEFAULT_CPU
2351 #define SUBTARGET32_DEFAULT_CPU "i386"
2354 /* Whether -mtune= or -march= were specified */
2355 static int ix86_tune_defaulted
;
2356 static int ix86_arch_specified
;
2358 /* Vectorization library interface and handlers. */
2359 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2361 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2362 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2364 /* Processor target table, indexed by processor number */
2367 const struct processor_costs
*cost
; /* Processor costs */
2368 const int align_loop
; /* Default alignments. */
2369 const int align_loop_max_skip
;
2370 const int align_jump
;
2371 const int align_jump_max_skip
;
2372 const int align_func
;
2375 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2377 {&i386_cost
, 4, 3, 4, 3, 4},
2378 {&i486_cost
, 16, 15, 16, 15, 16},
2379 {&pentium_cost
, 16, 7, 16, 7, 16},
2380 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2381 {&geode_cost
, 0, 0, 0, 0, 0},
2382 {&k6_cost
, 32, 7, 32, 7, 32},
2383 {&athlon_cost
, 16, 7, 16, 7, 16},
2384 {&pentium4_cost
, 0, 0, 0, 0, 0},
2385 {&k8_cost
, 16, 7, 16, 7, 16},
2386 {&nocona_cost
, 0, 0, 0, 0, 0},
2388 {&core_cost
, 16, 10, 16, 10, 16},
2390 {&core_cost
, 16, 10, 16, 10, 16},
2392 {&core_cost
, 16, 10, 16, 10, 16},
2394 {&core_cost
, 16, 10, 16, 10, 16},
2395 {&generic_cost
, 16, 10, 16, 10, 16},
2396 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2397 {&bdver1_cost
, 16, 10, 16, 7, 11},
2398 {&bdver2_cost
, 16, 10, 16, 7, 11},
2399 {&bdver3_cost
, 16, 10, 16, 7, 11},
2400 {&bdver4_cost
, 16, 10, 16, 7, 11},
2401 {&btver1_cost
, 16, 10, 16, 7, 11},
2402 {&btver2_cost
, 16, 10, 16, 7, 11},
2403 {&atom_cost
, 16, 15, 16, 7, 16},
2404 {&slm_cost
, 16, 15, 16, 7, 16}
2407 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2444 gate_insert_vzeroupper (void)
2446 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2450 rest_of_handle_insert_vzeroupper (void)
2454 /* vzeroupper instructions are inserted immediately after reload to
2455 account for possible spills from 256bit registers. The pass
2456 reuses mode switching infrastructure by re-running mode insertion
2457 pass, so disable entities that have already been processed. */
2458 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2459 ix86_optimize_mode_switching
[i
] = 0;
2461 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2463 /* Call optimize_mode_switching. */
2464 g
->get_passes ()->execute_pass_mode_switching ();
2470 const pass_data pass_data_insert_vzeroupper
=
2472 RTL_PASS
, /* type */
2473 "vzeroupper", /* name */
2474 OPTGROUP_NONE
, /* optinfo_flags */
2475 true, /* has_gate */
2476 true, /* has_execute */
2477 TV_NONE
, /* tv_id */
2478 0, /* properties_required */
2479 0, /* properties_provided */
2480 0, /* properties_destroyed */
2481 0, /* todo_flags_start */
2482 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2485 class pass_insert_vzeroupper
: public rtl_opt_pass
2488 pass_insert_vzeroupper(gcc::context
*ctxt
)
2489 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2492 /* opt_pass methods: */
2493 bool gate () { return gate_insert_vzeroupper (); }
2494 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2496 }; // class pass_insert_vzeroupper
2501 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2503 return new pass_insert_vzeroupper (ctxt
);
2506 /* Return true if a red-zone is in use. */
2509 ix86_using_red_zone (void)
2511 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2514 /* Return a string that documents the current -m options. The caller is
2515 responsible for freeing the string. */
2518 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2519 const char *tune
, enum fpmath_unit fpmath
,
2522 struct ix86_target_opts
2524 const char *option
; /* option string */
2525 HOST_WIDE_INT mask
; /* isa mask options */
2528 /* This table is ordered so that options like -msse4.2 that imply
2529 preceding options while match those first. */
2530 static struct ix86_target_opts isa_opts
[] =
2532 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2533 { "-mfma", OPTION_MASK_ISA_FMA
},
2534 { "-mxop", OPTION_MASK_ISA_XOP
},
2535 { "-mlwp", OPTION_MASK_ISA_LWP
},
2536 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2537 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2538 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2539 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2540 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2541 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2542 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2543 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2544 { "-msse3", OPTION_MASK_ISA_SSE3
},
2545 { "-msse2", OPTION_MASK_ISA_SSE2
},
2546 { "-msse", OPTION_MASK_ISA_SSE
},
2547 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2548 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2549 { "-mmmx", OPTION_MASK_ISA_MMX
},
2550 { "-mabm", OPTION_MASK_ISA_ABM
},
2551 { "-mbmi", OPTION_MASK_ISA_BMI
},
2552 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2553 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2554 { "-mhle", OPTION_MASK_ISA_HLE
},
2555 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2556 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2557 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2558 { "-madx", OPTION_MASK_ISA_ADX
},
2559 { "-mtbm", OPTION_MASK_ISA_TBM
},
2560 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2561 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2562 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2563 { "-maes", OPTION_MASK_ISA_AES
},
2564 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2565 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2566 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2567 { "-mf16c", OPTION_MASK_ISA_F16C
},
2568 { "-mrtm", OPTION_MASK_ISA_RTM
},
2569 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2570 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2571 { "-mmpx", OPTION_MASK_ISA_MPX
},
2575 static struct ix86_target_opts flag_opts
[] =
2577 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2578 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2579 { "-m80387", MASK_80387
},
2580 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2581 { "-malign-double", MASK_ALIGN_DOUBLE
},
2582 { "-mcld", MASK_CLD
},
2583 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2584 { "-mieee-fp", MASK_IEEE_FP
},
2585 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2586 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2587 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2588 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2589 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2590 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2591 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2592 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2593 { "-mrecip", MASK_RECIP
},
2594 { "-mrtd", MASK_RTD
},
2595 { "-msseregparm", MASK_SSEREGPARM
},
2596 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2597 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2598 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2599 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2600 { "-mvzeroupper", MASK_VZEROUPPER
},
2601 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2602 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2603 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2606 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2609 char target_other
[40];
2619 memset (opts
, '\0', sizeof (opts
));
2621 /* Add -march= option. */
2624 opts
[num
][0] = "-march=";
2625 opts
[num
++][1] = arch
;
2628 /* Add -mtune= option. */
2631 opts
[num
][0] = "-mtune=";
2632 opts
[num
++][1] = tune
;
2635 /* Add -m32/-m64/-mx32. */
2636 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2638 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2642 isa
&= ~ (OPTION_MASK_ISA_64BIT
2643 | OPTION_MASK_ABI_64
2644 | OPTION_MASK_ABI_X32
);
2648 opts
[num
++][0] = abi
;
2650 /* Pick out the options in isa options. */
2651 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2653 if ((isa
& isa_opts
[i
].mask
) != 0)
2655 opts
[num
++][0] = isa_opts
[i
].option
;
2656 isa
&= ~ isa_opts
[i
].mask
;
2660 if (isa
&& add_nl_p
)
2662 opts
[num
++][0] = isa_other
;
2663 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2667 /* Add flag options. */
2668 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2670 if ((flags
& flag_opts
[i
].mask
) != 0)
2672 opts
[num
++][0] = flag_opts
[i
].option
;
2673 flags
&= ~ flag_opts
[i
].mask
;
2677 if (flags
&& add_nl_p
)
2679 opts
[num
++][0] = target_other
;
2680 sprintf (target_other
, "(other flags: %#x)", flags
);
2683 /* Add -fpmath= option. */
2686 opts
[num
][0] = "-mfpmath=";
2687 switch ((int) fpmath
)
2690 opts
[num
++][1] = "387";
2694 opts
[num
++][1] = "sse";
2697 case FPMATH_387
| FPMATH_SSE
:
2698 opts
[num
++][1] = "sse+387";
2710 gcc_assert (num
< ARRAY_SIZE (opts
));
2712 /* Size the string. */
2714 sep_len
= (add_nl_p
) ? 3 : 1;
2715 for (i
= 0; i
< num
; i
++)
2718 for (j
= 0; j
< 2; j
++)
2720 len
+= strlen (opts
[i
][j
]);
2723 /* Build the string. */
2724 ret
= ptr
= (char *) xmalloc (len
);
2727 for (i
= 0; i
< num
; i
++)
2731 for (j
= 0; j
< 2; j
++)
2732 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2739 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2747 for (j
= 0; j
< 2; j
++)
2750 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2752 line_len
+= len2
[j
];
2757 gcc_assert (ret
+ len
>= ptr
);
2762 /* Return true, if profiling code should be emitted before
2763 prologue. Otherwise it returns false.
2764 Note: For x86 with "hotfix" it is sorried. */
2766 ix86_profile_before_prologue (void)
2768 return flag_fentry
!= 0;
2771 /* Function that is callable from the debugger to print the current
2773 void ATTRIBUTE_UNUSED
2774 ix86_debug_options (void)
2776 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2777 ix86_arch_string
, ix86_tune_string
,
2782 fprintf (stderr
, "%s\n\n", opts
);
2786 fputs ("<no options>\n\n", stderr
);
2791 static const char *stringop_alg_names
[] = {
2793 #define DEF_ALG(alg, name) #name,
2794 #include "stringop.def"
2799 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2800 The string is of the following form (or comma separated list of it):
2802 strategy_alg:max_size:[align|noalign]
2804 where the full size range for the strategy is either [0, max_size] or
2805 [min_size, max_size], in which min_size is the max_size + 1 of the
2806 preceding range. The last size range must have max_size == -1.
2811 -mmemcpy-strategy=libcall:-1:noalign
2813 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2817 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2819 This is to tell the compiler to use the following strategy for memset
2820 1) when the expected size is between [1, 16], use rep_8byte strategy;
2821 2) when the size is between [17, 2048], use vector_loop;
2822 3) when the size is > 2048, use libcall. */
2824 struct stringop_size_range
2832 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2834 const struct stringop_algs
*default_algs
;
2835 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2836 char *curr_range_str
, *next_range_str
;
2840 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2842 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2844 curr_range_str
= strategy_str
;
2852 next_range_str
= strchr (curr_range_str
, ',');
2854 *next_range_str
++ = '\0';
2856 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2857 alg_name
, &maxs
, align
))
2859 error ("wrong arg %s to option %s", curr_range_str
,
2860 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2864 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2866 error ("size ranges of option %s should be increasing",
2867 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2871 for (i
= 0; i
< last_alg
; i
++)
2873 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2875 alg
= (stringop_alg
) i
;
2882 error ("wrong stringop strategy name %s specified for option %s",
2884 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2888 input_ranges
[n
].max
= maxs
;
2889 input_ranges
[n
].alg
= alg
;
2890 if (!strcmp (align
, "align"))
2891 input_ranges
[n
].noalign
= false;
2892 else if (!strcmp (align
, "noalign"))
2893 input_ranges
[n
].noalign
= true;
2896 error ("unknown alignment %s specified for option %s",
2897 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2901 curr_range_str
= next_range_str
;
2903 while (curr_range_str
);
2905 if (input_ranges
[n
- 1].max
!= -1)
2907 error ("the max value for the last size range should be -1"
2909 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2913 if (n
> MAX_STRINGOP_ALGS
)
2915 error ("too many size ranges specified in option %s",
2916 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2920 /* Now override the default algs array. */
2921 for (i
= 0; i
< n
; i
++)
2923 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2924 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2925 = input_ranges
[i
].alg
;
2926 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2927 = input_ranges
[i
].noalign
;
2932 /* parse -mtune-ctrl= option. When DUMP is true,
2933 print the features that are explicitly set. */
2936 parse_mtune_ctrl_str (bool dump
)
2938 if (!ix86_tune_ctrl_string
)
2941 char *next_feature_string
= NULL
;
2942 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2943 char *orig
= curr_feature_string
;
2949 next_feature_string
= strchr (curr_feature_string
, ',');
2950 if (next_feature_string
)
2951 *next_feature_string
++ = '\0';
2952 if (*curr_feature_string
== '^')
2954 curr_feature_string
++;
2957 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2959 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2961 ix86_tune_features
[i
] = !clear
;
2963 fprintf (stderr
, "Explicitly %s feature %s\n",
2964 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2968 if (i
== X86_TUNE_LAST
)
2969 error ("Unknown parameter to option -mtune-ctrl: %s",
2970 clear
? curr_feature_string
- 1 : curr_feature_string
);
2971 curr_feature_string
= next_feature_string
;
2973 while (curr_feature_string
);
2977 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2981 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2983 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2986 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2988 if (ix86_tune_no_default
)
2989 ix86_tune_features
[i
] = 0;
2991 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
2996 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
2997 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2998 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
2999 ix86_tune_features
[i
] ? "on" : "off");
3002 parse_mtune_ctrl_str (dump
);
3006 /* Override various settings based on options. If MAIN_ARGS_P, the
3007 options are from the command line, otherwise they are from
3011 ix86_option_override_internal (bool main_args_p
,
3012 struct gcc_options
*opts
,
3013 struct gcc_options
*opts_set
)
3016 unsigned int ix86_arch_mask
;
3017 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
3022 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3023 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3024 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3025 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3026 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3027 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3028 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3029 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3030 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3031 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3032 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3033 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3034 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3035 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3036 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3037 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3038 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3039 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3040 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3041 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3042 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3043 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3044 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3045 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3046 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3047 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3048 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3049 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3050 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3051 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3052 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3053 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3054 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3055 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3056 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3057 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3058 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3059 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3060 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3061 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3062 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3063 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3064 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3065 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3066 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3068 /* if this reaches 64, need to widen struct pta flags below */
3072 const char *const name
; /* processor name or nickname. */
3073 const enum processor_type processor
;
3074 const enum attr_cpu schedule
;
3075 const unsigned HOST_WIDE_INT flags
;
3077 const processor_alias_table
[] =
3079 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3080 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3081 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3082 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3083 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3084 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3085 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3086 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3087 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3088 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3089 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3090 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3091 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3092 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3093 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3094 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3095 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3096 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3097 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3098 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3099 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3100 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3101 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3102 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3103 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3104 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3105 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3106 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3107 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3108 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3109 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3110 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3111 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3112 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3113 {"corei7-avx", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3114 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3115 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3116 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3117 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3118 {"core-avx-i", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3119 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3120 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3121 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3122 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3123 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3124 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3125 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3126 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3127 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3128 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3130 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3131 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3132 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3133 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3134 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3135 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3137 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3138 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3139 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3140 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3141 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3142 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3143 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3144 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3145 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3146 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3147 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3148 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3149 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3150 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3151 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3152 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3153 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3154 {"k8", PROCESSOR_K8
, CPU_K8
,
3155 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3156 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3157 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3158 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3159 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3160 {"opteron", PROCESSOR_K8
, CPU_K8
,
3161 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3162 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3163 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3164 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3165 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3166 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3167 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3168 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3169 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3170 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3171 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3172 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3173 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3174 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3175 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3176 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3177 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3178 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3179 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3180 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3181 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3182 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3183 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3184 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3185 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3186 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3187 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3188 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3189 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3190 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3191 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3192 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3193 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3194 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3195 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3196 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3197 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3198 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3199 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
3200 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3201 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3202 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3203 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
3204 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
3205 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
},
3206 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3207 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3208 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3209 | PTA_FXSR
| PTA_XSAVE
},
3210 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3211 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3212 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3213 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3214 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3215 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3217 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3219 | PTA_HLE
/* flags are only used for -march switch. */ },
3222 /* -mrecip options. */
3225 const char *string
; /* option name */
3226 unsigned int mask
; /* mask bits to set */
3228 const recip_options
[] =
3230 { "all", RECIP_MASK_ALL
},
3231 { "none", RECIP_MASK_NONE
},
3232 { "div", RECIP_MASK_DIV
},
3233 { "sqrt", RECIP_MASK_SQRT
},
3234 { "vec-div", RECIP_MASK_VEC_DIV
},
3235 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3238 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3240 /* Set up prefix/suffix so the error messages refer to either the command
3241 line argument, or the attribute(target). */
3250 prefix
= "option(\"";
3255 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3256 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3257 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3258 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3259 #ifdef TARGET_BI_ARCH
3262 #if TARGET_BI_ARCH == 1
3263 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3264 is on and OPTION_MASK_ABI_X32 is off. We turn off
3265 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3267 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3268 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3270 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3271 on and OPTION_MASK_ABI_64 is off. We turn off
3272 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3274 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3275 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3280 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3282 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3283 OPTION_MASK_ABI_64 for TARGET_X32. */
3284 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3285 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3287 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3289 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3290 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3291 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3292 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3295 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3296 SUBTARGET_OVERRIDE_OPTIONS
;
3299 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3300 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3303 /* -fPIC is the default for x86_64. */
3304 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3305 opts
->x_flag_pic
= 2;
3307 /* Need to check -mtune=generic first. */
3308 if (opts
->x_ix86_tune_string
)
3310 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3311 || !strcmp (opts
->x_ix86_tune_string
, "i686")
3312 /* As special support for cross compilers we read -mtune=native
3313 as -mtune=generic. With native compilers we won't see the
3314 -mtune=native, as it was changed by the driver. */
3315 || !strcmp (opts
->x_ix86_tune_string
, "native"))
3317 opts
->x_ix86_tune_string
= "generic";
3319 /* If this call is for setting the option attribute, allow the
3320 generic that was previously set. */
3321 else if (!main_args_p
3322 && !strcmp (opts
->x_ix86_tune_string
, "generic"))
3324 else if (!strncmp (opts
->x_ix86_tune_string
, "generic", 7))
3325 error ("bad value (%s) for %stune=%s %s",
3326 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3327 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3328 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3329 "%stune=k8%s or %stune=generic%s instead as appropriate",
3330 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3334 if (opts
->x_ix86_arch_string
)
3335 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3336 if (!opts
->x_ix86_tune_string
)
3338 opts
->x_ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3339 ix86_tune_defaulted
= 1;
3342 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3343 or defaulted. We need to use a sensible tune option. */
3344 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3345 || !strcmp (opts
->x_ix86_tune_string
, "x86-64")
3346 || !strcmp (opts
->x_ix86_tune_string
, "i686"))
3348 opts
->x_ix86_tune_string
= "generic";
3352 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3353 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3355 /* rep; movq isn't available in 32-bit code. */
3356 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3357 opts
->x_ix86_stringop_alg
= no_stringop
;
3360 if (!opts
->x_ix86_arch_string
)
3361 opts
->x_ix86_arch_string
3362 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3363 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3365 ix86_arch_specified
= 1;
3367 if (opts_set
->x_ix86_pmode
)
3369 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3370 && opts
->x_ix86_pmode
== PMODE_SI
)
3371 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3372 && opts
->x_ix86_pmode
== PMODE_DI
))
3373 error ("address mode %qs not supported in the %s bit mode",
3374 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3375 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3378 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3379 ? PMODE_DI
: PMODE_SI
;
3381 if (!opts_set
->x_ix86_abi
)
3382 opts
->x_ix86_abi
= DEFAULT_ABI
;
3384 /* For targets using ms ABI enable ms-extensions, if not
3385 explicit turned off. For non-ms ABI we turn off this
3387 if (!opts_set
->x_flag_ms_extensions
)
3388 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3390 if (opts_set
->x_ix86_cmodel
)
3392 switch (opts
->x_ix86_cmodel
)
3396 if (opts
->x_flag_pic
)
3397 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3398 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3399 error ("code model %qs not supported in the %s bit mode",
3405 if (opts
->x_flag_pic
)
3406 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3407 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3408 error ("code model %qs not supported in the %s bit mode",
3410 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3411 error ("code model %qs not supported in x32 mode",
3417 if (opts
->x_flag_pic
)
3418 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3419 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3420 error ("code model %qs not supported in the %s bit mode",
3422 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3423 error ("code model %qs not supported in x32 mode",
3428 if (opts
->x_flag_pic
)
3429 error ("code model %s does not support PIC mode", "32");
3430 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3431 error ("code model %qs not supported in the %s bit mode",
3436 if (opts
->x_flag_pic
)
3438 error ("code model %s does not support PIC mode", "kernel");
3439 opts
->x_ix86_cmodel
= CM_32
;
3441 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3442 error ("code model %qs not supported in the %s bit mode",
3452 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3453 use of rip-relative addressing. This eliminates fixups that
3454 would otherwise be needed if this object is to be placed in a
3455 DLL, and is essentially just as efficient as direct addressing. */
3456 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3457 && (TARGET_RDOS
|| TARGET_PECOFF
))
3458 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3459 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3460 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3462 opts
->x_ix86_cmodel
= CM_32
;
3464 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3466 error ("-masm=intel not supported in this configuration");
3467 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3469 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3470 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3471 sorry ("%i-bit mode not compiled in",
3472 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3474 for (i
= 0; i
< pta_size
; i
++)
3475 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3477 ix86_schedule
= processor_alias_table
[i
].schedule
;
3478 ix86_arch
= processor_alias_table
[i
].processor
;
3479 /* Default cpu tuning to the architecture. */
3480 ix86_tune
= ix86_arch
;
3482 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3483 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3484 error ("CPU you selected does not support x86-64 "
3487 if (processor_alias_table
[i
].flags
& PTA_MMX
3488 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3489 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3490 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3491 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3492 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3493 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3494 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3495 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3496 if (processor_alias_table
[i
].flags
& PTA_SSE
3497 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3498 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3499 if (processor_alias_table
[i
].flags
& PTA_SSE2
3500 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3501 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3502 if (processor_alias_table
[i
].flags
& PTA_SSE3
3503 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3504 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3505 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3506 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3507 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3508 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3509 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3510 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3511 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3512 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3513 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3514 if (processor_alias_table
[i
].flags
& PTA_AVX
3515 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3516 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3517 if (processor_alias_table
[i
].flags
& PTA_AVX2
3518 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3519 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3520 if (processor_alias_table
[i
].flags
& PTA_FMA
3521 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3522 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3523 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3524 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3525 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3526 if (processor_alias_table
[i
].flags
& PTA_FMA4
3527 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3528 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3529 if (processor_alias_table
[i
].flags
& PTA_XOP
3530 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3531 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3532 if (processor_alias_table
[i
].flags
& PTA_LWP
3533 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3534 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3535 if (processor_alias_table
[i
].flags
& PTA_ABM
3536 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3537 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3538 if (processor_alias_table
[i
].flags
& PTA_BMI
3539 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3540 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3541 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3542 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3543 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3544 if (processor_alias_table
[i
].flags
& PTA_TBM
3545 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3546 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3547 if (processor_alias_table
[i
].flags
& PTA_BMI2
3548 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3549 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3550 if (processor_alias_table
[i
].flags
& PTA_CX16
3551 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3552 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3553 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3554 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3555 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3556 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3557 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3558 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3559 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3560 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3561 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3562 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3563 if (processor_alias_table
[i
].flags
& PTA_AES
3564 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3565 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3566 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3567 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3568 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3569 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3570 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3571 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3572 if (processor_alias_table
[i
].flags
& PTA_RDRND
3573 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3574 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3575 if (processor_alias_table
[i
].flags
& PTA_F16C
3576 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3577 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3578 if (processor_alias_table
[i
].flags
& PTA_RTM
3579 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3580 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3581 if (processor_alias_table
[i
].flags
& PTA_HLE
3582 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3583 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3584 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3585 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3586 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3587 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3588 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3589 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3590 if (processor_alias_table
[i
].flags
& PTA_ADX
3591 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3592 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3593 if (processor_alias_table
[i
].flags
& PTA_FXSR
3594 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3595 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3596 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3597 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3598 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3599 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3600 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3601 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3602 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3603 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3604 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3605 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3606 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3607 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3608 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3609 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3610 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3611 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3612 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3613 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3614 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3615 x86_prefetch_sse
= true;
3620 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3621 error ("generic CPU can be used only for %stune=%s %s",
3622 prefix
, suffix
, sw
);
3623 else if (!strncmp (opts
->x_ix86_arch_string
, "generic", 7) || i
== pta_size
)
3624 error ("bad value (%s) for %sarch=%s %s",
3625 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3627 ix86_arch_mask
= 1u << ix86_arch
;
3628 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3629 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3631 for (i
= 0; i
< pta_size
; i
++)
3632 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3634 ix86_schedule
= processor_alias_table
[i
].schedule
;
3635 ix86_tune
= processor_alias_table
[i
].processor
;
3636 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3638 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3640 if (ix86_tune_defaulted
)
3642 opts
->x_ix86_tune_string
= "x86-64";
3643 for (i
= 0; i
< pta_size
; i
++)
3644 if (! strcmp (opts
->x_ix86_tune_string
,
3645 processor_alias_table
[i
].name
))
3647 ix86_schedule
= processor_alias_table
[i
].schedule
;
3648 ix86_tune
= processor_alias_table
[i
].processor
;
3651 error ("CPU you selected does not support x86-64 "
3655 /* Intel CPUs have always interpreted SSE prefetch instructions as
3656 NOPs; so, we can enable SSE prefetch instructions even when
3657 -mtune (rather than -march) points us to a processor that has them.
3658 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3659 higher processors. */
3661 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3662 x86_prefetch_sse
= true;
3666 if (ix86_tune_specified
&& i
== pta_size
)
3667 error ("bad value (%s) for %stune=%s %s",
3668 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3670 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3672 #ifndef USE_IX86_FRAME_POINTER
3673 #define USE_IX86_FRAME_POINTER 0
3676 #ifndef USE_X86_64_FRAME_POINTER
3677 #define USE_X86_64_FRAME_POINTER 0
3680 /* Set the default values for switches whose default depends on TARGET_64BIT
3681 in case they weren't overwritten by command line options. */
3682 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3684 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3685 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3686 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3687 opts
->x_flag_unwind_tables
3688 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3689 if (opts
->x_flag_pcc_struct_return
== 2)
3690 opts
->x_flag_pcc_struct_return
= 0;
3694 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3695 opts
->x_flag_omit_frame_pointer
3696 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3697 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3698 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3699 if (opts
->x_flag_pcc_struct_return
== 2)
3700 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3703 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3704 if (opts
->x_optimize_size
)
3705 ix86_cost
= &ix86_size_cost
;
3707 ix86_cost
= ix86_tune_cost
;
3709 /* Arrange to set up i386_stack_locals for all functions. */
3710 init_machine_status
= ix86_init_machine_status
;
3712 /* Validate -mregparm= value. */
3713 if (opts_set
->x_ix86_regparm
)
3715 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3716 warning (0, "-mregparm is ignored in 64-bit mode");
3717 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3719 error ("-mregparm=%d is not between 0 and %d",
3720 opts
->x_ix86_regparm
, REGPARM_MAX
);
3721 opts
->x_ix86_regparm
= 0;
3724 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3725 opts
->x_ix86_regparm
= REGPARM_MAX
;
3727 /* Default align_* from the processor table. */
3728 if (opts
->x_align_loops
== 0)
3730 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3731 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3733 if (opts
->x_align_jumps
== 0)
3735 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3736 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3738 if (opts
->x_align_functions
== 0)
3740 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3743 /* Provide default for -mbranch-cost= value. */
3744 if (!opts_set
->x_ix86_branch_cost
)
3745 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3747 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3749 opts
->x_target_flags
3750 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3752 /* Enable by default the SSE and MMX builtins. Do allow the user to
3753 explicitly disable any of these. In particular, disabling SSE and
3754 MMX for kernel code is extremely useful. */
3755 if (!ix86_arch_specified
)
3756 opts
->x_ix86_isa_flags
3757 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3758 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3759 & ~opts
->x_ix86_isa_flags_explicit
);
3761 if (TARGET_RTD_P (opts
->x_target_flags
))
3762 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3766 opts
->x_target_flags
3767 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3769 if (!ix86_arch_specified
)
3770 opts
->x_ix86_isa_flags
3771 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3773 /* i386 ABI does not specify red zone. It still makes sense to use it
3774 when programmer takes care to stack from being destroyed. */
3775 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3776 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3779 /* Keep nonleaf frame pointers. */
3780 if (opts
->x_flag_omit_frame_pointer
)
3781 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3782 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3783 opts
->x_flag_omit_frame_pointer
= 1;
3785 /* If we're doing fast math, we don't care about comparison order
3786 wrt NaNs. This lets us use a shorter comparison sequence. */
3787 if (opts
->x_flag_finite_math_only
)
3788 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3790 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3791 since the insns won't need emulation. */
3792 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3793 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3795 /* Likewise, if the target doesn't have a 387, or we've specified
3796 software floating point, don't use 387 inline intrinsics. */
3797 if (!TARGET_80387_P (opts
->x_target_flags
))
3798 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3800 /* Turn on MMX builtins for -msse. */
3801 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3802 opts
->x_ix86_isa_flags
3803 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3805 /* Enable SSE prefetch. */
3806 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3807 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3808 x86_prefetch_sse
= true;
3810 /* Enable prefetch{,w} instructions for -m3dnow. */
3811 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
3812 opts
->x_ix86_isa_flags
3813 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3815 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3816 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3817 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3818 opts
->x_ix86_isa_flags
3819 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3821 /* Enable lzcnt instruction for -mabm. */
3822 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3823 opts
->x_ix86_isa_flags
3824 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3826 /* Validate -mpreferred-stack-boundary= value or default it to
3827 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3828 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3829 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3831 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3832 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3833 int max
= (TARGET_SEH
? 4 : 12);
3835 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3836 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3839 error ("-mpreferred-stack-boundary is not supported "
3842 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3843 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3846 ix86_preferred_stack_boundary
3847 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3850 /* Set the default value for -mstackrealign. */
3851 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3852 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3854 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3856 /* Validate -mincoming-stack-boundary= value or default it to
3857 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3858 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3859 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3861 if (ix86_incoming_stack_boundary_arg
3862 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3863 || ix86_incoming_stack_boundary_arg
> 12)
3864 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3865 ix86_incoming_stack_boundary_arg
,
3866 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3869 ix86_user_incoming_stack_boundary
3870 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3871 ix86_incoming_stack_boundary
3872 = ix86_user_incoming_stack_boundary
;
3876 /* Accept -msseregparm only if at least SSE support is enabled. */
3877 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3878 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3879 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3881 if (opts_set
->x_ix86_fpmath
)
3883 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3885 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3887 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3888 opts
->x_ix86_fpmath
= FPMATH_387
;
3890 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
3891 && !TARGET_80387_P (opts
->x_target_flags
))
3893 warning (0, "387 instruction set disabled, using SSE arithmetics");
3894 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3898 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3899 fpmath=387. The second is however default at many targets since the
3900 extra 80bit precision of temporaries is considered to be part of ABI.
3901 Overwrite the default at least for -ffast-math.
3902 TODO: -mfpmath=both seems to produce same performing code with bit
3903 smaller binaries. It is however not clear if register allocation is
3904 ready for this setting.
3905 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3906 codegen. We may switch to 387 with -ffast-math for size optimized
3908 else if (fast_math_flags_set_p (&global_options
)
3909 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
3910 ix86_fpmath
= FPMATH_SSE
;
3912 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
3914 /* If the i387 is disabled, then do not return values in it. */
3915 if (!TARGET_80387_P (opts
->x_target_flags
))
3916 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
3918 /* Use external vectorized library in vectorizing intrinsics. */
3919 if (opts_set
->x_ix86_veclibabi_type
)
3920 switch (opts
->x_ix86_veclibabi_type
)
3922 case ix86_veclibabi_type_svml
:
3923 ix86_veclib_handler
= ix86_veclibabi_svml
;
3926 case ix86_veclibabi_type_acml
:
3927 ix86_veclib_handler
= ix86_veclibabi_acml
;
3934 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
3935 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3936 && !opts
->x_optimize_size
)
3937 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3939 /* If stack probes are required, the space used for large function
3940 arguments on the stack must also be probed, so enable
3941 -maccumulate-outgoing-args so this happens in the prologue. */
3942 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
3943 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3945 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3946 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3947 "for correctness", prefix
, suffix
);
3948 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3951 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3954 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3955 p
= strchr (internal_label_prefix
, 'X');
3956 internal_label_prefix_len
= p
- internal_label_prefix
;
3960 /* When scheduling description is not available, disable scheduler pass
3961 so it won't slow down the compilation and make x87 code slower. */
3962 if (!TARGET_SCHEDULE
)
3963 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
3965 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3966 ix86_tune_cost
->simultaneous_prefetches
,
3967 opts
->x_param_values
,
3968 opts_set
->x_param_values
);
3969 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3970 ix86_tune_cost
->prefetch_block
,
3971 opts
->x_param_values
,
3972 opts_set
->x_param_values
);
3973 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3974 ix86_tune_cost
->l1_cache_size
,
3975 opts
->x_param_values
,
3976 opts_set
->x_param_values
);
3977 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3978 ix86_tune_cost
->l2_cache_size
,
3979 opts
->x_param_values
,
3980 opts_set
->x_param_values
);
3982 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3983 if (opts
->x_flag_prefetch_loop_arrays
< 0
3985 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
3986 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3987 opts
->x_flag_prefetch_loop_arrays
= 1;
3989 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3990 can be opts->x_optimized to ap = __builtin_next_arg (0). */
3991 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
3992 targetm
.expand_builtin_va_start
= NULL
;
3994 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3996 ix86_gen_leave
= gen_leave_rex64
;
3997 if (Pmode
== DImode
)
3999 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4000 ix86_gen_tls_local_dynamic_base_64
4001 = gen_tls_local_dynamic_base_64_di
;
4005 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4006 ix86_gen_tls_local_dynamic_base_64
4007 = gen_tls_local_dynamic_base_64_si
;
4011 ix86_gen_leave
= gen_leave
;
4013 if (Pmode
== DImode
)
4015 ix86_gen_add3
= gen_adddi3
;
4016 ix86_gen_sub3
= gen_subdi3
;
4017 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4018 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4019 ix86_gen_andsp
= gen_anddi3
;
4020 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4021 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4022 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4023 ix86_gen_monitor
= gen_sse3_monitor_di
;
4027 ix86_gen_add3
= gen_addsi3
;
4028 ix86_gen_sub3
= gen_subsi3
;
4029 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4030 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4031 ix86_gen_andsp
= gen_andsi3
;
4032 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4033 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4034 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4035 ix86_gen_monitor
= gen_sse3_monitor_si
;
4039 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4040 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4041 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
4044 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
4046 if (opts
->x_flag_fentry
> 0)
4047 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4049 opts
->x_flag_fentry
= 0;
4051 else if (TARGET_SEH
)
4053 if (opts
->x_flag_fentry
== 0)
4054 sorry ("-mno-fentry isn%'t compatible with SEH");
4055 opts
->x_flag_fentry
= 1;
4057 else if (opts
->x_flag_fentry
< 0)
4059 #if defined(PROFILE_BEFORE_PROLOGUE)
4060 opts
->x_flag_fentry
= 1;
4062 opts
->x_flag_fentry
= 0;
4066 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4067 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4068 AVX unaligned load/store. */
4069 if (!opts
->x_optimize_size
)
4071 if (flag_expensive_optimizations
4072 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
4073 opts
->x_target_flags
|= MASK_VZEROUPPER
;
4074 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
4075 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4076 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4077 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
4078 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4079 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4080 /* Enable 128-bit AVX instruction generation
4081 for the auto-vectorizer. */
4082 if (TARGET_AVX128_OPTIMAL
4083 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
4084 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
4087 if (opts
->x_ix86_recip_name
)
4089 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
4091 unsigned int mask
, i
;
4094 while ((q
= strtok (p
, ",")) != NULL
)
4105 if (!strcmp (q
, "default"))
4106 mask
= RECIP_MASK_ALL
;
4109 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4110 if (!strcmp (q
, recip_options
[i
].string
))
4112 mask
= recip_options
[i
].mask
;
4116 if (i
== ARRAY_SIZE (recip_options
))
4118 error ("unknown option for -mrecip=%s", q
);
4120 mask
= RECIP_MASK_NONE
;
4124 opts
->x_recip_mask_explicit
|= mask
;
4126 opts
->x_recip_mask
&= ~mask
;
4128 opts
->x_recip_mask
|= mask
;
4132 if (TARGET_RECIP_P (opts
->x_target_flags
))
4133 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4134 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4135 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4137 /* Default long double to 64-bit for Bionic. */
4138 if (TARGET_HAS_BIONIC
4139 && !(opts_set
->x_target_flags
& MASK_LONG_DOUBLE_64
))
4140 opts
->x_target_flags
|= MASK_LONG_DOUBLE_64
;
4142 /* Save the initial options in case the user does function specific
4145 target_option_default_node
= target_option_current_node
4146 = build_target_option_node (opts
);
4148 /* Handle stack protector */
4149 if (!opts_set
->x_ix86_stack_protector_guard
)
4150 opts
->x_ix86_stack_protector_guard
4151 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4153 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4154 if (opts
->x_ix86_tune_memcpy_strategy
)
4156 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4157 ix86_parse_stringop_strategy_string (str
, false);
4161 if (opts
->x_ix86_tune_memset_strategy
)
4163 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4164 ix86_parse_stringop_strategy_string (str
, true);
4169 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4172 ix86_option_override (void)
4174 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4175 static struct register_pass_info insert_vzeroupper_info
4176 = { pass_insert_vzeroupper
, "reload",
4177 1, PASS_POS_INSERT_AFTER
4180 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4183 /* This needs to be done at start up. It's convenient to do it here. */
4184 register_pass (&insert_vzeroupper_info
);
4187 /* Update register usage after having seen the compiler flags. */
4190 ix86_conditional_register_usage (void)
4195 /* The PIC register, if it exists, is fixed. */
4196 j
= PIC_OFFSET_TABLE_REGNUM
;
4197 if (j
!= INVALID_REGNUM
)
4198 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4200 /* For 32-bit targets, squash the REX registers. */
4203 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4204 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4205 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4206 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4207 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4208 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4211 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4212 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4213 : TARGET_64BIT
? (1 << 2)
4216 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4218 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4220 /* Set/reset conditionally defined registers from
4221 CALL_USED_REGISTERS initializer. */
4222 if (call_used_regs
[i
] > 1)
4223 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4225 /* Calculate registers of CLOBBERED_REGS register set
4226 as call used registers from GENERAL_REGS register set. */
4227 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4228 && call_used_regs
[i
])
4229 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4232 /* If MMX is disabled, squash the registers. */
4234 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4235 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4236 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4238 /* If SSE is disabled, squash the registers. */
4240 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4241 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4242 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4244 /* If the FPU is disabled, squash the registers. */
4245 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4246 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4247 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4248 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4250 /* If AVX512F is disabled, squash the registers. */
4251 if (! TARGET_AVX512F
)
4253 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4254 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4256 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4257 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4260 /* If MPX is disabled, squash the registers. */
4262 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
4263 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4267 /* Save the current options */
4270 ix86_function_specific_save (struct cl_target_option
*ptr
,
4271 struct gcc_options
*opts
)
4273 ptr
->arch
= ix86_arch
;
4274 ptr
->schedule
= ix86_schedule
;
4275 ptr
->tune
= ix86_tune
;
4276 ptr
->branch_cost
= ix86_branch_cost
;
4277 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4278 ptr
->arch_specified
= ix86_arch_specified
;
4279 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4280 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4281 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4283 /* The fields are char but the variables are not; make sure the
4284 values fit in the fields. */
4285 gcc_assert (ptr
->arch
== ix86_arch
);
4286 gcc_assert (ptr
->schedule
== ix86_schedule
);
4287 gcc_assert (ptr
->tune
== ix86_tune
);
4288 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4291 /* Restore the current options */
4294 ix86_function_specific_restore (struct gcc_options
*opts
,
4295 struct cl_target_option
*ptr
)
4297 enum processor_type old_tune
= ix86_tune
;
4298 enum processor_type old_arch
= ix86_arch
;
4299 unsigned int ix86_arch_mask
;
4302 ix86_arch
= (enum processor_type
) ptr
->arch
;
4303 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4304 ix86_tune
= (enum processor_type
) ptr
->tune
;
4305 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4306 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4307 ix86_arch_specified
= ptr
->arch_specified
;
4308 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4309 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4310 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4312 /* Recreate the arch feature tests if the arch changed */
4313 if (old_arch
!= ix86_arch
)
4315 ix86_arch_mask
= 1u << ix86_arch
;
4316 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4317 ix86_arch_features
[i
]
4318 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4321 /* Recreate the tune optimization tests */
4322 if (old_tune
!= ix86_tune
)
4323 set_ix86_tune_features (ix86_tune
, false);
4326 /* Print the current options */
4329 ix86_function_specific_print (FILE *file
, int indent
,
4330 struct cl_target_option
*ptr
)
4333 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4334 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4336 fprintf (file
, "%*sarch = %d (%s)\n",
4339 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4340 ? cpu_names
[ptr
->arch
]
4343 fprintf (file
, "%*stune = %d (%s)\n",
4346 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4347 ? cpu_names
[ptr
->tune
]
4350 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4354 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4355 free (target_string
);
4360 /* Inner function to process the attribute((target(...))), take an argument and
4361 set the current options from the argument. If we have a list, recursively go
4365 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4366 struct gcc_options
*opts
,
4367 struct gcc_options
*opts_set
,
4368 struct gcc_options
*enum_opts_set
)
4373 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4374 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4375 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4376 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4377 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4393 enum ix86_opt_type type
;
4398 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4399 IX86_ATTR_ISA ("abm", OPT_mabm
),
4400 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4401 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4402 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4403 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4404 IX86_ATTR_ISA ("aes", OPT_maes
),
4405 IX86_ATTR_ISA ("avx", OPT_mavx
),
4406 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4407 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4408 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4409 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4410 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4411 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4412 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4413 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4414 IX86_ATTR_ISA ("sse", OPT_msse
),
4415 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4416 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4417 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4418 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4419 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4420 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4421 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4422 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4423 IX86_ATTR_ISA ("fma", OPT_mfma
),
4424 IX86_ATTR_ISA ("xop", OPT_mxop
),
4425 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4426 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4427 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4428 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4429 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4430 IX86_ATTR_ISA ("hle", OPT_mhle
),
4431 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4432 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4433 IX86_ATTR_ISA ("adx", OPT_madx
),
4434 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4435 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4436 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4439 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4441 /* string options */
4442 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4443 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4446 IX86_ATTR_YES ("cld",
4450 IX86_ATTR_NO ("fancy-math-387",
4451 OPT_mfancy_math_387
,
4452 MASK_NO_FANCY_MATH_387
),
4454 IX86_ATTR_YES ("ieee-fp",
4458 IX86_ATTR_YES ("inline-all-stringops",
4459 OPT_minline_all_stringops
,
4460 MASK_INLINE_ALL_STRINGOPS
),
4462 IX86_ATTR_YES ("inline-stringops-dynamically",
4463 OPT_minline_stringops_dynamically
,
4464 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4466 IX86_ATTR_NO ("align-stringops",
4467 OPT_mno_align_stringops
,
4468 MASK_NO_ALIGN_STRINGOPS
),
4470 IX86_ATTR_YES ("recip",
4476 /* If this is a list, recurse to get the options. */
4477 if (TREE_CODE (args
) == TREE_LIST
)
4481 for (; args
; args
= TREE_CHAIN (args
))
4482 if (TREE_VALUE (args
)
4483 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4484 p_strings
, opts
, opts_set
,
4491 else if (TREE_CODE (args
) != STRING_CST
)
4493 error ("attribute %<target%> argument not a string");
4497 /* Handle multiple arguments separated by commas. */
4498 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4500 while (next_optstr
&& *next_optstr
!= '\0')
4502 char *p
= next_optstr
;
4504 char *comma
= strchr (next_optstr
, ',');
4505 const char *opt_string
;
4506 size_t len
, opt_len
;
4511 enum ix86_opt_type type
= ix86_opt_unknown
;
4517 len
= comma
- next_optstr
;
4518 next_optstr
= comma
+ 1;
4526 /* Recognize no-xxx. */
4527 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4536 /* Find the option. */
4539 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4541 type
= attrs
[i
].type
;
4542 opt_len
= attrs
[i
].len
;
4543 if (ch
== attrs
[i
].string
[0]
4544 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4547 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4550 mask
= attrs
[i
].mask
;
4551 opt_string
= attrs
[i
].string
;
4556 /* Process the option. */
4559 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4563 else if (type
== ix86_opt_isa
)
4565 struct cl_decoded_option decoded
;
4567 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4568 ix86_handle_option (opts
, opts_set
,
4569 &decoded
, input_location
);
4572 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4574 if (type
== ix86_opt_no
)
4575 opt_set_p
= !opt_set_p
;
4578 opts
->x_target_flags
|= mask
;
4580 opts
->x_target_flags
&= ~mask
;
4583 else if (type
== ix86_opt_str
)
4587 error ("option(\"%s\") was already specified", opt_string
);
4591 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4594 else if (type
== ix86_opt_enum
)
4599 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4601 set_option (opts
, enum_opts_set
, opt
, value
,
4602 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4606 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4618 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4621 ix86_valid_target_attribute_tree (tree args
,
4622 struct gcc_options
*opts
,
4623 struct gcc_options
*opts_set
)
4625 const char *orig_arch_string
= ix86_arch_string
;
4626 const char *orig_tune_string
= ix86_tune_string
;
4627 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4628 int orig_tune_defaulted
= ix86_tune_defaulted
;
4629 int orig_arch_specified
= ix86_arch_specified
;
4630 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4633 struct cl_target_option
*def
4634 = TREE_TARGET_OPTION (target_option_default_node
);
4635 struct gcc_options enum_opts_set
;
4637 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4639 /* Process each of the options on the chain. */
4640 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4641 opts_set
, &enum_opts_set
))
4642 return error_mark_node
;
4644 /* If the changed options are different from the default, rerun
4645 ix86_option_override_internal, and then save the options away.
4646 The string options are are attribute options, and will be undone
4647 when we copy the save structure. */
4648 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4649 || opts
->x_target_flags
!= def
->x_target_flags
4650 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4651 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4652 || enum_opts_set
.x_ix86_fpmath
)
4654 /* If we are using the default tune= or arch=, undo the string assigned,
4655 and use the default. */
4656 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4657 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4658 else if (!orig_arch_specified
)
4659 opts
->x_ix86_arch_string
= NULL
;
4661 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4662 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4663 else if (orig_tune_defaulted
)
4664 opts
->x_ix86_tune_string
= NULL
;
4666 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4667 if (enum_opts_set
.x_ix86_fpmath
)
4668 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4669 else if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4670 && TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4672 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4673 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4676 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4677 ix86_option_override_internal (false, opts
, opts_set
);
4679 /* Add any builtin functions with the new isa if any. */
4680 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4682 /* Save the current options unless we are validating options for
4684 t
= build_target_option_node (opts
);
4686 opts
->x_ix86_arch_string
= orig_arch_string
;
4687 opts
->x_ix86_tune_string
= orig_tune_string
;
4688 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4690 /* Free up memory allocated to hold the strings */
4691 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4692 free (option_strings
[i
]);
4698 /* Hook to validate attribute((target("string"))). */
4701 ix86_valid_target_attribute_p (tree fndecl
,
4702 tree
ARG_UNUSED (name
),
4704 int ARG_UNUSED (flags
))
4706 struct gcc_options func_options
;
4707 tree new_target
, new_optimize
;
4710 /* attribute((target("default"))) does nothing, beyond
4711 affecting multi-versioning. */
4712 if (TREE_VALUE (args
)
4713 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4714 && TREE_CHAIN (args
) == NULL_TREE
4715 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4718 tree old_optimize
= build_optimization_node (&global_options
);
4720 /* Get the optimization options of the current function. */
4721 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4724 func_optimize
= old_optimize
;
4726 /* Init func_options. */
4727 memset (&func_options
, 0, sizeof (func_options
));
4728 init_options_struct (&func_options
, NULL
);
4729 lang_hooks
.init_options_struct (&func_options
);
4731 cl_optimization_restore (&func_options
,
4732 TREE_OPTIMIZATION (func_optimize
));
4734 /* Initialize func_options to the default before its target options can
4736 cl_target_option_restore (&func_options
,
4737 TREE_TARGET_OPTION (target_option_default_node
));
4739 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4740 &global_options_set
);
4742 new_optimize
= build_optimization_node (&func_options
);
4744 if (new_target
== error_mark_node
)
4747 else if (fndecl
&& new_target
)
4749 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4751 if (old_optimize
!= new_optimize
)
4752 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4759 /* Hook to determine if one function can safely inline another. */
4762 ix86_can_inline_p (tree caller
, tree callee
)
4765 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4766 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4768 /* If callee has no option attributes, then it is ok to inline. */
4772 /* If caller has no option attributes, but callee does then it is not ok to
4774 else if (!caller_tree
)
4779 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4780 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4782 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4783 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4785 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4786 != callee_opts
->x_ix86_isa_flags
)
4789 /* See if we have the same non-isa options. */
4790 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4793 /* See if arch, tune, etc. are the same. */
4794 else if (caller_opts
->arch
!= callee_opts
->arch
)
4797 else if (caller_opts
->tune
!= callee_opts
->tune
)
4800 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4803 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4814 /* Remember the last target of ix86_set_current_function. */
4815 static GTY(()) tree ix86_previous_fndecl
;
4817 /* Invalidate ix86_previous_fndecl cache. */
4819 ix86_reset_previous_fndecl (void)
4821 ix86_previous_fndecl
= NULL_TREE
;
4824 /* Establish appropriate back-end context for processing the function
4825 FNDECL. The argument might be NULL to indicate processing at top
4826 level, outside of any function scope. */
4828 ix86_set_current_function (tree fndecl
)
4830 /* Only change the context if the function changes. This hook is called
4831 several times in the course of compiling a function, and we don't want to
4832 slow things down too much or call target_reinit when it isn't safe. */
4833 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4835 tree old_tree
= (ix86_previous_fndecl
4836 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4839 tree new_tree
= (fndecl
4840 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4843 ix86_previous_fndecl
= fndecl
;
4844 if (old_tree
== new_tree
)
4849 cl_target_option_restore (&global_options
,
4850 TREE_TARGET_OPTION (new_tree
));
4856 struct cl_target_option
*def
4857 = TREE_TARGET_OPTION (target_option_current_node
);
4859 cl_target_option_restore (&global_options
, def
);
4866 /* Return true if this goes in large data/bss. */
4869 ix86_in_large_data_p (tree exp
)
4871 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4874 /* Functions are never large data. */
4875 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4878 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4880 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4881 if (strcmp (section
, ".ldata") == 0
4882 || strcmp (section
, ".lbss") == 0)
4888 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4890 /* If this is an incomplete type with size 0, then we can't put it
4891 in data because it might be too big when completed. */
4892 if (!size
|| size
> ix86_section_threshold
)
4899 /* Switch to the appropriate section for output of DECL.
4900 DECL is either a `VAR_DECL' node or a constant of some sort.
4901 RELOC indicates whether forming the initial value of DECL requires
4902 link-time relocations. */
4904 ATTRIBUTE_UNUSED
static section
*
4905 x86_64_elf_select_section (tree decl
, int reloc
,
4906 unsigned HOST_WIDE_INT align
)
4908 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4909 && ix86_in_large_data_p (decl
))
4911 const char *sname
= NULL
;
4912 unsigned int flags
= SECTION_WRITE
;
4913 switch (categorize_decl_for_section (decl
, reloc
))
4918 case SECCAT_DATA_REL
:
4919 sname
= ".ldata.rel";
4921 case SECCAT_DATA_REL_LOCAL
:
4922 sname
= ".ldata.rel.local";
4924 case SECCAT_DATA_REL_RO
:
4925 sname
= ".ldata.rel.ro";
4927 case SECCAT_DATA_REL_RO_LOCAL
:
4928 sname
= ".ldata.rel.ro.local";
4932 flags
|= SECTION_BSS
;
4935 case SECCAT_RODATA_MERGE_STR
:
4936 case SECCAT_RODATA_MERGE_STR_INIT
:
4937 case SECCAT_RODATA_MERGE_CONST
:
4941 case SECCAT_SRODATA
:
4948 /* We don't split these for medium model. Place them into
4949 default sections and hope for best. */
4954 /* We might get called with string constants, but get_named_section
4955 doesn't like them as they are not DECLs. Also, we need to set
4956 flags in that case. */
4958 return get_section (sname
, flags
, NULL
);
4959 return get_named_section (decl
, sname
, reloc
);
4962 return default_elf_select_section (decl
, reloc
, align
);
4965 /* Select a set of attributes for section NAME based on the properties
4966 of DECL and whether or not RELOC indicates that DECL's initializer
4967 might contain runtime relocations. */
4969 static unsigned int ATTRIBUTE_UNUSED
4970 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
4972 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
4974 if (decl
== NULL_TREE
4975 && (strcmp (name
, ".ldata.rel.ro") == 0
4976 || strcmp (name
, ".ldata.rel.ro.local") == 0))
4977 flags
|= SECTION_RELRO
;
4979 if (strcmp (name
, ".lbss") == 0
4980 || strncmp (name
, ".lbss.", 5) == 0
4981 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
4982 flags
|= SECTION_BSS
;
4987 /* Build up a unique section name, expressed as a
4988 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4989 RELOC indicates whether the initial value of EXP requires
4990 link-time relocations. */
4992 static void ATTRIBUTE_UNUSED
4993 x86_64_elf_unique_section (tree decl
, int reloc
)
4995 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4996 && ix86_in_large_data_p (decl
))
4998 const char *prefix
= NULL
;
4999 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5000 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
5002 switch (categorize_decl_for_section (decl
, reloc
))
5005 case SECCAT_DATA_REL
:
5006 case SECCAT_DATA_REL_LOCAL
:
5007 case SECCAT_DATA_REL_RO
:
5008 case SECCAT_DATA_REL_RO_LOCAL
:
5009 prefix
= one_only
? ".ld" : ".ldata";
5012 prefix
= one_only
? ".lb" : ".lbss";
5015 case SECCAT_RODATA_MERGE_STR
:
5016 case SECCAT_RODATA_MERGE_STR_INIT
:
5017 case SECCAT_RODATA_MERGE_CONST
:
5018 prefix
= one_only
? ".lr" : ".lrodata";
5020 case SECCAT_SRODATA
:
5027 /* We don't split these for medium model. Place them into
5028 default sections and hope for best. */
5033 const char *name
, *linkonce
;
5036 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
5037 name
= targetm
.strip_name_encoding (name
);
5039 /* If we're using one_only, then there needs to be a .gnu.linkonce
5040 prefix to the section name. */
5041 linkonce
= one_only
? ".gnu.linkonce" : "";
5043 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
5045 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
5049 default_unique_section (decl
, reloc
);
5052 #ifdef COMMON_ASM_OP
5053 /* This says how to output assembler code to declare an
5054 uninitialized external linkage data object.
5056 For medium model x86-64 we need to use .largecomm opcode for
5059 x86_elf_aligned_common (FILE *file
,
5060 const char *name
, unsigned HOST_WIDE_INT size
,
5063 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5064 && size
> (unsigned int)ix86_section_threshold
)
5065 fputs (".largecomm\t", file
);
5067 fputs (COMMON_ASM_OP
, file
);
5068 assemble_name (file
, name
);
5069 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
5070 size
, align
/ BITS_PER_UNIT
);
5074 /* Utility function for targets to use in implementing
5075 ASM_OUTPUT_ALIGNED_BSS. */
5078 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
5079 const char *name
, unsigned HOST_WIDE_INT size
,
5082 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5083 && size
> (unsigned int)ix86_section_threshold
)
5084 switch_to_section (get_named_section (decl
, ".lbss", 0));
5086 switch_to_section (bss_section
);
5087 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
5088 #ifdef ASM_DECLARE_OBJECT_NAME
5089 last_assemble_variable_decl
= decl
;
5090 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5092 /* Standard thing is just output label for the object. */
5093 ASM_OUTPUT_LABEL (file
, name
);
5094 #endif /* ASM_DECLARE_OBJECT_NAME */
5095 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5098 /* Decide whether we must probe the stack before any space allocation
5099 on this target. It's essentially TARGET_STACK_PROBE except when
5100 -fstack-check causes the stack to be already probed differently. */
5103 ix86_target_stack_probe (void)
5105 /* Do not probe the stack twice if static stack checking is enabled. */
5106 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5109 return TARGET_STACK_PROBE
;
5112 /* Decide whether we can make a sibling call to a function. DECL is the
5113 declaration of the function being targeted by the call and EXP is the
5114 CALL_EXPR representing the call. */
5117 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5119 tree type
, decl_or_type
;
5122 /* If we are generating position-independent code, we cannot sibcall
5123 optimize any indirect call, or a direct call to a global function,
5124 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5128 && (!decl
|| !targetm
.binds_local_p (decl
)))
5131 /* If we need to align the outgoing stack, then sibcalling would
5132 unalign the stack, which may break the called function. */
5133 if (ix86_minimum_incoming_stack_boundary (true)
5134 < PREFERRED_STACK_BOUNDARY
)
5139 decl_or_type
= decl
;
5140 type
= TREE_TYPE (decl
);
5144 /* We're looking at the CALL_EXPR, we need the type of the function. */
5145 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5146 type
= TREE_TYPE (type
); /* pointer type */
5147 type
= TREE_TYPE (type
); /* function type */
5148 decl_or_type
= type
;
5151 /* Check that the return value locations are the same. Like
5152 if we are returning floats on the 80387 register stack, we cannot
5153 make a sibcall from a function that doesn't return a float to a
5154 function that does or, conversely, from a function that does return
5155 a float to a function that doesn't; the necessary stack adjustment
5156 would not be executed. This is also the place we notice
5157 differences in the return value ABI. Note that it is ok for one
5158 of the functions to have void return type as long as the return
5159 value of the other is passed in a register. */
5160 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5161 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5163 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5165 if (!rtx_equal_p (a
, b
))
5168 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5170 else if (!rtx_equal_p (a
, b
))
5175 /* The SYSV ABI has more call-clobbered registers;
5176 disallow sibcalls from MS to SYSV. */
5177 if (cfun
->machine
->call_abi
== MS_ABI
5178 && ix86_function_type_abi (type
) == SYSV_ABI
)
5183 /* If this call is indirect, we'll need to be able to use a
5184 call-clobbered register for the address of the target function.
5185 Make sure that all such registers are not used for passing
5186 parameters. Note that DLLIMPORT functions are indirect. */
5188 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5190 if (ix86_function_regparm (type
, NULL
) >= 3)
5192 /* ??? Need to count the actual number of registers to be used,
5193 not the possible number of registers. Fix later. */
5199 /* Otherwise okay. That also includes certain types of indirect calls. */
5203 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5204 and "sseregparm" calling convention attributes;
5205 arguments as in struct attribute_spec.handler. */
5208 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5210 int flags ATTRIBUTE_UNUSED
,
5213 if (TREE_CODE (*node
) != FUNCTION_TYPE
5214 && TREE_CODE (*node
) != METHOD_TYPE
5215 && TREE_CODE (*node
) != FIELD_DECL
5216 && TREE_CODE (*node
) != TYPE_DECL
)
5218 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5220 *no_add_attrs
= true;
5224 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5225 if (is_attribute_p ("regparm", name
))
5229 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5231 error ("fastcall and regparm attributes are not compatible");
5234 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5236 error ("regparam and thiscall attributes are not compatible");
5239 cst
= TREE_VALUE (args
);
5240 if (TREE_CODE (cst
) != INTEGER_CST
)
5242 warning (OPT_Wattributes
,
5243 "%qE attribute requires an integer constant argument",
5245 *no_add_attrs
= true;
5247 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5249 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5251 *no_add_attrs
= true;
5259 /* Do not warn when emulating the MS ABI. */
5260 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5261 && TREE_CODE (*node
) != METHOD_TYPE
)
5262 || ix86_function_type_abi (*node
) != MS_ABI
)
5263 warning (OPT_Wattributes
, "%qE attribute ignored",
5265 *no_add_attrs
= true;
5269 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5270 if (is_attribute_p ("fastcall", name
))
5272 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5274 error ("fastcall and cdecl attributes are not compatible");
5276 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5278 error ("fastcall and stdcall attributes are not compatible");
5280 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5282 error ("fastcall and regparm attributes are not compatible");
5284 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5286 error ("fastcall and thiscall attributes are not compatible");
5290 /* Can combine stdcall with fastcall (redundant), regparm and
5292 else if (is_attribute_p ("stdcall", name
))
5294 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5296 error ("stdcall and cdecl attributes are not compatible");
5298 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5300 error ("stdcall and fastcall attributes are not compatible");
5302 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5304 error ("stdcall and thiscall attributes are not compatible");
5308 /* Can combine cdecl with regparm and sseregparm. */
5309 else if (is_attribute_p ("cdecl", name
))
5311 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5313 error ("stdcall and cdecl attributes are not compatible");
5315 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5317 error ("fastcall and cdecl attributes are not compatible");
5319 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5321 error ("cdecl and thiscall attributes are not compatible");
5324 else if (is_attribute_p ("thiscall", name
))
5326 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5327 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5329 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5331 error ("stdcall and thiscall attributes are not compatible");
5333 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5335 error ("fastcall and thiscall attributes are not compatible");
5337 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5339 error ("cdecl and thiscall attributes are not compatible");
5343 /* Can combine sseregparm with all attributes. */
5348 /* The transactional memory builtins are implicitly regparm or fastcall
5349 depending on the ABI. Override the generic do-nothing attribute that
5350 these builtins were declared with, and replace it with one of the two
5351 attributes that we expect elsewhere. */
5354 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5355 tree args ATTRIBUTE_UNUSED
,
5356 int flags
, bool *no_add_attrs
)
5360 /* In no case do we want to add the placeholder attribute. */
5361 *no_add_attrs
= true;
5363 /* The 64-bit ABI is unchanged for transactional memory. */
5367 /* ??? Is there a better way to validate 32-bit windows? We have
5368 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5369 if (CHECK_STACK_LIMIT
> 0)
5370 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5373 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5374 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5376 decl_attributes (node
, alt
, flags
);
5381 /* This function determines from TYPE the calling-convention. */
5384 ix86_get_callcvt (const_tree type
)
5386 unsigned int ret
= 0;
5391 return IX86_CALLCVT_CDECL
;
5393 attrs
= TYPE_ATTRIBUTES (type
);
5394 if (attrs
!= NULL_TREE
)
5396 if (lookup_attribute ("cdecl", attrs
))
5397 ret
|= IX86_CALLCVT_CDECL
;
5398 else if (lookup_attribute ("stdcall", attrs
))
5399 ret
|= IX86_CALLCVT_STDCALL
;
5400 else if (lookup_attribute ("fastcall", attrs
))
5401 ret
|= IX86_CALLCVT_FASTCALL
;
5402 else if (lookup_attribute ("thiscall", attrs
))
5403 ret
|= IX86_CALLCVT_THISCALL
;
5405 /* Regparam isn't allowed for thiscall and fastcall. */
5406 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5408 if (lookup_attribute ("regparm", attrs
))
5409 ret
|= IX86_CALLCVT_REGPARM
;
5410 if (lookup_attribute ("sseregparm", attrs
))
5411 ret
|= IX86_CALLCVT_SSEREGPARM
;
5414 if (IX86_BASE_CALLCVT(ret
) != 0)
5418 is_stdarg
= stdarg_p (type
);
5419 if (TARGET_RTD
&& !is_stdarg
)
5420 return IX86_CALLCVT_STDCALL
| ret
;
5424 || TREE_CODE (type
) != METHOD_TYPE
5425 || ix86_function_type_abi (type
) != MS_ABI
)
5426 return IX86_CALLCVT_CDECL
| ret
;
5428 return IX86_CALLCVT_THISCALL
;
5431 /* Return 0 if the attributes for two types are incompatible, 1 if they
5432 are compatible, and 2 if they are nearly compatible (which causes a
5433 warning to be generated). */
5436 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5438 unsigned int ccvt1
, ccvt2
;
5440 if (TREE_CODE (type1
) != FUNCTION_TYPE
5441 && TREE_CODE (type1
) != METHOD_TYPE
)
5444 ccvt1
= ix86_get_callcvt (type1
);
5445 ccvt2
= ix86_get_callcvt (type2
);
5448 if (ix86_function_regparm (type1
, NULL
)
5449 != ix86_function_regparm (type2
, NULL
))
5455 /* Return the regparm value for a function with the indicated TYPE and DECL.
5456 DECL may be NULL when calling function indirectly
5457 or considering a libcall. */
5460 ix86_function_regparm (const_tree type
, const_tree decl
)
5467 return (ix86_function_type_abi (type
) == SYSV_ABI
5468 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5469 ccvt
= ix86_get_callcvt (type
);
5470 regparm
= ix86_regparm
;
5472 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5474 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5477 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5481 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5483 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5486 /* Use register calling convention for local functions when possible. */
5488 && TREE_CODE (decl
) == FUNCTION_DECL
5490 && !(profile_flag
&& !flag_fentry
))
5492 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5493 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5494 if (i
&& i
->local
&& i
->can_change_signature
)
5496 int local_regparm
, globals
= 0, regno
;
5498 /* Make sure no regparm register is taken by a
5499 fixed register variable. */
5500 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5501 if (fixed_regs
[local_regparm
])
5504 /* We don't want to use regparm(3) for nested functions as
5505 these use a static chain pointer in the third argument. */
5506 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5509 /* In 32-bit mode save a register for the split stack. */
5510 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5513 /* Each fixed register usage increases register pressure,
5514 so less registers should be used for argument passing.
5515 This functionality can be overriden by an explicit
5517 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5518 if (fixed_regs
[regno
])
5522 = globals
< local_regparm
? local_regparm
- globals
: 0;
5524 if (local_regparm
> regparm
)
5525 regparm
= local_regparm
;
5532 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5533 DFmode (2) arguments in SSE registers for a function with the
5534 indicated TYPE and DECL. DECL may be NULL when calling function
5535 indirectly or considering a libcall. Otherwise return 0. */
5538 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5540 gcc_assert (!TARGET_64BIT
);
5542 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5543 by the sseregparm attribute. */
5544 if (TARGET_SSEREGPARM
5545 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5552 error ("calling %qD with attribute sseregparm without "
5553 "SSE/SSE2 enabled", decl
);
5555 error ("calling %qT with attribute sseregparm without "
5556 "SSE/SSE2 enabled", type
);
5564 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5565 (and DFmode for SSE2) arguments in SSE registers. */
5566 if (decl
&& TARGET_SSE_MATH
&& optimize
5567 && !(profile_flag
&& !flag_fentry
))
5569 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5570 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5571 if (i
&& i
->local
&& i
->can_change_signature
)
5572 return TARGET_SSE2
? 2 : 1;
5578 /* Return true if EAX is live at the start of the function. Used by
5579 ix86_expand_prologue to determine if we need special help before
5580 calling allocate_stack_worker. */
5583 ix86_eax_live_at_start_p (void)
5585 /* Cheat. Don't bother working forward from ix86_function_regparm
5586 to the function type to whether an actual argument is located in
5587 eax. Instead just look at cfg info, which is still close enough
5588 to correct at this point. This gives false positives for broken
5589 functions that might use uninitialized data that happens to be
5590 allocated in eax, but who cares? */
5591 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5595 ix86_keep_aggregate_return_pointer (tree fntype
)
5601 attr
= lookup_attribute ("callee_pop_aggregate_return",
5602 TYPE_ATTRIBUTES (fntype
));
5604 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5606 /* For 32-bit MS-ABI the default is to keep aggregate
5608 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5611 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5614 /* Value is the number of bytes of arguments automatically
5615 popped when returning from a subroutine call.
5616 FUNDECL is the declaration node of the function (as a tree),
5617 FUNTYPE is the data type of the function (as a tree),
5618 or for a library call it is an identifier node for the subroutine name.
5619 SIZE is the number of bytes of arguments passed on the stack.
5621 On the 80386, the RTD insn may be used to pop them if the number
5622 of args is fixed, but if the number is variable then the caller
5623 must pop them all. RTD can't be used for library calls now
5624 because the library is compiled with the Unix compiler.
5625 Use of RTD is a selectable option, since it is incompatible with
5626 standard Unix calling sequences. If the option is not selected,
5627 the caller must always pop the args.
5629 The attribute stdcall is equivalent to RTD on a per module basis. */
5632 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5636 /* None of the 64-bit ABIs pop arguments. */
5640 ccvt
= ix86_get_callcvt (funtype
);
5642 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5643 | IX86_CALLCVT_THISCALL
)) != 0
5644 && ! stdarg_p (funtype
))
5647 /* Lose any fake structure return argument if it is passed on the stack. */
5648 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5649 && !ix86_keep_aggregate_return_pointer (funtype
))
5651 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5653 return GET_MODE_SIZE (Pmode
);
5659 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5662 ix86_legitimate_combined_insn (rtx insn
)
5664 /* Check operand constraints in case hard registers were propagated
5665 into insn pattern. This check prevents combine pass from
5666 generating insn patterns with invalid hard register operands.
5667 These invalid insns can eventually confuse reload to error out
5668 with a spill failure. See also PRs 46829 and 46843. */
5669 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5673 extract_insn (insn
);
5674 preprocess_constraints ();
5676 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5678 rtx op
= recog_data
.operand
[i
];
5679 enum machine_mode mode
= GET_MODE (op
);
5680 struct operand_alternative
*op_alt
;
5685 /* A unary operator may be accepted by the predicate, but it
5686 is irrelevant for matching constraints. */
5690 if (GET_CODE (op
) == SUBREG
)
5692 if (REG_P (SUBREG_REG (op
))
5693 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5694 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5695 GET_MODE (SUBREG_REG (op
)),
5698 op
= SUBREG_REG (op
);
5701 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5704 op_alt
= recog_op_alt
[i
];
5706 /* Operand has no constraints, anything is OK. */
5707 win
= !recog_data
.n_alternatives
;
5709 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5711 if (op_alt
[j
].anything_ok
5712 || (op_alt
[j
].matches
!= -1
5714 (recog_data
.operand
[i
],
5715 recog_data
.operand
[op_alt
[j
].matches
]))
5716 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5731 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5733 static unsigned HOST_WIDE_INT
5734 ix86_asan_shadow_offset (void)
5736 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5737 : HOST_WIDE_INT_C (0x7fff8000))
5738 : (HOST_WIDE_INT_1
<< 29);
5741 /* Argument support functions. */
5743 /* Return true when register may be used to pass function parameters. */
5745 ix86_function_arg_regno_p (int regno
)
5748 const int *parm_regs
;
5753 return (regno
< REGPARM_MAX
5754 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5756 return (regno
< REGPARM_MAX
5757 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5758 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5759 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5760 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5763 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5764 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5767 /* TODO: The function should depend on current function ABI but
5768 builtins.c would need updating then. Therefore we use the
5771 /* RAX is used as hidden argument to va_arg functions. */
5772 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5775 if (ix86_abi
== MS_ABI
)
5776 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5778 parm_regs
= x86_64_int_parameter_registers
;
5779 for (i
= 0; i
< (ix86_abi
== MS_ABI
5780 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5781 if (regno
== parm_regs
[i
])
5786 /* Return if we do not know how to pass TYPE solely in registers. */
5789 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5791 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5794 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5795 The layout_type routine is crafty and tries to trick us into passing
5796 currently unsupported vector types on the stack by using TImode. */
5797 return (!TARGET_64BIT
&& mode
== TImode
5798 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5801 /* It returns the size, in bytes, of the area reserved for arguments passed
5802 in registers for the function represented by fndecl dependent to the used
5805 ix86_reg_parm_stack_space (const_tree fndecl
)
5807 enum calling_abi call_abi
= SYSV_ABI
;
5808 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5809 call_abi
= ix86_function_abi (fndecl
);
5811 call_abi
= ix86_function_type_abi (fndecl
);
5812 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5817 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5820 ix86_function_type_abi (const_tree fntype
)
5822 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5824 enum calling_abi abi
= ix86_abi
;
5825 if (abi
== SYSV_ABI
)
5827 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5830 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5837 /* We add this as a workaround in order to use libc_has_function
5840 ix86_libc_has_function (enum function_class fn_class
)
5842 return targetm
.libc_has_function (fn_class
);
5846 ix86_function_ms_hook_prologue (const_tree fn
)
5848 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5850 if (decl_function_context (fn
) != NULL_TREE
)
5851 error_at (DECL_SOURCE_LOCATION (fn
),
5852 "ms_hook_prologue is not compatible with nested function");
5859 static enum calling_abi
5860 ix86_function_abi (const_tree fndecl
)
5864 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5867 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5870 ix86_cfun_abi (void)
5874 return cfun
->machine
->call_abi
;
5877 /* Write the extra assembler code needed to declare a function properly. */
5880 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5883 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5887 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5888 unsigned int filler_cc
= 0xcccccccc;
5890 for (i
= 0; i
< filler_count
; i
+= 4)
5891 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5894 #ifdef SUBTARGET_ASM_UNWIND_INIT
5895 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5898 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5900 /* Output magic byte marker, if hot-patch attribute is set. */
5905 /* leaq [%rsp + 0], %rsp */
5906 asm_fprintf (asm_out_file
, ASM_BYTE
5907 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5911 /* movl.s %edi, %edi
5913 movl.s %esp, %ebp */
5914 asm_fprintf (asm_out_file
, ASM_BYTE
5915 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5921 extern void init_regs (void);
5923 /* Implementation of call abi switching target hook. Specific to FNDECL
5924 the specific call register sets are set. See also
5925 ix86_conditional_register_usage for more details. */
5927 ix86_call_abi_override (const_tree fndecl
)
5929 if (fndecl
== NULL_TREE
)
5930 cfun
->machine
->call_abi
= ix86_abi
;
5932 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5935 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5936 expensive re-initialization of init_regs each time we switch function context
5937 since this is needed only during RTL expansion. */
5939 ix86_maybe_switch_abi (void)
5942 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5946 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5947 for a call to a function whose data type is FNTYPE.
5948 For a library call, FNTYPE is 0. */
5951 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5952 tree fntype
, /* tree ptr for function decl */
5953 rtx libname
, /* SYMBOL_REF of library name or 0 */
5957 struct cgraph_local_info
*i
;
5959 memset (cum
, 0, sizeof (*cum
));
5963 i
= cgraph_local_info (fndecl
);
5964 cum
->call_abi
= ix86_function_abi (fndecl
);
5969 cum
->call_abi
= ix86_function_type_abi (fntype
);
5972 cum
->caller
= caller
;
5974 /* Set up the number of registers to use for passing arguments. */
5976 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5977 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5978 "or subtarget optimization implying it");
5979 cum
->nregs
= ix86_regparm
;
5982 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5983 ? X86_64_REGPARM_MAX
5984 : X86_64_MS_REGPARM_MAX
);
5988 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5991 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5992 ? X86_64_SSE_REGPARM_MAX
5993 : X86_64_MS_SSE_REGPARM_MAX
);
5997 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5998 cum
->warn_avx
= true;
5999 cum
->warn_sse
= true;
6000 cum
->warn_mmx
= true;
6002 /* Because type might mismatch in between caller and callee, we need to
6003 use actual type of function for local calls.
6004 FIXME: cgraph_analyze can be told to actually record if function uses
6005 va_start so for local functions maybe_vaarg can be made aggressive
6007 FIXME: once typesytem is fixed, we won't need this code anymore. */
6008 if (i
&& i
->local
&& i
->can_change_signature
)
6009 fntype
= TREE_TYPE (fndecl
);
6010 cum
->maybe_vaarg
= (fntype
6011 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
6016 /* If there are variable arguments, then we won't pass anything
6017 in registers in 32-bit mode. */
6018 if (stdarg_p (fntype
))
6029 /* Use ecx and edx registers if function has fastcall attribute,
6030 else look for regparm information. */
6033 unsigned int ccvt
= ix86_get_callcvt (fntype
);
6034 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6037 cum
->fastcall
= 1; /* Same first register as in fastcall. */
6039 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6045 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
6048 /* Set up the number of SSE registers used for passing SFmode
6049 and DFmode arguments. Warn for mismatching ABI. */
6050 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
6054 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6055 But in the case of vector types, it is some vector mode.
6057 When we have only some of our vector isa extensions enabled, then there
6058 are some modes for which vector_mode_supported_p is false. For these
6059 modes, the generic vector support in gcc will choose some non-vector mode
6060 in order to implement the type. By computing the natural mode, we'll
6061 select the proper ABI location for the operand and not depend on whatever
6062 the middle-end decides to do with these vector types.
6064 The midde-end can't deal with the vector types > 16 bytes. In this
6065 case, we return the original mode and warn ABI change if CUM isn't
6068 static enum machine_mode
6069 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
6071 enum machine_mode mode
= TYPE_MODE (type
);
6073 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6075 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6076 if ((size
== 8 || size
== 16 || size
== 32)
6077 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6078 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6080 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6082 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6083 mode
= MIN_MODE_VECTOR_FLOAT
;
6085 mode
= MIN_MODE_VECTOR_INT
;
6087 /* Get the mode which has this inner mode and number of units. */
6088 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6089 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6090 && GET_MODE_INNER (mode
) == innermode
)
6092 if (size
== 32 && !TARGET_AVX
)
6094 static bool warnedavx
;
6101 warning (0, "AVX vector argument without AVX "
6102 "enabled changes the ABI");
6104 return TYPE_MODE (type
);
6106 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
6108 static bool warnedsse
;
6115 warning (0, "SSE vector argument without SSE "
6116 "enabled changes the ABI");
6131 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6132 this may not agree with the mode that the type system has chosen for the
6133 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6134 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6137 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6142 if (orig_mode
!= BLKmode
)
6143 tmp
= gen_rtx_REG (orig_mode
, regno
);
6146 tmp
= gen_rtx_REG (mode
, regno
);
6147 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6148 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6154 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6155 of this code is to classify each 8bytes of incoming argument by the register
6156 class and assign registers accordingly. */
6158 /* Return the union class of CLASS1 and CLASS2.
6159 See the x86-64 PS ABI for details. */
6161 static enum x86_64_reg_class
6162 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6164 /* Rule #1: If both classes are equal, this is the resulting class. */
6165 if (class1
== class2
)
6168 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6170 if (class1
== X86_64_NO_CLASS
)
6172 if (class2
== X86_64_NO_CLASS
)
6175 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6176 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6177 return X86_64_MEMORY_CLASS
;
6179 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6180 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6181 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6182 return X86_64_INTEGERSI_CLASS
;
6183 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6184 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6185 return X86_64_INTEGER_CLASS
;
6187 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6189 if (class1
== X86_64_X87_CLASS
6190 || class1
== X86_64_X87UP_CLASS
6191 || class1
== X86_64_COMPLEX_X87_CLASS
6192 || class2
== X86_64_X87_CLASS
6193 || class2
== X86_64_X87UP_CLASS
6194 || class2
== X86_64_COMPLEX_X87_CLASS
)
6195 return X86_64_MEMORY_CLASS
;
6197 /* Rule #6: Otherwise class SSE is used. */
6198 return X86_64_SSE_CLASS
;
6201 /* Classify the argument of type TYPE and mode MODE.
6202 CLASSES will be filled by the register class used to pass each word
6203 of the operand. The number of words is returned. In case the parameter
6204 should be passed in memory, 0 is returned. As a special case for zero
6205 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6207 BIT_OFFSET is used internally for handling records and specifies offset
6208 of the offset in bits modulo 256 to avoid overflow cases.
6210 See the x86-64 PS ABI for details.
6214 classify_argument (enum machine_mode mode
, const_tree type
,
6215 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6217 HOST_WIDE_INT bytes
=
6218 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6220 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6222 /* Variable sized entities are always passed/returned in memory. */
6226 if (mode
!= VOIDmode
6227 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6230 if (type
&& AGGREGATE_TYPE_P (type
))
6234 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6236 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6240 for (i
= 0; i
< words
; i
++)
6241 classes
[i
] = X86_64_NO_CLASS
;
6243 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6244 signalize memory class, so handle it as special case. */
6247 classes
[0] = X86_64_NO_CLASS
;
6251 /* Classify each field of record and merge classes. */
6252 switch (TREE_CODE (type
))
6255 /* And now merge the fields of structure. */
6256 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6258 if (TREE_CODE (field
) == FIELD_DECL
)
6262 if (TREE_TYPE (field
) == error_mark_node
)
6265 /* Bitfields are always classified as integer. Handle them
6266 early, since later code would consider them to be
6267 misaligned integers. */
6268 if (DECL_BIT_FIELD (field
))
6270 for (i
= (int_bit_position (field
)
6271 + (bit_offset
% 64)) / 8 / 8;
6272 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6273 + tree_low_cst (DECL_SIZE (field
), 0)
6276 merge_classes (X86_64_INTEGER_CLASS
,
6283 type
= TREE_TYPE (field
);
6285 /* Flexible array member is ignored. */
6286 if (TYPE_MODE (type
) == BLKmode
6287 && TREE_CODE (type
) == ARRAY_TYPE
6288 && TYPE_SIZE (type
) == NULL_TREE
6289 && TYPE_DOMAIN (type
) != NULL_TREE
6290 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6295 if (!warned
&& warn_psabi
)
6298 inform (input_location
,
6299 "the ABI of passing struct with"
6300 " a flexible array member has"
6301 " changed in GCC 4.4");
6305 num
= classify_argument (TYPE_MODE (type
), type
,
6307 (int_bit_position (field
)
6308 + bit_offset
) % 256);
6311 pos
= (int_bit_position (field
)
6312 + (bit_offset
% 64)) / 8 / 8;
6313 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6315 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6322 /* Arrays are handled as small records. */
6325 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6326 TREE_TYPE (type
), subclasses
, bit_offset
);
6330 /* The partial classes are now full classes. */
6331 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6332 subclasses
[0] = X86_64_SSE_CLASS
;
6333 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6334 && !((bit_offset
% 64) == 0 && bytes
== 4))
6335 subclasses
[0] = X86_64_INTEGER_CLASS
;
6337 for (i
= 0; i
< words
; i
++)
6338 classes
[i
] = subclasses
[i
% num
];
6343 case QUAL_UNION_TYPE
:
6344 /* Unions are similar to RECORD_TYPE but offset is always 0.
6346 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6348 if (TREE_CODE (field
) == FIELD_DECL
)
6352 if (TREE_TYPE (field
) == error_mark_node
)
6355 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6356 TREE_TYPE (field
), subclasses
,
6360 for (i
= 0; i
< num
; i
++)
6361 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6372 /* When size > 16 bytes, if the first one isn't
6373 X86_64_SSE_CLASS or any other ones aren't
6374 X86_64_SSEUP_CLASS, everything should be passed in
6376 if (classes
[0] != X86_64_SSE_CLASS
)
6379 for (i
= 1; i
< words
; i
++)
6380 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6384 /* Final merger cleanup. */
6385 for (i
= 0; i
< words
; i
++)
6387 /* If one class is MEMORY, everything should be passed in
6389 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6392 /* The X86_64_SSEUP_CLASS should be always preceded by
6393 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6394 if (classes
[i
] == X86_64_SSEUP_CLASS
6395 && classes
[i
- 1] != X86_64_SSE_CLASS
6396 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6398 /* The first one should never be X86_64_SSEUP_CLASS. */
6399 gcc_assert (i
!= 0);
6400 classes
[i
] = X86_64_SSE_CLASS
;
6403 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6404 everything should be passed in memory. */
6405 if (classes
[i
] == X86_64_X87UP_CLASS
6406 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6410 /* The first one should never be X86_64_X87UP_CLASS. */
6411 gcc_assert (i
!= 0);
6412 if (!warned
&& warn_psabi
)
6415 inform (input_location
,
6416 "the ABI of passing union with long double"
6417 " has changed in GCC 4.4");
6425 /* Compute alignment needed. We align all types to natural boundaries with
6426 exception of XFmode that is aligned to 64bits. */
6427 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6429 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6432 mode_alignment
= 128;
6433 else if (mode
== XCmode
)
6434 mode_alignment
= 256;
6435 if (COMPLEX_MODE_P (mode
))
6436 mode_alignment
/= 2;
6437 /* Misaligned fields are always returned in memory. */
6438 if (bit_offset
% mode_alignment
)
6442 /* for V1xx modes, just use the base mode */
6443 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6444 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6445 mode
= GET_MODE_INNER (mode
);
6447 /* Classification of atomic types. */
6452 classes
[0] = X86_64_SSE_CLASS
;
6455 classes
[0] = X86_64_SSE_CLASS
;
6456 classes
[1] = X86_64_SSEUP_CLASS
;
6466 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6470 classes
[0] = X86_64_INTEGERSI_CLASS
;
6473 else if (size
<= 64)
6475 classes
[0] = X86_64_INTEGER_CLASS
;
6478 else if (size
<= 64+32)
6480 classes
[0] = X86_64_INTEGER_CLASS
;
6481 classes
[1] = X86_64_INTEGERSI_CLASS
;
6484 else if (size
<= 64+64)
6486 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6494 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6498 /* OImode shouldn't be used directly. */
6503 if (!(bit_offset
% 64))
6504 classes
[0] = X86_64_SSESF_CLASS
;
6506 classes
[0] = X86_64_SSE_CLASS
;
6509 classes
[0] = X86_64_SSEDF_CLASS
;
6512 classes
[0] = X86_64_X87_CLASS
;
6513 classes
[1] = X86_64_X87UP_CLASS
;
6516 classes
[0] = X86_64_SSE_CLASS
;
6517 classes
[1] = X86_64_SSEUP_CLASS
;
6520 classes
[0] = X86_64_SSE_CLASS
;
6521 if (!(bit_offset
% 64))
6527 if (!warned
&& warn_psabi
)
6530 inform (input_location
,
6531 "the ABI of passing structure with complex float"
6532 " member has changed in GCC 4.4");
6534 classes
[1] = X86_64_SSESF_CLASS
;
6538 classes
[0] = X86_64_SSEDF_CLASS
;
6539 classes
[1] = X86_64_SSEDF_CLASS
;
6542 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6545 /* This modes is larger than 16 bytes. */
6553 classes
[0] = X86_64_SSE_CLASS
;
6554 classes
[1] = X86_64_SSEUP_CLASS
;
6555 classes
[2] = X86_64_SSEUP_CLASS
;
6556 classes
[3] = X86_64_SSEUP_CLASS
;
6564 classes
[0] = X86_64_SSE_CLASS
;
6565 classes
[1] = X86_64_SSEUP_CLASS
;
6573 classes
[0] = X86_64_SSE_CLASS
;
6579 gcc_assert (VECTOR_MODE_P (mode
));
6584 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6586 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6587 classes
[0] = X86_64_INTEGERSI_CLASS
;
6589 classes
[0] = X86_64_INTEGER_CLASS
;
6590 classes
[1] = X86_64_INTEGER_CLASS
;
6591 return 1 + (bytes
> 8);
6595 /* Examine the argument and return set number of register required in each
6596 class. Return 0 iff parameter should be passed in memory. */
6598 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6599 int *int_nregs
, int *sse_nregs
)
6601 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6602 int n
= classify_argument (mode
, type
, regclass
, 0);
6608 for (n
--; n
>= 0; n
--)
6609 switch (regclass
[n
])
6611 case X86_64_INTEGER_CLASS
:
6612 case X86_64_INTEGERSI_CLASS
:
6615 case X86_64_SSE_CLASS
:
6616 case X86_64_SSESF_CLASS
:
6617 case X86_64_SSEDF_CLASS
:
6620 case X86_64_NO_CLASS
:
6621 case X86_64_SSEUP_CLASS
:
6623 case X86_64_X87_CLASS
:
6624 case X86_64_X87UP_CLASS
:
6628 case X86_64_COMPLEX_X87_CLASS
:
6629 return in_return
? 2 : 0;
6630 case X86_64_MEMORY_CLASS
:
6636 /* Construct container for the argument used by GCC interface. See
6637 FUNCTION_ARG for the detailed description. */
6640 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6641 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6642 const int *intreg
, int sse_regno
)
6644 /* The following variables hold the static issued_error state. */
6645 static bool issued_sse_arg_error
;
6646 static bool issued_sse_ret_error
;
6647 static bool issued_x87_ret_error
;
6649 enum machine_mode tmpmode
;
6651 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6652 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6656 int needed_sseregs
, needed_intregs
;
6657 rtx exp
[MAX_CLASSES
];
6660 n
= classify_argument (mode
, type
, regclass
, 0);
6663 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6666 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6669 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6670 some less clueful developer tries to use floating-point anyway. */
6671 if (needed_sseregs
&& !TARGET_SSE
)
6675 if (!issued_sse_ret_error
)
6677 error ("SSE register return with SSE disabled");
6678 issued_sse_ret_error
= true;
6681 else if (!issued_sse_arg_error
)
6683 error ("SSE register argument with SSE disabled");
6684 issued_sse_arg_error
= true;
6689 /* Likewise, error if the ABI requires us to return values in the
6690 x87 registers and the user specified -mno-80387. */
6691 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6692 for (i
= 0; i
< n
; i
++)
6693 if (regclass
[i
] == X86_64_X87_CLASS
6694 || regclass
[i
] == X86_64_X87UP_CLASS
6695 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6697 if (!issued_x87_ret_error
)
6699 error ("x87 register return with x87 disabled");
6700 issued_x87_ret_error
= true;
6705 /* First construct simple cases. Avoid SCmode, since we want to use
6706 single register to pass this type. */
6707 if (n
== 1 && mode
!= SCmode
)
6708 switch (regclass
[0])
6710 case X86_64_INTEGER_CLASS
:
6711 case X86_64_INTEGERSI_CLASS
:
6712 return gen_rtx_REG (mode
, intreg
[0]);
6713 case X86_64_SSE_CLASS
:
6714 case X86_64_SSESF_CLASS
:
6715 case X86_64_SSEDF_CLASS
:
6716 if (mode
!= BLKmode
)
6717 return gen_reg_or_parallel (mode
, orig_mode
,
6718 SSE_REGNO (sse_regno
));
6720 case X86_64_X87_CLASS
:
6721 case X86_64_COMPLEX_X87_CLASS
:
6722 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6723 case X86_64_NO_CLASS
:
6724 /* Zero sized array, struct or class. */
6730 && regclass
[0] == X86_64_SSE_CLASS
6731 && regclass
[1] == X86_64_SSEUP_CLASS
6733 return gen_reg_or_parallel (mode
, orig_mode
,
6734 SSE_REGNO (sse_regno
));
6736 && regclass
[0] == X86_64_SSE_CLASS
6737 && regclass
[1] == X86_64_SSEUP_CLASS
6738 && regclass
[2] == X86_64_SSEUP_CLASS
6739 && regclass
[3] == X86_64_SSEUP_CLASS
6741 return gen_reg_or_parallel (mode
, orig_mode
,
6742 SSE_REGNO (sse_regno
));
6744 && regclass
[0] == X86_64_X87_CLASS
6745 && regclass
[1] == X86_64_X87UP_CLASS
)
6746 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6749 && regclass
[0] == X86_64_INTEGER_CLASS
6750 && regclass
[1] == X86_64_INTEGER_CLASS
6751 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6752 && intreg
[0] + 1 == intreg
[1])
6753 return gen_rtx_REG (mode
, intreg
[0]);
6755 /* Otherwise figure out the entries of the PARALLEL. */
6756 for (i
= 0; i
< n
; i
++)
6760 switch (regclass
[i
])
6762 case X86_64_NO_CLASS
:
6764 case X86_64_INTEGER_CLASS
:
6765 case X86_64_INTEGERSI_CLASS
:
6766 /* Merge TImodes on aligned occasions here too. */
6767 if (i
* 8 + 8 > bytes
)
6769 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6770 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6774 /* We've requested 24 bytes we
6775 don't have mode for. Use DImode. */
6776 if (tmpmode
== BLKmode
)
6779 = gen_rtx_EXPR_LIST (VOIDmode
,
6780 gen_rtx_REG (tmpmode
, *intreg
),
6784 case X86_64_SSESF_CLASS
:
6786 = gen_rtx_EXPR_LIST (VOIDmode
,
6787 gen_rtx_REG (SFmode
,
6788 SSE_REGNO (sse_regno
)),
6792 case X86_64_SSEDF_CLASS
:
6794 = gen_rtx_EXPR_LIST (VOIDmode
,
6795 gen_rtx_REG (DFmode
,
6796 SSE_REGNO (sse_regno
)),
6800 case X86_64_SSE_CLASS
:
6808 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6818 && regclass
[1] == X86_64_SSEUP_CLASS
6819 && regclass
[2] == X86_64_SSEUP_CLASS
6820 && regclass
[3] == X86_64_SSEUP_CLASS
);
6828 = gen_rtx_EXPR_LIST (VOIDmode
,
6829 gen_rtx_REG (tmpmode
,
6830 SSE_REGNO (sse_regno
)),
6839 /* Empty aligned struct, union or class. */
6843 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6844 for (i
= 0; i
< nexps
; i
++)
6845 XVECEXP (ret
, 0, i
) = exp
[i
];
6849 /* Update the data in CUM to advance over an argument of mode MODE
6850 and data type TYPE. (TYPE is null for libcalls where that information
6851 may not be available.) */
6854 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6855 const_tree type
, HOST_WIDE_INT bytes
,
6856 HOST_WIDE_INT words
)
6872 cum
->words
+= words
;
6873 cum
->nregs
-= words
;
6874 cum
->regno
+= words
;
6876 if (cum
->nregs
<= 0)
6884 /* OImode shouldn't be used directly. */
6888 if (cum
->float_in_sse
< 2)
6891 if (cum
->float_in_sse
< 1)
6908 if (!type
|| !AGGREGATE_TYPE_P (type
))
6910 cum
->sse_words
+= words
;
6911 cum
->sse_nregs
-= 1;
6912 cum
->sse_regno
+= 1;
6913 if (cum
->sse_nregs
<= 0)
6927 if (!type
|| !AGGREGATE_TYPE_P (type
))
6929 cum
->mmx_words
+= words
;
6930 cum
->mmx_nregs
-= 1;
6931 cum
->mmx_regno
+= 1;
6932 if (cum
->mmx_nregs
<= 0)
6943 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6944 const_tree type
, HOST_WIDE_INT words
, bool named
)
6946 int int_nregs
, sse_nregs
;
6948 /* Unnamed 256bit vector mode parameters are passed on stack. */
6949 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6952 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6953 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6955 cum
->nregs
-= int_nregs
;
6956 cum
->sse_nregs
-= sse_nregs
;
6957 cum
->regno
+= int_nregs
;
6958 cum
->sse_regno
+= sse_nregs
;
6962 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6963 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6964 cum
->words
+= words
;
6969 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6970 HOST_WIDE_INT words
)
6972 /* Otherwise, this should be passed indirect. */
6973 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6975 cum
->words
+= words
;
6983 /* Update the data in CUM to advance over an argument of mode MODE and
6984 data type TYPE. (TYPE is null for libcalls where that information
6985 may not be available.) */
6988 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6989 const_tree type
, bool named
)
6991 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6992 HOST_WIDE_INT bytes
, words
;
6994 if (mode
== BLKmode
)
6995 bytes
= int_size_in_bytes (type
);
6997 bytes
= GET_MODE_SIZE (mode
);
6998 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7001 mode
= type_natural_mode (type
, NULL
);
7003 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7004 function_arg_advance_ms_64 (cum
, bytes
, words
);
7005 else if (TARGET_64BIT
)
7006 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
7008 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
7011 /* Define where to put the arguments to a function.
7012 Value is zero to push the argument on the stack,
7013 or a hard register in which to store the argument.
7015 MODE is the argument's machine mode.
7016 TYPE is the data type of the argument (as a tree).
7017 This is null for libcalls where that information may
7019 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7020 the preceding args and about the function being called.
7021 NAMED is nonzero if this argument is a named parameter
7022 (otherwise it is an extra parameter matching an ellipsis). */
7025 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7026 enum machine_mode orig_mode
, const_tree type
,
7027 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
7029 static bool warnedsse
, warnedmmx
;
7031 /* Avoid the AL settings for the Unix64 ABI. */
7032 if (mode
== VOIDmode
)
7048 if (words
<= cum
->nregs
)
7050 int regno
= cum
->regno
;
7052 /* Fastcall allocates the first two DWORD (SImode) or
7053 smaller arguments to ECX and EDX if it isn't an
7059 || (type
&& AGGREGATE_TYPE_P (type
)))
7062 /* ECX not EAX is the first allocated register. */
7063 if (regno
== AX_REG
)
7066 return gen_rtx_REG (mode
, regno
);
7071 if (cum
->float_in_sse
< 2)
7074 if (cum
->float_in_sse
< 1)
7078 /* In 32bit, we pass TImode in xmm registers. */
7085 if (!type
|| !AGGREGATE_TYPE_P (type
))
7087 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
7090 warning (0, "SSE vector argument without SSE enabled "
7094 return gen_reg_or_parallel (mode
, orig_mode
,
7095 cum
->sse_regno
+ FIRST_SSE_REG
);
7100 /* OImode shouldn't be used directly. */
7109 if (!type
|| !AGGREGATE_TYPE_P (type
))
7112 return gen_reg_or_parallel (mode
, orig_mode
,
7113 cum
->sse_regno
+ FIRST_SSE_REG
);
7123 if (!type
|| !AGGREGATE_TYPE_P (type
))
7125 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7128 warning (0, "MMX vector argument without MMX enabled "
7132 return gen_reg_or_parallel (mode
, orig_mode
,
7133 cum
->mmx_regno
+ FIRST_MMX_REG
);
7142 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7143 enum machine_mode orig_mode
, const_tree type
, bool named
)
7145 /* Handle a hidden AL argument containing number of registers
7146 for varargs x86-64 functions. */
7147 if (mode
== VOIDmode
)
7148 return GEN_INT (cum
->maybe_vaarg
7149 ? (cum
->sse_nregs
< 0
7150 ? X86_64_SSE_REGPARM_MAX
7165 /* Unnamed 256bit vector mode parameters are passed on stack. */
7171 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7173 &x86_64_int_parameter_registers
[cum
->regno
],
7178 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7179 enum machine_mode orig_mode
, bool named
,
7180 HOST_WIDE_INT bytes
)
7184 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7185 We use value of -2 to specify that current function call is MSABI. */
7186 if (mode
== VOIDmode
)
7187 return GEN_INT (-2);
7189 /* If we've run out of registers, it goes on the stack. */
7190 if (cum
->nregs
== 0)
7193 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7195 /* Only floating point modes are passed in anything but integer regs. */
7196 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7199 regno
= cum
->regno
+ FIRST_SSE_REG
;
7204 /* Unnamed floating parameters are passed in both the
7205 SSE and integer registers. */
7206 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7207 t2
= gen_rtx_REG (mode
, regno
);
7208 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7209 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7210 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7213 /* Handle aggregated types passed in register. */
7214 if (orig_mode
== BLKmode
)
7216 if (bytes
> 0 && bytes
<= 8)
7217 mode
= (bytes
> 4 ? DImode
: SImode
);
7218 if (mode
== BLKmode
)
7222 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7225 /* Return where to put the arguments to a function.
7226 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7228 MODE is the argument's machine mode. TYPE is the data type of the
7229 argument. It is null for libcalls where that information may not be
7230 available. CUM gives information about the preceding args and about
7231 the function being called. NAMED is nonzero if this argument is a
7232 named parameter (otherwise it is an extra parameter matching an
7236 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7237 const_tree type
, bool named
)
7239 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7240 enum machine_mode mode
= omode
;
7241 HOST_WIDE_INT bytes
, words
;
7244 if (mode
== BLKmode
)
7245 bytes
= int_size_in_bytes (type
);
7247 bytes
= GET_MODE_SIZE (mode
);
7248 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7250 /* To simplify the code below, represent vector types with a vector mode
7251 even if MMX/SSE are not active. */
7252 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7253 mode
= type_natural_mode (type
, cum
);
7255 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7256 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7257 else if (TARGET_64BIT
)
7258 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7260 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7265 /* A C expression that indicates when an argument must be passed by
7266 reference. If nonzero for an argument, a copy of that argument is
7267 made in memory and a pointer to the argument is passed instead of
7268 the argument itself. The pointer is passed in whatever way is
7269 appropriate for passing a pointer to that type. */
7272 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7273 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7275 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7277 /* See Windows x64 Software Convention. */
7278 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7280 int msize
= (int) GET_MODE_SIZE (mode
);
7283 /* Arrays are passed by reference. */
7284 if (TREE_CODE (type
) == ARRAY_TYPE
)
7287 if (AGGREGATE_TYPE_P (type
))
7289 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7290 are passed by reference. */
7291 msize
= int_size_in_bytes (type
);
7295 /* __m128 is passed by reference. */
7297 case 1: case 2: case 4: case 8:
7303 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7309 /* Return true when TYPE should be 128bit aligned for 32bit argument
7310 passing ABI. XXX: This function is obsolete and is only used for
7311 checking psABI compatibility with previous versions of GCC. */
7314 ix86_compat_aligned_value_p (const_tree type
)
7316 enum machine_mode mode
= TYPE_MODE (type
);
7317 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7321 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7323 if (TYPE_ALIGN (type
) < 128)
7326 if (AGGREGATE_TYPE_P (type
))
7328 /* Walk the aggregates recursively. */
7329 switch (TREE_CODE (type
))
7333 case QUAL_UNION_TYPE
:
7337 /* Walk all the structure fields. */
7338 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7340 if (TREE_CODE (field
) == FIELD_DECL
7341 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7348 /* Just for use if some languages passes arrays by value. */
7349 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7360 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7361 XXX: This function is obsolete and is only used for checking psABI
7362 compatibility with previous versions of GCC. */
7365 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7366 const_tree type
, unsigned int align
)
7368 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7369 natural boundaries. */
7370 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7372 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7373 make an exception for SSE modes since these require 128bit
7376 The handling here differs from field_alignment. ICC aligns MMX
7377 arguments to 4 byte boundaries, while structure fields are aligned
7378 to 8 byte boundaries. */
7381 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7382 align
= PARM_BOUNDARY
;
7386 if (!ix86_compat_aligned_value_p (type
))
7387 align
= PARM_BOUNDARY
;
7390 if (align
> BIGGEST_ALIGNMENT
)
7391 align
= BIGGEST_ALIGNMENT
;
7395 /* Return true when TYPE should be 128bit aligned for 32bit argument
7399 ix86_contains_aligned_value_p (const_tree type
)
7401 enum machine_mode mode
= TYPE_MODE (type
);
7403 if (mode
== XFmode
|| mode
== XCmode
)
7406 if (TYPE_ALIGN (type
) < 128)
7409 if (AGGREGATE_TYPE_P (type
))
7411 /* Walk the aggregates recursively. */
7412 switch (TREE_CODE (type
))
7416 case QUAL_UNION_TYPE
:
7420 /* Walk all the structure fields. */
7421 for (field
= TYPE_FIELDS (type
);
7423 field
= DECL_CHAIN (field
))
7425 if (TREE_CODE (field
) == FIELD_DECL
7426 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7433 /* Just for use if some languages passes arrays by value. */
7434 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7443 return TYPE_ALIGN (type
) >= 128;
7448 /* Gives the alignment boundary, in bits, of an argument with the
7449 specified mode and type. */
7452 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7457 /* Since the main variant type is used for call, we convert it to
7458 the main variant type. */
7459 type
= TYPE_MAIN_VARIANT (type
);
7460 align
= TYPE_ALIGN (type
);
7463 align
= GET_MODE_ALIGNMENT (mode
);
7464 if (align
< PARM_BOUNDARY
)
7465 align
= PARM_BOUNDARY
;
7469 unsigned int saved_align
= align
;
7473 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7476 if (mode
== XFmode
|| mode
== XCmode
)
7477 align
= PARM_BOUNDARY
;
7479 else if (!ix86_contains_aligned_value_p (type
))
7480 align
= PARM_BOUNDARY
;
7483 align
= PARM_BOUNDARY
;
7488 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7492 inform (input_location
,
7493 "The ABI for passing parameters with %d-byte"
7494 " alignment has changed in GCC 4.6",
7495 align
/ BITS_PER_UNIT
);
7502 /* Return true if N is a possible register number of function value. */
7505 ix86_function_value_regno_p (const unsigned int regno
)
7514 return TARGET_64BIT
&& ix86_abi
!= MS_ABI
;
7516 /* Complex values are returned in %st(0)/%st(1) pair. */
7519 /* TODO: The function should depend on current function ABI but
7520 builtins.c would need updating then. Therefore we use the
7522 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7524 return TARGET_FLOAT_RETURNS_IN_80387
;
7526 /* Complex values are returned in %xmm0/%xmm1 pair. */
7532 if (TARGET_MACHO
|| TARGET_64BIT
)
7540 /* Define how to find the value returned by a function.
7541 VALTYPE is the data type of the value (as a tree).
7542 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7543 otherwise, FUNC is 0. */
7546 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7547 const_tree fntype
, const_tree fn
)
7551 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7552 we normally prevent this case when mmx is not available. However
7553 some ABIs may require the result to be returned like DImode. */
7554 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7555 regno
= FIRST_MMX_REG
;
7557 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7558 we prevent this case when sse is not available. However some ABIs
7559 may require the result to be returned like integer TImode. */
7560 else if (mode
== TImode
7561 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7562 regno
= FIRST_SSE_REG
;
7564 /* 32-byte vector modes in %ymm0. */
7565 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7566 regno
= FIRST_SSE_REG
;
7568 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7569 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7570 regno
= FIRST_FLOAT_REG
;
7572 /* Most things go in %eax. */
7575 /* Override FP return register with %xmm0 for local functions when
7576 SSE math is enabled or for functions with sseregparm attribute. */
7577 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7579 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7580 if ((sse_level
>= 1 && mode
== SFmode
)
7581 || (sse_level
== 2 && mode
== DFmode
))
7582 regno
= FIRST_SSE_REG
;
7585 /* OImode shouldn't be used directly. */
7586 gcc_assert (mode
!= OImode
);
7588 return gen_rtx_REG (orig_mode
, regno
);
7592 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7597 /* Handle libcalls, which don't provide a type node. */
7598 if (valtype
== NULL
)
7612 regno
= FIRST_SSE_REG
;
7616 regno
= FIRST_FLOAT_REG
;
7624 return gen_rtx_REG (mode
, regno
);
7626 else if (POINTER_TYPE_P (valtype
))
7628 /* Pointers are always returned in word_mode. */
7632 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7633 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7634 x86_64_int_return_registers
, 0);
7636 /* For zero sized structures, construct_container returns NULL, but we
7637 need to keep rest of compiler happy by returning meaningful value. */
7639 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7645 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7648 unsigned int regno
= AX_REG
;
7652 switch (GET_MODE_SIZE (mode
))
7655 if (valtype
!= NULL_TREE
7656 && !VECTOR_INTEGER_TYPE_P (valtype
)
7657 && !VECTOR_INTEGER_TYPE_P (valtype
)
7658 && !INTEGRAL_TYPE_P (valtype
)
7659 && !VECTOR_FLOAT_TYPE_P (valtype
))
7661 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7662 && !COMPLEX_MODE_P (mode
))
7663 regno
= FIRST_SSE_REG
;
7667 if (mode
== SFmode
|| mode
== DFmode
)
7668 regno
= FIRST_SSE_REG
;
7674 return gen_rtx_REG (orig_mode
, regno
);
7678 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7679 enum machine_mode orig_mode
, enum machine_mode mode
)
7681 const_tree fn
, fntype
;
7684 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7685 fn
= fntype_or_decl
;
7686 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7688 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7689 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7690 else if (TARGET_64BIT
)
7691 return function_value_64 (orig_mode
, mode
, valtype
);
7693 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7697 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7698 bool outgoing ATTRIBUTE_UNUSED
)
7700 enum machine_mode mode
, orig_mode
;
7702 orig_mode
= TYPE_MODE (valtype
);
7703 mode
= type_natural_mode (valtype
, NULL
);
7704 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7707 /* Pointer function arguments and return values are promoted to
7710 static enum machine_mode
7711 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7712 int *punsignedp
, const_tree fntype
,
7715 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7717 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7720 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7724 /* Return true if a structure, union or array with MODE containing FIELD
7725 should be accessed using BLKmode. */
7728 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7730 /* Union with XFmode must be in BLKmode. */
7731 return (mode
== XFmode
7732 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7733 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7737 ix86_libcall_value (enum machine_mode mode
)
7739 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7742 /* Return true iff type is returned in memory. */
7744 static bool ATTRIBUTE_UNUSED
7745 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7749 if (mode
== BLKmode
)
7752 size
= int_size_in_bytes (type
);
7754 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7757 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7759 /* User-created vectors small enough to fit in EAX. */
7763 /* MMX/3dNow values are returned in MM0,
7764 except when it doesn't exits or the ABI prescribes otherwise. */
7766 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7768 /* SSE values are returned in XMM0, except when it doesn't exist. */
7772 /* AVX values are returned in YMM0, except when it doesn't exist. */
7783 /* OImode shouldn't be used directly. */
7784 gcc_assert (mode
!= OImode
);
7789 static bool ATTRIBUTE_UNUSED
7790 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7792 int needed_intregs
, needed_sseregs
;
7793 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7796 static bool ATTRIBUTE_UNUSED
7797 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7799 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7801 /* __m128 is returned in xmm0. */
7802 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7803 || VECTOR_FLOAT_TYPE_P (type
))
7804 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7805 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7808 /* Otherwise, the size must be exactly in [1248]. */
7809 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7813 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7815 #ifdef SUBTARGET_RETURN_IN_MEMORY
7816 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7818 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7822 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7823 return return_in_memory_ms_64 (type
, mode
);
7825 return return_in_memory_64 (type
, mode
);
7828 return return_in_memory_32 (type
, mode
);
7832 /* When returning SSE vector types, we have a choice of either
7833 (1) being abi incompatible with a -march switch, or
7834 (2) generating an error.
7835 Given no good solution, I think the safest thing is one warning.
7836 The user won't be able to use -Werror, but....
7838 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7839 called in response to actually generating a caller or callee that
7840 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7841 via aggregate_value_p for general type probing from tree-ssa. */
7844 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7846 static bool warnedsse
, warnedmmx
;
7848 if (!TARGET_64BIT
&& type
)
7850 /* Look at the return type of the function, not the function type. */
7851 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7853 if (!TARGET_SSE
&& !warnedsse
)
7856 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7859 warning (0, "SSE vector return without SSE enabled "
7864 if (!TARGET_MMX
&& !warnedmmx
)
7866 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7869 warning (0, "MMX vector return without MMX enabled "
7879 /* Create the va_list data type. */
7881 /* Returns the calling convention specific va_list date type.
7882 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7885 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7887 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7889 /* For i386 we use plain pointer to argument area. */
7890 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7891 return build_pointer_type (char_type_node
);
7893 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7894 type_decl
= build_decl (BUILTINS_LOCATION
,
7895 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7897 f_gpr
= build_decl (BUILTINS_LOCATION
,
7898 FIELD_DECL
, get_identifier ("gp_offset"),
7899 unsigned_type_node
);
7900 f_fpr
= build_decl (BUILTINS_LOCATION
,
7901 FIELD_DECL
, get_identifier ("fp_offset"),
7902 unsigned_type_node
);
7903 f_ovf
= build_decl (BUILTINS_LOCATION
,
7904 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7906 f_sav
= build_decl (BUILTINS_LOCATION
,
7907 FIELD_DECL
, get_identifier ("reg_save_area"),
7910 va_list_gpr_counter_field
= f_gpr
;
7911 va_list_fpr_counter_field
= f_fpr
;
7913 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7914 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7915 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7916 DECL_FIELD_CONTEXT (f_sav
) = record
;
7918 TYPE_STUB_DECL (record
) = type_decl
;
7919 TYPE_NAME (record
) = type_decl
;
7920 TYPE_FIELDS (record
) = f_gpr
;
7921 DECL_CHAIN (f_gpr
) = f_fpr
;
7922 DECL_CHAIN (f_fpr
) = f_ovf
;
7923 DECL_CHAIN (f_ovf
) = f_sav
;
7925 layout_type (record
);
7927 /* The correct type is an array type of one element. */
7928 return build_array_type (record
, build_index_type (size_zero_node
));
7931 /* Setup the builtin va_list data type and for 64-bit the additional
7932 calling convention specific va_list data types. */
7935 ix86_build_builtin_va_list (void)
7937 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7939 /* Initialize abi specific va_list builtin types. */
7943 if (ix86_abi
== MS_ABI
)
7945 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7946 if (TREE_CODE (t
) != RECORD_TYPE
)
7947 t
= build_variant_type_copy (t
);
7948 sysv_va_list_type_node
= t
;
7953 if (TREE_CODE (t
) != RECORD_TYPE
)
7954 t
= build_variant_type_copy (t
);
7955 sysv_va_list_type_node
= t
;
7957 if (ix86_abi
!= MS_ABI
)
7959 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7960 if (TREE_CODE (t
) != RECORD_TYPE
)
7961 t
= build_variant_type_copy (t
);
7962 ms_va_list_type_node
= t
;
7967 if (TREE_CODE (t
) != RECORD_TYPE
)
7968 t
= build_variant_type_copy (t
);
7969 ms_va_list_type_node
= t
;
7976 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7979 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7985 /* GPR size of varargs save area. */
7986 if (cfun
->va_list_gpr_size
)
7987 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7989 ix86_varargs_gpr_size
= 0;
7991 /* FPR size of varargs save area. We don't need it if we don't pass
7992 anything in SSE registers. */
7993 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7994 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7996 ix86_varargs_fpr_size
= 0;
7998 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
8001 save_area
= frame_pointer_rtx
;
8002 set
= get_varargs_alias_set ();
8004 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
8005 if (max
> X86_64_REGPARM_MAX
)
8006 max
= X86_64_REGPARM_MAX
;
8008 for (i
= cum
->regno
; i
< max
; i
++)
8010 mem
= gen_rtx_MEM (word_mode
,
8011 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
8012 MEM_NOTRAP_P (mem
) = 1;
8013 set_mem_alias_set (mem
, set
);
8014 emit_move_insn (mem
,
8015 gen_rtx_REG (word_mode
,
8016 x86_64_int_parameter_registers
[i
]));
8019 if (ix86_varargs_fpr_size
)
8021 enum machine_mode smode
;
8024 /* Now emit code to save SSE registers. The AX parameter contains number
8025 of SSE parameter registers used to call this function, though all we
8026 actually check here is the zero/non-zero status. */
8028 label
= gen_label_rtx ();
8029 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
8030 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
8033 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8034 we used movdqa (i.e. TImode) instead? Perhaps even better would
8035 be if we could determine the real mode of the data, via a hook
8036 into pass_stdarg. Ignore all that for now. */
8038 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
8039 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
8041 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
8042 if (max
> X86_64_SSE_REGPARM_MAX
)
8043 max
= X86_64_SSE_REGPARM_MAX
;
8045 for (i
= cum
->sse_regno
; i
< max
; ++i
)
8047 mem
= plus_constant (Pmode
, save_area
,
8048 i
* 16 + ix86_varargs_gpr_size
);
8049 mem
= gen_rtx_MEM (smode
, mem
);
8050 MEM_NOTRAP_P (mem
) = 1;
8051 set_mem_alias_set (mem
, set
);
8052 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
8054 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
8062 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
8064 alias_set_type set
= get_varargs_alias_set ();
8067 /* Reset to zero, as there might be a sysv vaarg used
8069 ix86_varargs_gpr_size
= 0;
8070 ix86_varargs_fpr_size
= 0;
8072 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8076 mem
= gen_rtx_MEM (Pmode
,
8077 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8078 i
* UNITS_PER_WORD
));
8079 MEM_NOTRAP_P (mem
) = 1;
8080 set_mem_alias_set (mem
, set
);
8082 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8083 emit_move_insn (mem
, reg
);
8088 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8089 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8092 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8093 CUMULATIVE_ARGS next_cum
;
8096 /* This argument doesn't appear to be used anymore. Which is good,
8097 because the old code here didn't suppress rtl generation. */
8098 gcc_assert (!no_rtl
);
8103 fntype
= TREE_TYPE (current_function_decl
);
8105 /* For varargs, we do not want to skip the dummy va_dcl argument.
8106 For stdargs, we do want to skip the last named argument. */
8108 if (stdarg_p (fntype
))
8109 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8112 if (cum
->call_abi
== MS_ABI
)
8113 setup_incoming_varargs_ms_64 (&next_cum
);
8115 setup_incoming_varargs_64 (&next_cum
);
8118 /* Checks if TYPE is of kind va_list char *. */
8121 is_va_list_char_pointer (tree type
)
8125 /* For 32-bit it is always true. */
8128 canonic
= ix86_canonical_va_list_type (type
);
8129 return (canonic
== ms_va_list_type_node
8130 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8133 /* Implement va_start. */
8136 ix86_va_start (tree valist
, rtx nextarg
)
8138 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8139 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8140 tree gpr
, fpr
, ovf
, sav
, t
;
8144 if (flag_split_stack
8145 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8147 unsigned int scratch_regno
;
8149 /* When we are splitting the stack, we can't refer to the stack
8150 arguments using internal_arg_pointer, because they may be on
8151 the old stack. The split stack prologue will arrange to
8152 leave a pointer to the old stack arguments in a scratch
8153 register, which we here copy to a pseudo-register. The split
8154 stack prologue can't set the pseudo-register directly because
8155 it (the prologue) runs before any registers have been saved. */
8157 scratch_regno
= split_stack_prologue_scratch_regno ();
8158 if (scratch_regno
!= INVALID_REGNUM
)
8162 reg
= gen_reg_rtx (Pmode
);
8163 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8166 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8170 push_topmost_sequence ();
8171 emit_insn_after (seq
, entry_of_function ());
8172 pop_topmost_sequence ();
8176 /* Only 64bit target needs something special. */
8177 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8179 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8180 std_expand_builtin_va_start (valist
, nextarg
);
8185 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8186 next
= expand_binop (ptr_mode
, add_optab
,
8187 cfun
->machine
->split_stack_varargs_pointer
,
8188 crtl
->args
.arg_offset_rtx
,
8189 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8190 convert_move (va_r
, next
, 0);
8195 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8196 f_fpr
= DECL_CHAIN (f_gpr
);
8197 f_ovf
= DECL_CHAIN (f_fpr
);
8198 f_sav
= DECL_CHAIN (f_ovf
);
8200 valist
= build_simple_mem_ref (valist
);
8201 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8202 /* The following should be folded into the MEM_REF offset. */
8203 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8205 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8207 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8209 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8212 /* Count number of gp and fp argument registers used. */
8213 words
= crtl
->args
.info
.words
;
8214 n_gpr
= crtl
->args
.info
.regno
;
8215 n_fpr
= crtl
->args
.info
.sse_regno
;
8217 if (cfun
->va_list_gpr_size
)
8219 type
= TREE_TYPE (gpr
);
8220 t
= build2 (MODIFY_EXPR
, type
,
8221 gpr
, build_int_cst (type
, n_gpr
* 8));
8222 TREE_SIDE_EFFECTS (t
) = 1;
8223 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8226 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8228 type
= TREE_TYPE (fpr
);
8229 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8230 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8231 TREE_SIDE_EFFECTS (t
) = 1;
8232 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8235 /* Find the overflow area. */
8236 type
= TREE_TYPE (ovf
);
8237 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8238 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8240 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8241 t
= make_tree (type
, ovf_rtx
);
8243 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8244 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8245 TREE_SIDE_EFFECTS (t
) = 1;
8246 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8248 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8250 /* Find the register save area.
8251 Prologue of the function save it right above stack frame. */
8252 type
= TREE_TYPE (sav
);
8253 t
= make_tree (type
, frame_pointer_rtx
);
8254 if (!ix86_varargs_gpr_size
)
8255 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8256 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8257 TREE_SIDE_EFFECTS (t
) = 1;
8258 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8262 /* Implement va_arg. */
8265 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8268 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8269 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8270 tree gpr
, fpr
, ovf
, sav
, t
;
8272 tree lab_false
, lab_over
= NULL_TREE
;
8277 enum machine_mode nat_mode
;
8278 unsigned int arg_boundary
;
8280 /* Only 64bit target needs something special. */
8281 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8282 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8284 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8285 f_fpr
= DECL_CHAIN (f_gpr
);
8286 f_ovf
= DECL_CHAIN (f_fpr
);
8287 f_sav
= DECL_CHAIN (f_ovf
);
8289 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8290 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8291 valist
= build_va_arg_indirect_ref (valist
);
8292 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8293 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8294 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8296 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8298 type
= build_pointer_type (type
);
8299 size
= int_size_in_bytes (type
);
8300 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8302 nat_mode
= type_natural_mode (type
, NULL
);
8311 /* Unnamed 256bit vector mode parameters are passed on stack. */
8312 if (!TARGET_64BIT_MS_ABI
)
8319 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8320 type
, 0, X86_64_REGPARM_MAX
,
8321 X86_64_SSE_REGPARM_MAX
, intreg
,
8326 /* Pull the value out of the saved registers. */
8328 addr
= create_tmp_var (ptr_type_node
, "addr");
8332 int needed_intregs
, needed_sseregs
;
8334 tree int_addr
, sse_addr
;
8336 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8337 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8339 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8341 need_temp
= (!REG_P (container
)
8342 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8343 || TYPE_ALIGN (type
) > 128));
8345 /* In case we are passing structure, verify that it is consecutive block
8346 on the register save area. If not we need to do moves. */
8347 if (!need_temp
&& !REG_P (container
))
8349 /* Verify that all registers are strictly consecutive */
8350 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8354 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8356 rtx slot
= XVECEXP (container
, 0, i
);
8357 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8358 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8366 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8368 rtx slot
= XVECEXP (container
, 0, i
);
8369 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8370 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8382 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8383 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8386 /* First ensure that we fit completely in registers. */
8389 t
= build_int_cst (TREE_TYPE (gpr
),
8390 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8391 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8392 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8393 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8394 gimplify_and_add (t
, pre_p
);
8398 t
= build_int_cst (TREE_TYPE (fpr
),
8399 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8400 + X86_64_REGPARM_MAX
* 8);
8401 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8402 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8403 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8404 gimplify_and_add (t
, pre_p
);
8407 /* Compute index to start of area used for integer regs. */
8410 /* int_addr = gpr + sav; */
8411 t
= fold_build_pointer_plus (sav
, gpr
);
8412 gimplify_assign (int_addr
, t
, pre_p
);
8416 /* sse_addr = fpr + sav; */
8417 t
= fold_build_pointer_plus (sav
, fpr
);
8418 gimplify_assign (sse_addr
, t
, pre_p
);
8422 int i
, prev_size
= 0;
8423 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8426 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8427 gimplify_assign (addr
, t
, pre_p
);
8429 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8431 rtx slot
= XVECEXP (container
, 0, i
);
8432 rtx reg
= XEXP (slot
, 0);
8433 enum machine_mode mode
= GET_MODE (reg
);
8439 tree dest_addr
, dest
;
8440 int cur_size
= GET_MODE_SIZE (mode
);
8442 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8443 prev_size
= INTVAL (XEXP (slot
, 1));
8444 if (prev_size
+ cur_size
> size
)
8446 cur_size
= size
- prev_size
;
8447 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8448 if (mode
== BLKmode
)
8451 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8452 if (mode
== GET_MODE (reg
))
8453 addr_type
= build_pointer_type (piece_type
);
8455 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8457 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8460 if (SSE_REGNO_P (REGNO (reg
)))
8462 src_addr
= sse_addr
;
8463 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8467 src_addr
= int_addr
;
8468 src_offset
= REGNO (reg
) * 8;
8470 src_addr
= fold_convert (addr_type
, src_addr
);
8471 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8473 dest_addr
= fold_convert (daddr_type
, addr
);
8474 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8475 if (cur_size
== GET_MODE_SIZE (mode
))
8477 src
= build_va_arg_indirect_ref (src_addr
);
8478 dest
= build_va_arg_indirect_ref (dest_addr
);
8480 gimplify_assign (dest
, src
, pre_p
);
8485 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8486 3, dest_addr
, src_addr
,
8487 size_int (cur_size
));
8488 gimplify_and_add (copy
, pre_p
);
8490 prev_size
+= cur_size
;
8496 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8497 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8498 gimplify_assign (gpr
, t
, pre_p
);
8503 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8504 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8505 gimplify_assign (fpr
, t
, pre_p
);
8508 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8510 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8513 /* ... otherwise out of the overflow area. */
8515 /* When we align parameter on stack for caller, if the parameter
8516 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8517 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8518 here with caller. */
8519 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8520 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8521 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8523 /* Care for on-stack alignment if needed. */
8524 if (arg_boundary
<= 64 || size
== 0)
8528 HOST_WIDE_INT align
= arg_boundary
/ 8;
8529 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8530 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8531 build_int_cst (TREE_TYPE (t
), -align
));
8534 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8535 gimplify_assign (addr
, t
, pre_p
);
8537 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8538 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8541 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8543 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8544 addr
= fold_convert (ptrtype
, addr
);
8547 addr
= build_va_arg_indirect_ref (addr
);
8548 return build_va_arg_indirect_ref (addr
);
8551 /* Return true if OPNUM's MEM should be matched
8552 in movabs* patterns. */
8555 ix86_check_movabs (rtx insn
, int opnum
)
8559 set
= PATTERN (insn
);
8560 if (GET_CODE (set
) == PARALLEL
)
8561 set
= XVECEXP (set
, 0, 0);
8562 gcc_assert (GET_CODE (set
) == SET
);
8563 mem
= XEXP (set
, opnum
);
8564 while (GET_CODE (mem
) == SUBREG
)
8565 mem
= SUBREG_REG (mem
);
8566 gcc_assert (MEM_P (mem
));
8567 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8570 /* Initialize the table of extra 80387 mathematical constants. */
8573 init_ext_80387_constants (void)
8575 static const char * cst
[5] =
8577 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8578 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8579 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8580 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8581 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8585 for (i
= 0; i
< 5; i
++)
8587 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8588 /* Ensure each constant is rounded to XFmode precision. */
8589 real_convert (&ext_80387_constants_table
[i
],
8590 XFmode
, &ext_80387_constants_table
[i
]);
8593 ext_80387_constants_init
= 1;
8596 /* Return non-zero if the constant is something that
8597 can be loaded with a special instruction. */
8600 standard_80387_constant_p (rtx x
)
8602 enum machine_mode mode
= GET_MODE (x
);
8606 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8609 if (x
== CONST0_RTX (mode
))
8611 if (x
== CONST1_RTX (mode
))
8614 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8616 /* For XFmode constants, try to find a special 80387 instruction when
8617 optimizing for size or on those CPUs that benefit from them. */
8619 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8623 if (! ext_80387_constants_init
)
8624 init_ext_80387_constants ();
8626 for (i
= 0; i
< 5; i
++)
8627 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8631 /* Load of the constant -0.0 or -1.0 will be split as
8632 fldz;fchs or fld1;fchs sequence. */
8633 if (real_isnegzero (&r
))
8635 if (real_identical (&r
, &dconstm1
))
8641 /* Return the opcode of the special instruction to be used to load
8645 standard_80387_constant_opcode (rtx x
)
8647 switch (standard_80387_constant_p (x
))
8671 /* Return the CONST_DOUBLE representing the 80387 constant that is
8672 loaded by the specified special instruction. The argument IDX
8673 matches the return value from standard_80387_constant_p. */
8676 standard_80387_constant_rtx (int idx
)
8680 if (! ext_80387_constants_init
)
8681 init_ext_80387_constants ();
8697 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8701 /* Return 1 if X is all 0s and 2 if x is all 1s
8702 in supported SSE/AVX vector mode. */
8705 standard_sse_constant_p (rtx x
)
8707 enum machine_mode mode
= GET_MODE (x
);
8709 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8711 if (vector_all_ones_operand (x
, mode
))
8733 /* Return the opcode of the special instruction to be used to load
8737 standard_sse_constant_opcode (rtx insn
, rtx x
)
8739 switch (standard_sse_constant_p (x
))
8742 switch (get_attr_mode (insn
))
8745 return "%vpxor\t%0, %d0";
8747 return "%vxorpd\t%0, %d0";
8749 return "%vxorps\t%0, %d0";
8752 return "vpxor\t%x0, %x0, %x0";
8754 return "vxorpd\t%x0, %x0, %x0";
8756 return "vxorps\t%x0, %x0, %x0";
8763 if (get_attr_mode (insn
) == MODE_XI
8764 || get_attr_mode (insn
) == MODE_V8DF
8765 || get_attr_mode (insn
) == MODE_V16SF
)
8766 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8768 return "vpcmpeqd\t%0, %0, %0";
8770 return "pcmpeqd\t%0, %0";
8778 /* Returns true if OP contains a symbol reference */
8781 symbolic_reference_mentioned_p (rtx op
)
8786 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8789 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8790 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8796 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8797 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8801 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8808 /* Return true if it is appropriate to emit `ret' instructions in the
8809 body of a function. Do this only if the epilogue is simple, needing a
8810 couple of insns. Prior to reloading, we can't tell how many registers
8811 must be saved, so return false then. Return false if there is no frame
8812 marker to de-allocate. */
8815 ix86_can_use_return_insn_p (void)
8817 struct ix86_frame frame
;
8819 if (! reload_completed
|| frame_pointer_needed
)
8822 /* Don't allow more than 32k pop, since that's all we can do
8823 with one instruction. */
8824 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8827 ix86_compute_frame_layout (&frame
);
8828 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8829 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8832 /* Value should be nonzero if functions must have frame pointers.
8833 Zero means the frame pointer need not be set up (and parms may
8834 be accessed via the stack pointer) in functions that seem suitable. */
8837 ix86_frame_pointer_required (void)
8839 /* If we accessed previous frames, then the generated code expects
8840 to be able to access the saved ebp value in our frame. */
8841 if (cfun
->machine
->accesses_prev_frame
)
8844 /* Several x86 os'es need a frame pointer for other reasons,
8845 usually pertaining to setjmp. */
8846 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8849 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8850 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8853 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8854 allocation is 4GB. */
8855 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8858 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8859 turns off the frame pointer by default. Turn it back on now if
8860 we've not got a leaf function. */
8861 if (TARGET_OMIT_LEAF_FRAME_POINTER
8863 || ix86_current_function_calls_tls_descriptor
))
8866 if (crtl
->profile
&& !flag_fentry
)
8872 /* Record that the current function accesses previous call frames. */
8875 ix86_setup_frame_addresses (void)
8877 cfun
->machine
->accesses_prev_frame
= 1;
8880 #ifndef USE_HIDDEN_LINKONCE
8881 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8882 # define USE_HIDDEN_LINKONCE 1
8884 # define USE_HIDDEN_LINKONCE 0
8888 static int pic_labels_used
;
8890 /* Fills in the label name that should be used for a pc thunk for
8891 the given register. */
8894 get_pc_thunk_name (char name
[32], unsigned int regno
)
8896 gcc_assert (!TARGET_64BIT
);
8898 if (USE_HIDDEN_LINKONCE
)
8899 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8901 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8905 /* This function generates code for -fpic that loads %ebx with
8906 the return address of the caller and then returns. */
8909 ix86_code_end (void)
8914 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8919 if (!(pic_labels_used
& (1 << regno
)))
8922 get_pc_thunk_name (name
, regno
);
8924 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8925 get_identifier (name
),
8926 build_function_type_list (void_type_node
, NULL_TREE
));
8927 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8928 NULL_TREE
, void_type_node
);
8929 TREE_PUBLIC (decl
) = 1;
8930 TREE_STATIC (decl
) = 1;
8931 DECL_IGNORED_P (decl
) = 1;
8936 switch_to_section (darwin_sections
[text_coal_section
]);
8937 fputs ("\t.weak_definition\t", asm_out_file
);
8938 assemble_name (asm_out_file
, name
);
8939 fputs ("\n\t.private_extern\t", asm_out_file
);
8940 assemble_name (asm_out_file
, name
);
8941 putc ('\n', asm_out_file
);
8942 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8943 DECL_WEAK (decl
) = 1;
8947 if (USE_HIDDEN_LINKONCE
)
8949 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8951 targetm
.asm_out
.unique_section (decl
, 0);
8952 switch_to_section (get_named_section (decl
, NULL
, 0));
8954 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8955 fputs ("\t.hidden\t", asm_out_file
);
8956 assemble_name (asm_out_file
, name
);
8957 putc ('\n', asm_out_file
);
8958 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8962 switch_to_section (text_section
);
8963 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8966 DECL_INITIAL (decl
) = make_node (BLOCK
);
8967 current_function_decl
= decl
;
8968 init_function_start (decl
);
8969 first_function_block_is_cold
= false;
8970 /* Make sure unwind info is emitted for the thunk if needed. */
8971 final_start_function (emit_barrier (), asm_out_file
, 1);
8973 /* Pad stack IP move with 4 instructions (two NOPs count
8974 as one instruction). */
8975 if (TARGET_PAD_SHORT_FUNCTION
)
8980 fputs ("\tnop\n", asm_out_file
);
8983 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8984 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8985 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8986 output_asm_insn ("%!ret", NULL
);
8987 final_end_function ();
8988 init_insn_lengths ();
8989 free_after_compilation (cfun
);
8991 current_function_decl
= NULL
;
8994 if (flag_split_stack
)
8995 file_end_indicate_split_stack ();
8998 /* Emit code for the SET_GOT patterns. */
9001 output_set_got (rtx dest
, rtx label
)
9007 if (TARGET_VXWORKS_RTP
&& flag_pic
)
9009 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9010 xops
[2] = gen_rtx_MEM (Pmode
,
9011 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
9012 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
9014 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9015 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9016 an unadorned address. */
9017 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
9018 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
9019 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
9023 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
9028 /* We don't need a pic base, we're not producing pic. */
9031 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
9032 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
9033 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9034 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
9039 get_pc_thunk_name (name
, REGNO (dest
));
9040 pic_labels_used
|= 1 << REGNO (dest
);
9042 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
9043 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
9044 output_asm_insn ("%!call\t%X2", xops
);
9047 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9048 This is what will be referenced by the Mach-O PIC subsystem. */
9049 if (machopic_should_output_picbase_label () || !label
)
9050 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
9052 /* When we are restoring the pic base at the site of a nonlocal label,
9053 and we decided to emit the pic base above, we will still output a
9054 local label used for calculating the correction offset (even though
9055 the offset will be 0 in that case). */
9057 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9058 CODE_LABEL_NUMBER (label
));
9063 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
9068 /* Generate an "push" pattern for input ARG. */
9073 struct machine_function
*m
= cfun
->machine
;
9075 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9076 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9077 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9079 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9080 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9082 return gen_rtx_SET (VOIDmode
,
9083 gen_rtx_MEM (word_mode
,
9084 gen_rtx_PRE_DEC (Pmode
,
9085 stack_pointer_rtx
)),
9089 /* Generate an "pop" pattern for input ARG. */
9094 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9095 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9097 return gen_rtx_SET (VOIDmode
,
9099 gen_rtx_MEM (word_mode
,
9100 gen_rtx_POST_INC (Pmode
,
9101 stack_pointer_rtx
)));
9104 /* Return >= 0 if there is an unused call-clobbered register available
9105 for the entire function. */
9108 ix86_select_alt_pic_regnum (void)
9112 && !ix86_current_function_calls_tls_descriptor
)
9115 /* Can't use the same register for both PIC and DRAP. */
9117 drap
= REGNO (crtl
->drap_reg
);
9120 for (i
= 2; i
>= 0; --i
)
9121 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9125 return INVALID_REGNUM
;
9128 /* Return TRUE if we need to save REGNO. */
9131 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9133 if (pic_offset_table_rtx
9134 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9135 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9137 || crtl
->calls_eh_return
9138 || crtl
->uses_const_pool
9139 || cfun
->has_nonlocal_label
))
9140 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9142 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9147 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9148 if (test
== INVALID_REGNUM
)
9155 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9158 return (df_regs_ever_live_p (regno
)
9159 && !call_used_regs
[regno
]
9160 && !fixed_regs
[regno
]
9161 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9164 /* Return number of saved general prupose registers. */
9167 ix86_nsaved_regs (void)
9172 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9173 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9178 /* Return number of saved SSE registrers. */
9181 ix86_nsaved_sseregs (void)
9186 if (!TARGET_64BIT_MS_ABI
)
9188 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9189 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9194 /* Given FROM and TO register numbers, say whether this elimination is
9195 allowed. If stack alignment is needed, we can only replace argument
9196 pointer with hard frame pointer, or replace frame pointer with stack
9197 pointer. Otherwise, frame pointer elimination is automatically
9198 handled and all other eliminations are valid. */
9201 ix86_can_eliminate (const int from
, const int to
)
9203 if (stack_realign_fp
)
9204 return ((from
== ARG_POINTER_REGNUM
9205 && to
== HARD_FRAME_POINTER_REGNUM
)
9206 || (from
== FRAME_POINTER_REGNUM
9207 && to
== STACK_POINTER_REGNUM
));
9209 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9212 /* Return the offset between two registers, one to be eliminated, and the other
9213 its replacement, at the start of a routine. */
9216 ix86_initial_elimination_offset (int from
, int to
)
9218 struct ix86_frame frame
;
9219 ix86_compute_frame_layout (&frame
);
9221 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9222 return frame
.hard_frame_pointer_offset
;
9223 else if (from
== FRAME_POINTER_REGNUM
9224 && to
== HARD_FRAME_POINTER_REGNUM
)
9225 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9228 gcc_assert (to
== STACK_POINTER_REGNUM
);
9230 if (from
== ARG_POINTER_REGNUM
)
9231 return frame
.stack_pointer_offset
;
9233 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9234 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9238 /* In a dynamically-aligned function, we can't know the offset from
9239 stack pointer to frame pointer, so we must ensure that setjmp
9240 eliminates fp against the hard fp (%ebp) rather than trying to
9241 index from %esp up to the top of the frame across a gap that is
9242 of unknown (at compile-time) size. */
9244 ix86_builtin_setjmp_frame_value (void)
9246 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9249 /* When using -fsplit-stack, the allocation routines set a field in
9250 the TCB to the bottom of the stack plus this much space, measured
9253 #define SPLIT_STACK_AVAILABLE 256
9255 /* Fill structure ix86_frame about frame of currently computed function. */
9258 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9260 unsigned HOST_WIDE_INT stack_alignment_needed
;
9261 HOST_WIDE_INT offset
;
9262 unsigned HOST_WIDE_INT preferred_alignment
;
9263 HOST_WIDE_INT size
= get_frame_size ();
9264 HOST_WIDE_INT to_allocate
;
9266 frame
->nregs
= ix86_nsaved_regs ();
9267 frame
->nsseregs
= ix86_nsaved_sseregs ();
9269 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9270 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9272 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9273 function prologues and leaf. */
9274 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9275 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9276 || ix86_current_function_calls_tls_descriptor
))
9278 preferred_alignment
= 16;
9279 stack_alignment_needed
= 16;
9280 crtl
->preferred_stack_boundary
= 128;
9281 crtl
->stack_alignment_needed
= 128;
9284 gcc_assert (!size
|| stack_alignment_needed
);
9285 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9286 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9288 /* For SEH we have to limit the amount of code movement into the prologue.
9289 At present we do this via a BLOCKAGE, at which point there's very little
9290 scheduling that can be done, which means that there's very little point
9291 in doing anything except PUSHs. */
9293 cfun
->machine
->use_fast_prologue_epilogue
= false;
9295 /* During reload iteration the amount of registers saved can change.
9296 Recompute the value as needed. Do not recompute when amount of registers
9297 didn't change as reload does multiple calls to the function and does not
9298 expect the decision to change within single iteration. */
9299 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9300 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9302 int count
= frame
->nregs
;
9303 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9305 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9307 /* The fast prologue uses move instead of push to save registers. This
9308 is significantly longer, but also executes faster as modern hardware
9309 can execute the moves in parallel, but can't do that for push/pop.
9311 Be careful about choosing what prologue to emit: When function takes
9312 many instructions to execute we may use slow version as well as in
9313 case function is known to be outside hot spot (this is known with
9314 feedback only). Weight the size of function by number of registers
9315 to save as it is cheap to use one or two push instructions but very
9316 slow to use many of them. */
9318 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9319 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9320 || (flag_branch_probabilities
9321 && node
->frequency
< NODE_FREQUENCY_HOT
))
9322 cfun
->machine
->use_fast_prologue_epilogue
= false;
9324 cfun
->machine
->use_fast_prologue_epilogue
9325 = !expensive_function_p (count
);
9328 frame
->save_regs_using_mov
9329 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9330 /* If static stack checking is enabled and done with probes,
9331 the registers need to be saved before allocating the frame. */
9332 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9334 /* Skip return address. */
9335 offset
= UNITS_PER_WORD
;
9337 /* Skip pushed static chain. */
9338 if (ix86_static_chain_on_stack
)
9339 offset
+= UNITS_PER_WORD
;
9341 /* Skip saved base pointer. */
9342 if (frame_pointer_needed
)
9343 offset
+= UNITS_PER_WORD
;
9344 frame
->hfp_save_offset
= offset
;
9346 /* The traditional frame pointer location is at the top of the frame. */
9347 frame
->hard_frame_pointer_offset
= offset
;
9349 /* Register save area */
9350 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9351 frame
->reg_save_offset
= offset
;
9353 /* On SEH target, registers are pushed just before the frame pointer
9356 frame
->hard_frame_pointer_offset
= offset
;
9358 /* Align and set SSE register save area. */
9359 if (frame
->nsseregs
)
9361 /* The only ABI that has saved SSE registers (Win64) also has a
9362 16-byte aligned default stack, and thus we don't need to be
9363 within the re-aligned local stack frame to save them. */
9364 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9365 offset
= (offset
+ 16 - 1) & -16;
9366 offset
+= frame
->nsseregs
* 16;
9368 frame
->sse_reg_save_offset
= offset
;
9370 /* The re-aligned stack starts here. Values before this point are not
9371 directly comparable with values below this point. In order to make
9372 sure that no value happens to be the same before and after, force
9373 the alignment computation below to add a non-zero value. */
9374 if (stack_realign_fp
)
9375 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9378 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9379 offset
+= frame
->va_arg_size
;
9381 /* Align start of frame for local function. */
9382 if (stack_realign_fp
9383 || offset
!= frame
->sse_reg_save_offset
9386 || cfun
->calls_alloca
9387 || ix86_current_function_calls_tls_descriptor
)
9388 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9390 /* Frame pointer points here. */
9391 frame
->frame_pointer_offset
= offset
;
9395 /* Add outgoing arguments area. Can be skipped if we eliminated
9396 all the function calls as dead code.
9397 Skipping is however impossible when function calls alloca. Alloca
9398 expander assumes that last crtl->outgoing_args_size
9399 of stack frame are unused. */
9400 if (ACCUMULATE_OUTGOING_ARGS
9401 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9402 || ix86_current_function_calls_tls_descriptor
))
9404 offset
+= crtl
->outgoing_args_size
;
9405 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9408 frame
->outgoing_arguments_size
= 0;
9410 /* Align stack boundary. Only needed if we're calling another function
9412 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9413 || ix86_current_function_calls_tls_descriptor
)
9414 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9416 /* We've reached end of stack frame. */
9417 frame
->stack_pointer_offset
= offset
;
9419 /* Size prologue needs to allocate. */
9420 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9422 if ((!to_allocate
&& frame
->nregs
<= 1)
9423 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9424 frame
->save_regs_using_mov
= false;
9426 if (ix86_using_red_zone ()
9427 && crtl
->sp_is_unchanging
9429 && !ix86_current_function_calls_tls_descriptor
)
9431 frame
->red_zone_size
= to_allocate
;
9432 if (frame
->save_regs_using_mov
)
9433 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9434 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9435 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9438 frame
->red_zone_size
= 0;
9439 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9441 /* The SEH frame pointer location is near the bottom of the frame.
9442 This is enforced by the fact that the difference between the
9443 stack pointer and the frame pointer is limited to 240 bytes in
9444 the unwind data structure. */
9449 /* If we can leave the frame pointer where it is, do so. Also, returns
9450 the establisher frame for __builtin_frame_address (0). */
9451 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9452 if (diff
<= SEH_MAX_FRAME_SIZE
9453 && (diff
> 240 || (diff
& 15) != 0)
9454 && !crtl
->accesses_prior_frames
)
9456 /* Ideally we'd determine what portion of the local stack frame
9457 (within the constraint of the lowest 240) is most heavily used.
9458 But without that complication, simply bias the frame pointer
9459 by 128 bytes so as to maximize the amount of the local stack
9460 frame that is addressable with 8-bit offsets. */
9461 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9466 /* This is semi-inlined memory_address_length, but simplified
9467 since we know that we're always dealing with reg+offset, and
9468 to avoid having to create and discard all that rtl. */
9471 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9477 /* EBP and R13 cannot be encoded without an offset. */
9478 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9480 else if (IN_RANGE (offset
, -128, 127))
9483 /* ESP and R12 must be encoded with a SIB byte. */
9484 if (regno
== SP_REG
|| regno
== R12_REG
)
9490 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9491 The valid base registers are taken from CFUN->MACHINE->FS. */
9494 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9496 const struct machine_function
*m
= cfun
->machine
;
9497 rtx base_reg
= NULL
;
9498 HOST_WIDE_INT base_offset
= 0;
9500 if (m
->use_fast_prologue_epilogue
)
9502 /* Choose the base register most likely to allow the most scheduling
9503 opportunities. Generally FP is valid throughout the function,
9504 while DRAP must be reloaded within the epilogue. But choose either
9505 over the SP due to increased encoding size. */
9509 base_reg
= hard_frame_pointer_rtx
;
9510 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9512 else if (m
->fs
.drap_valid
)
9514 base_reg
= crtl
->drap_reg
;
9515 base_offset
= 0 - cfa_offset
;
9517 else if (m
->fs
.sp_valid
)
9519 base_reg
= stack_pointer_rtx
;
9520 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9525 HOST_WIDE_INT toffset
;
9528 /* Choose the base register with the smallest address encoding.
9529 With a tie, choose FP > DRAP > SP. */
9532 base_reg
= stack_pointer_rtx
;
9533 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9534 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9536 if (m
->fs
.drap_valid
)
9538 toffset
= 0 - cfa_offset
;
9539 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9542 base_reg
= crtl
->drap_reg
;
9543 base_offset
= toffset
;
9549 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9550 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9553 base_reg
= hard_frame_pointer_rtx
;
9554 base_offset
= toffset
;
9559 gcc_assert (base_reg
!= NULL
);
9561 return plus_constant (Pmode
, base_reg
, base_offset
);
9564 /* Emit code to save registers in the prologue. */
9567 ix86_emit_save_regs (void)
9572 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9573 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9575 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9576 RTX_FRAME_RELATED_P (insn
) = 1;
9580 /* Emit a single register save at CFA - CFA_OFFSET. */
9583 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9584 HOST_WIDE_INT cfa_offset
)
9586 struct machine_function
*m
= cfun
->machine
;
9587 rtx reg
= gen_rtx_REG (mode
, regno
);
9588 rtx mem
, addr
, base
, insn
;
9590 addr
= choose_baseaddr (cfa_offset
);
9591 mem
= gen_frame_mem (mode
, addr
);
9593 /* For SSE saves, we need to indicate the 128-bit alignment. */
9594 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9596 insn
= emit_move_insn (mem
, reg
);
9597 RTX_FRAME_RELATED_P (insn
) = 1;
9600 if (GET_CODE (base
) == PLUS
)
9601 base
= XEXP (base
, 0);
9602 gcc_checking_assert (REG_P (base
));
9604 /* When saving registers into a re-aligned local stack frame, avoid
9605 any tricky guessing by dwarf2out. */
9606 if (m
->fs
.realigned
)
9608 gcc_checking_assert (stack_realign_drap
);
9610 if (regno
== REGNO (crtl
->drap_reg
))
9612 /* A bit of a hack. We force the DRAP register to be saved in
9613 the re-aligned stack frame, which provides us with a copy
9614 of the CFA that will last past the prologue. Install it. */
9615 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9616 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9617 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9618 mem
= gen_rtx_MEM (mode
, addr
);
9619 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9623 /* The frame pointer is a stable reference within the
9624 aligned frame. Use it. */
9625 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9626 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9627 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9628 mem
= gen_rtx_MEM (mode
, addr
);
9629 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9630 gen_rtx_SET (VOIDmode
, mem
, reg
));
9634 /* The memory may not be relative to the current CFA register,
9635 which means that we may need to generate a new pattern for
9636 use by the unwind info. */
9637 else if (base
!= m
->fs
.cfa_reg
)
9639 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9640 m
->fs
.cfa_offset
- cfa_offset
);
9641 mem
= gen_rtx_MEM (mode
, addr
);
9642 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9646 /* Emit code to save registers using MOV insns.
9647 First register is stored at CFA - CFA_OFFSET. */
9649 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9653 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9654 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9656 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9657 cfa_offset
-= UNITS_PER_WORD
;
9661 /* Emit code to save SSE registers using MOV insns.
9662 First register is stored at CFA - CFA_OFFSET. */
9664 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9668 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9669 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9671 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9676 static GTY(()) rtx queued_cfa_restores
;
9678 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9679 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9680 Don't add the note if the previously saved value will be left untouched
9681 within stack red-zone till return, as unwinders can find the same value
9682 in the register and on the stack. */
9685 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9687 if (!crtl
->shrink_wrapped
9688 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9693 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9694 RTX_FRAME_RELATED_P (insn
) = 1;
9698 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9701 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9704 ix86_add_queued_cfa_restore_notes (rtx insn
)
9707 if (!queued_cfa_restores
)
9709 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9711 XEXP (last
, 1) = REG_NOTES (insn
);
9712 REG_NOTES (insn
) = queued_cfa_restores
;
9713 queued_cfa_restores
= NULL_RTX
;
9714 RTX_FRAME_RELATED_P (insn
) = 1;
9717 /* Expand prologue or epilogue stack adjustment.
9718 The pattern exist to put a dependency on all ebp-based memory accesses.
9719 STYLE should be negative if instructions should be marked as frame related,
9720 zero if %r11 register is live and cannot be freely used and positive
9724 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9725 int style
, bool set_cfa
)
9727 struct machine_function
*m
= cfun
->machine
;
9729 bool add_frame_related_expr
= false;
9731 if (Pmode
== SImode
)
9732 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9733 else if (x86_64_immediate_operand (offset
, DImode
))
9734 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9738 /* r11 is used by indirect sibcall return as well, set before the
9739 epilogue and used after the epilogue. */
9741 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9744 gcc_assert (src
!= hard_frame_pointer_rtx
9745 && dest
!= hard_frame_pointer_rtx
);
9746 tmp
= hard_frame_pointer_rtx
;
9748 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9750 add_frame_related_expr
= true;
9752 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9755 insn
= emit_insn (insn
);
9757 ix86_add_queued_cfa_restore_notes (insn
);
9763 gcc_assert (m
->fs
.cfa_reg
== src
);
9764 m
->fs
.cfa_offset
+= INTVAL (offset
);
9765 m
->fs
.cfa_reg
= dest
;
9767 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9768 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9769 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9770 RTX_FRAME_RELATED_P (insn
) = 1;
9774 RTX_FRAME_RELATED_P (insn
) = 1;
9775 if (add_frame_related_expr
)
9777 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9778 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9779 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9783 if (dest
== stack_pointer_rtx
)
9785 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9786 bool valid
= m
->fs
.sp_valid
;
9788 if (src
== hard_frame_pointer_rtx
)
9790 valid
= m
->fs
.fp_valid
;
9791 ooffset
= m
->fs
.fp_offset
;
9793 else if (src
== crtl
->drap_reg
)
9795 valid
= m
->fs
.drap_valid
;
9800 /* Else there are two possibilities: SP itself, which we set
9801 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9802 taken care of this by hand along the eh_return path. */
9803 gcc_checking_assert (src
== stack_pointer_rtx
9804 || offset
== const0_rtx
);
9807 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9808 m
->fs
.sp_valid
= valid
;
9812 /* Find an available register to be used as dynamic realign argument
9813 pointer regsiter. Such a register will be written in prologue and
9814 used in begin of body, so it must not be
9815 1. parameter passing register.
9817 We reuse static-chain register if it is available. Otherwise, we
9818 use DI for i386 and R13 for x86-64. We chose R13 since it has
9821 Return: the regno of chosen register. */
9824 find_drap_reg (void)
9826 tree decl
= cfun
->decl
;
9830 /* Use R13 for nested function or function need static chain.
9831 Since function with tail call may use any caller-saved
9832 registers in epilogue, DRAP must not use caller-saved
9833 register in such case. */
9834 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9841 /* Use DI for nested function or function need static chain.
9842 Since function with tail call may use any caller-saved
9843 registers in epilogue, DRAP must not use caller-saved
9844 register in such case. */
9845 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9848 /* Reuse static chain register if it isn't used for parameter
9850 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9852 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9853 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9860 /* Return minimum incoming stack alignment. */
9863 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9865 unsigned int incoming_stack_boundary
;
9867 /* Prefer the one specified at command line. */
9868 if (ix86_user_incoming_stack_boundary
)
9869 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9870 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9871 if -mstackrealign is used, it isn't used for sibcall check and
9872 estimated stack alignment is 128bit. */
9875 && ix86_force_align_arg_pointer
9876 && crtl
->stack_alignment_estimated
== 128)
9877 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9879 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9881 /* Incoming stack alignment can be changed on individual functions
9882 via force_align_arg_pointer attribute. We use the smallest
9883 incoming stack boundary. */
9884 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9885 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9886 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9887 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9889 /* The incoming stack frame has to be aligned at least at
9890 parm_stack_boundary. */
9891 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9892 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9894 /* Stack at entrance of main is aligned by runtime. We use the
9895 smallest incoming stack boundary. */
9896 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9897 && DECL_NAME (current_function_decl
)
9898 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9899 && DECL_FILE_SCOPE_P (current_function_decl
))
9900 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9902 return incoming_stack_boundary
;
9905 /* Update incoming stack boundary and estimated stack alignment. */
9908 ix86_update_stack_boundary (void)
9910 ix86_incoming_stack_boundary
9911 = ix86_minimum_incoming_stack_boundary (false);
9913 /* x86_64 vararg needs 16byte stack alignment for register save
9917 && crtl
->stack_alignment_estimated
< 128)
9918 crtl
->stack_alignment_estimated
= 128;
9921 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9922 needed or an rtx for DRAP otherwise. */
9925 ix86_get_drap_rtx (void)
9927 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9928 crtl
->need_drap
= true;
9930 if (stack_realign_drap
)
9932 /* Assign DRAP to vDRAP and returns vDRAP */
9933 unsigned int regno
= find_drap_reg ();
9938 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9939 crtl
->drap_reg
= arg_ptr
;
9942 drap_vreg
= copy_to_reg (arg_ptr
);
9946 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9949 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9950 RTX_FRAME_RELATED_P (insn
) = 1;
9958 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9961 ix86_internal_arg_pointer (void)
9963 return virtual_incoming_args_rtx
;
9966 struct scratch_reg
{
9971 /* Return a short-lived scratch register for use on function entry.
9972 In 32-bit mode, it is valid only after the registers are saved
9973 in the prologue. This register must be released by means of
9974 release_scratch_register_on_entry once it is dead. */
9977 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9985 /* We always use R11 in 64-bit mode. */
9990 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9992 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9994 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9995 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9996 int regparm
= ix86_function_regparm (fntype
, decl
);
9998 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
10000 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10001 for the static chain register. */
10002 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
10003 && drap_regno
!= AX_REG
)
10005 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10006 for the static chain register. */
10007 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
10009 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
10011 /* ecx is the static chain register. */
10012 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
10014 && drap_regno
!= CX_REG
)
10016 else if (ix86_save_reg (BX_REG
, true))
10018 /* esi is the static chain register. */
10019 else if (!(regparm
== 3 && static_chain_p
)
10020 && ix86_save_reg (SI_REG
, true))
10022 else if (ix86_save_reg (DI_REG
, true))
10026 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
10031 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
10034 rtx insn
= emit_insn (gen_push (sr
->reg
));
10035 RTX_FRAME_RELATED_P (insn
) = 1;
10039 /* Release a scratch register obtained from the preceding function. */
10042 release_scratch_register_on_entry (struct scratch_reg
*sr
)
10046 struct machine_function
*m
= cfun
->machine
;
10047 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
10049 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10050 RTX_FRAME_RELATED_P (insn
) = 1;
10051 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
10052 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10053 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
10054 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10058 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10060 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10063 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
10065 /* We skip the probe for the first interval + a small dope of 4 words and
10066 probe that many bytes past the specified size to maintain a protection
10067 area at the botton of the stack. */
10068 const int dope
= 4 * UNITS_PER_WORD
;
10069 rtx size_rtx
= GEN_INT (size
), last
;
10071 /* See if we have a constant small number of probes to generate. If so,
10072 that's the easy case. The run-time loop is made up of 11 insns in the
10073 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10074 for n # of intervals. */
10075 if (size
<= 5 * PROBE_INTERVAL
)
10077 HOST_WIDE_INT i
, adjust
;
10078 bool first_probe
= true;
10080 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10081 values of N from 1 until it exceeds SIZE. If only one probe is
10082 needed, this will not generate any code. Then adjust and probe
10083 to PROBE_INTERVAL + SIZE. */
10084 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10088 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10089 first_probe
= false;
10092 adjust
= PROBE_INTERVAL
;
10094 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10095 plus_constant (Pmode
, stack_pointer_rtx
,
10097 emit_stack_probe (stack_pointer_rtx
);
10101 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10103 adjust
= size
+ PROBE_INTERVAL
- i
;
10105 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10106 plus_constant (Pmode
, stack_pointer_rtx
,
10108 emit_stack_probe (stack_pointer_rtx
);
10110 /* Adjust back to account for the additional first interval. */
10111 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10112 plus_constant (Pmode
, stack_pointer_rtx
,
10113 PROBE_INTERVAL
+ dope
)));
10116 /* Otherwise, do the same as above, but in a loop. Note that we must be
10117 extra careful with variables wrapping around because we might be at
10118 the very top (or the very bottom) of the address space and we have
10119 to be able to handle this case properly; in particular, we use an
10120 equality test for the loop condition. */
10123 HOST_WIDE_INT rounded_size
;
10124 struct scratch_reg sr
;
10126 get_scratch_register_on_entry (&sr
);
10129 /* Step 1: round SIZE to the previous multiple of the interval. */
10131 rounded_size
= size
& -PROBE_INTERVAL
;
10134 /* Step 2: compute initial and final value of the loop counter. */
10136 /* SP = SP_0 + PROBE_INTERVAL. */
10137 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10138 plus_constant (Pmode
, stack_pointer_rtx
,
10139 - (PROBE_INTERVAL
+ dope
))));
10141 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10142 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10143 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10144 gen_rtx_PLUS (Pmode
, sr
.reg
,
10145 stack_pointer_rtx
)));
10148 /* Step 3: the loop
10150 while (SP != LAST_ADDR)
10152 SP = SP + PROBE_INTERVAL
10156 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10157 values of N from 1 until it is equal to ROUNDED_SIZE. */
10159 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10162 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10163 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10165 if (size
!= rounded_size
)
10167 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10168 plus_constant (Pmode
, stack_pointer_rtx
,
10169 rounded_size
- size
)));
10170 emit_stack_probe (stack_pointer_rtx
);
10173 /* Adjust back to account for the additional first interval. */
10174 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10175 plus_constant (Pmode
, stack_pointer_rtx
,
10176 PROBE_INTERVAL
+ dope
)));
10178 release_scratch_register_on_entry (&sr
);
10181 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10183 /* Even if the stack pointer isn't the CFA register, we need to correctly
10184 describe the adjustments made to it, in particular differentiate the
10185 frame-related ones from the frame-unrelated ones. */
10188 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10189 XVECEXP (expr
, 0, 0)
10190 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10191 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10192 XVECEXP (expr
, 0, 1)
10193 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10194 plus_constant (Pmode
, stack_pointer_rtx
,
10195 PROBE_INTERVAL
+ dope
+ size
));
10196 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10197 RTX_FRAME_RELATED_P (last
) = 1;
10199 cfun
->machine
->fs
.sp_offset
+= size
;
10202 /* Make sure nothing is scheduled before we are done. */
10203 emit_insn (gen_blockage ());
10206 /* Adjust the stack pointer up to REG while probing it. */
10209 output_adjust_stack_and_probe (rtx reg
)
10211 static int labelno
= 0;
10212 char loop_lab
[32], end_lab
[32];
10215 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10216 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10218 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10220 /* Jump to END_LAB if SP == LAST_ADDR. */
10221 xops
[0] = stack_pointer_rtx
;
10223 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10224 fputs ("\tje\t", asm_out_file
);
10225 assemble_name_raw (asm_out_file
, end_lab
);
10226 fputc ('\n', asm_out_file
);
10228 /* SP = SP + PROBE_INTERVAL. */
10229 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10230 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10233 xops
[1] = const0_rtx
;
10234 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10236 fprintf (asm_out_file
, "\tjmp\t");
10237 assemble_name_raw (asm_out_file
, loop_lab
);
10238 fputc ('\n', asm_out_file
);
10240 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10245 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10246 inclusive. These are offsets from the current stack pointer. */
10249 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10251 /* See if we have a constant small number of probes to generate. If so,
10252 that's the easy case. The run-time loop is made up of 7 insns in the
10253 generic case while the compile-time loop is made up of n insns for n #
10255 if (size
<= 7 * PROBE_INTERVAL
)
10259 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10260 it exceeds SIZE. If only one probe is needed, this will not
10261 generate any code. Then probe at FIRST + SIZE. */
10262 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10263 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10266 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10270 /* Otherwise, do the same as above, but in a loop. Note that we must be
10271 extra careful with variables wrapping around because we might be at
10272 the very top (or the very bottom) of the address space and we have
10273 to be able to handle this case properly; in particular, we use an
10274 equality test for the loop condition. */
10277 HOST_WIDE_INT rounded_size
, last
;
10278 struct scratch_reg sr
;
10280 get_scratch_register_on_entry (&sr
);
10283 /* Step 1: round SIZE to the previous multiple of the interval. */
10285 rounded_size
= size
& -PROBE_INTERVAL
;
10288 /* Step 2: compute initial and final value of the loop counter. */
10290 /* TEST_OFFSET = FIRST. */
10291 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10293 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10294 last
= first
+ rounded_size
;
10297 /* Step 3: the loop
10299 while (TEST_ADDR != LAST_ADDR)
10301 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10305 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10306 until it is equal to ROUNDED_SIZE. */
10308 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10311 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10312 that SIZE is equal to ROUNDED_SIZE. */
10314 if (size
!= rounded_size
)
10315 emit_stack_probe (plus_constant (Pmode
,
10316 gen_rtx_PLUS (Pmode
,
10319 rounded_size
- size
));
10321 release_scratch_register_on_entry (&sr
);
10324 /* Make sure nothing is scheduled before we are done. */
10325 emit_insn (gen_blockage ());
10328 /* Probe a range of stack addresses from REG to END, inclusive. These are
10329 offsets from the current stack pointer. */
10332 output_probe_stack_range (rtx reg
, rtx end
)
10334 static int labelno
= 0;
10335 char loop_lab
[32], end_lab
[32];
10338 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10339 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10341 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10343 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10346 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10347 fputs ("\tje\t", asm_out_file
);
10348 assemble_name_raw (asm_out_file
, end_lab
);
10349 fputc ('\n', asm_out_file
);
10351 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10352 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10353 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10355 /* Probe at TEST_ADDR. */
10356 xops
[0] = stack_pointer_rtx
;
10358 xops
[2] = const0_rtx
;
10359 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10361 fprintf (asm_out_file
, "\tjmp\t");
10362 assemble_name_raw (asm_out_file
, loop_lab
);
10363 fputc ('\n', asm_out_file
);
10365 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10370 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10371 to be generated in correct form. */
10373 ix86_finalize_stack_realign_flags (void)
10375 /* Check if stack realign is really needed after reload, and
10376 stores result in cfun */
10377 unsigned int incoming_stack_boundary
10378 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10379 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10380 unsigned int stack_realign
= (incoming_stack_boundary
10382 ? crtl
->max_used_stack_slot_alignment
10383 : crtl
->stack_alignment_needed
));
10385 if (crtl
->stack_realign_finalized
)
10387 /* After stack_realign_needed is finalized, we can't no longer
10389 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10393 /* If the only reason for frame_pointer_needed is that we conservatively
10394 assumed stack realignment might be needed, but in the end nothing that
10395 needed the stack alignment had been spilled, clear frame_pointer_needed
10396 and say we don't need stack realignment. */
10398 && !crtl
->need_drap
10399 && frame_pointer_needed
10401 && flag_omit_frame_pointer
10402 && crtl
->sp_is_unchanging
10403 && !ix86_current_function_calls_tls_descriptor
10404 && !crtl
->accesses_prior_frames
10405 && !cfun
->calls_alloca
10406 && !crtl
->calls_eh_return
10407 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10408 && !ix86_frame_pointer_required ()
10409 && get_frame_size () == 0
10410 && ix86_nsaved_sseregs () == 0
10411 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10413 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10416 CLEAR_HARD_REG_SET (prologue_used
);
10417 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10418 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10419 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10420 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10421 HARD_FRAME_POINTER_REGNUM
);
10425 FOR_BB_INSNS (bb
, insn
)
10426 if (NONDEBUG_INSN_P (insn
)
10427 && requires_stack_frame_p (insn
, prologue_used
,
10428 set_up_by_prologue
))
10430 crtl
->stack_realign_needed
= stack_realign
;
10431 crtl
->stack_realign_finalized
= true;
10436 frame_pointer_needed
= false;
10437 stack_realign
= false;
10438 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10439 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10440 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10441 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10442 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10443 df_finish_pass (true);
10444 df_scan_alloc (NULL
);
10446 df_compute_regs_ever_live (true);
10450 crtl
->stack_realign_needed
= stack_realign
;
10451 crtl
->stack_realign_finalized
= true;
10454 /* Expand the prologue into a bunch of separate insns. */
10457 ix86_expand_prologue (void)
10459 struct machine_function
*m
= cfun
->machine
;
10462 struct ix86_frame frame
;
10463 HOST_WIDE_INT allocate
;
10464 bool int_registers_saved
;
10465 bool sse_registers_saved
;
10467 ix86_finalize_stack_realign_flags ();
10469 /* DRAP should not coexist with stack_realign_fp */
10470 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10472 memset (&m
->fs
, 0, sizeof (m
->fs
));
10474 /* Initialize CFA state for before the prologue. */
10475 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10476 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10478 /* Track SP offset to the CFA. We continue tracking this after we've
10479 swapped the CFA register away from SP. In the case of re-alignment
10480 this is fudged; we're interested to offsets within the local frame. */
10481 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10482 m
->fs
.sp_valid
= true;
10484 ix86_compute_frame_layout (&frame
);
10486 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10488 /* We should have already generated an error for any use of
10489 ms_hook on a nested function. */
10490 gcc_checking_assert (!ix86_static_chain_on_stack
);
10492 /* Check if profiling is active and we shall use profiling before
10493 prologue variant. If so sorry. */
10494 if (crtl
->profile
&& flag_fentry
!= 0)
10495 sorry ("ms_hook_prologue attribute isn%'t compatible "
10496 "with -mfentry for 32-bit");
10498 /* In ix86_asm_output_function_label we emitted:
10499 8b ff movl.s %edi,%edi
10501 8b ec movl.s %esp,%ebp
10503 This matches the hookable function prologue in Win32 API
10504 functions in Microsoft Windows XP Service Pack 2 and newer.
10505 Wine uses this to enable Windows apps to hook the Win32 API
10506 functions provided by Wine.
10508 What that means is that we've already set up the frame pointer. */
10510 if (frame_pointer_needed
10511 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10515 /* We've decided to use the frame pointer already set up.
10516 Describe this to the unwinder by pretending that both
10517 push and mov insns happen right here.
10519 Putting the unwind info here at the end of the ms_hook
10520 is done so that we can make absolutely certain we get
10521 the required byte sequence at the start of the function,
10522 rather than relying on an assembler that can produce
10523 the exact encoding required.
10525 However it does mean (in the unpatched case) that we have
10526 a 1 insn window where the asynchronous unwind info is
10527 incorrect. However, if we placed the unwind info at
10528 its correct location we would have incorrect unwind info
10529 in the patched case. Which is probably all moot since
10530 I don't expect Wine generates dwarf2 unwind info for the
10531 system libraries that use this feature. */
10533 insn
= emit_insn (gen_blockage ());
10535 push
= gen_push (hard_frame_pointer_rtx
);
10536 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10537 stack_pointer_rtx
);
10538 RTX_FRAME_RELATED_P (push
) = 1;
10539 RTX_FRAME_RELATED_P (mov
) = 1;
10541 RTX_FRAME_RELATED_P (insn
) = 1;
10542 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10543 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10545 /* Note that gen_push incremented m->fs.cfa_offset, even
10546 though we didn't emit the push insn here. */
10547 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10548 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10549 m
->fs
.fp_valid
= true;
10553 /* The frame pointer is not needed so pop %ebp again.
10554 This leaves us with a pristine state. */
10555 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10559 /* The first insn of a function that accepts its static chain on the
10560 stack is to push the register that would be filled in by a direct
10561 call. This insn will be skipped by the trampoline. */
10562 else if (ix86_static_chain_on_stack
)
10564 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10565 emit_insn (gen_blockage ());
10567 /* We don't want to interpret this push insn as a register save,
10568 only as a stack adjustment. The real copy of the register as
10569 a save will be done later, if needed. */
10570 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10571 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10572 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10573 RTX_FRAME_RELATED_P (insn
) = 1;
10576 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10577 of DRAP is needed and stack realignment is really needed after reload */
10578 if (stack_realign_drap
)
10580 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10582 /* Only need to push parameter pointer reg if it is caller saved. */
10583 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10585 /* Push arg pointer reg */
10586 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10587 RTX_FRAME_RELATED_P (insn
) = 1;
10590 /* Grab the argument pointer. */
10591 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10592 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10593 RTX_FRAME_RELATED_P (insn
) = 1;
10594 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10595 m
->fs
.cfa_offset
= 0;
10597 /* Align the stack. */
10598 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10600 GEN_INT (-align_bytes
)));
10601 RTX_FRAME_RELATED_P (insn
) = 1;
10603 /* Replicate the return address on the stack so that return
10604 address can be reached via (argp - 1) slot. This is needed
10605 to implement macro RETURN_ADDR_RTX and intrinsic function
10606 expand_builtin_return_addr etc. */
10607 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10608 t
= gen_frame_mem (word_mode
, t
);
10609 insn
= emit_insn (gen_push (t
));
10610 RTX_FRAME_RELATED_P (insn
) = 1;
10612 /* For the purposes of frame and register save area addressing,
10613 we've started over with a new frame. */
10614 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10615 m
->fs
.realigned
= true;
10618 int_registers_saved
= (frame
.nregs
== 0);
10619 sse_registers_saved
= (frame
.nsseregs
== 0);
10621 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10623 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10624 slower on all targets. Also sdb doesn't like it. */
10625 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10626 RTX_FRAME_RELATED_P (insn
) = 1;
10628 /* Push registers now, before setting the frame pointer
10630 if (!int_registers_saved
10632 && !frame
.save_regs_using_mov
)
10634 ix86_emit_save_regs ();
10635 int_registers_saved
= true;
10636 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10639 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10641 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10642 RTX_FRAME_RELATED_P (insn
) = 1;
10644 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10645 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10646 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10647 m
->fs
.fp_valid
= true;
10651 if (!int_registers_saved
)
10653 /* If saving registers via PUSH, do so now. */
10654 if (!frame
.save_regs_using_mov
)
10656 ix86_emit_save_regs ();
10657 int_registers_saved
= true;
10658 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10661 /* When using red zone we may start register saving before allocating
10662 the stack frame saving one cycle of the prologue. However, avoid
10663 doing this if we have to probe the stack; at least on x86_64 the
10664 stack probe can turn into a call that clobbers a red zone location. */
10665 else if (ix86_using_red_zone ()
10666 && (! TARGET_STACK_PROBE
10667 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10669 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10670 int_registers_saved
= true;
10674 if (stack_realign_fp
)
10676 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10677 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10679 /* The computation of the size of the re-aligned stack frame means
10680 that we must allocate the size of the register save area before
10681 performing the actual alignment. Otherwise we cannot guarantee
10682 that there's enough storage above the realignment point. */
10683 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10684 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10685 GEN_INT (m
->fs
.sp_offset
10686 - frame
.sse_reg_save_offset
),
10689 /* Align the stack. */
10690 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10692 GEN_INT (-align_bytes
)));
10694 /* For the purposes of register save area addressing, the stack
10695 pointer is no longer valid. As for the value of sp_offset,
10696 see ix86_compute_frame_layout, which we need to match in order
10697 to pass verification of stack_pointer_offset at the end. */
10698 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10699 m
->fs
.sp_valid
= false;
10702 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10704 if (flag_stack_usage_info
)
10706 /* We start to count from ARG_POINTER. */
10707 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10709 /* If it was realigned, take into account the fake frame. */
10710 if (stack_realign_drap
)
10712 if (ix86_static_chain_on_stack
)
10713 stack_size
+= UNITS_PER_WORD
;
10715 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10716 stack_size
+= UNITS_PER_WORD
;
10718 /* This over-estimates by 1 minimal-stack-alignment-unit but
10719 mitigates that by counting in the new return address slot. */
10720 current_function_dynamic_stack_size
10721 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10724 current_function_static_stack_size
= stack_size
;
10727 /* On SEH target with very large frame size, allocate an area to save
10728 SSE registers (as the very large allocation won't be described). */
10730 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10731 && !sse_registers_saved
)
10733 HOST_WIDE_INT sse_size
=
10734 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10736 gcc_assert (int_registers_saved
);
10738 /* No need to do stack checking as the area will be immediately
10740 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10741 GEN_INT (-sse_size
), -1,
10742 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10743 allocate
-= sse_size
;
10744 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10745 sse_registers_saved
= true;
10748 /* The stack has already been decremented by the instruction calling us
10749 so probe if the size is non-negative to preserve the protection area. */
10750 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10752 /* We expect the registers to be saved when probes are used. */
10753 gcc_assert (int_registers_saved
);
10755 if (STACK_CHECK_MOVING_SP
)
10757 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
10758 && allocate
<= PROBE_INTERVAL
))
10760 ix86_adjust_stack_and_probe (allocate
);
10766 HOST_WIDE_INT size
= allocate
;
10768 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10769 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10771 if (TARGET_STACK_PROBE
)
10773 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10775 if (size
> PROBE_INTERVAL
)
10776 ix86_emit_probe_stack_range (0, size
);
10779 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10783 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10785 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
10786 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
,
10787 size
- STACK_CHECK_PROTECT
);
10790 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10797 else if (!ix86_target_stack_probe ()
10798 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10800 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10801 GEN_INT (-allocate
), -1,
10802 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10806 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10808 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10809 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10810 bool eax_live
= false;
10811 bool r10_live
= false;
10814 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10815 if (!TARGET_64BIT_MS_ABI
)
10816 eax_live
= ix86_eax_live_at_start_p ();
10818 /* Note that SEH directives need to continue tracking the stack
10819 pointer even after the frame pointer has been set up. */
10822 insn
= emit_insn (gen_push (eax
));
10823 allocate
-= UNITS_PER_WORD
;
10824 if (sp_is_cfa_reg
|| TARGET_SEH
)
10827 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10828 RTX_FRAME_RELATED_P (insn
) = 1;
10834 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10835 insn
= emit_insn (gen_push (r10
));
10836 allocate
-= UNITS_PER_WORD
;
10837 if (sp_is_cfa_reg
|| TARGET_SEH
)
10840 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10841 RTX_FRAME_RELATED_P (insn
) = 1;
10845 emit_move_insn (eax
, GEN_INT (allocate
));
10846 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10848 /* Use the fact that AX still contains ALLOCATE. */
10849 adjust_stack_insn
= (Pmode
== DImode
10850 ? gen_pro_epilogue_adjust_stack_di_sub
10851 : gen_pro_epilogue_adjust_stack_si_sub
);
10853 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10854 stack_pointer_rtx
, eax
));
10856 if (sp_is_cfa_reg
|| TARGET_SEH
)
10859 m
->fs
.cfa_offset
+= allocate
;
10860 RTX_FRAME_RELATED_P (insn
) = 1;
10861 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10862 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10863 plus_constant (Pmode
, stack_pointer_rtx
,
10866 m
->fs
.sp_offset
+= allocate
;
10868 if (r10_live
&& eax_live
)
10870 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10871 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10872 gen_frame_mem (word_mode
, t
));
10873 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10874 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10875 gen_frame_mem (word_mode
, t
));
10877 else if (eax_live
|| r10_live
)
10879 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10880 emit_move_insn (gen_rtx_REG (word_mode
,
10881 (eax_live
? AX_REG
: R10_REG
)),
10882 gen_frame_mem (word_mode
, t
));
10885 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10887 /* If we havn't already set up the frame pointer, do so now. */
10888 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10890 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10891 GEN_INT (frame
.stack_pointer_offset
10892 - frame
.hard_frame_pointer_offset
));
10893 insn
= emit_insn (insn
);
10894 RTX_FRAME_RELATED_P (insn
) = 1;
10895 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10897 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10898 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10899 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10900 m
->fs
.fp_valid
= true;
10903 if (!int_registers_saved
)
10904 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10905 if (!sse_registers_saved
)
10906 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10908 pic_reg_used
= false;
10909 /* We don't use pic-register for pe-coff target. */
10910 if (pic_offset_table_rtx
10912 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10915 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10917 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10918 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10920 pic_reg_used
= true;
10927 if (ix86_cmodel
== CM_LARGE_PIC
)
10929 rtx label
, tmp_reg
;
10931 gcc_assert (Pmode
== DImode
);
10932 label
= gen_label_rtx ();
10933 emit_label (label
);
10934 LABEL_PRESERVE_P (label
) = 1;
10935 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10936 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10937 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10939 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10940 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10941 pic_offset_table_rtx
, tmp_reg
));
10944 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10948 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10949 RTX_FRAME_RELATED_P (insn
) = 1;
10950 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10954 /* In the pic_reg_used case, make sure that the got load isn't deleted
10955 when mcount needs it. Blockage to avoid call movement across mcount
10956 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10958 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10959 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10961 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10963 /* vDRAP is setup but after reload it turns out stack realign
10964 isn't necessary, here we will emit prologue to setup DRAP
10965 without stack realign adjustment */
10966 t
= choose_baseaddr (0);
10967 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10970 /* Prevent instructions from being scheduled into register save push
10971 sequence when access to the redzone area is done through frame pointer.
10972 The offset between the frame pointer and the stack pointer is calculated
10973 relative to the value of the stack pointer at the end of the function
10974 prologue, and moving instructions that access redzone area via frame
10975 pointer inside push sequence violates this assumption. */
10976 if (frame_pointer_needed
&& frame
.red_zone_size
)
10977 emit_insn (gen_memory_blockage ());
10979 /* Emit cld instruction if stringops are used in the function. */
10980 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10981 emit_insn (gen_cld ());
10983 /* SEH requires that the prologue end within 256 bytes of the start of
10984 the function. Prevent instruction schedules that would extend that.
10985 Further, prevent alloca modifications to the stack pointer from being
10986 combined with prologue modifications. */
10988 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10991 /* Emit code to restore REG using a POP insn. */
10994 ix86_emit_restore_reg_using_pop (rtx reg
)
10996 struct machine_function
*m
= cfun
->machine
;
10997 rtx insn
= emit_insn (gen_pop (reg
));
10999 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
11000 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11002 if (m
->fs
.cfa_reg
== crtl
->drap_reg
11003 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
11005 /* Previously we'd represented the CFA as an expression
11006 like *(%ebp - 8). We've just popped that value from
11007 the stack, which means we need to reset the CFA to
11008 the drap register. This will remain until we restore
11009 the stack pointer. */
11010 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11011 RTX_FRAME_RELATED_P (insn
) = 1;
11013 /* This means that the DRAP register is valid for addressing too. */
11014 m
->fs
.drap_valid
= true;
11018 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11020 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11021 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11022 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11023 RTX_FRAME_RELATED_P (insn
) = 1;
11025 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11028 /* When the frame pointer is the CFA, and we pop it, we are
11029 swapping back to the stack pointer as the CFA. This happens
11030 for stack frames that don't allocate other data, so we assume
11031 the stack pointer is now pointing at the return address, i.e.
11032 the function entry state, which makes the offset be 1 word. */
11033 if (reg
== hard_frame_pointer_rtx
)
11035 m
->fs
.fp_valid
= false;
11036 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11038 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11039 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11041 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11042 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11043 GEN_INT (m
->fs
.cfa_offset
)));
11044 RTX_FRAME_RELATED_P (insn
) = 1;
11049 /* Emit code to restore saved registers using POP insns. */
11052 ix86_emit_restore_regs_using_pop (void)
11054 unsigned int regno
;
11056 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11057 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
11058 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
11061 /* Emit code and notes for the LEAVE instruction. */
11064 ix86_emit_leave (void)
11066 struct machine_function
*m
= cfun
->machine
;
11067 rtx insn
= emit_insn (ix86_gen_leave ());
11069 ix86_add_queued_cfa_restore_notes (insn
);
11071 gcc_assert (m
->fs
.fp_valid
);
11072 m
->fs
.sp_valid
= true;
11073 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
11074 m
->fs
.fp_valid
= false;
11076 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11078 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11079 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
11081 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11082 plus_constant (Pmode
, stack_pointer_rtx
,
11084 RTX_FRAME_RELATED_P (insn
) = 1;
11086 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
11090 /* Emit code to restore saved registers using MOV insns.
11091 First register is restored from CFA - CFA_OFFSET. */
11093 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11094 bool maybe_eh_return
)
11096 struct machine_function
*m
= cfun
->machine
;
11097 unsigned int regno
;
11099 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11100 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11102 rtx reg
= gen_rtx_REG (word_mode
, regno
);
11105 mem
= choose_baseaddr (cfa_offset
);
11106 mem
= gen_frame_mem (word_mode
, mem
);
11107 insn
= emit_move_insn (reg
, mem
);
11109 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
11111 /* Previously we'd represented the CFA as an expression
11112 like *(%ebp - 8). We've just popped that value from
11113 the stack, which means we need to reset the CFA to
11114 the drap register. This will remain until we restore
11115 the stack pointer. */
11116 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11117 RTX_FRAME_RELATED_P (insn
) = 1;
11119 /* This means that the DRAP register is valid for addressing. */
11120 m
->fs
.drap_valid
= true;
11123 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11125 cfa_offset
-= UNITS_PER_WORD
;
11129 /* Emit code to restore saved registers using MOV insns.
11130 First register is restored from CFA - CFA_OFFSET. */
11132 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11133 bool maybe_eh_return
)
11135 unsigned int regno
;
11137 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11138 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11140 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11143 mem
= choose_baseaddr (cfa_offset
);
11144 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11145 set_mem_align (mem
, 128);
11146 emit_move_insn (reg
, mem
);
11148 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11154 /* Restore function stack, frame, and registers. */
11157 ix86_expand_epilogue (int style
)
11159 struct machine_function
*m
= cfun
->machine
;
11160 struct machine_frame_state frame_state_save
= m
->fs
;
11161 struct ix86_frame frame
;
11162 bool restore_regs_via_mov
;
11165 ix86_finalize_stack_realign_flags ();
11166 ix86_compute_frame_layout (&frame
);
11168 m
->fs
.sp_valid
= (!frame_pointer_needed
11169 || (crtl
->sp_is_unchanging
11170 && !stack_realign_fp
));
11171 gcc_assert (!m
->fs
.sp_valid
11172 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11174 /* The FP must be valid if the frame pointer is present. */
11175 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11176 gcc_assert (!m
->fs
.fp_valid
11177 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11179 /* We must have *some* valid pointer to the stack frame. */
11180 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11182 /* The DRAP is never valid at this point. */
11183 gcc_assert (!m
->fs
.drap_valid
);
11185 /* See the comment about red zone and frame
11186 pointer usage in ix86_expand_prologue. */
11187 if (frame_pointer_needed
&& frame
.red_zone_size
)
11188 emit_insn (gen_memory_blockage ());
11190 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11191 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11193 /* Determine the CFA offset of the end of the red-zone. */
11194 m
->fs
.red_zone_offset
= 0;
11195 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11197 /* The red-zone begins below the return address. */
11198 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11200 /* When the register save area is in the aligned portion of
11201 the stack, determine the maximum runtime displacement that
11202 matches up with the aligned frame. */
11203 if (stack_realign_drap
)
11204 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11208 /* Special care must be taken for the normal return case of a function
11209 using eh_return: the eax and edx registers are marked as saved, but
11210 not restored along this path. Adjust the save location to match. */
11211 if (crtl
->calls_eh_return
&& style
!= 2)
11212 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11214 /* EH_RETURN requires the use of moves to function properly. */
11215 if (crtl
->calls_eh_return
)
11216 restore_regs_via_mov
= true;
11217 /* SEH requires the use of pops to identify the epilogue. */
11218 else if (TARGET_SEH
)
11219 restore_regs_via_mov
= false;
11220 /* If we're only restoring one register and sp is not valid then
11221 using a move instruction to restore the register since it's
11222 less work than reloading sp and popping the register. */
11223 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11224 restore_regs_via_mov
= true;
11225 else if (TARGET_EPILOGUE_USING_MOVE
11226 && cfun
->machine
->use_fast_prologue_epilogue
11227 && (frame
.nregs
> 1
11228 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11229 restore_regs_via_mov
= true;
11230 else if (frame_pointer_needed
11232 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11233 restore_regs_via_mov
= true;
11234 else if (frame_pointer_needed
11235 && TARGET_USE_LEAVE
11236 && cfun
->machine
->use_fast_prologue_epilogue
11237 && frame
.nregs
== 1)
11238 restore_regs_via_mov
= true;
11240 restore_regs_via_mov
= false;
11242 if (restore_regs_via_mov
|| frame
.nsseregs
)
11244 /* Ensure that the entire register save area is addressable via
11245 the stack pointer, if we will restore via sp. */
11247 && m
->fs
.sp_offset
> 0x7fffffff
11248 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11249 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11251 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11252 GEN_INT (m
->fs
.sp_offset
11253 - frame
.sse_reg_save_offset
),
11255 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11259 /* If there are any SSE registers to restore, then we have to do it
11260 via moves, since there's obviously no pop for SSE regs. */
11261 if (frame
.nsseregs
)
11262 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11265 if (restore_regs_via_mov
)
11270 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11272 /* eh_return epilogues need %ecx added to the stack pointer. */
11275 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11277 /* Stack align doesn't work with eh_return. */
11278 gcc_assert (!stack_realign_drap
);
11279 /* Neither does regparm nested functions. */
11280 gcc_assert (!ix86_static_chain_on_stack
);
11282 if (frame_pointer_needed
)
11284 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11285 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11286 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11288 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11289 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11291 /* Note that we use SA as a temporary CFA, as the return
11292 address is at the proper place relative to it. We
11293 pretend this happens at the FP restore insn because
11294 prior to this insn the FP would be stored at the wrong
11295 offset relative to SA, and after this insn we have no
11296 other reasonable register to use for the CFA. We don't
11297 bother resetting the CFA to the SP for the duration of
11298 the return insn. */
11299 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11300 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11301 ix86_add_queued_cfa_restore_notes (insn
);
11302 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11303 RTX_FRAME_RELATED_P (insn
) = 1;
11305 m
->fs
.cfa_reg
= sa
;
11306 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11307 m
->fs
.fp_valid
= false;
11309 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11310 const0_rtx
, style
, false);
11314 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11315 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11316 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11317 ix86_add_queued_cfa_restore_notes (insn
);
11319 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11320 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11322 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11323 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11324 plus_constant (Pmode
, stack_pointer_rtx
,
11326 RTX_FRAME_RELATED_P (insn
) = 1;
11329 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11330 m
->fs
.sp_valid
= true;
11335 /* SEH requires that the function end with (1) a stack adjustment
11336 if necessary, (2) a sequence of pops, and (3) a return or
11337 jump instruction. Prevent insns from the function body from
11338 being scheduled into this sequence. */
11341 /* Prevent a catch region from being adjacent to the standard
11342 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11343 several other flags that would be interesting to test are
11345 if (flag_non_call_exceptions
)
11346 emit_insn (gen_nops (const1_rtx
));
11348 emit_insn (gen_blockage ());
11351 /* First step is to deallocate the stack frame so that we can
11352 pop the registers. Also do it on SEH target for very large
11353 frame as the emitted instructions aren't allowed by the ABI in
11355 if (!m
->fs
.sp_valid
11357 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11358 >= SEH_MAX_FRAME_SIZE
)))
11360 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11361 GEN_INT (m
->fs
.fp_offset
11362 - frame
.reg_save_offset
),
11365 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11367 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11368 GEN_INT (m
->fs
.sp_offset
11369 - frame
.reg_save_offset
),
11371 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11374 ix86_emit_restore_regs_using_pop ();
11377 /* If we used a stack pointer and haven't already got rid of it,
11379 if (m
->fs
.fp_valid
)
11381 /* If the stack pointer is valid and pointing at the frame
11382 pointer store address, then we only need a pop. */
11383 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11384 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11385 /* Leave results in shorter dependency chains on CPUs that are
11386 able to grok it fast. */
11387 else if (TARGET_USE_LEAVE
11388 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11389 || !cfun
->machine
->use_fast_prologue_epilogue
)
11390 ix86_emit_leave ();
11393 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11394 hard_frame_pointer_rtx
,
11395 const0_rtx
, style
, !using_drap
);
11396 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11402 int param_ptr_offset
= UNITS_PER_WORD
;
11405 gcc_assert (stack_realign_drap
);
11407 if (ix86_static_chain_on_stack
)
11408 param_ptr_offset
+= UNITS_PER_WORD
;
11409 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11410 param_ptr_offset
+= UNITS_PER_WORD
;
11412 insn
= emit_insn (gen_rtx_SET
11413 (VOIDmode
, stack_pointer_rtx
,
11414 gen_rtx_PLUS (Pmode
,
11416 GEN_INT (-param_ptr_offset
))));
11417 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11418 m
->fs
.cfa_offset
= param_ptr_offset
;
11419 m
->fs
.sp_offset
= param_ptr_offset
;
11420 m
->fs
.realigned
= false;
11422 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11423 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11424 GEN_INT (param_ptr_offset
)));
11425 RTX_FRAME_RELATED_P (insn
) = 1;
11427 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11428 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11431 /* At this point the stack pointer must be valid, and we must have
11432 restored all of the registers. We may not have deallocated the
11433 entire stack frame. We've delayed this until now because it may
11434 be possible to merge the local stack deallocation with the
11435 deallocation forced by ix86_static_chain_on_stack. */
11436 gcc_assert (m
->fs
.sp_valid
);
11437 gcc_assert (!m
->fs
.fp_valid
);
11438 gcc_assert (!m
->fs
.realigned
);
11439 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11441 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11442 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11446 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11448 /* Sibcall epilogues don't want a return instruction. */
11451 m
->fs
= frame_state_save
;
11455 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11457 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11459 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11460 address, do explicit add, and jump indirectly to the caller. */
11462 if (crtl
->args
.pops_args
>= 65536)
11464 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11467 /* There is no "pascal" calling convention in any 64bit ABI. */
11468 gcc_assert (!TARGET_64BIT
);
11470 insn
= emit_insn (gen_pop (ecx
));
11471 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11472 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11474 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11475 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11476 add_reg_note (insn
, REG_CFA_REGISTER
,
11477 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11478 RTX_FRAME_RELATED_P (insn
) = 1;
11480 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11482 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11485 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11488 emit_jump_insn (gen_simple_return_internal ());
11490 /* Restore the state back to the state from the prologue,
11491 so that it's correct for the next epilogue. */
11492 m
->fs
= frame_state_save
;
11495 /* Reset from the function's potential modifications. */
11498 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11499 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11501 if (pic_offset_table_rtx
)
11502 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11504 /* Mach-O doesn't support labels at the end of objects, so if
11505 it looks like we might want one, insert a NOP. */
11507 rtx insn
= get_last_insn ();
11508 rtx deleted_debug_label
= NULL_RTX
;
11511 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11513 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11514 notes only, instead set their CODE_LABEL_NUMBER to -1,
11515 otherwise there would be code generation differences
11516 in between -g and -g0. */
11517 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11518 deleted_debug_label
= insn
;
11519 insn
= PREV_INSN (insn
);
11524 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11525 fputs ("\tnop\n", file
);
11526 else if (deleted_debug_label
)
11527 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11528 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11529 CODE_LABEL_NUMBER (insn
) = -1;
11535 /* Return a scratch register to use in the split stack prologue. The
11536 split stack prologue is used for -fsplit-stack. It is the first
11537 instructions in the function, even before the regular prologue.
11538 The scratch register can be any caller-saved register which is not
11539 used for parameters or for the static chain. */
11541 static unsigned int
11542 split_stack_prologue_scratch_regno (void)
11548 bool is_fastcall
, is_thiscall
;
11551 is_fastcall
= (lookup_attribute ("fastcall",
11552 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11554 is_thiscall
= (lookup_attribute ("thiscall",
11555 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11557 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11561 if (DECL_STATIC_CHAIN (cfun
->decl
))
11563 sorry ("-fsplit-stack does not support fastcall with "
11564 "nested function");
11565 return INVALID_REGNUM
;
11569 else if (is_thiscall
)
11571 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11575 else if (regparm
< 3)
11577 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11583 sorry ("-fsplit-stack does not support 2 register "
11584 " parameters for a nested function");
11585 return INVALID_REGNUM
;
11592 /* FIXME: We could make this work by pushing a register
11593 around the addition and comparison. */
11594 sorry ("-fsplit-stack does not support 3 register parameters");
11595 return INVALID_REGNUM
;
11600 /* A SYMBOL_REF for the function which allocates new stackspace for
11603 static GTY(()) rtx split_stack_fn
;
11605 /* A SYMBOL_REF for the more stack function when using the large
11608 static GTY(()) rtx split_stack_fn_large
;
11610 /* Handle -fsplit-stack. These are the first instructions in the
11611 function, even before the regular prologue. */
11614 ix86_expand_split_stack_prologue (void)
11616 struct ix86_frame frame
;
11617 HOST_WIDE_INT allocate
;
11618 unsigned HOST_WIDE_INT args_size
;
11619 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11620 rtx scratch_reg
= NULL_RTX
;
11621 rtx varargs_label
= NULL_RTX
;
11624 gcc_assert (flag_split_stack
&& reload_completed
);
11626 ix86_finalize_stack_realign_flags ();
11627 ix86_compute_frame_layout (&frame
);
11628 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11630 /* This is the label we will branch to if we have enough stack
11631 space. We expect the basic block reordering pass to reverse this
11632 branch if optimizing, so that we branch in the unlikely case. */
11633 label
= gen_label_rtx ();
11635 /* We need to compare the stack pointer minus the frame size with
11636 the stack boundary in the TCB. The stack boundary always gives
11637 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11638 can compare directly. Otherwise we need to do an addition. */
11640 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11641 UNSPEC_STACK_CHECK
);
11642 limit
= gen_rtx_CONST (Pmode
, limit
);
11643 limit
= gen_rtx_MEM (Pmode
, limit
);
11644 if (allocate
< SPLIT_STACK_AVAILABLE
)
11645 current
= stack_pointer_rtx
;
11648 unsigned int scratch_regno
;
11651 /* We need a scratch register to hold the stack pointer minus
11652 the required frame size. Since this is the very start of the
11653 function, the scratch register can be any caller-saved
11654 register which is not used for parameters. */
11655 offset
= GEN_INT (- allocate
);
11656 scratch_regno
= split_stack_prologue_scratch_regno ();
11657 if (scratch_regno
== INVALID_REGNUM
)
11659 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11660 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11662 /* We don't use ix86_gen_add3 in this case because it will
11663 want to split to lea, but when not optimizing the insn
11664 will not be split after this point. */
11665 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11666 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11671 emit_move_insn (scratch_reg
, offset
);
11672 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11673 stack_pointer_rtx
));
11675 current
= scratch_reg
;
11678 ix86_expand_branch (GEU
, current
, limit
, label
);
11679 jump_insn
= get_last_insn ();
11680 JUMP_LABEL (jump_insn
) = label
;
11682 /* Mark the jump as very likely to be taken. */
11683 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11684 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11686 if (split_stack_fn
== NULL_RTX
)
11687 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11688 fn
= split_stack_fn
;
11690 /* Get more stack space. We pass in the desired stack space and the
11691 size of the arguments to copy to the new stack. In 32-bit mode
11692 we push the parameters; __morestack will return on a new stack
11693 anyhow. In 64-bit mode we pass the parameters in r10 and
11695 allocate_rtx
= GEN_INT (allocate
);
11696 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11697 call_fusage
= NULL_RTX
;
11702 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11703 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11705 /* If this function uses a static chain, it will be in %r10.
11706 Preserve it across the call to __morestack. */
11707 if (DECL_STATIC_CHAIN (cfun
->decl
))
11711 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11712 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11713 use_reg (&call_fusage
, rax
);
11716 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11719 HOST_WIDE_INT argval
;
11721 gcc_assert (Pmode
== DImode
);
11722 /* When using the large model we need to load the address
11723 into a register, and we've run out of registers. So we
11724 switch to a different calling convention, and we call a
11725 different function: __morestack_large. We pass the
11726 argument size in the upper 32 bits of r10 and pass the
11727 frame size in the lower 32 bits. */
11728 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11729 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11731 if (split_stack_fn_large
== NULL_RTX
)
11732 split_stack_fn_large
=
11733 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11735 if (ix86_cmodel
== CM_LARGE_PIC
)
11739 label
= gen_label_rtx ();
11740 emit_label (label
);
11741 LABEL_PRESERVE_P (label
) = 1;
11742 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11743 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11744 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11745 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11747 x
= gen_rtx_CONST (Pmode
, x
);
11748 emit_move_insn (reg11
, x
);
11749 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11750 x
= gen_const_mem (Pmode
, x
);
11751 emit_move_insn (reg11
, x
);
11754 emit_move_insn (reg11
, split_stack_fn_large
);
11758 argval
= ((args_size
<< 16) << 16) + allocate
;
11759 emit_move_insn (reg10
, GEN_INT (argval
));
11763 emit_move_insn (reg10
, allocate_rtx
);
11764 emit_move_insn (reg11
, GEN_INT (args_size
));
11765 use_reg (&call_fusage
, reg11
);
11768 use_reg (&call_fusage
, reg10
);
11772 emit_insn (gen_push (GEN_INT (args_size
)));
11773 emit_insn (gen_push (allocate_rtx
));
11775 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11776 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11778 add_function_usage_to (call_insn
, call_fusage
);
11780 /* In order to make call/return prediction work right, we now need
11781 to execute a return instruction. See
11782 libgcc/config/i386/morestack.S for the details on how this works.
11784 For flow purposes gcc must not see this as a return
11785 instruction--we need control flow to continue at the subsequent
11786 label. Therefore, we use an unspec. */
11787 gcc_assert (crtl
->args
.pops_args
< 65536);
11788 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11790 /* If we are in 64-bit mode and this function uses a static chain,
11791 we saved %r10 in %rax before calling _morestack. */
11792 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11793 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11794 gen_rtx_REG (word_mode
, AX_REG
));
11796 /* If this function calls va_start, we need to store a pointer to
11797 the arguments on the old stack, because they may not have been
11798 all copied to the new stack. At this point the old stack can be
11799 found at the frame pointer value used by __morestack, because
11800 __morestack has set that up before calling back to us. Here we
11801 store that pointer in a scratch register, and in
11802 ix86_expand_prologue we store the scratch register in a stack
11804 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11806 unsigned int scratch_regno
;
11810 scratch_regno
= split_stack_prologue_scratch_regno ();
11811 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11812 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11816 return address within this function
11817 return address of caller of this function
11819 So we add three words to get to the stack arguments.
11823 return address within this function
11824 first argument to __morestack
11825 second argument to __morestack
11826 return address of caller of this function
11828 So we add five words to get to the stack arguments.
11830 words
= TARGET_64BIT
? 3 : 5;
11831 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11832 gen_rtx_PLUS (Pmode
, frame_reg
,
11833 GEN_INT (words
* UNITS_PER_WORD
))));
11835 varargs_label
= gen_label_rtx ();
11836 emit_jump_insn (gen_jump (varargs_label
));
11837 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11842 emit_label (label
);
11843 LABEL_NUSES (label
) = 1;
11845 /* If this function calls va_start, we now have to set the scratch
11846 register for the case where we do not call __morestack. In this
11847 case we need to set it based on the stack pointer. */
11848 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11850 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11851 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11852 GEN_INT (UNITS_PER_WORD
))));
11854 emit_label (varargs_label
);
11855 LABEL_NUSES (varargs_label
) = 1;
11859 /* We may have to tell the dataflow pass that the split stack prologue
11860 is initializing a scratch register. */
11863 ix86_live_on_entry (bitmap regs
)
11865 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11867 gcc_assert (flag_split_stack
);
11868 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11872 /* Determine if op is suitable SUBREG RTX for address. */
11875 ix86_address_subreg_operand (rtx op
)
11877 enum machine_mode mode
;
11882 mode
= GET_MODE (op
);
11884 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11885 failures when the register is one word out of a two word structure. */
11886 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11889 /* Allow only SUBREGs of non-eliminable hard registers. */
11890 return register_no_elim_operand (op
, mode
);
11893 /* Extract the parts of an RTL expression that is a valid memory address
11894 for an instruction. Return 0 if the structure of the address is
11895 grossly off. Return -1 if the address contains ASHIFT, so it is not
11896 strictly valid, but still used for computing length of lea instruction. */
11899 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11901 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11902 rtx base_reg
, index_reg
;
11903 HOST_WIDE_INT scale
= 1;
11904 rtx scale_rtx
= NULL_RTX
;
11907 enum ix86_address_seg seg
= SEG_DEFAULT
;
11909 /* Allow zero-extended SImode addresses,
11910 they will be emitted with addr32 prefix. */
11911 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11913 if (GET_CODE (addr
) == ZERO_EXTEND
11914 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11916 addr
= XEXP (addr
, 0);
11917 if (CONST_INT_P (addr
))
11920 else if (GET_CODE (addr
) == AND
11921 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11923 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11924 if (addr
== NULL_RTX
)
11927 if (CONST_INT_P (addr
))
11932 /* Allow SImode subregs of DImode addresses,
11933 they will be emitted with addr32 prefix. */
11934 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11936 if (GET_CODE (addr
) == SUBREG
11937 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11939 addr
= SUBREG_REG (addr
);
11940 if (CONST_INT_P (addr
))
11947 else if (GET_CODE (addr
) == SUBREG
)
11949 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11954 else if (GET_CODE (addr
) == PLUS
)
11956 rtx addends
[4], op
;
11964 addends
[n
++] = XEXP (op
, 1);
11967 while (GET_CODE (op
) == PLUS
);
11972 for (i
= n
; i
>= 0; --i
)
11975 switch (GET_CODE (op
))
11980 index
= XEXP (op
, 0);
11981 scale_rtx
= XEXP (op
, 1);
11987 index
= XEXP (op
, 0);
11988 tmp
= XEXP (op
, 1);
11989 if (!CONST_INT_P (tmp
))
11991 scale
= INTVAL (tmp
);
11992 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11994 scale
= 1 << scale
;
11999 if (GET_CODE (op
) != UNSPEC
)
12004 if (XINT (op
, 1) == UNSPEC_TP
12005 && TARGET_TLS_DIRECT_SEG_REFS
12006 && seg
== SEG_DEFAULT
)
12007 seg
= DEFAULT_TLS_SEG_REG
;
12013 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
12040 else if (GET_CODE (addr
) == MULT
)
12042 index
= XEXP (addr
, 0); /* index*scale */
12043 scale_rtx
= XEXP (addr
, 1);
12045 else if (GET_CODE (addr
) == ASHIFT
)
12047 /* We're called for lea too, which implements ashift on occasion. */
12048 index
= XEXP (addr
, 0);
12049 tmp
= XEXP (addr
, 1);
12050 if (!CONST_INT_P (tmp
))
12052 scale
= INTVAL (tmp
);
12053 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12055 scale
= 1 << scale
;
12059 disp
= addr
; /* displacement */
12065 else if (GET_CODE (index
) == SUBREG
12066 && ix86_address_subreg_operand (SUBREG_REG (index
)))
12072 /* Address override works only on the (%reg) part of %fs:(%reg). */
12073 if (seg
!= SEG_DEFAULT
12074 && ((base
&& GET_MODE (base
) != word_mode
)
12075 || (index
&& GET_MODE (index
) != word_mode
)))
12078 /* Extract the integral value of scale. */
12081 if (!CONST_INT_P (scale_rtx
))
12083 scale
= INTVAL (scale_rtx
);
12086 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
12087 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
12089 /* Avoid useless 0 displacement. */
12090 if (disp
== const0_rtx
&& (base
|| index
))
12093 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12094 if (base_reg
&& index_reg
&& scale
== 1
12095 && (index_reg
== arg_pointer_rtx
12096 || index_reg
== frame_pointer_rtx
12097 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12100 tmp
= base
, base
= index
, index
= tmp
;
12101 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12104 /* Special case: %ebp cannot be encoded as a base without a displacement.
12108 && (base_reg
== hard_frame_pointer_rtx
12109 || base_reg
== frame_pointer_rtx
12110 || base_reg
== arg_pointer_rtx
12111 || (REG_P (base_reg
)
12112 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12113 || REGNO (base_reg
) == R13_REG
))))
12116 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12117 Avoid this by transforming to [%esi+0].
12118 Reload calls address legitimization without cfun defined, so we need
12119 to test cfun for being non-NULL. */
12120 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12121 && base_reg
&& !index_reg
&& !disp
12122 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12125 /* Special case: encode reg+reg instead of reg*2. */
12126 if (!base
&& index
&& scale
== 2)
12127 base
= index
, base_reg
= index_reg
, scale
= 1;
12129 /* Special case: scaling cannot be encoded without base or displacement. */
12130 if (!base
&& !disp
&& index
&& scale
!= 1)
12134 out
->index
= index
;
12136 out
->scale
= scale
;
12142 /* Return cost of the memory address x.
12143 For i386, it is better to use a complex address than let gcc copy
12144 the address into a reg and make a new pseudo. But not if the address
12145 requires to two regs - that would mean more pseudos with longer
12148 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12149 addr_space_t as ATTRIBUTE_UNUSED
,
12150 bool speed ATTRIBUTE_UNUSED
)
12152 struct ix86_address parts
;
12154 int ok
= ix86_decompose_address (x
, &parts
);
12158 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12159 parts
.base
= SUBREG_REG (parts
.base
);
12160 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12161 parts
.index
= SUBREG_REG (parts
.index
);
12163 /* Attempt to minimize number of registers in the address. */
12165 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12167 && (!REG_P (parts
.index
)
12168 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12172 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12174 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12175 && parts
.base
!= parts
.index
)
12178 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12179 since it's predecode logic can't detect the length of instructions
12180 and it degenerates to vector decoded. Increase cost of such
12181 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12182 to split such addresses or even refuse such addresses at all.
12184 Following addressing modes are affected:
12189 The first and last case may be avoidable by explicitly coding the zero in
12190 memory address, but I don't have AMD-K6 machine handy to check this
12194 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12195 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12196 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12202 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12203 this is used for to form addresses to local data when -fPIC is in
12207 darwin_local_data_pic (rtx disp
)
12209 return (GET_CODE (disp
) == UNSPEC
12210 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12213 /* Determine if a given RTX is a valid constant. We already know this
12214 satisfies CONSTANT_P. */
12217 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12219 switch (GET_CODE (x
))
12224 if (GET_CODE (x
) == PLUS
)
12226 if (!CONST_INT_P (XEXP (x
, 1)))
12231 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12234 /* Only some unspecs are valid as "constants". */
12235 if (GET_CODE (x
) == UNSPEC
)
12236 switch (XINT (x
, 1))
12239 case UNSPEC_GOTOFF
:
12240 case UNSPEC_PLTOFF
:
12241 return TARGET_64BIT
;
12243 case UNSPEC_NTPOFF
:
12244 x
= XVECEXP (x
, 0, 0);
12245 return (GET_CODE (x
) == SYMBOL_REF
12246 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12247 case UNSPEC_DTPOFF
:
12248 x
= XVECEXP (x
, 0, 0);
12249 return (GET_CODE (x
) == SYMBOL_REF
12250 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12255 /* We must have drilled down to a symbol. */
12256 if (GET_CODE (x
) == LABEL_REF
)
12258 if (GET_CODE (x
) != SYMBOL_REF
)
12263 /* TLS symbols are never valid. */
12264 if (SYMBOL_REF_TLS_MODEL (x
))
12267 /* DLLIMPORT symbols are never valid. */
12268 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12269 && SYMBOL_REF_DLLIMPORT_P (x
))
12273 /* mdynamic-no-pic */
12274 if (MACHO_DYNAMIC_NO_PIC_P
)
12275 return machopic_symbol_defined_p (x
);
12280 if (GET_MODE (x
) == TImode
12281 && x
!= CONST0_RTX (TImode
)
12287 if (!standard_sse_constant_p (x
))
12294 /* Otherwise we handle everything else in the move patterns. */
12298 /* Determine if it's legal to put X into the constant pool. This
12299 is not possible for the address of thread-local symbols, which
12300 is checked above. */
12303 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12305 /* We can always put integral constants and vectors in memory. */
12306 switch (GET_CODE (x
))
12316 return !ix86_legitimate_constant_p (mode
, x
);
12319 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12323 is_imported_p (rtx x
)
12325 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12326 || GET_CODE (x
) != SYMBOL_REF
)
12329 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12333 /* Nonzero if the constant value X is a legitimate general operand
12334 when generating PIC code. It is given that flag_pic is on and
12335 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12338 legitimate_pic_operand_p (rtx x
)
12342 switch (GET_CODE (x
))
12345 inner
= XEXP (x
, 0);
12346 if (GET_CODE (inner
) == PLUS
12347 && CONST_INT_P (XEXP (inner
, 1)))
12348 inner
= XEXP (inner
, 0);
12350 /* Only some unspecs are valid as "constants". */
12351 if (GET_CODE (inner
) == UNSPEC
)
12352 switch (XINT (inner
, 1))
12355 case UNSPEC_GOTOFF
:
12356 case UNSPEC_PLTOFF
:
12357 return TARGET_64BIT
;
12359 x
= XVECEXP (inner
, 0, 0);
12360 return (GET_CODE (x
) == SYMBOL_REF
12361 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12362 case UNSPEC_MACHOPIC_OFFSET
:
12363 return legitimate_pic_address_disp_p (x
);
12371 return legitimate_pic_address_disp_p (x
);
12378 /* Determine if a given CONST RTX is a valid memory displacement
12382 legitimate_pic_address_disp_p (rtx disp
)
12386 /* In 64bit mode we can allow direct addresses of symbols and labels
12387 when they are not dynamic symbols. */
12390 rtx op0
= disp
, op1
;
12392 switch (GET_CODE (disp
))
12398 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12400 op0
= XEXP (XEXP (disp
, 0), 0);
12401 op1
= XEXP (XEXP (disp
, 0), 1);
12402 if (!CONST_INT_P (op1
)
12403 || INTVAL (op1
) >= 16*1024*1024
12404 || INTVAL (op1
) < -16*1024*1024)
12406 if (GET_CODE (op0
) == LABEL_REF
)
12408 if (GET_CODE (op0
) == CONST
12409 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12410 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12412 if (GET_CODE (op0
) == UNSPEC
12413 && XINT (op0
, 1) == UNSPEC_PCREL
)
12415 if (GET_CODE (op0
) != SYMBOL_REF
)
12420 /* TLS references should always be enclosed in UNSPEC.
12421 The dllimported symbol needs always to be resolved. */
12422 if (SYMBOL_REF_TLS_MODEL (op0
)
12423 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12428 if (is_imported_p (op0
))
12431 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12432 || !SYMBOL_REF_LOCAL_P (op0
))
12435 /* Function-symbols need to be resolved only for
12437 For the small-model we don't need to resolve anything
12439 if ((ix86_cmodel
!= CM_LARGE_PIC
12440 && SYMBOL_REF_FUNCTION_P (op0
))
12441 || ix86_cmodel
== CM_SMALL_PIC
)
12443 /* Non-external symbols don't need to be resolved for
12444 large, and medium-model. */
12445 if ((ix86_cmodel
== CM_LARGE_PIC
12446 || ix86_cmodel
== CM_MEDIUM_PIC
)
12447 && !SYMBOL_REF_EXTERNAL_P (op0
))
12450 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12451 && SYMBOL_REF_LOCAL_P (op0
)
12452 && ix86_cmodel
!= CM_LARGE_PIC
)
12460 if (GET_CODE (disp
) != CONST
)
12462 disp
= XEXP (disp
, 0);
12466 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12467 of GOT tables. We should not need these anyway. */
12468 if (GET_CODE (disp
) != UNSPEC
12469 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12470 && XINT (disp
, 1) != UNSPEC_GOTOFF
12471 && XINT (disp
, 1) != UNSPEC_PCREL
12472 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12475 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12476 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12482 if (GET_CODE (disp
) == PLUS
)
12484 if (!CONST_INT_P (XEXP (disp
, 1)))
12486 disp
= XEXP (disp
, 0);
12490 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12493 if (GET_CODE (disp
) != UNSPEC
)
12496 switch (XINT (disp
, 1))
12501 /* We need to check for both symbols and labels because VxWorks loads
12502 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12504 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12505 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12506 case UNSPEC_GOTOFF
:
12507 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12508 While ABI specify also 32bit relocation but we don't produce it in
12509 small PIC model at all. */
12510 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12511 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12513 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12515 case UNSPEC_GOTTPOFF
:
12516 case UNSPEC_GOTNTPOFF
:
12517 case UNSPEC_INDNTPOFF
:
12520 disp
= XVECEXP (disp
, 0, 0);
12521 return (GET_CODE (disp
) == SYMBOL_REF
12522 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12523 case UNSPEC_NTPOFF
:
12524 disp
= XVECEXP (disp
, 0, 0);
12525 return (GET_CODE (disp
) == SYMBOL_REF
12526 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12527 case UNSPEC_DTPOFF
:
12528 disp
= XVECEXP (disp
, 0, 0);
12529 return (GET_CODE (disp
) == SYMBOL_REF
12530 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12536 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12537 replace the input X, or the original X if no replacement is called for.
12538 The output parameter *WIN is 1 if the calling macro should goto WIN,
12539 0 if it should not. */
12542 ix86_legitimize_reload_address (rtx x
,
12543 enum machine_mode mode ATTRIBUTE_UNUSED
,
12544 int opnum
, int type
,
12545 int ind_levels ATTRIBUTE_UNUSED
)
12547 /* Reload can generate:
12549 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12553 This RTX is rejected from ix86_legitimate_address_p due to
12554 non-strictness of base register 97. Following this rejection,
12555 reload pushes all three components into separate registers,
12556 creating invalid memory address RTX.
12558 Following code reloads only the invalid part of the
12559 memory address RTX. */
12561 if (GET_CODE (x
) == PLUS
12562 && REG_P (XEXP (x
, 1))
12563 && GET_CODE (XEXP (x
, 0)) == PLUS
12564 && REG_P (XEXP (XEXP (x
, 0), 1)))
12567 bool something_reloaded
= false;
12569 base
= XEXP (XEXP (x
, 0), 1);
12570 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12572 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12573 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12574 opnum
, (enum reload_type
) type
);
12575 something_reloaded
= true;
12578 index
= XEXP (x
, 1);
12579 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12581 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12582 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12583 opnum
, (enum reload_type
) type
);
12584 something_reloaded
= true;
12587 gcc_assert (something_reloaded
);
12594 /* Recognizes RTL expressions that are valid memory addresses for an
12595 instruction. The MODE argument is the machine mode for the MEM
12596 expression that wants to use this address.
12598 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12599 convert common non-canonical forms to canonical form so that they will
12603 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12604 rtx addr
, bool strict
)
12606 struct ix86_address parts
;
12607 rtx base
, index
, disp
;
12608 HOST_WIDE_INT scale
;
12610 if (ix86_decompose_address (addr
, &parts
) <= 0)
12611 /* Decomposition failed. */
12615 index
= parts
.index
;
12617 scale
= parts
.scale
;
12619 /* Validate base register. */
12626 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12627 reg
= SUBREG_REG (base
);
12629 /* Base is not a register. */
12632 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12635 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12636 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12637 /* Base is not valid. */
12641 /* Validate index register. */
12648 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12649 reg
= SUBREG_REG (index
);
12651 /* Index is not a register. */
12654 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12657 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12658 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12659 /* Index is not valid. */
12663 /* Index and base should have the same mode. */
12665 && GET_MODE (base
) != GET_MODE (index
))
12668 /* Validate scale factor. */
12672 /* Scale without index. */
12675 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12676 /* Scale is not a valid multiplier. */
12680 /* Validate displacement. */
12683 if (GET_CODE (disp
) == CONST
12684 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12685 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12686 switch (XINT (XEXP (disp
, 0), 1))
12688 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12689 used. While ABI specify also 32bit relocations, we don't produce
12690 them at all and use IP relative instead. */
12692 case UNSPEC_GOTOFF
:
12693 gcc_assert (flag_pic
);
12695 goto is_legitimate_pic
;
12697 /* 64bit address unspec. */
12700 case UNSPEC_GOTPCREL
:
12702 gcc_assert (flag_pic
);
12703 goto is_legitimate_pic
;
12705 case UNSPEC_GOTTPOFF
:
12706 case UNSPEC_GOTNTPOFF
:
12707 case UNSPEC_INDNTPOFF
:
12708 case UNSPEC_NTPOFF
:
12709 case UNSPEC_DTPOFF
:
12712 case UNSPEC_STACK_CHECK
:
12713 gcc_assert (flag_split_stack
);
12717 /* Invalid address unspec. */
12721 else if (SYMBOLIC_CONST (disp
)
12725 && MACHOPIC_INDIRECT
12726 && !machopic_operand_p (disp
)
12732 if (TARGET_64BIT
&& (index
|| base
))
12734 /* foo@dtpoff(%rX) is ok. */
12735 if (GET_CODE (disp
) != CONST
12736 || GET_CODE (XEXP (disp
, 0)) != PLUS
12737 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12738 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12739 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12740 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12741 /* Non-constant pic memory reference. */
12744 else if ((!TARGET_MACHO
|| flag_pic
)
12745 && ! legitimate_pic_address_disp_p (disp
))
12746 /* Displacement is an invalid pic construct. */
12749 else if (MACHO_DYNAMIC_NO_PIC_P
12750 && !ix86_legitimate_constant_p (Pmode
, disp
))
12751 /* displacment must be referenced via non_lazy_pointer */
12755 /* This code used to verify that a symbolic pic displacement
12756 includes the pic_offset_table_rtx register.
12758 While this is good idea, unfortunately these constructs may
12759 be created by "adds using lea" optimization for incorrect
12768 This code is nonsensical, but results in addressing
12769 GOT table with pic_offset_table_rtx base. We can't
12770 just refuse it easily, since it gets matched by
12771 "addsi3" pattern, that later gets split to lea in the
12772 case output register differs from input. While this
12773 can be handled by separate addsi pattern for this case
12774 that never results in lea, this seems to be easier and
12775 correct fix for crash to disable this test. */
12777 else if (GET_CODE (disp
) != LABEL_REF
12778 && !CONST_INT_P (disp
)
12779 && (GET_CODE (disp
) != CONST
12780 || !ix86_legitimate_constant_p (Pmode
, disp
))
12781 && (GET_CODE (disp
) != SYMBOL_REF
12782 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12783 /* Displacement is not constant. */
12785 else if (TARGET_64BIT
12786 && !x86_64_immediate_operand (disp
, VOIDmode
))
12787 /* Displacement is out of range. */
12789 /* In x32 mode, constant addresses are sign extended to 64bit, so
12790 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12791 else if (TARGET_X32
&& !(index
|| base
)
12792 && CONST_INT_P (disp
)
12793 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
12797 /* Everything looks valid. */
12801 /* Determine if a given RTX is a valid constant address. */
12804 constant_address_p (rtx x
)
12806 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12809 /* Return a unique alias set for the GOT. */
12811 static alias_set_type
12812 ix86_GOT_alias_set (void)
12814 static alias_set_type set
= -1;
12816 set
= new_alias_set ();
12820 /* Return a legitimate reference for ORIG (an address) using the
12821 register REG. If REG is 0, a new pseudo is generated.
12823 There are two types of references that must be handled:
12825 1. Global data references must load the address from the GOT, via
12826 the PIC reg. An insn is emitted to do this load, and the reg is
12829 2. Static data references, constant pool addresses, and code labels
12830 compute the address as an offset from the GOT, whose base is in
12831 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12832 differentiate them from global data objects. The returned
12833 address is the PIC reg + an unspec constant.
12835 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12836 reg also appears in the address. */
12839 legitimize_pic_address (rtx orig
, rtx reg
)
12842 rtx new_rtx
= orig
;
12845 if (TARGET_MACHO
&& !TARGET_64BIT
)
12848 reg
= gen_reg_rtx (Pmode
);
12849 /* Use the generic Mach-O PIC machinery. */
12850 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12854 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12856 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12861 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12863 else if (TARGET_64BIT
&& !TARGET_PECOFF
12864 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12867 /* This symbol may be referenced via a displacement from the PIC
12868 base address (@GOTOFF). */
12870 if (reload_in_progress
)
12871 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12872 if (GET_CODE (addr
) == CONST
)
12873 addr
= XEXP (addr
, 0);
12874 if (GET_CODE (addr
) == PLUS
)
12876 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12878 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12881 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12882 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12884 tmpreg
= gen_reg_rtx (Pmode
);
12887 emit_move_insn (tmpreg
, new_rtx
);
12891 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12892 tmpreg
, 1, OPTAB_DIRECT
);
12896 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12898 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12900 /* This symbol may be referenced via a displacement from the PIC
12901 base address (@GOTOFF). */
12903 if (reload_in_progress
)
12904 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12905 if (GET_CODE (addr
) == CONST
)
12906 addr
= XEXP (addr
, 0);
12907 if (GET_CODE (addr
) == PLUS
)
12909 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12911 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12914 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12915 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12916 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12920 emit_move_insn (reg
, new_rtx
);
12924 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12925 /* We can't use @GOTOFF for text labels on VxWorks;
12926 see gotoff_operand. */
12927 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12929 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12933 /* For x64 PE-COFF there is no GOT table. So we use address
12935 if (TARGET_64BIT
&& TARGET_PECOFF
)
12937 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12938 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12941 reg
= gen_reg_rtx (Pmode
);
12942 emit_move_insn (reg
, new_rtx
);
12945 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12947 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12948 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12949 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12950 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12953 reg
= gen_reg_rtx (Pmode
);
12954 /* Use directly gen_movsi, otherwise the address is loaded
12955 into register for CSE. We don't want to CSE this addresses,
12956 instead we CSE addresses from the GOT table, so skip this. */
12957 emit_insn (gen_movsi (reg
, new_rtx
));
12962 /* This symbol must be referenced via a load from the
12963 Global Offset Table (@GOT). */
12965 if (reload_in_progress
)
12966 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12967 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12968 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12970 new_rtx
= force_reg (Pmode
, new_rtx
);
12971 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12972 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12973 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12976 reg
= gen_reg_rtx (Pmode
);
12977 emit_move_insn (reg
, new_rtx
);
12983 if (CONST_INT_P (addr
)
12984 && !x86_64_immediate_operand (addr
, VOIDmode
))
12988 emit_move_insn (reg
, addr
);
12992 new_rtx
= force_reg (Pmode
, addr
);
12994 else if (GET_CODE (addr
) == CONST
)
12996 addr
= XEXP (addr
, 0);
12998 /* We must match stuff we generate before. Assume the only
12999 unspecs that can get here are ours. Not that we could do
13000 anything with them anyway.... */
13001 if (GET_CODE (addr
) == UNSPEC
13002 || (GET_CODE (addr
) == PLUS
13003 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
13005 gcc_assert (GET_CODE (addr
) == PLUS
);
13007 if (GET_CODE (addr
) == PLUS
)
13009 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
13011 /* Check first to see if this is a constant offset from a @GOTOFF
13012 symbol reference. */
13013 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
13014 && CONST_INT_P (op1
))
13018 if (reload_in_progress
)
13019 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13020 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
13022 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
13023 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13024 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13028 emit_move_insn (reg
, new_rtx
);
13034 if (INTVAL (op1
) < -16*1024*1024
13035 || INTVAL (op1
) >= 16*1024*1024)
13037 if (!x86_64_immediate_operand (op1
, Pmode
))
13038 op1
= force_reg (Pmode
, op1
);
13039 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
13045 rtx base
= legitimize_pic_address (op0
, reg
);
13046 enum machine_mode mode
= GET_MODE (base
);
13048 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
13050 if (CONST_INT_P (new_rtx
))
13052 if (INTVAL (new_rtx
) < -16*1024*1024
13053 || INTVAL (new_rtx
) >= 16*1024*1024)
13055 if (!x86_64_immediate_operand (new_rtx
, mode
))
13056 new_rtx
= force_reg (mode
, new_rtx
);
13058 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
13061 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
13065 if (GET_CODE (new_rtx
) == PLUS
13066 && CONSTANT_P (XEXP (new_rtx
, 1)))
13068 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
13069 new_rtx
= XEXP (new_rtx
, 1);
13071 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
13079 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13082 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
13084 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
13086 if (GET_MODE (tp
) != tp_mode
)
13088 gcc_assert (GET_MODE (tp
) == SImode
);
13089 gcc_assert (tp_mode
== DImode
);
13091 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
13095 tp
= copy_to_mode_reg (tp_mode
, tp
);
13100 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13102 static GTY(()) rtx ix86_tls_symbol
;
13105 ix86_tls_get_addr (void)
13107 if (!ix86_tls_symbol
)
13110 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13111 ? "___tls_get_addr" : "__tls_get_addr");
13113 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13116 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
13118 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
13120 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
13121 gen_rtx_CONST (Pmode
, unspec
));
13124 return ix86_tls_symbol
;
13127 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13129 static GTY(()) rtx ix86_tls_module_base_symbol
;
13132 ix86_tls_module_base (void)
13134 if (!ix86_tls_module_base_symbol
)
13136 ix86_tls_module_base_symbol
13137 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13139 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13140 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13143 return ix86_tls_module_base_symbol
;
13146 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13147 false if we expect this to be used for a memory address and true if
13148 we expect to load the address into a register. */
13151 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13153 rtx dest
, base
, off
;
13154 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13155 enum machine_mode tp_mode
= Pmode
;
13160 case TLS_MODEL_GLOBAL_DYNAMIC
:
13161 dest
= gen_reg_rtx (Pmode
);
13165 if (flag_pic
&& !TARGET_PECOFF
)
13166 pic
= pic_offset_table_rtx
;
13169 pic
= gen_reg_rtx (Pmode
);
13170 emit_insn (gen_set_got (pic
));
13174 if (TARGET_GNU2_TLS
)
13177 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13179 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13181 tp
= get_thread_pointer (Pmode
, true);
13182 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13184 if (GET_MODE (x
) != Pmode
)
13185 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13187 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13191 rtx caddr
= ix86_tls_get_addr ();
13195 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13200 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13201 insns
= get_insns ();
13204 if (GET_MODE (x
) != Pmode
)
13205 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13207 RTL_CONST_CALL_P (insns
) = 1;
13208 emit_libcall_block (insns
, dest
, rax
, x
);
13211 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13215 case TLS_MODEL_LOCAL_DYNAMIC
:
13216 base
= gen_reg_rtx (Pmode
);
13221 pic
= pic_offset_table_rtx
;
13224 pic
= gen_reg_rtx (Pmode
);
13225 emit_insn (gen_set_got (pic
));
13229 if (TARGET_GNU2_TLS
)
13231 rtx tmp
= ix86_tls_module_base ();
13234 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13236 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13238 tp
= get_thread_pointer (Pmode
, true);
13239 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13240 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13244 rtx caddr
= ix86_tls_get_addr ();
13248 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13253 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13254 insns
= get_insns ();
13257 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13258 share the LD_BASE result with other LD model accesses. */
13259 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13260 UNSPEC_TLS_LD_BASE
);
13262 RTL_CONST_CALL_P (insns
) = 1;
13263 emit_libcall_block (insns
, base
, rax
, eqv
);
13266 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13269 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13270 off
= gen_rtx_CONST (Pmode
, off
);
13272 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13274 if (TARGET_GNU2_TLS
)
13276 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13278 if (GET_MODE (x
) != Pmode
)
13279 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13281 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13285 case TLS_MODEL_INITIAL_EXEC
:
13288 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13290 /* The Sun linker took the AMD64 TLS spec literally
13291 and can only handle %rax as destination of the
13292 initial executable code sequence. */
13294 dest
= gen_reg_rtx (DImode
);
13295 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13299 /* Generate DImode references to avoid %fs:(%reg32)
13300 problems and linker IE->LE relaxation bug. */
13303 type
= UNSPEC_GOTNTPOFF
;
13307 if (reload_in_progress
)
13308 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13309 pic
= pic_offset_table_rtx
;
13310 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13312 else if (!TARGET_ANY_GNU_TLS
)
13314 pic
= gen_reg_rtx (Pmode
);
13315 emit_insn (gen_set_got (pic
));
13316 type
= UNSPEC_GOTTPOFF
;
13321 type
= UNSPEC_INDNTPOFF
;
13324 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13325 off
= gen_rtx_CONST (tp_mode
, off
);
13327 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13328 off
= gen_const_mem (tp_mode
, off
);
13329 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13331 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13333 base
= get_thread_pointer (tp_mode
,
13334 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13335 off
= force_reg (tp_mode
, off
);
13336 return gen_rtx_PLUS (tp_mode
, base
, off
);
13340 base
= get_thread_pointer (Pmode
, true);
13341 dest
= gen_reg_rtx (Pmode
);
13342 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13346 case TLS_MODEL_LOCAL_EXEC
:
13347 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13348 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13349 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13350 off
= gen_rtx_CONST (Pmode
, off
);
13352 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13354 base
= get_thread_pointer (Pmode
,
13355 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13356 return gen_rtx_PLUS (Pmode
, base
, off
);
13360 base
= get_thread_pointer (Pmode
, true);
13361 dest
= gen_reg_rtx (Pmode
);
13362 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13367 gcc_unreachable ();
13373 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13374 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13375 unique refptr-DECL symbol corresponding to symbol DECL. */
13377 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13378 htab_t dllimport_map
;
13381 get_dllimport_decl (tree decl
, bool beimport
)
13383 struct tree_map
*h
, in
;
13386 const char *prefix
;
13387 size_t namelen
, prefixlen
;
13392 if (!dllimport_map
)
13393 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13395 in
.hash
= htab_hash_pointer (decl
);
13396 in
.base
.from
= decl
;
13397 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13398 h
= (struct tree_map
*) *loc
;
13402 *loc
= h
= ggc_alloc_tree_map ();
13404 h
->base
.from
= decl
;
13405 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13406 VAR_DECL
, NULL
, ptr_type_node
);
13407 DECL_ARTIFICIAL (to
) = 1;
13408 DECL_IGNORED_P (to
) = 1;
13409 DECL_EXTERNAL (to
) = 1;
13410 TREE_READONLY (to
) = 1;
13412 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13413 name
= targetm
.strip_name_encoding (name
);
13415 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13416 ? "*__imp_" : "*__imp__";
13418 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13419 namelen
= strlen (name
);
13420 prefixlen
= strlen (prefix
);
13421 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13422 memcpy (imp_name
, prefix
, prefixlen
);
13423 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13425 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13426 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13427 SET_SYMBOL_REF_DECL (rtl
, to
);
13428 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13431 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13432 #ifdef SUB_TARGET_RECORD_STUB
13433 SUB_TARGET_RECORD_STUB (name
);
13437 rtl
= gen_const_mem (Pmode
, rtl
);
13438 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13440 SET_DECL_RTL (to
, rtl
);
13441 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13446 /* Expand SYMBOL into its corresponding far-addresse symbol.
13447 WANT_REG is true if we require the result be a register. */
13450 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13455 gcc_assert (SYMBOL_REF_DECL (symbol
));
13456 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13458 x
= DECL_RTL (imp_decl
);
13460 x
= force_reg (Pmode
, x
);
13464 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13465 true if we require the result be a register. */
13468 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13473 gcc_assert (SYMBOL_REF_DECL (symbol
));
13474 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13476 x
= DECL_RTL (imp_decl
);
13478 x
= force_reg (Pmode
, x
);
13482 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13483 is true if we require the result be a register. */
13486 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13488 if (!TARGET_PECOFF
)
13491 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13493 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13494 return legitimize_dllimport_symbol (addr
, inreg
);
13495 if (GET_CODE (addr
) == CONST
13496 && GET_CODE (XEXP (addr
, 0)) == PLUS
13497 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13498 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13500 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13501 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13505 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13507 if (GET_CODE (addr
) == SYMBOL_REF
13508 && !is_imported_p (addr
)
13509 && SYMBOL_REF_EXTERNAL_P (addr
)
13510 && SYMBOL_REF_DECL (addr
))
13511 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13513 if (GET_CODE (addr
) == CONST
13514 && GET_CODE (XEXP (addr
, 0)) == PLUS
13515 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13516 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13517 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13518 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13520 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13521 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13526 /* Try machine-dependent ways of modifying an illegitimate address
13527 to be legitimate. If we find one, return the new, valid address.
13528 This macro is used in only one place: `memory_address' in explow.c.
13530 OLDX is the address as it was before break_out_memory_refs was called.
13531 In some cases it is useful to look at this to decide what needs to be done.
13533 It is always safe for this macro to do nothing. It exists to recognize
13534 opportunities to optimize the output.
13536 For the 80386, we handle X+REG by loading X into a register R and
13537 using R+REG. R will go in a general reg and indexing will be used.
13538 However, if REG is a broken-out memory address or multiplication,
13539 nothing needs to be done because REG can certainly go in a general reg.
13541 When -fpic is used, special handling is needed for symbolic references.
13542 See comments by legitimize_pic_address in i386.c for details. */
13545 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13546 enum machine_mode mode
)
13551 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13553 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13554 if (GET_CODE (x
) == CONST
13555 && GET_CODE (XEXP (x
, 0)) == PLUS
13556 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13557 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13559 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13560 (enum tls_model
) log
, false);
13561 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13564 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13566 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13571 if (flag_pic
&& SYMBOLIC_CONST (x
))
13572 return legitimize_pic_address (x
, 0);
13575 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13576 return machopic_indirect_data_reference (x
, 0);
13579 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13580 if (GET_CODE (x
) == ASHIFT
13581 && CONST_INT_P (XEXP (x
, 1))
13582 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13585 log
= INTVAL (XEXP (x
, 1));
13586 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13587 GEN_INT (1 << log
));
13590 if (GET_CODE (x
) == PLUS
)
13592 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13594 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13595 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13596 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13599 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13600 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13601 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13602 GEN_INT (1 << log
));
13605 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13606 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13607 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13610 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13611 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13612 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13613 GEN_INT (1 << log
));
13616 /* Put multiply first if it isn't already. */
13617 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13619 rtx tmp
= XEXP (x
, 0);
13620 XEXP (x
, 0) = XEXP (x
, 1);
13625 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13626 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13627 created by virtual register instantiation, register elimination, and
13628 similar optimizations. */
13629 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13632 x
= gen_rtx_PLUS (Pmode
,
13633 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13634 XEXP (XEXP (x
, 1), 0)),
13635 XEXP (XEXP (x
, 1), 1));
13639 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13640 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13641 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13642 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13643 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13644 && CONSTANT_P (XEXP (x
, 1)))
13647 rtx other
= NULL_RTX
;
13649 if (CONST_INT_P (XEXP (x
, 1)))
13651 constant
= XEXP (x
, 1);
13652 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13654 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13656 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13657 other
= XEXP (x
, 1);
13665 x
= gen_rtx_PLUS (Pmode
,
13666 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13667 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13668 plus_constant (Pmode
, other
,
13669 INTVAL (constant
)));
13673 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13676 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13679 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13682 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13685 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13689 && REG_P (XEXP (x
, 1))
13690 && REG_P (XEXP (x
, 0)))
13693 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13696 x
= legitimize_pic_address (x
, 0);
13699 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13702 if (REG_P (XEXP (x
, 0)))
13704 rtx temp
= gen_reg_rtx (Pmode
);
13705 rtx val
= force_operand (XEXP (x
, 1), temp
);
13708 val
= convert_to_mode (Pmode
, val
, 1);
13709 emit_move_insn (temp
, val
);
13712 XEXP (x
, 1) = temp
;
13716 else if (REG_P (XEXP (x
, 1)))
13718 rtx temp
= gen_reg_rtx (Pmode
);
13719 rtx val
= force_operand (XEXP (x
, 0), temp
);
13722 val
= convert_to_mode (Pmode
, val
, 1);
13723 emit_move_insn (temp
, val
);
13726 XEXP (x
, 0) = temp
;
13734 /* Print an integer constant expression in assembler syntax. Addition
13735 and subtraction are the only arithmetic that may appear in these
13736 expressions. FILE is the stdio stream to write to, X is the rtx, and
13737 CODE is the operand print code from the output string. */
13740 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13744 switch (GET_CODE (x
))
13747 gcc_assert (flag_pic
);
13752 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13753 output_addr_const (file
, x
);
13756 const char *name
= XSTR (x
, 0);
13758 /* Mark the decl as referenced so that cgraph will
13759 output the function. */
13760 if (SYMBOL_REF_DECL (x
))
13761 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13764 if (MACHOPIC_INDIRECT
13765 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13766 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13768 assemble_name (file
, name
);
13770 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13771 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13772 fputs ("@PLT", file
);
13779 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13780 assemble_name (asm_out_file
, buf
);
13784 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13788 /* This used to output parentheses around the expression,
13789 but that does not work on the 386 (either ATT or BSD assembler). */
13790 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13794 if (GET_MODE (x
) == VOIDmode
)
13796 /* We can use %d if the number is <32 bits and positive. */
13797 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13798 fprintf (file
, "0x%lx%08lx",
13799 (unsigned long) CONST_DOUBLE_HIGH (x
),
13800 (unsigned long) CONST_DOUBLE_LOW (x
));
13802 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13805 /* We can't handle floating point constants;
13806 TARGET_PRINT_OPERAND must handle them. */
13807 output_operand_lossage ("floating constant misused");
13811 /* Some assemblers need integer constants to appear first. */
13812 if (CONST_INT_P (XEXP (x
, 0)))
13814 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13816 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13820 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13821 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13823 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13829 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13830 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13832 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13834 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13838 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13840 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13845 gcc_assert (XVECLEN (x
, 0) == 1);
13846 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13847 switch (XINT (x
, 1))
13850 fputs ("@GOT", file
);
13852 case UNSPEC_GOTOFF
:
13853 fputs ("@GOTOFF", file
);
13855 case UNSPEC_PLTOFF
:
13856 fputs ("@PLTOFF", file
);
13859 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13860 "(%rip)" : "[rip]", file
);
13862 case UNSPEC_GOTPCREL
:
13863 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13864 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13866 case UNSPEC_GOTTPOFF
:
13867 /* FIXME: This might be @TPOFF in Sun ld too. */
13868 fputs ("@gottpoff", file
);
13871 fputs ("@tpoff", file
);
13873 case UNSPEC_NTPOFF
:
13875 fputs ("@tpoff", file
);
13877 fputs ("@ntpoff", file
);
13879 case UNSPEC_DTPOFF
:
13880 fputs ("@dtpoff", file
);
13882 case UNSPEC_GOTNTPOFF
:
13884 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13885 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13887 fputs ("@gotntpoff", file
);
13889 case UNSPEC_INDNTPOFF
:
13890 fputs ("@indntpoff", file
);
13893 case UNSPEC_MACHOPIC_OFFSET
:
13895 machopic_output_function_base_name (file
);
13899 output_operand_lossage ("invalid UNSPEC as operand");
13905 output_operand_lossage ("invalid expression as operand");
13909 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13910 We need to emit DTP-relative relocations. */
13912 static void ATTRIBUTE_UNUSED
13913 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13915 fputs (ASM_LONG
, file
);
13916 output_addr_const (file
, x
);
13917 fputs ("@dtpoff", file
);
13923 fputs (", 0", file
);
13926 gcc_unreachable ();
13930 /* Return true if X is a representation of the PIC register. This copes
13931 with calls from ix86_find_base_term, where the register might have
13932 been replaced by a cselib value. */
13935 ix86_pic_register_p (rtx x
)
13937 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13938 return (pic_offset_table_rtx
13939 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13941 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13944 /* Helper function for ix86_delegitimize_address.
13945 Attempt to delegitimize TLS local-exec accesses. */
13948 ix86_delegitimize_tls_address (rtx orig_x
)
13950 rtx x
= orig_x
, unspec
;
13951 struct ix86_address addr
;
13953 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13957 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13959 if (ix86_decompose_address (x
, &addr
) == 0
13960 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13961 || addr
.disp
== NULL_RTX
13962 || GET_CODE (addr
.disp
) != CONST
)
13964 unspec
= XEXP (addr
.disp
, 0);
13965 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13966 unspec
= XEXP (unspec
, 0);
13967 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13969 x
= XVECEXP (unspec
, 0, 0);
13970 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13971 if (unspec
!= XEXP (addr
.disp
, 0))
13972 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13975 rtx idx
= addr
.index
;
13976 if (addr
.scale
!= 1)
13977 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13978 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13981 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13982 if (MEM_P (orig_x
))
13983 x
= replace_equiv_address_nv (orig_x
, x
);
13987 /* In the name of slightly smaller debug output, and to cater to
13988 general assembler lossage, recognize PIC+GOTOFF and turn it back
13989 into a direct symbol reference.
13991 On Darwin, this is necessary to avoid a crash, because Darwin
13992 has a different PIC label for each routine but the DWARF debugging
13993 information is not associated with any particular routine, so it's
13994 necessary to remove references to the PIC label from RTL stored by
13995 the DWARF output code. */
13998 ix86_delegitimize_address (rtx x
)
14000 rtx orig_x
= delegitimize_mem_from_attrs (x
);
14001 /* addend is NULL or some rtx if x is something+GOTOFF where
14002 something doesn't include the PIC register. */
14003 rtx addend
= NULL_RTX
;
14004 /* reg_addend is NULL or a multiple of some register. */
14005 rtx reg_addend
= NULL_RTX
;
14006 /* const_addend is NULL or a const_int. */
14007 rtx const_addend
= NULL_RTX
;
14008 /* This is the result, or NULL. */
14009 rtx result
= NULL_RTX
;
14018 if (GET_CODE (x
) == CONST
14019 && GET_CODE (XEXP (x
, 0)) == PLUS
14020 && GET_MODE (XEXP (x
, 0)) == Pmode
14021 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
14022 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
14023 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
14025 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
14026 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
14027 if (MEM_P (orig_x
))
14028 x
= replace_equiv_address_nv (orig_x
, x
);
14032 if (GET_CODE (x
) == CONST
14033 && GET_CODE (XEXP (x
, 0)) == UNSPEC
14034 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
14035 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
14036 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
14038 x
= XVECEXP (XEXP (x
, 0), 0, 0);
14039 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
14041 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
14049 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
14050 return ix86_delegitimize_tls_address (orig_x
);
14052 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14053 and -mcmodel=medium -fpic. */
14056 if (GET_CODE (x
) != PLUS
14057 || GET_CODE (XEXP (x
, 1)) != CONST
)
14058 return ix86_delegitimize_tls_address (orig_x
);
14060 if (ix86_pic_register_p (XEXP (x
, 0)))
14061 /* %ebx + GOT/GOTOFF */
14063 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14065 /* %ebx + %reg * scale + GOT/GOTOFF */
14066 reg_addend
= XEXP (x
, 0);
14067 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
14068 reg_addend
= XEXP (reg_addend
, 1);
14069 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
14070 reg_addend
= XEXP (reg_addend
, 0);
14073 reg_addend
= NULL_RTX
;
14074 addend
= XEXP (x
, 0);
14078 addend
= XEXP (x
, 0);
14080 x
= XEXP (XEXP (x
, 1), 0);
14081 if (GET_CODE (x
) == PLUS
14082 && CONST_INT_P (XEXP (x
, 1)))
14084 const_addend
= XEXP (x
, 1);
14088 if (GET_CODE (x
) == UNSPEC
14089 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
14090 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
14091 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
14092 && !MEM_P (orig_x
) && !addend
)))
14093 result
= XVECEXP (x
, 0, 0);
14095 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
14096 && !MEM_P (orig_x
))
14097 result
= XVECEXP (x
, 0, 0);
14100 return ix86_delegitimize_tls_address (orig_x
);
14103 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
14105 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14108 /* If the rest of original X doesn't involve the PIC register, add
14109 addend and subtract pic_offset_table_rtx. This can happen e.g.
14111 leal (%ebx, %ecx, 4), %ecx
14113 movl foo@GOTOFF(%ecx), %edx
14114 in which case we return (%ecx - %ebx) + foo. */
14115 if (pic_offset_table_rtx
)
14116 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14117 pic_offset_table_rtx
),
14122 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14124 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14125 if (result
== NULL_RTX
)
14131 /* If X is a machine specific address (i.e. a symbol or label being
14132 referenced as a displacement from the GOT implemented using an
14133 UNSPEC), then return the base term. Otherwise return X. */
14136 ix86_find_base_term (rtx x
)
14142 if (GET_CODE (x
) != CONST
)
14144 term
= XEXP (x
, 0);
14145 if (GET_CODE (term
) == PLUS
14146 && (CONST_INT_P (XEXP (term
, 1))
14147 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14148 term
= XEXP (term
, 0);
14149 if (GET_CODE (term
) != UNSPEC
14150 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14151 && XINT (term
, 1) != UNSPEC_PCREL
))
14154 return XVECEXP (term
, 0, 0);
14157 return ix86_delegitimize_address (x
);
14161 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14162 bool fp
, FILE *file
)
14164 const char *suffix
;
14166 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14168 code
= ix86_fp_compare_code_to_integer (code
);
14172 code
= reverse_condition (code
);
14223 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14227 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14228 Those same assemblers have the same but opposite lossage on cmov. */
14229 if (mode
== CCmode
)
14230 suffix
= fp
? "nbe" : "a";
14232 gcc_unreachable ();
14248 gcc_unreachable ();
14252 if (mode
== CCmode
)
14254 else if (mode
== CCCmode
)
14257 gcc_unreachable ();
14273 gcc_unreachable ();
14277 if (mode
== CCmode
)
14278 suffix
= fp
? "nb" : "ae";
14279 else if (mode
== CCCmode
)
14282 gcc_unreachable ();
14285 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14289 if (mode
== CCmode
)
14292 gcc_unreachable ();
14295 suffix
= fp
? "u" : "p";
14298 suffix
= fp
? "nu" : "np";
14301 gcc_unreachable ();
14303 fputs (suffix
, file
);
14306 /* Print the name of register X to FILE based on its machine mode and number.
14307 If CODE is 'w', pretend the mode is HImode.
14308 If CODE is 'b', pretend the mode is QImode.
14309 If CODE is 'k', pretend the mode is SImode.
14310 If CODE is 'q', pretend the mode is DImode.
14311 If CODE is 'x', pretend the mode is V4SFmode.
14312 If CODE is 't', pretend the mode is V8SFmode.
14313 If CODE is 'g', pretend the mode is V16SFmode.
14314 If CODE is 'h', pretend the reg is the 'high' byte register.
14315 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14316 If CODE is 'd', duplicate the operand for AVX instruction.
14320 print_reg (rtx x
, int code
, FILE *file
)
14323 unsigned int regno
;
14324 bool duplicated
= code
== 'd' && TARGET_AVX
;
14326 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14331 gcc_assert (TARGET_64BIT
);
14332 fputs ("rip", file
);
14336 regno
= true_regnum (x
);
14337 gcc_assert (regno
!= ARG_POINTER_REGNUM
14338 && regno
!= FRAME_POINTER_REGNUM
14339 && regno
!= FLAGS_REG
14340 && regno
!= FPSR_REG
14341 && regno
!= FPCR_REG
);
14343 if (code
== 'w' || MMX_REG_P (x
))
14345 else if (code
== 'b')
14347 else if (code
== 'k')
14349 else if (code
== 'q')
14351 else if (code
== 'y')
14353 else if (code
== 'h')
14355 else if (code
== 'x')
14357 else if (code
== 't')
14359 else if (code
== 'g')
14362 code
= GET_MODE_SIZE (GET_MODE (x
));
14364 /* Irritatingly, AMD extended registers use different naming convention
14365 from the normal registers: "r%d[bwd]" */
14366 if (REX_INT_REGNO_P (regno
))
14368 gcc_assert (TARGET_64BIT
);
14370 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14374 error ("extended registers have no high halves");
14389 error ("unsupported operand size for extended register");
14399 if (STACK_TOP_P (x
))
14408 if (! ANY_FP_REG_P (x
) && ! ANY_BND_REG_P (x
))
14409 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14414 reg
= hi_reg_name
[regno
];
14417 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14419 reg
= qi_reg_name
[regno
];
14422 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14424 reg
= qi_high_reg_name
[regno
];
14429 gcc_assert (!duplicated
);
14431 fputs (hi_reg_name
[regno
] + 1, file
);
14437 gcc_assert (!duplicated
);
14439 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14444 gcc_unreachable ();
14450 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14451 fprintf (file
, ", %%%s", reg
);
14453 fprintf (file
, ", %s", reg
);
14457 /* Locate some local-dynamic symbol still in use by this function
14458 so that we can print its name in some tls_local_dynamic_base
14462 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14466 if (GET_CODE (x
) == SYMBOL_REF
14467 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14469 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14476 static const char *
14477 get_some_local_dynamic_name (void)
14481 if (cfun
->machine
->some_ld_name
)
14482 return cfun
->machine
->some_ld_name
;
14484 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14485 if (NONDEBUG_INSN_P (insn
)
14486 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14487 return cfun
->machine
->some_ld_name
;
14492 /* Meaning of CODE:
14493 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14494 C -- print opcode suffix for set/cmov insn.
14495 c -- like C, but print reversed condition
14496 F,f -- likewise, but for floating-point.
14497 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14499 R -- print the prefix for register names.
14500 z -- print the opcode suffix for the size of the current operand.
14501 Z -- likewise, with special suffixes for x87 instructions.
14502 * -- print a star (in certain assembler syntax)
14503 A -- print an absolute memory reference.
14504 E -- print address with DImode register names if TARGET_64BIT.
14505 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14506 s -- print a shift double count, followed by the assemblers argument
14508 b -- print the QImode name of the register for the indicated operand.
14509 %b0 would print %al if operands[0] is reg 0.
14510 w -- likewise, print the HImode name of the register.
14511 k -- likewise, print the SImode name of the register.
14512 q -- likewise, print the DImode name of the register.
14513 x -- likewise, print the V4SFmode name of the register.
14514 t -- likewise, print the V8SFmode name of the register.
14515 g -- likewise, print the V16SFmode name of the register.
14516 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14517 y -- print "st(0)" instead of "st" as a register.
14518 d -- print duplicated register operand for AVX instruction.
14519 D -- print condition for SSE cmp instruction.
14520 P -- if PIC, print an @PLT suffix.
14521 p -- print raw symbol name.
14522 X -- don't print any sort of PIC '@' suffix for a symbol.
14523 & -- print some in-use local-dynamic symbol name.
14524 H -- print a memory address offset by 8; used for sse high-parts
14525 Y -- print condition for XOP pcom* instruction.
14526 + -- print a branch hint as 'cs' or 'ds' prefix
14527 ; -- print a semicolon (after prefixes due to bug in older gas).
14528 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14529 @ -- print a segment register of thread base pointer load
14530 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14531 ! -- print MPX prefix for jxx/call/ret instructions if required.
14535 ix86_print_operand (FILE *file
, rtx x
, int code
)
14542 switch (ASSEMBLER_DIALECT
)
14549 /* Intel syntax. For absolute addresses, registers should not
14550 be surrounded by braces. */
14554 ix86_print_operand (file
, x
, 0);
14561 gcc_unreachable ();
14564 ix86_print_operand (file
, x
, 0);
14568 /* Wrap address in an UNSPEC to declare special handling. */
14570 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14572 output_address (x
);
14576 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14581 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14586 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14591 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14596 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14601 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14606 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14607 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14610 switch (GET_MODE_SIZE (GET_MODE (x
)))
14625 output_operand_lossage
14626 ("invalid operand size for operand code 'O'");
14635 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14637 /* Opcodes don't get size suffixes if using Intel opcodes. */
14638 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14641 switch (GET_MODE_SIZE (GET_MODE (x
)))
14660 output_operand_lossage
14661 ("invalid operand size for operand code 'z'");
14666 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14668 (0, "non-integer operand used with operand code 'z'");
14672 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14673 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14676 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14678 switch (GET_MODE_SIZE (GET_MODE (x
)))
14681 #ifdef HAVE_AS_IX86_FILDS
14691 #ifdef HAVE_AS_IX86_FILDQ
14694 fputs ("ll", file
);
14702 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14704 /* 387 opcodes don't get size suffixes
14705 if the operands are registers. */
14706 if (STACK_REG_P (x
))
14709 switch (GET_MODE_SIZE (GET_MODE (x
)))
14730 output_operand_lossage
14731 ("invalid operand type used with operand code 'Z'");
14735 output_operand_lossage
14736 ("invalid operand size for operand code 'Z'");
14755 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14757 ix86_print_operand (file
, x
, 0);
14758 fputs (", ", file
);
14763 switch (GET_CODE (x
))
14766 fputs ("neq", file
);
14769 fputs ("eq", file
);
14773 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14777 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14781 fputs ("le", file
);
14785 fputs ("lt", file
);
14788 fputs ("unord", file
);
14791 fputs ("ord", file
);
14794 fputs ("ueq", file
);
14797 fputs ("nlt", file
);
14800 fputs ("nle", file
);
14803 fputs ("ule", file
);
14806 fputs ("ult", file
);
14809 fputs ("une", file
);
14812 output_operand_lossage ("operand is not a condition code, "
14813 "invalid operand code 'Y'");
14819 /* Little bit of braindamage here. The SSE compare instructions
14820 does use completely different names for the comparisons that the
14821 fp conditional moves. */
14822 switch (GET_CODE (x
))
14827 fputs ("eq_us", file
);
14831 fputs ("eq", file
);
14836 fputs ("nge", file
);
14840 fputs ("lt", file
);
14845 fputs ("ngt", file
);
14849 fputs ("le", file
);
14852 fputs ("unord", file
);
14857 fputs ("neq_oq", file
);
14861 fputs ("neq", file
);
14866 fputs ("ge", file
);
14870 fputs ("nlt", file
);
14875 fputs ("gt", file
);
14879 fputs ("nle", file
);
14882 fputs ("ord", file
);
14885 output_operand_lossage ("operand is not a condition code, "
14886 "invalid operand code 'D'");
14893 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14894 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14900 if (!COMPARISON_P (x
))
14902 output_operand_lossage ("operand is not a condition code, "
14903 "invalid operand code '%c'", code
);
14906 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14907 code
== 'c' || code
== 'f',
14908 code
== 'F' || code
== 'f',
14913 if (!offsettable_memref_p (x
))
14915 output_operand_lossage ("operand is not an offsettable memory "
14916 "reference, invalid operand code 'H'");
14919 /* It doesn't actually matter what mode we use here, as we're
14920 only going to use this for printing. */
14921 x
= adjust_address_nv (x
, DImode
, 8);
14922 /* Output 'qword ptr' for intel assembler dialect. */
14923 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14928 gcc_assert (CONST_INT_P (x
));
14930 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14931 #ifdef HAVE_AS_IX86_HLE
14932 fputs ("xacquire ", file
);
14934 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14936 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14937 #ifdef HAVE_AS_IX86_HLE
14938 fputs ("xrelease ", file
);
14940 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14942 /* We do not want to print value of the operand. */
14946 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
14947 fputs ("{z}", file
);
14951 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14957 const char *name
= get_some_local_dynamic_name ();
14959 output_operand_lossage ("'%%&' used without any "
14960 "local dynamic TLS references");
14962 assemble_name (file
, name
);
14971 || optimize_function_for_size_p (cfun
)
14972 || !TARGET_BRANCH_PREDICTION_HINTS
)
14975 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14978 int pred_val
= XINT (x
, 0);
14980 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14981 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14983 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14985 = final_forward_branch_p (current_output_insn
) == 0;
14987 /* Emit hints only in the case default branch prediction
14988 heuristics would fail. */
14989 if (taken
!= cputaken
)
14991 /* We use 3e (DS) prefix for taken branches and
14992 2e (CS) prefix for not taken branches. */
14994 fputs ("ds ; ", file
);
14996 fputs ("cs ; ", file
);
15004 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15010 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15013 /* The kernel uses a different segment register for performance
15014 reasons; a system call would not have to trash the userspace
15015 segment register, which would be expensive. */
15016 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
15017 fputs ("fs", file
);
15019 fputs ("gs", file
);
15023 putc (TARGET_AVX2
? 'i' : 'f', file
);
15027 if (TARGET_64BIT
&& Pmode
!= word_mode
)
15028 fputs ("addr32 ", file
);
15032 if (ix86_bnd_prefixed_insn_p (NULL_RTX
))
15033 fputs ("bnd ", file
);
15037 output_operand_lossage ("invalid operand code '%c'", code
);
15042 print_reg (x
, code
, file
);
15044 else if (MEM_P (x
))
15046 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15047 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
15048 && GET_MODE (x
) != BLKmode
)
15051 switch (GET_MODE_SIZE (GET_MODE (x
)))
15053 case 1: size
= "BYTE"; break;
15054 case 2: size
= "WORD"; break;
15055 case 4: size
= "DWORD"; break;
15056 case 8: size
= "QWORD"; break;
15057 case 12: size
= "TBYTE"; break;
15059 if (GET_MODE (x
) == XFmode
)
15064 case 32: size
= "YMMWORD"; break;
15065 case 64: size
= "ZMMWORD"; break;
15067 gcc_unreachable ();
15070 /* Check for explicit size override (codes 'b', 'w', 'k',
15074 else if (code
== 'w')
15076 else if (code
== 'k')
15078 else if (code
== 'q')
15080 else if (code
== 'x')
15083 fputs (size
, file
);
15084 fputs (" PTR ", file
);
15088 /* Avoid (%rip) for call operands. */
15089 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
15090 && !CONST_INT_P (x
))
15091 output_addr_const (file
, x
);
15092 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
15093 output_operand_lossage ("invalid constraints for operand");
15095 output_address (x
);
15098 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
15103 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15104 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
15106 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15108 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15110 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
15111 (unsigned long long) (int) l
);
15113 fprintf (file
, "0x%08x", (unsigned int) l
);
15116 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15121 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15122 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15124 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15126 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15129 /* These float cases don't actually occur as immediate operands. */
15130 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15134 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15135 fputs (dstr
, file
);
15140 /* We have patterns that allow zero sets of memory, for instance.
15141 In 64-bit mode, we should probably support all 8-byte vectors,
15142 since we can in fact encode that into an immediate. */
15143 if (GET_CODE (x
) == CONST_VECTOR
)
15145 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15149 if (code
!= 'P' && code
!= 'p')
15151 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15153 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15156 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15157 || GET_CODE (x
) == LABEL_REF
)
15159 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15162 fputs ("OFFSET FLAT:", file
);
15165 if (CONST_INT_P (x
))
15166 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15167 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15168 output_pic_addr_const (file
, x
, code
);
15170 output_addr_const (file
, x
);
15175 ix86_print_operand_punct_valid_p (unsigned char code
)
15177 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15178 || code
== ';' || code
== '~' || code
== '^' || code
== '!');
15181 /* Print a memory operand whose address is ADDR. */
15184 ix86_print_operand_address (FILE *file
, rtx addr
)
15186 struct ix86_address parts
;
15187 rtx base
, index
, disp
;
15193 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15195 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15196 gcc_assert (parts
.index
== NULL_RTX
);
15197 parts
.index
= XVECEXP (addr
, 0, 1);
15198 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15199 addr
= XVECEXP (addr
, 0, 0);
15202 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15204 gcc_assert (TARGET_64BIT
);
15205 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15208 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDMK_ADDR
)
15210 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 1), &parts
);
15211 gcc_assert (parts
.base
== NULL_RTX
|| parts
.index
== NULL_RTX
);
15212 if (parts
.base
!= NULL_RTX
)
15214 parts
.index
= parts
.base
;
15217 parts
.base
= XVECEXP (addr
, 0, 0);
15218 addr
= XVECEXP (addr
, 0, 0);
15220 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDLDX_ADDR
)
15222 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15223 gcc_assert (parts
.index
== NULL_RTX
);
15224 parts
.index
= XVECEXP (addr
, 0, 1);
15225 addr
= XVECEXP (addr
, 0, 0);
15228 ok
= ix86_decompose_address (addr
, &parts
);
15233 index
= parts
.index
;
15235 scale
= parts
.scale
;
15243 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15245 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15248 gcc_unreachable ();
15251 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15252 if (TARGET_64BIT
&& !base
&& !index
)
15256 if (GET_CODE (disp
) == CONST
15257 && GET_CODE (XEXP (disp
, 0)) == PLUS
15258 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15259 symbol
= XEXP (XEXP (disp
, 0), 0);
15261 if (GET_CODE (symbol
) == LABEL_REF
15262 || (GET_CODE (symbol
) == SYMBOL_REF
15263 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15266 if (!base
&& !index
)
15268 /* Displacement only requires special attention. */
15270 if (CONST_INT_P (disp
))
15272 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15273 fputs ("ds:", file
);
15274 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15277 output_pic_addr_const (file
, disp
, 0);
15279 output_addr_const (file
, disp
);
15283 /* Print SImode register names to force addr32 prefix. */
15284 if (SImode_address_operand (addr
, VOIDmode
))
15286 #ifdef ENABLE_CHECKING
15287 gcc_assert (TARGET_64BIT
);
15288 switch (GET_CODE (addr
))
15291 gcc_assert (GET_MODE (addr
) == SImode
);
15292 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15296 gcc_assert (GET_MODE (addr
) == DImode
);
15299 gcc_unreachable ();
15302 gcc_assert (!code
);
15308 && CONST_INT_P (disp
)
15309 && INTVAL (disp
) < -16*1024*1024)
15311 /* X32 runs in 64-bit mode, where displacement, DISP, in
15312 address DISP(%r64), is encoded as 32-bit immediate sign-
15313 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15314 address is %r64 + 0xffffffffbffffd00. When %r64 <
15315 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15316 which is invalid for x32. The correct address is %r64
15317 - 0x40000300 == 0xf7ffdd64. To properly encode
15318 -0x40000300(%r64) for x32, we zero-extend negative
15319 displacement by forcing addr32 prefix which truncates
15320 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15321 zero-extend all negative displacements, including -1(%rsp).
15322 However, for small negative displacements, sign-extension
15323 won't cause overflow. We only zero-extend negative
15324 displacements if they < -16*1024*1024, which is also used
15325 to check legitimate address displacements for PIC. */
15329 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15334 output_pic_addr_const (file
, disp
, 0);
15335 else if (GET_CODE (disp
) == LABEL_REF
)
15336 output_asm_label (disp
);
15338 output_addr_const (file
, disp
);
15343 print_reg (base
, code
, file
);
15347 print_reg (index
, vsib
? 0 : code
, file
);
15348 if (scale
!= 1 || vsib
)
15349 fprintf (file
, ",%d", scale
);
15355 rtx offset
= NULL_RTX
;
15359 /* Pull out the offset of a symbol; print any symbol itself. */
15360 if (GET_CODE (disp
) == CONST
15361 && GET_CODE (XEXP (disp
, 0)) == PLUS
15362 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15364 offset
= XEXP (XEXP (disp
, 0), 1);
15365 disp
= gen_rtx_CONST (VOIDmode
,
15366 XEXP (XEXP (disp
, 0), 0));
15370 output_pic_addr_const (file
, disp
, 0);
15371 else if (GET_CODE (disp
) == LABEL_REF
)
15372 output_asm_label (disp
);
15373 else if (CONST_INT_P (disp
))
15376 output_addr_const (file
, disp
);
15382 print_reg (base
, code
, file
);
15385 if (INTVAL (offset
) >= 0)
15387 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15391 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15398 print_reg (index
, vsib
? 0 : code
, file
);
15399 if (scale
!= 1 || vsib
)
15400 fprintf (file
, "*%d", scale
);
15407 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15410 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15414 if (GET_CODE (x
) != UNSPEC
)
15417 op
= XVECEXP (x
, 0, 0);
15418 switch (XINT (x
, 1))
15420 case UNSPEC_GOTTPOFF
:
15421 output_addr_const (file
, op
);
15422 /* FIXME: This might be @TPOFF in Sun ld. */
15423 fputs ("@gottpoff", file
);
15426 output_addr_const (file
, op
);
15427 fputs ("@tpoff", file
);
15429 case UNSPEC_NTPOFF
:
15430 output_addr_const (file
, op
);
15432 fputs ("@tpoff", file
);
15434 fputs ("@ntpoff", file
);
15436 case UNSPEC_DTPOFF
:
15437 output_addr_const (file
, op
);
15438 fputs ("@dtpoff", file
);
15440 case UNSPEC_GOTNTPOFF
:
15441 output_addr_const (file
, op
);
15443 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15444 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15446 fputs ("@gotntpoff", file
);
15448 case UNSPEC_INDNTPOFF
:
15449 output_addr_const (file
, op
);
15450 fputs ("@indntpoff", file
);
15453 case UNSPEC_MACHOPIC_OFFSET
:
15454 output_addr_const (file
, op
);
15456 machopic_output_function_base_name (file
);
15460 case UNSPEC_STACK_CHECK
:
15464 gcc_assert (flag_split_stack
);
15466 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15467 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15469 gcc_unreachable ();
15472 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15483 /* Split one or more double-mode RTL references into pairs of half-mode
15484 references. The RTL can be REG, offsettable MEM, integer constant, or
15485 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15486 split and "num" is its length. lo_half and hi_half are output arrays
15487 that parallel "operands". */
15490 split_double_mode (enum machine_mode mode
, rtx operands
[],
15491 int num
, rtx lo_half
[], rtx hi_half
[])
15493 enum machine_mode half_mode
;
15499 half_mode
= DImode
;
15502 half_mode
= SImode
;
15505 gcc_unreachable ();
15508 byte
= GET_MODE_SIZE (half_mode
);
15512 rtx op
= operands
[num
];
15514 /* simplify_subreg refuse to split volatile memory addresses,
15515 but we still have to handle it. */
15518 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15519 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15523 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15524 GET_MODE (op
) == VOIDmode
15525 ? mode
: GET_MODE (op
), 0);
15526 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15527 GET_MODE (op
) == VOIDmode
15528 ? mode
: GET_MODE (op
), byte
);
15533 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15534 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15535 is the expression of the binary operation. The output may either be
15536 emitted here, or returned to the caller, like all output_* functions.
15538 There is no guarantee that the operands are the same mode, as they
15539 might be within FLOAT or FLOAT_EXTEND expressions. */
15541 #ifndef SYSV386_COMPAT
15542 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15543 wants to fix the assemblers because that causes incompatibility
15544 with gcc. No-one wants to fix gcc because that causes
15545 incompatibility with assemblers... You can use the option of
15546 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15547 #define SYSV386_COMPAT 1
15551 output_387_binary_op (rtx insn
, rtx
*operands
)
15553 static char buf
[40];
15556 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15558 #ifdef ENABLE_CHECKING
15559 /* Even if we do not want to check the inputs, this documents input
15560 constraints. Which helps in understanding the following code. */
15561 if (STACK_REG_P (operands
[0])
15562 && ((REG_P (operands
[1])
15563 && REGNO (operands
[0]) == REGNO (operands
[1])
15564 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15565 || (REG_P (operands
[2])
15566 && REGNO (operands
[0]) == REGNO (operands
[2])
15567 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15568 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15571 gcc_assert (is_sse
);
15574 switch (GET_CODE (operands
[3]))
15577 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15578 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15586 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15587 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15595 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15596 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15604 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15605 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15613 gcc_unreachable ();
15620 strcpy (buf
, ssep
);
15621 if (GET_MODE (operands
[0]) == SFmode
)
15622 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15624 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15628 strcpy (buf
, ssep
+ 1);
15629 if (GET_MODE (operands
[0]) == SFmode
)
15630 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15632 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15638 switch (GET_CODE (operands
[3]))
15642 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15644 rtx temp
= operands
[2];
15645 operands
[2] = operands
[1];
15646 operands
[1] = temp
;
15649 /* know operands[0] == operands[1]. */
15651 if (MEM_P (operands
[2]))
15657 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15659 if (STACK_TOP_P (operands
[0]))
15660 /* How is it that we are storing to a dead operand[2]?
15661 Well, presumably operands[1] is dead too. We can't
15662 store the result to st(0) as st(0) gets popped on this
15663 instruction. Instead store to operands[2] (which I
15664 think has to be st(1)). st(1) will be popped later.
15665 gcc <= 2.8.1 didn't have this check and generated
15666 assembly code that the Unixware assembler rejected. */
15667 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15669 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15673 if (STACK_TOP_P (operands
[0]))
15674 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15676 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15681 if (MEM_P (operands
[1]))
15687 if (MEM_P (operands
[2]))
15693 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15696 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15697 derived assemblers, confusingly reverse the direction of
15698 the operation for fsub{r} and fdiv{r} when the
15699 destination register is not st(0). The Intel assembler
15700 doesn't have this brain damage. Read !SYSV386_COMPAT to
15701 figure out what the hardware really does. */
15702 if (STACK_TOP_P (operands
[0]))
15703 p
= "{p\t%0, %2|rp\t%2, %0}";
15705 p
= "{rp\t%2, %0|p\t%0, %2}";
15707 if (STACK_TOP_P (operands
[0]))
15708 /* As above for fmul/fadd, we can't store to st(0). */
15709 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15711 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15716 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15719 if (STACK_TOP_P (operands
[0]))
15720 p
= "{rp\t%0, %1|p\t%1, %0}";
15722 p
= "{p\t%1, %0|rp\t%0, %1}";
15724 if (STACK_TOP_P (operands
[0]))
15725 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15727 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15732 if (STACK_TOP_P (operands
[0]))
15734 if (STACK_TOP_P (operands
[1]))
15735 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15737 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15740 else if (STACK_TOP_P (operands
[1]))
15743 p
= "{\t%1, %0|r\t%0, %1}";
15745 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15751 p
= "{r\t%2, %0|\t%0, %2}";
15753 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15759 gcc_unreachable ();
15766 /* Check if a 256bit AVX register is referenced inside of EXP. */
15769 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15773 if (GET_CODE (exp
) == SUBREG
)
15774 exp
= SUBREG_REG (exp
);
15777 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15783 /* Return needed mode for entity in optimize_mode_switching pass. */
15786 ix86_avx_u128_mode_needed (rtx insn
)
15792 /* Needed mode is set to AVX_U128_CLEAN if there are
15793 no 256bit modes used in function arguments. */
15794 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15796 link
= XEXP (link
, 1))
15798 if (GET_CODE (XEXP (link
, 0)) == USE
)
15800 rtx arg
= XEXP (XEXP (link
, 0), 0);
15802 if (ix86_check_avx256_register (&arg
, NULL
))
15803 return AVX_U128_DIRTY
;
15807 return AVX_U128_CLEAN
;
15810 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15811 changes state only when a 256bit register is written to, but we need
15812 to prevent the compiler from moving optimal insertion point above
15813 eventual read from 256bit register. */
15814 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15815 return AVX_U128_DIRTY
;
15817 return AVX_U128_ANY
;
15820 /* Return mode that i387 must be switched into
15821 prior to the execution of insn. */
15824 ix86_i387_mode_needed (int entity
, rtx insn
)
15826 enum attr_i387_cw mode
;
15828 /* The mode UNINITIALIZED is used to store control word after a
15829 function call or ASM pattern. The mode ANY specify that function
15830 has no requirements on the control word and make no changes in the
15831 bits we are interested in. */
15834 || (NONJUMP_INSN_P (insn
)
15835 && (asm_noperands (PATTERN (insn
)) >= 0
15836 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15837 return I387_CW_UNINITIALIZED
;
15839 if (recog_memoized (insn
) < 0)
15840 return I387_CW_ANY
;
15842 mode
= get_attr_i387_cw (insn
);
15847 if (mode
== I387_CW_TRUNC
)
15852 if (mode
== I387_CW_FLOOR
)
15857 if (mode
== I387_CW_CEIL
)
15862 if (mode
== I387_CW_MASK_PM
)
15867 gcc_unreachable ();
15870 return I387_CW_ANY
;
15873 /* Return mode that entity must be switched into
15874 prior to the execution of insn. */
15877 ix86_mode_needed (int entity
, rtx insn
)
15882 return ix86_avx_u128_mode_needed (insn
);
15887 return ix86_i387_mode_needed (entity
, insn
);
15889 gcc_unreachable ();
15894 /* Check if a 256bit AVX register is referenced in stores. */
15897 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15899 if (ix86_check_avx256_register (&dest
, NULL
))
15901 bool *used
= (bool *) data
;
15906 /* Calculate mode of upper 128bit AVX registers after the insn. */
15909 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15911 rtx pat
= PATTERN (insn
);
15913 if (vzeroupper_operation (pat
, VOIDmode
)
15914 || vzeroall_operation (pat
, VOIDmode
))
15915 return AVX_U128_CLEAN
;
15917 /* We know that state is clean after CALL insn if there are no
15918 256bit registers used in the function return register. */
15921 bool avx_reg256_found
= false;
15922 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15924 return avx_reg256_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
15927 /* Otherwise, return current mode. Remember that if insn
15928 references AVX 256bit registers, the mode was already changed
15929 to DIRTY from MODE_NEEDED. */
15933 /* Return the mode that an insn results in. */
15936 ix86_mode_after (int entity
, int mode
, rtx insn
)
15941 return ix86_avx_u128_mode_after (mode
, insn
);
15948 gcc_unreachable ();
15953 ix86_avx_u128_mode_entry (void)
15957 /* Entry mode is set to AVX_U128_DIRTY if there are
15958 256bit modes used in function arguments. */
15959 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15960 arg
= TREE_CHAIN (arg
))
15962 rtx incoming
= DECL_INCOMING_RTL (arg
);
15964 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15965 return AVX_U128_DIRTY
;
15968 return AVX_U128_CLEAN
;
15971 /* Return a mode that ENTITY is assumed to be
15972 switched to at function entry. */
15975 ix86_mode_entry (int entity
)
15980 return ix86_avx_u128_mode_entry ();
15985 return I387_CW_ANY
;
15987 gcc_unreachable ();
15992 ix86_avx_u128_mode_exit (void)
15994 rtx reg
= crtl
->return_rtx
;
15996 /* Exit mode is set to AVX_U128_DIRTY if there are
15997 256bit modes used in the function return register. */
15998 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15999 return AVX_U128_DIRTY
;
16001 return AVX_U128_CLEAN
;
16004 /* Return a mode that ENTITY is assumed to be
16005 switched to at function exit. */
16008 ix86_mode_exit (int entity
)
16013 return ix86_avx_u128_mode_exit ();
16018 return I387_CW_ANY
;
16020 gcc_unreachable ();
16024 /* Output code to initialize control word copies used by trunc?f?i and
16025 rounding patterns. CURRENT_MODE is set to current control word,
16026 while NEW_MODE is set to new control word. */
16029 emit_i387_cw_initialization (int mode
)
16031 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
16034 enum ix86_stack_slot slot
;
16036 rtx reg
= gen_reg_rtx (HImode
);
16038 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
16039 emit_move_insn (reg
, copy_rtx (stored_mode
));
16041 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
16042 || optimize_insn_for_size_p ())
16046 case I387_CW_TRUNC
:
16047 /* round toward zero (truncate) */
16048 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
16049 slot
= SLOT_CW_TRUNC
;
16052 case I387_CW_FLOOR
:
16053 /* round down toward -oo */
16054 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16055 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
16056 slot
= SLOT_CW_FLOOR
;
16060 /* round up toward +oo */
16061 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16062 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
16063 slot
= SLOT_CW_CEIL
;
16066 case I387_CW_MASK_PM
:
16067 /* mask precision exception for nearbyint() */
16068 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16069 slot
= SLOT_CW_MASK_PM
;
16073 gcc_unreachable ();
16080 case I387_CW_TRUNC
:
16081 /* round toward zero (truncate) */
16082 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
16083 slot
= SLOT_CW_TRUNC
;
16086 case I387_CW_FLOOR
:
16087 /* round down toward -oo */
16088 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
16089 slot
= SLOT_CW_FLOOR
;
16093 /* round up toward +oo */
16094 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
16095 slot
= SLOT_CW_CEIL
;
16098 case I387_CW_MASK_PM
:
16099 /* mask precision exception for nearbyint() */
16100 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16101 slot
= SLOT_CW_MASK_PM
;
16105 gcc_unreachable ();
16109 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
16111 new_mode
= assign_386_stack_local (HImode
, slot
);
16112 emit_move_insn (new_mode
, reg
);
16115 /* Emit vzeroupper. */
16118 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16122 /* Cancel automatic vzeroupper insertion if there are
16123 live call-saved SSE registers at the insertion point. */
16125 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16126 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16130 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16131 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16134 emit_insn (gen_avx_vzeroupper ());
16137 /* Generate one or more insns to set ENTITY to MODE. */
16140 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
16145 if (mode
== AVX_U128_CLEAN
)
16146 ix86_avx_emit_vzeroupper (regs_live
);
16152 if (mode
!= I387_CW_ANY
16153 && mode
!= I387_CW_UNINITIALIZED
)
16154 emit_i387_cw_initialization (mode
);
16157 gcc_unreachable ();
16161 /* Output code for INSN to convert a float to a signed int. OPERANDS
16162 are the insn operands. The output may be [HSD]Imode and the input
16163 operand may be [SDX]Fmode. */
16166 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16168 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16169 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16170 int round_mode
= get_attr_i387_cw (insn
);
16172 /* Jump through a hoop or two for DImode, since the hardware has no
16173 non-popping instruction. We used to do this a different way, but
16174 that was somewhat fragile and broke with post-reload splitters. */
16175 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16176 output_asm_insn ("fld\t%y1", operands
);
16178 gcc_assert (STACK_TOP_P (operands
[1]));
16179 gcc_assert (MEM_P (operands
[0]));
16180 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16183 output_asm_insn ("fisttp%Z0\t%0", operands
);
16186 if (round_mode
!= I387_CW_ANY
)
16187 output_asm_insn ("fldcw\t%3", operands
);
16188 if (stack_top_dies
|| dimode_p
)
16189 output_asm_insn ("fistp%Z0\t%0", operands
);
16191 output_asm_insn ("fist%Z0\t%0", operands
);
16192 if (round_mode
!= I387_CW_ANY
)
16193 output_asm_insn ("fldcw\t%2", operands
);
16199 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16200 have the values zero or one, indicates the ffreep insn's operand
16201 from the OPERANDS array. */
16203 static const char *
16204 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16206 if (TARGET_USE_FFREEP
)
16207 #ifdef HAVE_AS_IX86_FFREEP
16208 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16211 static char retval
[32];
16212 int regno
= REGNO (operands
[opno
]);
16214 gcc_assert (STACK_REGNO_P (regno
));
16216 regno
-= FIRST_STACK_REG
;
16218 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16223 return opno
? "fstp\t%y1" : "fstp\t%y0";
16227 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16228 should be used. UNORDERED_P is true when fucom should be used. */
16231 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16233 int stack_top_dies
;
16234 rtx cmp_op0
, cmp_op1
;
16235 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16239 cmp_op0
= operands
[0];
16240 cmp_op1
= operands
[1];
16244 cmp_op0
= operands
[1];
16245 cmp_op1
= operands
[2];
16250 if (GET_MODE (operands
[0]) == SFmode
)
16252 return "%vucomiss\t{%1, %0|%0, %1}";
16254 return "%vcomiss\t{%1, %0|%0, %1}";
16257 return "%vucomisd\t{%1, %0|%0, %1}";
16259 return "%vcomisd\t{%1, %0|%0, %1}";
16262 gcc_assert (STACK_TOP_P (cmp_op0
));
16264 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16266 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16268 if (stack_top_dies
)
16270 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16271 return output_387_ffreep (operands
, 1);
16274 return "ftst\n\tfnstsw\t%0";
16277 if (STACK_REG_P (cmp_op1
)
16279 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16280 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16282 /* If both the top of the 387 stack dies, and the other operand
16283 is also a stack register that dies, then this must be a
16284 `fcompp' float compare */
16288 /* There is no double popping fcomi variant. Fortunately,
16289 eflags is immune from the fstp's cc clobbering. */
16291 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16293 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16294 return output_387_ffreep (operands
, 0);
16299 return "fucompp\n\tfnstsw\t%0";
16301 return "fcompp\n\tfnstsw\t%0";
16306 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16308 static const char * const alt
[16] =
16310 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16311 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16312 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16313 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16315 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16316 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16320 "fcomi\t{%y1, %0|%0, %y1}",
16321 "fcomip\t{%y1, %0|%0, %y1}",
16322 "fucomi\t{%y1, %0|%0, %y1}",
16323 "fucomip\t{%y1, %0|%0, %y1}",
16334 mask
= eflags_p
<< 3;
16335 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16336 mask
|= unordered_p
<< 1;
16337 mask
|= stack_top_dies
;
16339 gcc_assert (mask
< 16);
16348 ix86_output_addr_vec_elt (FILE *file
, int value
)
16350 const char *directive
= ASM_LONG
;
16354 directive
= ASM_QUAD
;
16356 gcc_assert (!TARGET_64BIT
);
16359 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16363 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16365 const char *directive
= ASM_LONG
;
16368 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16369 directive
= ASM_QUAD
;
16371 gcc_assert (!TARGET_64BIT
);
16373 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16374 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16375 fprintf (file
, "%s%s%d-%s%d\n",
16376 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16377 else if (HAVE_AS_GOTOFF_IN_DATA
)
16378 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16380 else if (TARGET_MACHO
)
16382 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16383 machopic_output_function_base_name (file
);
16388 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16389 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16392 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16396 ix86_expand_clear (rtx dest
)
16400 /* We play register width games, which are only valid after reload. */
16401 gcc_assert (reload_completed
);
16403 /* Avoid HImode and its attendant prefix byte. */
16404 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16405 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16406 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16408 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16409 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16411 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16412 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16418 /* X is an unchanging MEM. If it is a constant pool reference, return
16419 the constant pool rtx, else NULL. */
16422 maybe_get_pool_constant (rtx x
)
16424 x
= ix86_delegitimize_address (XEXP (x
, 0));
16426 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16427 return get_pool_constant (x
);
16433 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16436 enum tls_model model
;
16441 if (GET_CODE (op1
) == SYMBOL_REF
)
16445 model
= SYMBOL_REF_TLS_MODEL (op1
);
16448 op1
= legitimize_tls_address (op1
, model
, true);
16449 op1
= force_operand (op1
, op0
);
16452 op1
= convert_to_mode (mode
, op1
, 1);
16454 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16457 else if (GET_CODE (op1
) == CONST
16458 && GET_CODE (XEXP (op1
, 0)) == PLUS
16459 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16461 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16462 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16465 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16467 tmp
= legitimize_tls_address (symbol
, model
, true);
16469 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16473 tmp
= force_operand (tmp
, NULL
);
16474 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16475 op0
, 1, OPTAB_DIRECT
);
16478 op1
= convert_to_mode (mode
, tmp
, 1);
16482 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16483 && symbolic_operand (op1
, mode
))
16485 if (TARGET_MACHO
&& !TARGET_64BIT
)
16488 /* dynamic-no-pic */
16489 if (MACHOPIC_INDIRECT
)
16491 rtx temp
= ((reload_in_progress
16492 || ((op0
&& REG_P (op0
))
16494 ? op0
: gen_reg_rtx (Pmode
));
16495 op1
= machopic_indirect_data_reference (op1
, temp
);
16497 op1
= machopic_legitimize_pic_address (op1
, mode
,
16498 temp
== op1
? 0 : temp
);
16500 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16502 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16506 if (GET_CODE (op0
) == MEM
)
16507 op1
= force_reg (Pmode
, op1
);
16511 if (GET_CODE (temp
) != REG
)
16512 temp
= gen_reg_rtx (Pmode
);
16513 temp
= legitimize_pic_address (op1
, temp
);
16518 /* dynamic-no-pic */
16524 op1
= force_reg (mode
, op1
);
16525 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16527 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16528 op1
= legitimize_pic_address (op1
, reg
);
16531 op1
= convert_to_mode (mode
, op1
, 1);
16538 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16539 || !push_operand (op0
, mode
))
16541 op1
= force_reg (mode
, op1
);
16543 if (push_operand (op0
, mode
)
16544 && ! general_no_elim_operand (op1
, mode
))
16545 op1
= copy_to_mode_reg (mode
, op1
);
16547 /* Force large constants in 64bit compilation into register
16548 to get them CSEed. */
16549 if (can_create_pseudo_p ()
16550 && (mode
== DImode
) && TARGET_64BIT
16551 && immediate_operand (op1
, mode
)
16552 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16553 && !register_operand (op0
, mode
)
16555 op1
= copy_to_mode_reg (mode
, op1
);
16557 if (can_create_pseudo_p ()
16558 && FLOAT_MODE_P (mode
)
16559 && GET_CODE (op1
) == CONST_DOUBLE
)
16561 /* If we are loading a floating point constant to a register,
16562 force the value to memory now, since we'll get better code
16563 out the back end. */
16565 op1
= validize_mem (force_const_mem (mode
, op1
));
16566 if (!register_operand (op0
, mode
))
16568 rtx temp
= gen_reg_rtx (mode
);
16569 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16570 emit_move_insn (op0
, temp
);
16576 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16580 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16582 rtx op0
= operands
[0], op1
= operands
[1];
16583 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16585 /* Force constants other than zero into memory. We do not know how
16586 the instructions used to build constants modify the upper 64 bits
16587 of the register, once we have that information we may be able
16588 to handle some of them more efficiently. */
16589 if (can_create_pseudo_p ()
16590 && register_operand (op0
, mode
)
16591 && (CONSTANT_P (op1
)
16592 || (GET_CODE (op1
) == SUBREG
16593 && CONSTANT_P (SUBREG_REG (op1
))))
16594 && !standard_sse_constant_p (op1
))
16595 op1
= validize_mem (force_const_mem (mode
, op1
));
16597 /* We need to check memory alignment for SSE mode since attribute
16598 can make operands unaligned. */
16599 if (can_create_pseudo_p ()
16600 && SSE_REG_MODE_P (mode
)
16601 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16602 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16606 /* ix86_expand_vector_move_misalign() does not like constants ... */
16607 if (CONSTANT_P (op1
)
16608 || (GET_CODE (op1
) == SUBREG
16609 && CONSTANT_P (SUBREG_REG (op1
))))
16610 op1
= validize_mem (force_const_mem (mode
, op1
));
16612 /* ... nor both arguments in memory. */
16613 if (!register_operand (op0
, mode
)
16614 && !register_operand (op1
, mode
))
16615 op1
= force_reg (mode
, op1
);
16617 tmp
[0] = op0
; tmp
[1] = op1
;
16618 ix86_expand_vector_move_misalign (mode
, tmp
);
16622 /* Make operand1 a register if it isn't already. */
16623 if (can_create_pseudo_p ()
16624 && !register_operand (op0
, mode
)
16625 && !register_operand (op1
, mode
))
16627 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16631 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16634 /* Split 32-byte AVX unaligned load and store if needed. */
16637 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16640 rtx (*extract
) (rtx
, rtx
, rtx
);
16641 rtx (*load_unaligned
) (rtx
, rtx
);
16642 rtx (*store_unaligned
) (rtx
, rtx
);
16643 enum machine_mode mode
;
16645 switch (GET_MODE (op0
))
16648 gcc_unreachable ();
16650 extract
= gen_avx_vextractf128v32qi
;
16651 load_unaligned
= gen_avx_loaddquv32qi
;
16652 store_unaligned
= gen_avx_storedquv32qi
;
16656 extract
= gen_avx_vextractf128v8sf
;
16657 load_unaligned
= gen_avx_loadups256
;
16658 store_unaligned
= gen_avx_storeups256
;
16662 extract
= gen_avx_vextractf128v4df
;
16663 load_unaligned
= gen_avx_loadupd256
;
16664 store_unaligned
= gen_avx_storeupd256
;
16671 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16673 rtx r
= gen_reg_rtx (mode
);
16674 m
= adjust_address (op1
, mode
, 0);
16675 emit_move_insn (r
, m
);
16676 m
= adjust_address (op1
, mode
, 16);
16677 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16678 emit_move_insn (op0
, r
);
16680 /* Normal *mov<mode>_internal pattern will handle
16681 unaligned loads just fine if misaligned_operand
16682 is true, and without the UNSPEC it can be combined
16683 with arithmetic instructions. */
16684 else if (misaligned_operand (op1
, GET_MODE (op1
)))
16685 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16687 emit_insn (load_unaligned (op0
, op1
));
16689 else if (MEM_P (op0
))
16691 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16693 m
= adjust_address (op0
, mode
, 0);
16694 emit_insn (extract (m
, op1
, const0_rtx
));
16695 m
= adjust_address (op0
, mode
, 16);
16696 emit_insn (extract (m
, op1
, const1_rtx
));
16699 emit_insn (store_unaligned (op0
, op1
));
16702 gcc_unreachable ();
16705 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16706 straight to ix86_expand_vector_move. */
16707 /* Code generation for scalar reg-reg moves of single and double precision data:
16708 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16712 if (x86_sse_partial_reg_dependency == true)
16717 Code generation for scalar loads of double precision data:
16718 if (x86_sse_split_regs == true)
16719 movlpd mem, reg (gas syntax)
16723 Code generation for unaligned packed loads of single precision data
16724 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16725 if (x86_sse_unaligned_move_optimal)
16728 if (x86_sse_partial_reg_dependency == true)
16740 Code generation for unaligned packed loads of double precision data
16741 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16742 if (x86_sse_unaligned_move_optimal)
16745 if (x86_sse_split_regs == true)
16758 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16760 rtx op0
, op1
, orig_op0
= NULL_RTX
, m
;
16761 rtx (*load_unaligned
) (rtx
, rtx
);
16762 rtx (*store_unaligned
) (rtx
, rtx
);
16767 if (GET_MODE_SIZE (mode
) == 64)
16769 switch (GET_MODE_CLASS (mode
))
16771 case MODE_VECTOR_INT
:
16773 if (GET_MODE (op0
) != V16SImode
)
16778 op0
= gen_reg_rtx (V16SImode
);
16781 op0
= gen_lowpart (V16SImode
, op0
);
16783 op1
= gen_lowpart (V16SImode
, op1
);
16786 case MODE_VECTOR_FLOAT
:
16787 switch (GET_MODE (op0
))
16790 gcc_unreachable ();
16792 load_unaligned
= gen_avx512f_loaddquv16si
;
16793 store_unaligned
= gen_avx512f_storedquv16si
;
16796 load_unaligned
= gen_avx512f_loadups512
;
16797 store_unaligned
= gen_avx512f_storeups512
;
16800 load_unaligned
= gen_avx512f_loadupd512
;
16801 store_unaligned
= gen_avx512f_storeupd512
;
16806 emit_insn (load_unaligned (op0
, op1
));
16807 else if (MEM_P (op0
))
16808 emit_insn (store_unaligned (op0
, op1
));
16810 gcc_unreachable ();
16812 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16816 gcc_unreachable ();
16823 && GET_MODE_SIZE (mode
) == 32)
16825 switch (GET_MODE_CLASS (mode
))
16827 case MODE_VECTOR_INT
:
16829 if (GET_MODE (op0
) != V32QImode
)
16834 op0
= gen_reg_rtx (V32QImode
);
16837 op0
= gen_lowpart (V32QImode
, op0
);
16839 op1
= gen_lowpart (V32QImode
, op1
);
16842 case MODE_VECTOR_FLOAT
:
16843 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16845 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16849 gcc_unreachable ();
16857 /* Normal *mov<mode>_internal pattern will handle
16858 unaligned loads just fine if misaligned_operand
16859 is true, and without the UNSPEC it can be combined
16860 with arithmetic instructions. */
16862 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
16863 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
16864 && misaligned_operand (op1
, GET_MODE (op1
)))
16865 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16866 /* ??? If we have typed data, then it would appear that using
16867 movdqu is the only way to get unaligned data loaded with
16869 else if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16871 if (GET_MODE (op0
) != V16QImode
)
16874 op0
= gen_reg_rtx (V16QImode
);
16876 op1
= gen_lowpart (V16QImode
, op1
);
16877 /* We will eventually emit movups based on insn attributes. */
16878 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
16880 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16882 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16887 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16888 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16889 || optimize_insn_for_size_p ())
16891 /* We will eventually emit movups based on insn attributes. */
16892 emit_insn (gen_sse2_loadupd (op0
, op1
));
16896 /* When SSE registers are split into halves, we can avoid
16897 writing to the top half twice. */
16898 if (TARGET_SSE_SPLIT_REGS
)
16900 emit_clobber (op0
);
16905 /* ??? Not sure about the best option for the Intel chips.
16906 The following would seem to satisfy; the register is
16907 entirely cleared, breaking the dependency chain. We
16908 then store to the upper half, with a dependency depth
16909 of one. A rumor has it that Intel recommends two movsd
16910 followed by an unpacklpd, but this is unconfirmed. And
16911 given that the dependency depth of the unpacklpd would
16912 still be one, I'm not sure why this would be better. */
16913 zero
= CONST0_RTX (V2DFmode
);
16916 m
= adjust_address (op1
, DFmode
, 0);
16917 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16918 m
= adjust_address (op1
, DFmode
, 8);
16919 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16926 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16927 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16928 || optimize_insn_for_size_p ())
16930 if (GET_MODE (op0
) != V4SFmode
)
16933 op0
= gen_reg_rtx (V4SFmode
);
16935 op1
= gen_lowpart (V4SFmode
, op1
);
16936 emit_insn (gen_sse_loadups (op0
, op1
));
16938 emit_move_insn (orig_op0
,
16939 gen_lowpart (GET_MODE (orig_op0
), op0
));
16943 if (mode
!= V4SFmode
)
16944 t
= gen_reg_rtx (V4SFmode
);
16948 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16949 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
16953 m
= adjust_address (op1
, V2SFmode
, 0);
16954 emit_insn (gen_sse_loadlps (t
, t
, m
));
16955 m
= adjust_address (op1
, V2SFmode
, 8);
16956 emit_insn (gen_sse_loadhps (t
, t
, m
));
16957 if (mode
!= V4SFmode
)
16958 emit_move_insn (op0
, gen_lowpart (mode
, t
));
16961 else if (MEM_P (op0
))
16963 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16965 op0
= gen_lowpart (V16QImode
, op0
);
16966 op1
= gen_lowpart (V16QImode
, op1
);
16967 /* We will eventually emit movups based on insn attributes. */
16968 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
16970 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16973 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16974 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16975 || optimize_insn_for_size_p ())
16976 /* We will eventually emit movups based on insn attributes. */
16977 emit_insn (gen_sse2_storeupd (op0
, op1
));
16980 m
= adjust_address (op0
, DFmode
, 0);
16981 emit_insn (gen_sse2_storelpd (m
, op1
));
16982 m
= adjust_address (op0
, DFmode
, 8);
16983 emit_insn (gen_sse2_storehpd (m
, op1
));
16988 if (mode
!= V4SFmode
)
16989 op1
= gen_lowpart (V4SFmode
, op1
);
16992 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16993 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16994 || optimize_insn_for_size_p ())
16996 op0
= gen_lowpart (V4SFmode
, op0
);
16997 emit_insn (gen_sse_storeups (op0
, op1
));
17001 m
= adjust_address (op0
, V2SFmode
, 0);
17002 emit_insn (gen_sse_storelps (m
, op1
));
17003 m
= adjust_address (op0
, V2SFmode
, 8);
17004 emit_insn (gen_sse_storehps (m
, op1
));
17009 gcc_unreachable ();
17012 /* Expand a push in MODE. This is some mode for which we do not support
17013 proper push instructions, at least from the registers that we expect
17014 the value to live in. */
17017 ix86_expand_push (enum machine_mode mode
, rtx x
)
17021 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
17022 GEN_INT (-GET_MODE_SIZE (mode
)),
17023 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
17024 if (tmp
!= stack_pointer_rtx
)
17025 emit_move_insn (stack_pointer_rtx
, tmp
);
17027 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
17029 /* When we push an operand onto stack, it has to be aligned at least
17030 at the function argument boundary. However since we don't have
17031 the argument type, we can't determine the actual argument
17033 emit_move_insn (tmp
, x
);
17036 /* Helper function of ix86_fixup_binary_operands to canonicalize
17037 operand order. Returns true if the operands should be swapped. */
17040 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
17043 rtx dst
= operands
[0];
17044 rtx src1
= operands
[1];
17045 rtx src2
= operands
[2];
17047 /* If the operation is not commutative, we can't do anything. */
17048 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
17051 /* Highest priority is that src1 should match dst. */
17052 if (rtx_equal_p (dst
, src1
))
17054 if (rtx_equal_p (dst
, src2
))
17057 /* Next highest priority is that immediate constants come second. */
17058 if (immediate_operand (src2
, mode
))
17060 if (immediate_operand (src1
, mode
))
17063 /* Lowest priority is that memory references should come second. */
17073 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17074 destination to use for the operation. If different from the true
17075 destination in operands[0], a copy operation will be required. */
17078 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
17081 rtx dst
= operands
[0];
17082 rtx src1
= operands
[1];
17083 rtx src2
= operands
[2];
17085 /* Canonicalize operand order. */
17086 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17090 /* It is invalid to swap operands of different modes. */
17091 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
17098 /* Both source operands cannot be in memory. */
17099 if (MEM_P (src1
) && MEM_P (src2
))
17101 /* Optimization: Only read from memory once. */
17102 if (rtx_equal_p (src1
, src2
))
17104 src2
= force_reg (mode
, src2
);
17107 else if (rtx_equal_p (dst
, src1
))
17108 src2
= force_reg (mode
, src2
);
17110 src1
= force_reg (mode
, src1
);
17113 /* If the destination is memory, and we do not have matching source
17114 operands, do things in registers. */
17115 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17116 dst
= gen_reg_rtx (mode
);
17118 /* Source 1 cannot be a constant. */
17119 if (CONSTANT_P (src1
))
17120 src1
= force_reg (mode
, src1
);
17122 /* Source 1 cannot be a non-matching memory. */
17123 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17124 src1
= force_reg (mode
, src1
);
17126 /* Improve address combine. */
17128 && GET_MODE_CLASS (mode
) == MODE_INT
17130 src2
= force_reg (mode
, src2
);
17132 operands
[1] = src1
;
17133 operands
[2] = src2
;
17137 /* Similarly, but assume that the destination has already been
17138 set up properly. */
17141 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17142 enum machine_mode mode
, rtx operands
[])
17144 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17145 gcc_assert (dst
== operands
[0]);
17148 /* Attempt to expand a binary operator. Make the expansion closer to the
17149 actual machine, then just general_operand, which will allow 3 separate
17150 memory references (one output, two input) in a single insn. */
17153 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17156 rtx src1
, src2
, dst
, op
, clob
;
17158 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17159 src1
= operands
[1];
17160 src2
= operands
[2];
17162 /* Emit the instruction. */
17164 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17165 if (reload_in_progress
)
17167 /* Reload doesn't know about the flags register, and doesn't know that
17168 it doesn't want to clobber it. We can only do this with PLUS. */
17169 gcc_assert (code
== PLUS
);
17172 else if (reload_completed
17174 && !rtx_equal_p (dst
, src1
))
17176 /* This is going to be an LEA; avoid splitting it later. */
17181 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17182 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17185 /* Fix up the destination if needed. */
17186 if (dst
!= operands
[0])
17187 emit_move_insn (operands
[0], dst
);
17190 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17191 the given OPERANDS. */
17194 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17197 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17198 if (GET_CODE (operands
[1]) == SUBREG
)
17203 else if (GET_CODE (operands
[2]) == SUBREG
)
17208 /* Optimize (__m128i) d | (__m128i) e and similar code
17209 when d and e are float vectors into float vector logical
17210 insn. In C/C++ without using intrinsics there is no other way
17211 to express vector logical operation on float vectors than
17212 to cast them temporarily to integer vectors. */
17214 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17215 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17216 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17217 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17218 && SUBREG_BYTE (op1
) == 0
17219 && (GET_CODE (op2
) == CONST_VECTOR
17220 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17221 && SUBREG_BYTE (op2
) == 0))
17222 && can_create_pseudo_p ())
17225 switch (GET_MODE (SUBREG_REG (op1
)))
17231 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17232 if (GET_CODE (op2
) == CONST_VECTOR
)
17234 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17235 op2
= force_reg (GET_MODE (dst
), op2
);
17240 op2
= SUBREG_REG (operands
[2]);
17241 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17242 op2
= force_reg (GET_MODE (dst
), op2
);
17244 op1
= SUBREG_REG (op1
);
17245 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17246 op1
= force_reg (GET_MODE (dst
), op1
);
17247 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17248 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17250 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17256 if (!nonimmediate_operand (operands
[1], mode
))
17257 operands
[1] = force_reg (mode
, operands
[1]);
17258 if (!nonimmediate_operand (operands
[2], mode
))
17259 operands
[2] = force_reg (mode
, operands
[2]);
17260 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17261 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17262 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17266 /* Return TRUE or FALSE depending on whether the binary operator meets the
17267 appropriate constraints. */
17270 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17273 rtx dst
= operands
[0];
17274 rtx src1
= operands
[1];
17275 rtx src2
= operands
[2];
17277 /* Both source operands cannot be in memory. */
17278 if (MEM_P (src1
) && MEM_P (src2
))
17281 /* Canonicalize operand order for commutative operators. */
17282 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17289 /* If the destination is memory, we must have a matching source operand. */
17290 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17293 /* Source 1 cannot be a constant. */
17294 if (CONSTANT_P (src1
))
17297 /* Source 1 cannot be a non-matching memory. */
17298 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17299 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17300 return (code
== AND
17303 || (TARGET_64BIT
&& mode
== DImode
))
17304 && satisfies_constraint_L (src2
));
17309 /* Attempt to expand a unary operator. Make the expansion closer to the
17310 actual machine, then just general_operand, which will allow 2 separate
17311 memory references (one output, one input) in a single insn. */
17314 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17317 int matching_memory
;
17318 rtx src
, dst
, op
, clob
;
17323 /* If the destination is memory, and we do not have matching source
17324 operands, do things in registers. */
17325 matching_memory
= 0;
17328 if (rtx_equal_p (dst
, src
))
17329 matching_memory
= 1;
17331 dst
= gen_reg_rtx (mode
);
17334 /* When source operand is memory, destination must match. */
17335 if (MEM_P (src
) && !matching_memory
)
17336 src
= force_reg (mode
, src
);
17338 /* Emit the instruction. */
17340 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17341 if (reload_in_progress
|| code
== NOT
)
17343 /* Reload doesn't know about the flags register, and doesn't know that
17344 it doesn't want to clobber it. */
17345 gcc_assert (code
== NOT
);
17350 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17351 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17354 /* Fix up the destination if needed. */
17355 if (dst
!= operands
[0])
17356 emit_move_insn (operands
[0], dst
);
17359 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17360 divisor are within the range [0-255]. */
17363 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17366 rtx end_label
, qimode_label
;
17367 rtx insn
, div
, mod
;
17368 rtx scratch
, tmp0
, tmp1
, tmp2
;
17369 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17370 rtx (*gen_zero_extend
) (rtx
, rtx
);
17371 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17376 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17377 gen_test_ccno_1
= gen_testsi_ccno_1
;
17378 gen_zero_extend
= gen_zero_extendqisi2
;
17381 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17382 gen_test_ccno_1
= gen_testdi_ccno_1
;
17383 gen_zero_extend
= gen_zero_extendqidi2
;
17386 gcc_unreachable ();
17389 end_label
= gen_label_rtx ();
17390 qimode_label
= gen_label_rtx ();
17392 scratch
= gen_reg_rtx (mode
);
17394 /* Use 8bit unsigned divimod if dividend and divisor are within
17395 the range [0-255]. */
17396 emit_move_insn (scratch
, operands
[2]);
17397 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17398 scratch
, 1, OPTAB_DIRECT
);
17399 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17400 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17401 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17402 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17403 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17405 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17406 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17407 JUMP_LABEL (insn
) = qimode_label
;
17409 /* Generate original signed/unsigned divimod. */
17410 div
= gen_divmod4_1 (operands
[0], operands
[1],
17411 operands
[2], operands
[3]);
17414 /* Branch to the end. */
17415 emit_jump_insn (gen_jump (end_label
));
17418 /* Generate 8bit unsigned divide. */
17419 emit_label (qimode_label
);
17420 /* Don't use operands[0] for result of 8bit divide since not all
17421 registers support QImode ZERO_EXTRACT. */
17422 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17423 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17424 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17425 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17429 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17430 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17434 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17435 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17438 /* Extract remainder from AH. */
17439 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17440 if (REG_P (operands
[1]))
17441 insn
= emit_move_insn (operands
[1], tmp1
);
17444 /* Need a new scratch register since the old one has result
17446 scratch
= gen_reg_rtx (mode
);
17447 emit_move_insn (scratch
, tmp1
);
17448 insn
= emit_move_insn (operands
[1], scratch
);
17450 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17452 /* Zero extend quotient from AL. */
17453 tmp1
= gen_lowpart (QImode
, tmp0
);
17454 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17455 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17457 emit_label (end_label
);
17460 /* Whether it is OK to emit CFI directives when emitting asm code. */
17465 return dwarf2out_do_cfi_asm ();
17468 #define LEA_MAX_STALL (3)
17469 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17471 /* Increase given DISTANCE in half-cycles according to
17472 dependencies between PREV and NEXT instructions.
17473 Add 1 half-cycle if there is no dependency and
17474 go to next cycle if there is some dependecy. */
17476 static unsigned int
17477 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17482 if (!prev
|| !next
)
17483 return distance
+ (distance
& 1) + 2;
17485 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17486 return distance
+ 1;
17488 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17489 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17490 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17491 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17492 return distance
+ (distance
& 1) + 2;
17494 return distance
+ 1;
17497 /* Function checks if instruction INSN defines register number
17498 REGNO1 or REGNO2. */
17501 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17506 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17507 if (DF_REF_REG_DEF_P (*def_rec
)
17508 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17509 && (regno1
== DF_REF_REGNO (*def_rec
)
17510 || regno2
== DF_REF_REGNO (*def_rec
)))
17518 /* Function checks if instruction INSN uses register number
17519 REGNO as a part of address expression. */
17522 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17526 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17527 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17533 /* Search backward for non-agu definition of register number REGNO1
17534 or register number REGNO2 in basic block starting from instruction
17535 START up to head of basic block or instruction INSN.
17537 Function puts true value into *FOUND var if definition was found
17538 and false otherwise.
17540 Distance in half-cycles between START and found instruction or head
17541 of BB is added to DISTANCE and returned. */
17544 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17545 rtx insn
, int distance
,
17546 rtx start
, bool *found
)
17548 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17556 && distance
< LEA_SEARCH_THRESHOLD
)
17558 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17560 distance
= increase_distance (prev
, next
, distance
);
17561 if (insn_defines_reg (regno1
, regno2
, prev
))
17563 if (recog_memoized (prev
) < 0
17564 || get_attr_type (prev
) != TYPE_LEA
)
17573 if (prev
== BB_HEAD (bb
))
17576 prev
= PREV_INSN (prev
);
17582 /* Search backward for non-agu definition of register number REGNO1
17583 or register number REGNO2 in INSN's basic block until
17584 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17585 2. Reach neighbour BBs boundary, or
17586 3. Reach agu definition.
17587 Returns the distance between the non-agu definition point and INSN.
17588 If no definition point, returns -1. */
17591 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17594 basic_block bb
= BLOCK_FOR_INSN (insn
);
17596 bool found
= false;
17598 if (insn
!= BB_HEAD (bb
))
17599 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17600 distance
, PREV_INSN (insn
),
17603 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17607 bool simple_loop
= false;
17609 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17612 simple_loop
= true;
17617 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17619 BB_END (bb
), &found
);
17622 int shortest_dist
= -1;
17623 bool found_in_bb
= false;
17625 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17628 = distance_non_agu_define_in_bb (regno1
, regno2
,
17634 if (shortest_dist
< 0)
17635 shortest_dist
= bb_dist
;
17636 else if (bb_dist
> 0)
17637 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17643 distance
= shortest_dist
;
17647 /* get_attr_type may modify recog data. We want to make sure
17648 that recog data is valid for instruction INSN, on which
17649 distance_non_agu_define is called. INSN is unchanged here. */
17650 extract_insn_cached (insn
);
17655 return distance
>> 1;
17658 /* Return the distance in half-cycles between INSN and the next
17659 insn that uses register number REGNO in memory address added
17660 to DISTANCE. Return -1 if REGNO0 is set.
17662 Put true value into *FOUND if register usage was found and
17664 Put true value into *REDEFINED if register redefinition was
17665 found and false otherwise. */
17668 distance_agu_use_in_bb (unsigned int regno
,
17669 rtx insn
, int distance
, rtx start
,
17670 bool *found
, bool *redefined
)
17672 basic_block bb
= NULL
;
17677 *redefined
= false;
17679 if (start
!= NULL_RTX
)
17681 bb
= BLOCK_FOR_INSN (start
);
17682 if (start
!= BB_HEAD (bb
))
17683 /* If insn and start belong to the same bb, set prev to insn,
17684 so the call to increase_distance will increase the distance
17685 between insns by 1. */
17691 && distance
< LEA_SEARCH_THRESHOLD
)
17693 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17695 distance
= increase_distance(prev
, next
, distance
);
17696 if (insn_uses_reg_mem (regno
, next
))
17698 /* Return DISTANCE if OP0 is used in memory
17699 address in NEXT. */
17704 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17706 /* Return -1 if OP0 is set in NEXT. */
17714 if (next
== BB_END (bb
))
17717 next
= NEXT_INSN (next
);
17723 /* Return the distance between INSN and the next insn that uses
17724 register number REGNO0 in memory address. Return -1 if no such
17725 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17728 distance_agu_use (unsigned int regno0
, rtx insn
)
17730 basic_block bb
= BLOCK_FOR_INSN (insn
);
17732 bool found
= false;
17733 bool redefined
= false;
17735 if (insn
!= BB_END (bb
))
17736 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17738 &found
, &redefined
);
17740 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17744 bool simple_loop
= false;
17746 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17749 simple_loop
= true;
17754 distance
= distance_agu_use_in_bb (regno0
, insn
,
17755 distance
, BB_HEAD (bb
),
17756 &found
, &redefined
);
17759 int shortest_dist
= -1;
17760 bool found_in_bb
= false;
17761 bool redefined_in_bb
= false;
17763 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17766 = distance_agu_use_in_bb (regno0
, insn
,
17767 distance
, BB_HEAD (e
->dest
),
17768 &found_in_bb
, &redefined_in_bb
);
17771 if (shortest_dist
< 0)
17772 shortest_dist
= bb_dist
;
17773 else if (bb_dist
> 0)
17774 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17780 distance
= shortest_dist
;
17784 if (!found
|| redefined
)
17787 return distance
>> 1;
17790 /* Define this macro to tune LEA priority vs ADD, it take effect when
17791 there is a dilemma of choicing LEA or ADD
17792 Negative value: ADD is more preferred than LEA
17794 Positive value: LEA is more preferred than ADD*/
17795 #define IX86_LEA_PRIORITY 0
17797 /* Return true if usage of lea INSN has performance advantage
17798 over a sequence of instructions. Instructions sequence has
17799 SPLIT_COST cycles higher latency than lea latency. */
17802 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17803 unsigned int regno2
, int split_cost
, bool has_scale
)
17805 int dist_define
, dist_use
;
17807 /* For Silvermont if using a 2-source or 3-source LEA for
17808 non-destructive destination purposes, or due to wanting
17809 ability to use SCALE, the use of LEA is justified. */
17810 if (ix86_tune
== PROCESSOR_SLM
)
17814 if (split_cost
< 1)
17816 if (regno0
== regno1
|| regno0
== regno2
)
17821 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17822 dist_use
= distance_agu_use (regno0
, insn
);
17824 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17826 /* If there is no non AGU operand definition, no AGU
17827 operand usage and split cost is 0 then both lea
17828 and non lea variants have same priority. Currently
17829 we prefer lea for 64 bit code and non lea on 32 bit
17831 if (dist_use
< 0 && split_cost
== 0)
17832 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17837 /* With longer definitions distance lea is more preferable.
17838 Here we change it to take into account splitting cost and
17840 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17842 /* If there is no use in memory addess then we just check
17843 that split cost exceeds AGU stall. */
17845 return dist_define
> LEA_MAX_STALL
;
17847 /* If this insn has both backward non-agu dependence and forward
17848 agu dependence, the one with short distance takes effect. */
17849 return dist_define
>= dist_use
;
17852 /* Return true if it is legal to clobber flags by INSN and
17853 false otherwise. */
17856 ix86_ok_to_clobber_flags (rtx insn
)
17858 basic_block bb
= BLOCK_FOR_INSN (insn
);
17864 if (NONDEBUG_INSN_P (insn
))
17866 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17867 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17870 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17874 if (insn
== BB_END (bb
))
17877 insn
= NEXT_INSN (insn
);
17880 live
= df_get_live_out(bb
);
17881 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17884 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17885 move and add to avoid AGU stalls. */
17888 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17890 unsigned int regno0
, regno1
, regno2
;
17892 /* Check if we need to optimize. */
17893 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17896 /* Check it is correct to split here. */
17897 if (!ix86_ok_to_clobber_flags(insn
))
17900 regno0
= true_regnum (operands
[0]);
17901 regno1
= true_regnum (operands
[1]);
17902 regno2
= true_regnum (operands
[2]);
17904 /* We need to split only adds with non destructive
17905 destination operand. */
17906 if (regno0
== regno1
|| regno0
== regno2
)
17909 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17912 /* Return true if we should emit lea instruction instead of mov
17916 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17918 unsigned int regno0
, regno1
;
17920 /* Check if we need to optimize. */
17921 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17924 /* Use lea for reg to reg moves only. */
17925 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17928 regno0
= true_regnum (operands
[0]);
17929 regno1
= true_regnum (operands
[1]);
17931 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17934 /* Return true if we need to split lea into a sequence of
17935 instructions to avoid AGU stalls. */
17938 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17940 unsigned int regno0
, regno1
, regno2
;
17942 struct ix86_address parts
;
17945 /* Check we need to optimize. */
17946 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17949 /* Check it is correct to split here. */
17950 if (!ix86_ok_to_clobber_flags(insn
))
17953 ok
= ix86_decompose_address (operands
[1], &parts
);
17956 /* There should be at least two components in the address. */
17957 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17958 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17961 /* We should not split into add if non legitimate pic
17962 operand is used as displacement. */
17963 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17966 regno0
= true_regnum (operands
[0]) ;
17967 regno1
= INVALID_REGNUM
;
17968 regno2
= INVALID_REGNUM
;
17971 regno1
= true_regnum (parts
.base
);
17973 regno2
= true_regnum (parts
.index
);
17977 /* Compute how many cycles we will add to execution time
17978 if split lea into a sequence of instructions. */
17979 if (parts
.base
|| parts
.index
)
17981 /* Have to use mov instruction if non desctructive
17982 destination form is used. */
17983 if (regno1
!= regno0
&& regno2
!= regno0
)
17986 /* Have to add index to base if both exist. */
17987 if (parts
.base
&& parts
.index
)
17990 /* Have to use shift and adds if scale is 2 or greater. */
17991 if (parts
.scale
> 1)
17993 if (regno0
!= regno1
)
17995 else if (regno2
== regno0
)
17998 split_cost
+= parts
.scale
;
18001 /* Have to use add instruction with immediate if
18002 disp is non zero. */
18003 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18006 /* Subtract the price of lea. */
18010 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
18014 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18015 matches destination. RTX includes clobber of FLAGS_REG. */
18018 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
18023 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
18024 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18026 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
18029 /* Return true if regno1 def is nearest to the insn. */
18032 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
18035 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
18039 while (prev
&& prev
!= start
)
18041 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
18043 prev
= PREV_INSN (prev
);
18046 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
18048 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
18050 prev
= PREV_INSN (prev
);
18053 /* None of the regs is defined in the bb. */
18057 /* Split lea instructions into a sequence of instructions
18058 which are executed on ALU to avoid AGU stalls.
18059 It is assumed that it is allowed to clobber flags register
18060 at lea position. */
18063 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
18065 unsigned int regno0
, regno1
, regno2
;
18066 struct ix86_address parts
;
18070 ok
= ix86_decompose_address (operands
[1], &parts
);
18073 target
= gen_lowpart (mode
, operands
[0]);
18075 regno0
= true_regnum (target
);
18076 regno1
= INVALID_REGNUM
;
18077 regno2
= INVALID_REGNUM
;
18081 parts
.base
= gen_lowpart (mode
, parts
.base
);
18082 regno1
= true_regnum (parts
.base
);
18087 parts
.index
= gen_lowpart (mode
, parts
.index
);
18088 regno2
= true_regnum (parts
.index
);
18092 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
18094 if (parts
.scale
> 1)
18096 /* Case r1 = r1 + ... */
18097 if (regno1
== regno0
)
18099 /* If we have a case r1 = r1 + C * r1 then we
18100 should use multiplication which is very
18101 expensive. Assume cost model is wrong if we
18102 have such case here. */
18103 gcc_assert (regno2
!= regno0
);
18105 for (adds
= parts
.scale
; adds
> 0; adds
--)
18106 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
18110 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18111 if (regno0
!= regno2
)
18112 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18114 /* Use shift for scaling. */
18115 ix86_emit_binop (ASHIFT
, mode
, target
,
18116 GEN_INT (exact_log2 (parts
.scale
)));
18119 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18121 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18122 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18125 else if (!parts
.base
&& !parts
.index
)
18127 gcc_assert(parts
.disp
);
18128 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18134 if (regno0
!= regno2
)
18135 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18137 else if (!parts
.index
)
18139 if (regno0
!= regno1
)
18140 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18144 if (regno0
== regno1
)
18146 else if (regno0
== regno2
)
18152 /* Find better operand for SET instruction, depending
18153 on which definition is farther from the insn. */
18154 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18155 tmp
= parts
.index
, tmp1
= parts
.base
;
18157 tmp
= parts
.base
, tmp1
= parts
.index
;
18159 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18161 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18162 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18164 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18168 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18171 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18172 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18176 /* Return true if it is ok to optimize an ADD operation to LEA
18177 operation to avoid flag register consumation. For most processors,
18178 ADD is faster than LEA. For the processors like ATOM, if the
18179 destination register of LEA holds an actual address which will be
18180 used soon, LEA is better and otherwise ADD is better. */
18183 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
18185 unsigned int regno0
= true_regnum (operands
[0]);
18186 unsigned int regno1
= true_regnum (operands
[1]);
18187 unsigned int regno2
= true_regnum (operands
[2]);
18189 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18190 if (regno0
!= regno1
&& regno0
!= regno2
)
18193 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18196 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18199 /* Return true if destination reg of SET_BODY is shift count of
18203 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18209 /* Retrieve destination of SET_BODY. */
18210 switch (GET_CODE (set_body
))
18213 set_dest
= SET_DEST (set_body
);
18214 if (!set_dest
|| !REG_P (set_dest
))
18218 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18219 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18227 /* Retrieve shift count of USE_BODY. */
18228 switch (GET_CODE (use_body
))
18231 shift_rtx
= XEXP (use_body
, 1);
18234 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18235 if (ix86_dep_by_shift_count_body (set_body
,
18236 XVECEXP (use_body
, 0, i
)))
18244 && (GET_CODE (shift_rtx
) == ASHIFT
18245 || GET_CODE (shift_rtx
) == LSHIFTRT
18246 || GET_CODE (shift_rtx
) == ASHIFTRT
18247 || GET_CODE (shift_rtx
) == ROTATE
18248 || GET_CODE (shift_rtx
) == ROTATERT
))
18250 rtx shift_count
= XEXP (shift_rtx
, 1);
18252 /* Return true if shift count is dest of SET_BODY. */
18253 if (REG_P (shift_count
))
18255 /* Add check since it can be invoked before register
18256 allocation in pre-reload schedule. */
18257 if (reload_completed
18258 && true_regnum (set_dest
) == true_regnum (shift_count
))
18260 else if (REGNO(set_dest
) == REGNO(shift_count
))
18268 /* Return true if destination reg of SET_INSN is shift count of
18272 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18274 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18275 PATTERN (use_insn
));
18278 /* Return TRUE or FALSE depending on whether the unary operator meets the
18279 appropriate constraints. */
18282 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18283 enum machine_mode mode ATTRIBUTE_UNUSED
,
18286 /* If one of operands is memory, source and destination must match. */
18287 if ((MEM_P (operands
[0])
18288 || MEM_P (operands
[1]))
18289 && ! rtx_equal_p (operands
[0], operands
[1]))
18294 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18295 are ok, keeping in mind the possible movddup alternative. */
18298 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18300 if (MEM_P (operands
[0]))
18301 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18302 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18303 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18307 /* Post-reload splitter for converting an SF or DFmode value in an
18308 SSE register into an unsigned SImode. */
18311 ix86_split_convert_uns_si_sse (rtx operands
[])
18313 enum machine_mode vecmode
;
18314 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18316 large
= operands
[1];
18317 zero_or_two31
= operands
[2];
18318 input
= operands
[3];
18319 two31
= operands
[4];
18320 vecmode
= GET_MODE (large
);
18321 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18323 /* Load up the value into the low element. We must ensure that the other
18324 elements are valid floats -- zero is the easiest such value. */
18327 if (vecmode
== V4SFmode
)
18328 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18330 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18334 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18335 emit_move_insn (value
, CONST0_RTX (vecmode
));
18336 if (vecmode
== V4SFmode
)
18337 emit_insn (gen_sse_movss (value
, value
, input
));
18339 emit_insn (gen_sse2_movsd (value
, value
, input
));
18342 emit_move_insn (large
, two31
);
18343 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18345 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18346 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18348 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18349 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18351 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18352 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18354 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18355 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18357 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18358 if (vecmode
== V4SFmode
)
18359 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18361 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18364 emit_insn (gen_xorv4si3 (value
, value
, large
));
18367 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18368 Expects the 64-bit DImode to be supplied in a pair of integral
18369 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18370 -mfpmath=sse, !optimize_size only. */
18373 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18375 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18376 rtx int_xmm
, fp_xmm
;
18377 rtx biases
, exponents
;
18380 int_xmm
= gen_reg_rtx (V4SImode
);
18381 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18382 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18383 else if (TARGET_SSE_SPLIT_REGS
)
18385 emit_clobber (int_xmm
);
18386 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18390 x
= gen_reg_rtx (V2DImode
);
18391 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18392 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18395 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18396 gen_rtvec (4, GEN_INT (0x43300000UL
),
18397 GEN_INT (0x45300000UL
),
18398 const0_rtx
, const0_rtx
));
18399 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18401 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18402 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18404 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18405 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18406 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18407 (0x1.0p84 + double(fp_value_hi_xmm)).
18408 Note these exponents differ by 32. */
18410 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18412 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18413 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18414 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18415 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18416 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18417 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18418 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18419 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18420 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18422 /* Add the upper and lower DFmode values together. */
18424 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18427 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18428 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18429 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18432 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18435 /* Not used, but eases macroization of patterns. */
18437 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18438 rtx input ATTRIBUTE_UNUSED
)
18440 gcc_unreachable ();
18443 /* Convert an unsigned SImode value into a DFmode. Only currently used
18444 for SSE, but applicable anywhere. */
18447 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18449 REAL_VALUE_TYPE TWO31r
;
18452 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18453 NULL
, 1, OPTAB_DIRECT
);
18455 fp
= gen_reg_rtx (DFmode
);
18456 emit_insn (gen_floatsidf2 (fp
, x
));
18458 real_ldexp (&TWO31r
, &dconst1
, 31);
18459 x
= const_double_from_real_value (TWO31r
, DFmode
);
18461 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18463 emit_move_insn (target
, x
);
18466 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18467 32-bit mode; otherwise we have a direct convert instruction. */
18470 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18472 REAL_VALUE_TYPE TWO32r
;
18473 rtx fp_lo
, fp_hi
, x
;
18475 fp_lo
= gen_reg_rtx (DFmode
);
18476 fp_hi
= gen_reg_rtx (DFmode
);
18478 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18480 real_ldexp (&TWO32r
, &dconst1
, 32);
18481 x
= const_double_from_real_value (TWO32r
, DFmode
);
18482 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18484 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18486 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18489 emit_move_insn (target
, x
);
18492 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18493 For x86_32, -mfpmath=sse, !optimize_size only. */
18495 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18497 REAL_VALUE_TYPE ONE16r
;
18498 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18500 real_ldexp (&ONE16r
, &dconst1
, 16);
18501 x
= const_double_from_real_value (ONE16r
, SFmode
);
18502 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18503 NULL
, 0, OPTAB_DIRECT
);
18504 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18505 NULL
, 0, OPTAB_DIRECT
);
18506 fp_hi
= gen_reg_rtx (SFmode
);
18507 fp_lo
= gen_reg_rtx (SFmode
);
18508 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18509 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18510 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18512 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18514 if (!rtx_equal_p (target
, fp_hi
))
18515 emit_move_insn (target
, fp_hi
);
18518 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18519 a vector of unsigned ints VAL to vector of floats TARGET. */
18522 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18525 REAL_VALUE_TYPE TWO16r
;
18526 enum machine_mode intmode
= GET_MODE (val
);
18527 enum machine_mode fltmode
= GET_MODE (target
);
18528 rtx (*cvt
) (rtx
, rtx
);
18530 if (intmode
== V4SImode
)
18531 cvt
= gen_floatv4siv4sf2
;
18533 cvt
= gen_floatv8siv8sf2
;
18534 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18535 tmp
[0] = force_reg (intmode
, tmp
[0]);
18536 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18538 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18539 NULL_RTX
, 1, OPTAB_DIRECT
);
18540 tmp
[3] = gen_reg_rtx (fltmode
);
18541 emit_insn (cvt (tmp
[3], tmp
[1]));
18542 tmp
[4] = gen_reg_rtx (fltmode
);
18543 emit_insn (cvt (tmp
[4], tmp
[2]));
18544 real_ldexp (&TWO16r
, &dconst1
, 16);
18545 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18546 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18547 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18549 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18551 if (tmp
[7] != target
)
18552 emit_move_insn (target
, tmp
[7]);
18555 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18556 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18557 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18558 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18561 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18563 REAL_VALUE_TYPE TWO31r
;
18564 rtx two31r
, tmp
[4];
18565 enum machine_mode mode
= GET_MODE (val
);
18566 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18567 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18568 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18571 for (i
= 0; i
< 3; i
++)
18572 tmp
[i
] = gen_reg_rtx (mode
);
18573 real_ldexp (&TWO31r
, &dconst1
, 31);
18574 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18575 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18576 two31r
= force_reg (mode
, two31r
);
18579 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18580 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18581 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18582 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18583 default: gcc_unreachable ();
18585 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18586 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18587 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18589 if (intmode
== V4SImode
|| TARGET_AVX2
)
18590 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18591 gen_lowpart (intmode
, tmp
[0]),
18592 GEN_INT (31), NULL_RTX
, 0,
18596 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18597 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18598 *xorp
= expand_simple_binop (intmode
, AND
,
18599 gen_lowpart (intmode
, tmp
[0]),
18600 two31
, NULL_RTX
, 0,
18603 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18607 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18608 then replicate the value for all elements of the vector
18612 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18616 enum machine_mode scalar_mode
;
18633 n_elt
= GET_MODE_NUNITS (mode
);
18634 v
= rtvec_alloc (n_elt
);
18635 scalar_mode
= GET_MODE_INNER (mode
);
18637 RTVEC_ELT (v
, 0) = value
;
18639 for (i
= 1; i
< n_elt
; ++i
)
18640 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18642 return gen_rtx_CONST_VECTOR (mode
, v
);
18645 gcc_unreachable ();
18649 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18650 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18651 for an SSE register. If VECT is true, then replicate the mask for
18652 all elements of the vector register. If INVERT is true, then create
18653 a mask excluding the sign bit. */
18656 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18658 enum machine_mode vec_mode
, imode
;
18659 HOST_WIDE_INT hi
, lo
;
18664 /* Find the sign bit, sign extended to 2*HWI. */
18672 mode
= GET_MODE_INNER (mode
);
18674 lo
= 0x80000000, hi
= lo
< 0;
18682 mode
= GET_MODE_INNER (mode
);
18684 if (HOST_BITS_PER_WIDE_INT
>= 64)
18685 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18687 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18692 vec_mode
= VOIDmode
;
18693 if (HOST_BITS_PER_WIDE_INT
>= 64)
18696 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18703 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18707 lo
= ~lo
, hi
= ~hi
;
18713 mask
= immed_double_const (lo
, hi
, imode
);
18715 vec
= gen_rtvec (2, v
, mask
);
18716 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18717 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18724 gcc_unreachable ();
18728 lo
= ~lo
, hi
= ~hi
;
18730 /* Force this value into the low part of a fp vector constant. */
18731 mask
= immed_double_const (lo
, hi
, imode
);
18732 mask
= gen_lowpart (mode
, mask
);
18734 if (vec_mode
== VOIDmode
)
18735 return force_reg (mode
, mask
);
18737 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18738 return force_reg (vec_mode
, v
);
18741 /* Generate code for floating point ABS or NEG. */
18744 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18747 rtx mask
, set
, dst
, src
;
18748 bool use_sse
= false;
18749 bool vector_mode
= VECTOR_MODE_P (mode
);
18750 enum machine_mode vmode
= mode
;
18754 else if (mode
== TFmode
)
18756 else if (TARGET_SSE_MATH
)
18758 use_sse
= SSE_FLOAT_MODE_P (mode
);
18759 if (mode
== SFmode
)
18761 else if (mode
== DFmode
)
18765 /* NEG and ABS performed with SSE use bitwise mask operations.
18766 Create the appropriate mask now. */
18768 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18775 set
= gen_rtx_fmt_e (code
, mode
, src
);
18776 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18783 use
= gen_rtx_USE (VOIDmode
, mask
);
18785 par
= gen_rtvec (2, set
, use
);
18788 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18789 par
= gen_rtvec (3, set
, use
, clob
);
18791 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18797 /* Expand a copysign operation. Special case operand 0 being a constant. */
18800 ix86_expand_copysign (rtx operands
[])
18802 enum machine_mode mode
, vmode
;
18803 rtx dest
, op0
, op1
, mask
, nmask
;
18805 dest
= operands
[0];
18809 mode
= GET_MODE (dest
);
18811 if (mode
== SFmode
)
18813 else if (mode
== DFmode
)
18818 if (GET_CODE (op0
) == CONST_DOUBLE
)
18820 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18822 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18823 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18825 if (mode
== SFmode
|| mode
== DFmode
)
18827 if (op0
== CONST0_RTX (mode
))
18828 op0
= CONST0_RTX (vmode
);
18831 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18833 op0
= force_reg (vmode
, v
);
18836 else if (op0
!= CONST0_RTX (mode
))
18837 op0
= force_reg (mode
, op0
);
18839 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18841 if (mode
== SFmode
)
18842 copysign_insn
= gen_copysignsf3_const
;
18843 else if (mode
== DFmode
)
18844 copysign_insn
= gen_copysigndf3_const
;
18846 copysign_insn
= gen_copysigntf3_const
;
18848 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18852 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18854 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18855 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18857 if (mode
== SFmode
)
18858 copysign_insn
= gen_copysignsf3_var
;
18859 else if (mode
== DFmode
)
18860 copysign_insn
= gen_copysigndf3_var
;
18862 copysign_insn
= gen_copysigntf3_var
;
18864 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18868 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18869 be a constant, and so has already been expanded into a vector constant. */
18872 ix86_split_copysign_const (rtx operands
[])
18874 enum machine_mode mode
, vmode
;
18875 rtx dest
, op0
, mask
, x
;
18877 dest
= operands
[0];
18879 mask
= operands
[3];
18881 mode
= GET_MODE (dest
);
18882 vmode
= GET_MODE (mask
);
18884 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18885 x
= gen_rtx_AND (vmode
, dest
, mask
);
18886 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18888 if (op0
!= CONST0_RTX (vmode
))
18890 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18891 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18895 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18896 so we have to do two masks. */
18899 ix86_split_copysign_var (rtx operands
[])
18901 enum machine_mode mode
, vmode
;
18902 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18904 dest
= operands
[0];
18905 scratch
= operands
[1];
18908 nmask
= operands
[4];
18909 mask
= operands
[5];
18911 mode
= GET_MODE (dest
);
18912 vmode
= GET_MODE (mask
);
18914 if (rtx_equal_p (op0
, op1
))
18916 /* Shouldn't happen often (it's useless, obviously), but when it does
18917 we'd generate incorrect code if we continue below. */
18918 emit_move_insn (dest
, op0
);
18922 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18924 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18926 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18927 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18930 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18931 x
= gen_rtx_NOT (vmode
, dest
);
18932 x
= gen_rtx_AND (vmode
, x
, op0
);
18933 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18937 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18939 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18941 else /* alternative 2,4 */
18943 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18944 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18945 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18947 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18949 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18951 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18952 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18954 else /* alternative 3,4 */
18956 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18958 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18959 x
= gen_rtx_AND (vmode
, dest
, op0
);
18961 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18964 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18965 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18968 /* Return TRUE or FALSE depending on whether the first SET in INSN
18969 has source and destination with matching CC modes, and that the
18970 CC mode is at least as constrained as REQ_MODE. */
18973 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18976 enum machine_mode set_mode
;
18978 set
= PATTERN (insn
);
18979 if (GET_CODE (set
) == PARALLEL
)
18980 set
= XVECEXP (set
, 0, 0);
18981 gcc_assert (GET_CODE (set
) == SET
);
18982 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18984 set_mode
= GET_MODE (SET_DEST (set
));
18988 if (req_mode
!= CCNOmode
18989 && (req_mode
!= CCmode
18990 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18994 if (req_mode
== CCGCmode
)
18998 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
19002 if (req_mode
== CCZmode
)
19012 if (set_mode
!= req_mode
)
19017 gcc_unreachable ();
19020 return GET_MODE (SET_SRC (set
)) == set_mode
;
19023 /* Generate insn patterns to do an integer compare of OPERANDS. */
19026 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19028 enum machine_mode cmpmode
;
19031 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
19032 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
19034 /* This is very simple, but making the interface the same as in the
19035 FP case makes the rest of the code easier. */
19036 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
19037 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
19039 /* Return the test that should be put into the flags user, i.e.
19040 the bcc, scc, or cmov instruction. */
19041 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
19044 /* Figure out whether to use ordered or unordered fp comparisons.
19045 Return the appropriate mode to use. */
19048 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
19050 /* ??? In order to make all comparisons reversible, we do all comparisons
19051 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19052 all forms trapping and nontrapping comparisons, we can make inequality
19053 comparisons trapping again, since it results in better code when using
19054 FCOM based compares. */
19055 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
19059 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
19061 enum machine_mode mode
= GET_MODE (op0
);
19063 if (SCALAR_FLOAT_MODE_P (mode
))
19065 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19066 return ix86_fp_compare_mode (code
);
19071 /* Only zero flag is needed. */
19072 case EQ
: /* ZF=0 */
19073 case NE
: /* ZF!=0 */
19075 /* Codes needing carry flag. */
19076 case GEU
: /* CF=0 */
19077 case LTU
: /* CF=1 */
19078 /* Detect overflow checks. They need just the carry flag. */
19079 if (GET_CODE (op0
) == PLUS
19080 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19084 case GTU
: /* CF=0 & ZF=0 */
19085 case LEU
: /* CF=1 | ZF=1 */
19087 /* Codes possibly doable only with sign flag when
19088 comparing against zero. */
19089 case GE
: /* SF=OF or SF=0 */
19090 case LT
: /* SF<>OF or SF=1 */
19091 if (op1
== const0_rtx
)
19094 /* For other cases Carry flag is not required. */
19096 /* Codes doable only with sign flag when comparing
19097 against zero, but we miss jump instruction for it
19098 so we need to use relational tests against overflow
19099 that thus needs to be zero. */
19100 case GT
: /* ZF=0 & SF=OF */
19101 case LE
: /* ZF=1 | SF<>OF */
19102 if (op1
== const0_rtx
)
19106 /* strcmp pattern do (use flags) and combine may ask us for proper
19111 gcc_unreachable ();
19115 /* Return the fixed registers used for condition codes. */
19118 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19125 /* If two condition code modes are compatible, return a condition code
19126 mode which is compatible with both. Otherwise, return
19129 static enum machine_mode
19130 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19135 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19138 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19139 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19142 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19144 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19150 gcc_unreachable ();
19180 /* These are only compatible with themselves, which we already
19187 /* Return a comparison we can do and that it is equivalent to
19188 swap_condition (code) apart possibly from orderedness.
19189 But, never change orderedness if TARGET_IEEE_FP, returning
19190 UNKNOWN in that case if necessary. */
19192 static enum rtx_code
19193 ix86_fp_swap_condition (enum rtx_code code
)
19197 case GT
: /* GTU - CF=0 & ZF=0 */
19198 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19199 case GE
: /* GEU - CF=0 */
19200 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19201 case UNLT
: /* LTU - CF=1 */
19202 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19203 case UNLE
: /* LEU - CF=1 | ZF=1 */
19204 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19206 return swap_condition (code
);
19210 /* Return cost of comparison CODE using the best strategy for performance.
19211 All following functions do use number of instructions as a cost metrics.
19212 In future this should be tweaked to compute bytes for optimize_size and
19213 take into account performance of various instructions on various CPUs. */
19216 ix86_fp_comparison_cost (enum rtx_code code
)
19220 /* The cost of code using bit-twiddling on %ah. */
19237 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19241 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19244 gcc_unreachable ();
19247 switch (ix86_fp_comparison_strategy (code
))
19249 case IX86_FPCMP_COMI
:
19250 return arith_cost
> 4 ? 3 : 2;
19251 case IX86_FPCMP_SAHF
:
19252 return arith_cost
> 4 ? 4 : 3;
19258 /* Return strategy to use for floating-point. We assume that fcomi is always
19259 preferrable where available, since that is also true when looking at size
19260 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19262 enum ix86_fpcmp_strategy
19263 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19265 /* Do fcomi/sahf based test when profitable. */
19268 return IX86_FPCMP_COMI
;
19270 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19271 return IX86_FPCMP_SAHF
;
19273 return IX86_FPCMP_ARITH
;
19276 /* Swap, force into registers, or otherwise massage the two operands
19277 to a fp comparison. The operands are updated in place; the new
19278 comparison code is returned. */
19280 static enum rtx_code
19281 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19283 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19284 rtx op0
= *pop0
, op1
= *pop1
;
19285 enum machine_mode op_mode
= GET_MODE (op0
);
19286 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19288 /* All of the unordered compare instructions only work on registers.
19289 The same is true of the fcomi compare instructions. The XFmode
19290 compare instructions require registers except when comparing
19291 against zero or when converting operand 1 from fixed point to
19295 && (fpcmp_mode
== CCFPUmode
19296 || (op_mode
== XFmode
19297 && ! (standard_80387_constant_p (op0
) == 1
19298 || standard_80387_constant_p (op1
) == 1)
19299 && GET_CODE (op1
) != FLOAT
)
19300 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19302 op0
= force_reg (op_mode
, op0
);
19303 op1
= force_reg (op_mode
, op1
);
19307 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19308 things around if they appear profitable, otherwise force op0
19309 into a register. */
19311 if (standard_80387_constant_p (op0
) == 0
19313 && ! (standard_80387_constant_p (op1
) == 0
19316 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19317 if (new_code
!= UNKNOWN
)
19320 tmp
= op0
, op0
= op1
, op1
= tmp
;
19326 op0
= force_reg (op_mode
, op0
);
19328 if (CONSTANT_P (op1
))
19330 int tmp
= standard_80387_constant_p (op1
);
19332 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19336 op1
= force_reg (op_mode
, op1
);
19339 op1
= force_reg (op_mode
, op1
);
19343 /* Try to rearrange the comparison to make it cheaper. */
19344 if (ix86_fp_comparison_cost (code
)
19345 > ix86_fp_comparison_cost (swap_condition (code
))
19346 && (REG_P (op1
) || can_create_pseudo_p ()))
19349 tmp
= op0
, op0
= op1
, op1
= tmp
;
19350 code
= swap_condition (code
);
19352 op0
= force_reg (op_mode
, op0
);
19360 /* Convert comparison codes we use to represent FP comparison to integer
19361 code that will result in proper branch. Return UNKNOWN if no such code
19365 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19394 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19397 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19399 enum machine_mode fpcmp_mode
, intcmp_mode
;
19402 fpcmp_mode
= ix86_fp_compare_mode (code
);
19403 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19405 /* Do fcomi/sahf based test when profitable. */
19406 switch (ix86_fp_comparison_strategy (code
))
19408 case IX86_FPCMP_COMI
:
19409 intcmp_mode
= fpcmp_mode
;
19410 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19411 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19416 case IX86_FPCMP_SAHF
:
19417 intcmp_mode
= fpcmp_mode
;
19418 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19419 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19423 scratch
= gen_reg_rtx (HImode
);
19424 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19425 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19428 case IX86_FPCMP_ARITH
:
19429 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19430 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19431 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19433 scratch
= gen_reg_rtx (HImode
);
19434 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19436 /* In the unordered case, we have to check C2 for NaN's, which
19437 doesn't happen to work out to anything nice combination-wise.
19438 So do some bit twiddling on the value we've got in AH to come
19439 up with an appropriate set of condition codes. */
19441 intcmp_mode
= CCNOmode
;
19446 if (code
== GT
|| !TARGET_IEEE_FP
)
19448 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19453 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19454 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19455 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19456 intcmp_mode
= CCmode
;
19462 if (code
== LT
&& TARGET_IEEE_FP
)
19464 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19465 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19466 intcmp_mode
= CCmode
;
19471 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19477 if (code
== GE
|| !TARGET_IEEE_FP
)
19479 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19484 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19485 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19491 if (code
== LE
&& TARGET_IEEE_FP
)
19493 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19494 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19495 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19496 intcmp_mode
= CCmode
;
19501 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19507 if (code
== EQ
&& TARGET_IEEE_FP
)
19509 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19510 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19511 intcmp_mode
= CCmode
;
19516 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19522 if (code
== NE
&& TARGET_IEEE_FP
)
19524 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19525 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19531 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19537 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19541 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19546 gcc_unreachable ();
19554 /* Return the test that should be put into the flags user, i.e.
19555 the bcc, scc, or cmov instruction. */
19556 return gen_rtx_fmt_ee (code
, VOIDmode
,
19557 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19562 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19566 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19567 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19569 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19571 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19572 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19575 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19581 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19583 enum machine_mode mode
= GET_MODE (op0
);
19595 tmp
= ix86_expand_compare (code
, op0
, op1
);
19596 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19597 gen_rtx_LABEL_REF (VOIDmode
, label
),
19599 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19606 /* Expand DImode branch into multiple compare+branch. */
19608 rtx lo
[2], hi
[2], label2
;
19609 enum rtx_code code1
, code2
, code3
;
19610 enum machine_mode submode
;
19612 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19614 tmp
= op0
, op0
= op1
, op1
= tmp
;
19615 code
= swap_condition (code
);
19618 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19619 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19621 submode
= mode
== DImode
? SImode
: DImode
;
19623 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19624 avoid two branches. This costs one extra insn, so disable when
19625 optimizing for size. */
19627 if ((code
== EQ
|| code
== NE
)
19628 && (!optimize_insn_for_size_p ()
19629 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19634 if (hi
[1] != const0_rtx
)
19635 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19636 NULL_RTX
, 0, OPTAB_WIDEN
);
19639 if (lo
[1] != const0_rtx
)
19640 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19641 NULL_RTX
, 0, OPTAB_WIDEN
);
19643 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19644 NULL_RTX
, 0, OPTAB_WIDEN
);
19646 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19650 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19651 op1 is a constant and the low word is zero, then we can just
19652 examine the high word. Similarly for low word -1 and
19653 less-or-equal-than or greater-than. */
19655 if (CONST_INT_P (hi
[1]))
19658 case LT
: case LTU
: case GE
: case GEU
:
19659 if (lo
[1] == const0_rtx
)
19661 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19665 case LE
: case LEU
: case GT
: case GTU
:
19666 if (lo
[1] == constm1_rtx
)
19668 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19676 /* Otherwise, we need two or three jumps. */
19678 label2
= gen_label_rtx ();
19681 code2
= swap_condition (code
);
19682 code3
= unsigned_condition (code
);
19686 case LT
: case GT
: case LTU
: case GTU
:
19689 case LE
: code1
= LT
; code2
= GT
; break;
19690 case GE
: code1
= GT
; code2
= LT
; break;
19691 case LEU
: code1
= LTU
; code2
= GTU
; break;
19692 case GEU
: code1
= GTU
; code2
= LTU
; break;
19694 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19695 case NE
: code2
= UNKNOWN
; break;
19698 gcc_unreachable ();
19703 * if (hi(a) < hi(b)) goto true;
19704 * if (hi(a) > hi(b)) goto false;
19705 * if (lo(a) < lo(b)) goto true;
19709 if (code1
!= UNKNOWN
)
19710 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19711 if (code2
!= UNKNOWN
)
19712 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19714 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19716 if (code2
!= UNKNOWN
)
19717 emit_label (label2
);
19722 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19727 /* Split branch based on floating point condition. */
19729 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19730 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19735 if (target2
!= pc_rtx
)
19738 code
= reverse_condition_maybe_unordered (code
);
19743 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19746 /* Remove pushed operand from stack. */
19748 ix86_free_from_memory (GET_MODE (pushed
));
19750 i
= emit_jump_insn (gen_rtx_SET
19752 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19753 condition
, target1
, target2
)));
19754 if (split_branch_probability
>= 0)
19755 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
19759 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19763 gcc_assert (GET_MODE (dest
) == QImode
);
19765 ret
= ix86_expand_compare (code
, op0
, op1
);
19766 PUT_MODE (ret
, QImode
);
19767 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19770 /* Expand comparison setting or clearing carry flag. Return true when
19771 successful and set pop for the operation. */
19773 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19775 enum machine_mode mode
=
19776 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19778 /* Do not handle double-mode compares that go through special path. */
19779 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19782 if (SCALAR_FLOAT_MODE_P (mode
))
19784 rtx compare_op
, compare_seq
;
19786 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19788 /* Shortcut: following common codes never translate
19789 into carry flag compares. */
19790 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19791 || code
== ORDERED
|| code
== UNORDERED
)
19794 /* These comparisons require zero flag; swap operands so they won't. */
19795 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19796 && !TARGET_IEEE_FP
)
19801 code
= swap_condition (code
);
19804 /* Try to expand the comparison and verify that we end up with
19805 carry flag based comparison. This fails to be true only when
19806 we decide to expand comparison using arithmetic that is not
19807 too common scenario. */
19809 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19810 compare_seq
= get_insns ();
19813 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19814 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19815 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19817 code
= GET_CODE (compare_op
);
19819 if (code
!= LTU
&& code
!= GEU
)
19822 emit_insn (compare_seq
);
19827 if (!INTEGRAL_MODE_P (mode
))
19836 /* Convert a==0 into (unsigned)a<1. */
19839 if (op1
!= const0_rtx
)
19842 code
= (code
== EQ
? LTU
: GEU
);
19845 /* Convert a>b into b<a or a>=b-1. */
19848 if (CONST_INT_P (op1
))
19850 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19851 /* Bail out on overflow. We still can swap operands but that
19852 would force loading of the constant into register. */
19853 if (op1
== const0_rtx
19854 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19856 code
= (code
== GTU
? GEU
: LTU
);
19863 code
= (code
== GTU
? LTU
: GEU
);
19867 /* Convert a>=0 into (unsigned)a<0x80000000. */
19870 if (mode
== DImode
|| op1
!= const0_rtx
)
19872 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19873 code
= (code
== LT
? GEU
: LTU
);
19877 if (mode
== DImode
|| op1
!= constm1_rtx
)
19879 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19880 code
= (code
== LE
? GEU
: LTU
);
19886 /* Swapping operands may cause constant to appear as first operand. */
19887 if (!nonimmediate_operand (op0
, VOIDmode
))
19889 if (!can_create_pseudo_p ())
19891 op0
= force_reg (mode
, op0
);
19893 *pop
= ix86_expand_compare (code
, op0
, op1
);
19894 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19899 ix86_expand_int_movcc (rtx operands
[])
19901 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19902 rtx compare_seq
, compare_op
;
19903 enum machine_mode mode
= GET_MODE (operands
[0]);
19904 bool sign_bit_compare_p
= false;
19905 rtx op0
= XEXP (operands
[1], 0);
19906 rtx op1
= XEXP (operands
[1], 1);
19908 if (GET_MODE (op0
) == TImode
19909 || (GET_MODE (op0
) == DImode
19914 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19915 compare_seq
= get_insns ();
19918 compare_code
= GET_CODE (compare_op
);
19920 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19921 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19922 sign_bit_compare_p
= true;
19924 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19925 HImode insns, we'd be swallowed in word prefix ops. */
19927 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19928 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19929 && CONST_INT_P (operands
[2])
19930 && CONST_INT_P (operands
[3]))
19932 rtx out
= operands
[0];
19933 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19934 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19935 HOST_WIDE_INT diff
;
19938 /* Sign bit compares are better done using shifts than we do by using
19940 if (sign_bit_compare_p
19941 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19943 /* Detect overlap between destination and compare sources. */
19946 if (!sign_bit_compare_p
)
19949 bool fpcmp
= false;
19951 compare_code
= GET_CODE (compare_op
);
19953 flags
= XEXP (compare_op
, 0);
19955 if (GET_MODE (flags
) == CCFPmode
19956 || GET_MODE (flags
) == CCFPUmode
)
19960 = ix86_fp_compare_code_to_integer (compare_code
);
19963 /* To simplify rest of code, restrict to the GEU case. */
19964 if (compare_code
== LTU
)
19966 HOST_WIDE_INT tmp
= ct
;
19969 compare_code
= reverse_condition (compare_code
);
19970 code
= reverse_condition (code
);
19975 PUT_CODE (compare_op
,
19976 reverse_condition_maybe_unordered
19977 (GET_CODE (compare_op
)));
19979 PUT_CODE (compare_op
,
19980 reverse_condition (GET_CODE (compare_op
)));
19984 if (reg_overlap_mentioned_p (out
, op0
)
19985 || reg_overlap_mentioned_p (out
, op1
))
19986 tmp
= gen_reg_rtx (mode
);
19988 if (mode
== DImode
)
19989 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19991 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19992 flags
, compare_op
));
19996 if (code
== GT
|| code
== GE
)
19997 code
= reverse_condition (code
);
20000 HOST_WIDE_INT tmp
= ct
;
20005 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
20018 tmp
= expand_simple_binop (mode
, PLUS
,
20020 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20031 tmp
= expand_simple_binop (mode
, IOR
,
20033 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20035 else if (diff
== -1 && ct
)
20045 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20047 tmp
= expand_simple_binop (mode
, PLUS
,
20048 copy_rtx (tmp
), GEN_INT (cf
),
20049 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20057 * andl cf - ct, dest
20067 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20070 tmp
= expand_simple_binop (mode
, AND
,
20072 gen_int_mode (cf
- ct
, mode
),
20073 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20075 tmp
= expand_simple_binop (mode
, PLUS
,
20076 copy_rtx (tmp
), GEN_INT (ct
),
20077 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20080 if (!rtx_equal_p (tmp
, out
))
20081 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
20088 enum machine_mode cmp_mode
= GET_MODE (op0
);
20091 tmp
= ct
, ct
= cf
, cf
= tmp
;
20094 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20096 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20098 /* We may be reversing unordered compare to normal compare, that
20099 is not valid in general (we may convert non-trapping condition
20100 to trapping one), however on i386 we currently emit all
20101 comparisons unordered. */
20102 compare_code
= reverse_condition_maybe_unordered (compare_code
);
20103 code
= reverse_condition_maybe_unordered (code
);
20107 compare_code
= reverse_condition (compare_code
);
20108 code
= reverse_condition (code
);
20112 compare_code
= UNKNOWN
;
20113 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
20114 && CONST_INT_P (op1
))
20116 if (op1
== const0_rtx
20117 && (code
== LT
|| code
== GE
))
20118 compare_code
= code
;
20119 else if (op1
== constm1_rtx
)
20123 else if (code
== GT
)
20128 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20129 if (compare_code
!= UNKNOWN
20130 && GET_MODE (op0
) == GET_MODE (out
)
20131 && (cf
== -1 || ct
== -1))
20133 /* If lea code below could be used, only optimize
20134 if it results in a 2 insn sequence. */
20136 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20137 || diff
== 3 || diff
== 5 || diff
== 9)
20138 || (compare_code
== LT
&& ct
== -1)
20139 || (compare_code
== GE
&& cf
== -1))
20142 * notl op1 (if necessary)
20150 code
= reverse_condition (code
);
20153 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20155 out
= expand_simple_binop (mode
, IOR
,
20157 out
, 1, OPTAB_DIRECT
);
20158 if (out
!= operands
[0])
20159 emit_move_insn (operands
[0], out
);
20166 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20167 || diff
== 3 || diff
== 5 || diff
== 9)
20168 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20170 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20176 * lea cf(dest*(ct-cf)),dest
20180 * This also catches the degenerate setcc-only case.
20186 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20189 /* On x86_64 the lea instruction operates on Pmode, so we need
20190 to get arithmetics done in proper mode to match. */
20192 tmp
= copy_rtx (out
);
20196 out1
= copy_rtx (out
);
20197 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20201 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20207 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20210 if (!rtx_equal_p (tmp
, out
))
20213 out
= force_operand (tmp
, copy_rtx (out
));
20215 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20217 if (!rtx_equal_p (out
, operands
[0]))
20218 emit_move_insn (operands
[0], copy_rtx (out
));
20224 * General case: Jumpful:
20225 * xorl dest,dest cmpl op1, op2
20226 * cmpl op1, op2 movl ct, dest
20227 * setcc dest jcc 1f
20228 * decl dest movl cf, dest
20229 * andl (cf-ct),dest 1:
20232 * Size 20. Size 14.
20234 * This is reasonably steep, but branch mispredict costs are
20235 * high on modern cpus, so consider failing only if optimizing
20239 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20240 && BRANCH_COST (optimize_insn_for_speed_p (),
20245 enum machine_mode cmp_mode
= GET_MODE (op0
);
20250 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20252 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20254 /* We may be reversing unordered compare to normal compare,
20255 that is not valid in general (we may convert non-trapping
20256 condition to trapping one), however on i386 we currently
20257 emit all comparisons unordered. */
20258 code
= reverse_condition_maybe_unordered (code
);
20262 code
= reverse_condition (code
);
20263 if (compare_code
!= UNKNOWN
)
20264 compare_code
= reverse_condition (compare_code
);
20268 if (compare_code
!= UNKNOWN
)
20270 /* notl op1 (if needed)
20275 For x < 0 (resp. x <= -1) there will be no notl,
20276 so if possible swap the constants to get rid of the
20278 True/false will be -1/0 while code below (store flag
20279 followed by decrement) is 0/-1, so the constants need
20280 to be exchanged once more. */
20282 if (compare_code
== GE
|| !cf
)
20284 code
= reverse_condition (code
);
20289 HOST_WIDE_INT tmp
= cf
;
20294 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20298 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20300 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20302 copy_rtx (out
), 1, OPTAB_DIRECT
);
20305 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20306 gen_int_mode (cf
- ct
, mode
),
20307 copy_rtx (out
), 1, OPTAB_DIRECT
);
20309 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20310 copy_rtx (out
), 1, OPTAB_DIRECT
);
20311 if (!rtx_equal_p (out
, operands
[0]))
20312 emit_move_insn (operands
[0], copy_rtx (out
));
20318 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20320 /* Try a few things more with specific constants and a variable. */
20323 rtx var
, orig_out
, out
, tmp
;
20325 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20328 /* If one of the two operands is an interesting constant, load a
20329 constant with the above and mask it in with a logical operation. */
20331 if (CONST_INT_P (operands
[2]))
20334 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20335 operands
[3] = constm1_rtx
, op
= and_optab
;
20336 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20337 operands
[3] = const0_rtx
, op
= ior_optab
;
20341 else if (CONST_INT_P (operands
[3]))
20344 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20345 operands
[2] = constm1_rtx
, op
= and_optab
;
20346 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20347 operands
[2] = const0_rtx
, op
= ior_optab
;
20354 orig_out
= operands
[0];
20355 tmp
= gen_reg_rtx (mode
);
20358 /* Recurse to get the constant loaded. */
20359 if (ix86_expand_int_movcc (operands
) == 0)
20362 /* Mask in the interesting variable. */
20363 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20365 if (!rtx_equal_p (out
, orig_out
))
20366 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20372 * For comparison with above,
20382 if (! nonimmediate_operand (operands
[2], mode
))
20383 operands
[2] = force_reg (mode
, operands
[2]);
20384 if (! nonimmediate_operand (operands
[3], mode
))
20385 operands
[3] = force_reg (mode
, operands
[3]);
20387 if (! register_operand (operands
[2], VOIDmode
)
20389 || ! register_operand (operands
[3], VOIDmode
)))
20390 operands
[2] = force_reg (mode
, operands
[2]);
20393 && ! register_operand (operands
[3], VOIDmode
))
20394 operands
[3] = force_reg (mode
, operands
[3]);
20396 emit_insn (compare_seq
);
20397 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20398 gen_rtx_IF_THEN_ELSE (mode
,
20399 compare_op
, operands
[2],
20404 /* Swap, force into registers, or otherwise massage the two operands
20405 to an sse comparison with a mask result. Thus we differ a bit from
20406 ix86_prepare_fp_compare_args which expects to produce a flags result.
20408 The DEST operand exists to help determine whether to commute commutative
20409 operators. The POP0/POP1 operands are updated in place. The new
20410 comparison code is returned, or UNKNOWN if not implementable. */
20412 static enum rtx_code
20413 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20414 rtx
*pop0
, rtx
*pop1
)
20422 /* AVX supports all the needed comparisons. */
20425 /* We have no LTGT as an operator. We could implement it with
20426 NE & ORDERED, but this requires an extra temporary. It's
20427 not clear that it's worth it. */
20434 /* These are supported directly. */
20441 /* AVX has 3 operand comparisons, no need to swap anything. */
20444 /* For commutative operators, try to canonicalize the destination
20445 operand to be first in the comparison - this helps reload to
20446 avoid extra moves. */
20447 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20455 /* These are not supported directly before AVX, and furthermore
20456 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20457 comparison operands to transform into something that is
20462 code
= swap_condition (code
);
20466 gcc_unreachable ();
20472 /* Detect conditional moves that exactly match min/max operational
20473 semantics. Note that this is IEEE safe, as long as we don't
20474 interchange the operands.
20476 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20477 and TRUE if the operation is successful and instructions are emitted. */
20480 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20481 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20483 enum machine_mode mode
;
20489 else if (code
== UNGE
)
20492 if_true
= if_false
;
20498 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20500 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20505 mode
= GET_MODE (dest
);
20507 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20508 but MODE may be a vector mode and thus not appropriate. */
20509 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20511 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20514 if_true
= force_reg (mode
, if_true
);
20515 v
= gen_rtvec (2, if_true
, if_false
);
20516 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20520 code
= is_min
? SMIN
: SMAX
;
20521 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20524 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20528 /* Expand an sse vector comparison. Return the register with the result. */
20531 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20532 rtx op_true
, rtx op_false
)
20534 enum machine_mode mode
= GET_MODE (dest
);
20535 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20538 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20539 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20540 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20543 || reg_overlap_mentioned_p (dest
, op_true
)
20544 || reg_overlap_mentioned_p (dest
, op_false
))
20545 dest
= gen_reg_rtx (mode
);
20547 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20548 if (cmp_mode
!= mode
)
20550 x
= force_reg (cmp_mode
, x
);
20551 convert_move (dest
, x
, false);
20554 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20559 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20560 operations. This is used for both scalar and vector conditional moves. */
20563 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20565 enum machine_mode mode
= GET_MODE (dest
);
20568 if (vector_all_ones_operand (op_true
, mode
)
20569 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20571 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20573 else if (op_false
== CONST0_RTX (mode
))
20575 op_true
= force_reg (mode
, op_true
);
20576 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20577 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20579 else if (op_true
== CONST0_RTX (mode
))
20581 op_false
= force_reg (mode
, op_false
);
20582 x
= gen_rtx_NOT (mode
, cmp
);
20583 x
= gen_rtx_AND (mode
, x
, op_false
);
20584 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20586 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20588 op_false
= force_reg (mode
, op_false
);
20589 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20590 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20592 else if (TARGET_XOP
)
20594 op_true
= force_reg (mode
, op_true
);
20596 if (!nonimmediate_operand (op_false
, mode
))
20597 op_false
= force_reg (mode
, op_false
);
20599 emit_insn (gen_rtx_SET (mode
, dest
,
20600 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20606 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20609 if (!nonimmediate_operand (op_true
, mode
))
20610 op_true
= force_reg (mode
, op_true
);
20612 op_false
= force_reg (mode
, op_false
);
20618 gen
= gen_sse4_1_blendvps
;
20622 gen
= gen_sse4_1_blendvpd
;
20630 gen
= gen_sse4_1_pblendvb
;
20631 if (mode
!= V16QImode
)
20632 d
= gen_reg_rtx (V16QImode
);
20633 op_false
= gen_lowpart (V16QImode
, op_false
);
20634 op_true
= gen_lowpart (V16QImode
, op_true
);
20635 cmp
= gen_lowpart (V16QImode
, cmp
);
20640 gen
= gen_avx_blendvps256
;
20644 gen
= gen_avx_blendvpd256
;
20652 gen
= gen_avx2_pblendvb
;
20653 if (mode
!= V32QImode
)
20654 d
= gen_reg_rtx (V32QImode
);
20655 op_false
= gen_lowpart (V32QImode
, op_false
);
20656 op_true
= gen_lowpart (V32QImode
, op_true
);
20657 cmp
= gen_lowpart (V32QImode
, cmp
);
20666 emit_insn (gen (d
, op_false
, op_true
, cmp
));
20668 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
20672 op_true
= force_reg (mode
, op_true
);
20674 t2
= gen_reg_rtx (mode
);
20676 t3
= gen_reg_rtx (mode
);
20680 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20681 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20683 x
= gen_rtx_NOT (mode
, cmp
);
20684 x
= gen_rtx_AND (mode
, x
, op_false
);
20685 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20687 x
= gen_rtx_IOR (mode
, t3
, t2
);
20688 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20693 /* Expand a floating-point conditional move. Return true if successful. */
20696 ix86_expand_fp_movcc (rtx operands
[])
20698 enum machine_mode mode
= GET_MODE (operands
[0]);
20699 enum rtx_code code
= GET_CODE (operands
[1]);
20700 rtx tmp
, compare_op
;
20701 rtx op0
= XEXP (operands
[1], 0);
20702 rtx op1
= XEXP (operands
[1], 1);
20704 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20706 enum machine_mode cmode
;
20708 /* Since we've no cmove for sse registers, don't force bad register
20709 allocation just to gain access to it. Deny movcc when the
20710 comparison mode doesn't match the move mode. */
20711 cmode
= GET_MODE (op0
);
20712 if (cmode
== VOIDmode
)
20713 cmode
= GET_MODE (op1
);
20717 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20718 if (code
== UNKNOWN
)
20721 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20722 operands
[2], operands
[3]))
20725 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20726 operands
[2], operands
[3]);
20727 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20731 if (GET_MODE (op0
) == TImode
20732 || (GET_MODE (op0
) == DImode
20736 /* The floating point conditional move instructions don't directly
20737 support conditions resulting from a signed integer comparison. */
20739 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20740 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20742 tmp
= gen_reg_rtx (QImode
);
20743 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20745 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20748 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20749 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20750 operands
[2], operands
[3])));
20755 /* Expand a floating-point vector conditional move; a vcond operation
20756 rather than a movcc operation. */
20759 ix86_expand_fp_vcond (rtx operands
[])
20761 enum rtx_code code
= GET_CODE (operands
[3]);
20764 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20765 &operands
[4], &operands
[5]);
20766 if (code
== UNKNOWN
)
20769 switch (GET_CODE (operands
[3]))
20772 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20773 operands
[5], operands
[0], operands
[0]);
20774 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20775 operands
[5], operands
[1], operands
[2]);
20779 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20780 operands
[5], operands
[0], operands
[0]);
20781 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20782 operands
[5], operands
[1], operands
[2]);
20786 gcc_unreachable ();
20788 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20790 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20794 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20795 operands
[5], operands
[1], operands
[2]))
20798 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20799 operands
[1], operands
[2]);
20800 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20804 /* Expand a signed/unsigned integral vector conditional move. */
20807 ix86_expand_int_vcond (rtx operands
[])
20809 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20810 enum machine_mode mode
= GET_MODE (operands
[4]);
20811 enum rtx_code code
= GET_CODE (operands
[3]);
20812 bool negate
= false;
20815 cop0
= operands
[4];
20816 cop1
= operands
[5];
20818 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20819 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20820 if ((code
== LT
|| code
== GE
)
20821 && data_mode
== mode
20822 && cop1
== CONST0_RTX (mode
)
20823 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20824 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20825 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20826 && (GET_MODE_SIZE (data_mode
) == 16
20827 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20829 rtx negop
= operands
[2 - (code
== LT
)];
20830 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20831 if (negop
== CONST1_RTX (data_mode
))
20833 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20834 operands
[0], 1, OPTAB_DIRECT
);
20835 if (res
!= operands
[0])
20836 emit_move_insn (operands
[0], res
);
20839 else if (GET_MODE_INNER (data_mode
) != DImode
20840 && vector_all_ones_operand (negop
, data_mode
))
20842 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20843 operands
[0], 0, OPTAB_DIRECT
);
20844 if (res
!= operands
[0])
20845 emit_move_insn (operands
[0], res
);
20850 if (!nonimmediate_operand (cop1
, mode
))
20851 cop1
= force_reg (mode
, cop1
);
20852 if (!general_operand (operands
[1], data_mode
))
20853 operands
[1] = force_reg (data_mode
, operands
[1]);
20854 if (!general_operand (operands
[2], data_mode
))
20855 operands
[2] = force_reg (data_mode
, operands
[2]);
20857 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20859 && (mode
== V16QImode
|| mode
== V8HImode
20860 || mode
== V4SImode
|| mode
== V2DImode
))
20864 /* Canonicalize the comparison to EQ, GT, GTU. */
20875 code
= reverse_condition (code
);
20881 code
= reverse_condition (code
);
20887 code
= swap_condition (code
);
20888 x
= cop0
, cop0
= cop1
, cop1
= x
;
20892 gcc_unreachable ();
20895 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20896 if (mode
== V2DImode
)
20901 /* SSE4.1 supports EQ. */
20902 if (!TARGET_SSE4_1
)
20908 /* SSE4.2 supports GT/GTU. */
20909 if (!TARGET_SSE4_2
)
20914 gcc_unreachable ();
20918 /* Unsigned parallel compare is not supported by the hardware.
20919 Play some tricks to turn this into a signed comparison
20923 cop0
= force_reg (mode
, cop0
);
20933 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20937 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20938 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20939 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20940 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20942 gcc_unreachable ();
20944 /* Subtract (-(INT MAX) - 1) from both operands to make
20946 mask
= ix86_build_signbit_mask (mode
, true, false);
20947 t1
= gen_reg_rtx (mode
);
20948 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20950 t2
= gen_reg_rtx (mode
);
20951 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20963 /* Perform a parallel unsigned saturating subtraction. */
20964 x
= gen_reg_rtx (mode
);
20965 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20966 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20969 cop1
= CONST0_RTX (mode
);
20975 gcc_unreachable ();
20980 /* Allow the comparison to be done in one mode, but the movcc to
20981 happen in another mode. */
20982 if (data_mode
== mode
)
20984 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20985 operands
[1+negate
], operands
[2-negate
]);
20989 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20990 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
20991 operands
[1+negate
], operands
[2-negate
]);
20992 x
= gen_lowpart (data_mode
, x
);
20995 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20996 operands
[2-negate
]);
21000 /* Expand a variable vector permutation. */
21003 ix86_expand_vec_perm (rtx operands
[])
21005 rtx target
= operands
[0];
21006 rtx op0
= operands
[1];
21007 rtx op1
= operands
[2];
21008 rtx mask
= operands
[3];
21009 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
21010 enum machine_mode mode
= GET_MODE (op0
);
21011 enum machine_mode maskmode
= GET_MODE (mask
);
21013 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
21015 /* Number of elements in the vector. */
21016 w
= GET_MODE_NUNITS (mode
);
21017 e
= GET_MODE_UNIT_SIZE (mode
);
21018 gcc_assert (w
<= 32);
21022 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
21024 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21025 an constant shuffle operand. With a tiny bit of effort we can
21026 use VPERMD instead. A re-interpretation stall for V4DFmode is
21027 unfortunate but there's no avoiding it.
21028 Similarly for V16HImode we don't have instructions for variable
21029 shuffling, while for V32QImode we can use after preparing suitable
21030 masks vpshufb; vpshufb; vpermq; vpor. */
21032 if (mode
== V16HImode
)
21034 maskmode
= mode
= V32QImode
;
21040 maskmode
= mode
= V8SImode
;
21044 t1
= gen_reg_rtx (maskmode
);
21046 /* Replicate the low bits of the V4DImode mask into V8SImode:
21048 t1 = { A A B B C C D D }. */
21049 for (i
= 0; i
< w
/ 2; ++i
)
21050 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
21051 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21052 vt
= force_reg (maskmode
, vt
);
21053 mask
= gen_lowpart (maskmode
, mask
);
21054 if (maskmode
== V8SImode
)
21055 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
21057 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
21059 /* Multiply the shuffle indicies by two. */
21060 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
21063 /* Add one to the odd shuffle indicies:
21064 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21065 for (i
= 0; i
< w
/ 2; ++i
)
21067 vec
[i
* 2] = const0_rtx
;
21068 vec
[i
* 2 + 1] = const1_rtx
;
21070 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21071 vt
= validize_mem (force_const_mem (maskmode
, vt
));
21072 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
21075 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21076 operands
[3] = mask
= t1
;
21077 target
= gen_reg_rtx (mode
);
21078 op0
= gen_lowpart (mode
, op0
);
21079 op1
= gen_lowpart (mode
, op1
);
21085 /* The VPERMD and VPERMPS instructions already properly ignore
21086 the high bits of the shuffle elements. No need for us to
21087 perform an AND ourselves. */
21088 if (one_operand_shuffle
)
21090 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
21091 if (target
!= operands
[0])
21092 emit_move_insn (operands
[0],
21093 gen_lowpart (GET_MODE (operands
[0]), target
));
21097 t1
= gen_reg_rtx (V8SImode
);
21098 t2
= gen_reg_rtx (V8SImode
);
21099 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
21100 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
21106 mask
= gen_lowpart (V8SFmode
, mask
);
21107 if (one_operand_shuffle
)
21108 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
21111 t1
= gen_reg_rtx (V8SFmode
);
21112 t2
= gen_reg_rtx (V8SFmode
);
21113 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
21114 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
21120 /* By combining the two 128-bit input vectors into one 256-bit
21121 input vector, we can use VPERMD and VPERMPS for the full
21122 two-operand shuffle. */
21123 t1
= gen_reg_rtx (V8SImode
);
21124 t2
= gen_reg_rtx (V8SImode
);
21125 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21126 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21127 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21128 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21132 t1
= gen_reg_rtx (V8SFmode
);
21133 t2
= gen_reg_rtx (V8SImode
);
21134 mask
= gen_lowpart (V4SImode
, mask
);
21135 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21136 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21137 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21138 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21142 t1
= gen_reg_rtx (V32QImode
);
21143 t2
= gen_reg_rtx (V32QImode
);
21144 t3
= gen_reg_rtx (V32QImode
);
21145 vt2
= GEN_INT (128);
21146 for (i
= 0; i
< 32; i
++)
21148 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21149 vt
= force_reg (V32QImode
, vt
);
21150 for (i
= 0; i
< 32; i
++)
21151 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21152 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21153 vt2
= force_reg (V32QImode
, vt2
);
21154 /* From mask create two adjusted masks, which contain the same
21155 bits as mask in the low 7 bits of each vector element.
21156 The first mask will have the most significant bit clear
21157 if it requests element from the same 128-bit lane
21158 and MSB set if it requests element from the other 128-bit lane.
21159 The second mask will have the opposite values of the MSB,
21160 and additionally will have its 128-bit lanes swapped.
21161 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21162 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21163 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21164 stands for other 12 bytes. */
21165 /* The bit whether element is from the same lane or the other
21166 lane is bit 4, so shift it up by 3 to the MSB position. */
21167 t5
= gen_reg_rtx (V4DImode
);
21168 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
21170 /* Clear MSB bits from the mask just in case it had them set. */
21171 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21172 /* After this t1 will have MSB set for elements from other lane. */
21173 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
21174 /* Clear bits other than MSB. */
21175 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21176 /* Or in the lower bits from mask into t3. */
21177 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21178 /* And invert MSB bits in t1, so MSB is set for elements from the same
21180 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21181 /* Swap 128-bit lanes in t3. */
21182 t6
= gen_reg_rtx (V4DImode
);
21183 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
21184 const2_rtx
, GEN_INT (3),
21185 const0_rtx
, const1_rtx
));
21186 /* And or in the lower bits from mask into t1. */
21187 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21188 if (one_operand_shuffle
)
21190 /* Each of these shuffles will put 0s in places where
21191 element from the other 128-bit lane is needed, otherwise
21192 will shuffle in the requested value. */
21193 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
21194 gen_lowpart (V32QImode
, t6
)));
21195 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21196 /* For t3 the 128-bit lanes are swapped again. */
21197 t7
= gen_reg_rtx (V4DImode
);
21198 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
21199 const2_rtx
, GEN_INT (3),
21200 const0_rtx
, const1_rtx
));
21201 /* And oring both together leads to the result. */
21202 emit_insn (gen_iorv32qi3 (target
, t1
,
21203 gen_lowpart (V32QImode
, t7
)));
21204 if (target
!= operands
[0])
21205 emit_move_insn (operands
[0],
21206 gen_lowpart (GET_MODE (operands
[0]), target
));
21210 t4
= gen_reg_rtx (V32QImode
);
21211 /* Similarly to the above one_operand_shuffle code,
21212 just for repeated twice for each operand. merge_two:
21213 code will merge the two results together. */
21214 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
21215 gen_lowpart (V32QImode
, t6
)));
21216 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
21217 gen_lowpart (V32QImode
, t6
)));
21218 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21219 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21220 t7
= gen_reg_rtx (V4DImode
);
21221 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
21222 const2_rtx
, GEN_INT (3),
21223 const0_rtx
, const1_rtx
));
21224 t8
= gen_reg_rtx (V4DImode
);
21225 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
21226 const2_rtx
, GEN_INT (3),
21227 const0_rtx
, const1_rtx
));
21228 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
21229 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
21235 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21242 /* The XOP VPPERM insn supports three inputs. By ignoring the
21243 one_operand_shuffle special case, we avoid creating another
21244 set of constant vectors in memory. */
21245 one_operand_shuffle
= false;
21247 /* mask = mask & {2*w-1, ...} */
21248 vt
= GEN_INT (2*w
- 1);
21252 /* mask = mask & {w-1, ...} */
21253 vt
= GEN_INT (w
- 1);
21256 for (i
= 0; i
< w
; i
++)
21258 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21259 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21260 NULL_RTX
, 0, OPTAB_DIRECT
);
21262 /* For non-QImode operations, convert the word permutation control
21263 into a byte permutation control. */
21264 if (mode
!= V16QImode
)
21266 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21267 GEN_INT (exact_log2 (e
)),
21268 NULL_RTX
, 0, OPTAB_DIRECT
);
21270 /* Convert mask to vector of chars. */
21271 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21273 /* Replicate each of the input bytes into byte positions:
21274 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21275 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21276 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21277 for (i
= 0; i
< 16; ++i
)
21278 vec
[i
] = GEN_INT (i
/e
* e
);
21279 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21280 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21282 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21284 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21286 /* Convert it into the byte positions by doing
21287 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21288 for (i
= 0; i
< 16; ++i
)
21289 vec
[i
] = GEN_INT (i
% e
);
21290 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21291 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21292 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21295 /* The actual shuffle operations all operate on V16QImode. */
21296 op0
= gen_lowpart (V16QImode
, op0
);
21297 op1
= gen_lowpart (V16QImode
, op1
);
21301 if (GET_MODE (target
) != V16QImode
)
21302 target
= gen_reg_rtx (V16QImode
);
21303 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21304 if (target
!= operands
[0])
21305 emit_move_insn (operands
[0],
21306 gen_lowpart (GET_MODE (operands
[0]), target
));
21308 else if (one_operand_shuffle
)
21310 if (GET_MODE (target
) != V16QImode
)
21311 target
= gen_reg_rtx (V16QImode
);
21312 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21313 if (target
!= operands
[0])
21314 emit_move_insn (operands
[0],
21315 gen_lowpart (GET_MODE (operands
[0]), target
));
21322 /* Shuffle the two input vectors independently. */
21323 t1
= gen_reg_rtx (V16QImode
);
21324 t2
= gen_reg_rtx (V16QImode
);
21325 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21326 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21329 /* Then merge them together. The key is whether any given control
21330 element contained a bit set that indicates the second word. */
21331 mask
= operands
[3];
21333 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21335 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21336 more shuffle to convert the V2DI input mask into a V4SI
21337 input mask. At which point the masking that expand_int_vcond
21338 will work as desired. */
21339 rtx t3
= gen_reg_rtx (V4SImode
);
21340 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21341 const0_rtx
, const0_rtx
,
21342 const2_rtx
, const2_rtx
));
21344 maskmode
= V4SImode
;
21348 for (i
= 0; i
< w
; i
++)
21350 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21351 vt
= force_reg (maskmode
, vt
);
21352 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21353 NULL_RTX
, 0, OPTAB_DIRECT
);
21355 if (GET_MODE (target
) != mode
)
21356 target
= gen_reg_rtx (mode
);
21358 xops
[1] = gen_lowpart (mode
, t2
);
21359 xops
[2] = gen_lowpart (mode
, t1
);
21360 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21363 ok
= ix86_expand_int_vcond (xops
);
21365 if (target
!= operands
[0])
21366 emit_move_insn (operands
[0],
21367 gen_lowpart (GET_MODE (operands
[0]), target
));
21371 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21372 true if we should do zero extension, else sign extension. HIGH_P is
21373 true if we want the N/2 high elements, else the low elements. */
21376 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21378 enum machine_mode imode
= GET_MODE (src
);
21383 rtx (*unpack
)(rtx
, rtx
);
21384 rtx (*extract
)(rtx
, rtx
) = NULL
;
21385 enum machine_mode halfmode
= BLKmode
;
21391 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21393 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21394 halfmode
= V16QImode
;
21396 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21400 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21402 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21403 halfmode
= V8HImode
;
21405 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21409 unpack
= gen_avx2_zero_extendv4siv4di2
;
21411 unpack
= gen_avx2_sign_extendv4siv4di2
;
21412 halfmode
= V4SImode
;
21414 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21418 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21420 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21424 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21426 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21430 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21432 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21435 gcc_unreachable ();
21438 if (GET_MODE_SIZE (imode
) == 32)
21440 tmp
= gen_reg_rtx (halfmode
);
21441 emit_insn (extract (tmp
, src
));
21445 /* Shift higher 8 bytes to lower 8 bytes. */
21446 tmp
= gen_reg_rtx (V1TImode
);
21447 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
21449 tmp
= gen_lowpart (imode
, tmp
);
21454 emit_insn (unpack (dest
, tmp
));
21458 rtx (*unpack
)(rtx
, rtx
, rtx
);
21464 unpack
= gen_vec_interleave_highv16qi
;
21466 unpack
= gen_vec_interleave_lowv16qi
;
21470 unpack
= gen_vec_interleave_highv8hi
;
21472 unpack
= gen_vec_interleave_lowv8hi
;
21476 unpack
= gen_vec_interleave_highv4si
;
21478 unpack
= gen_vec_interleave_lowv4si
;
21481 gcc_unreachable ();
21485 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21487 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21488 src
, pc_rtx
, pc_rtx
);
21490 rtx tmp2
= gen_reg_rtx (imode
);
21491 emit_insn (unpack (tmp2
, src
, tmp
));
21492 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
21496 /* Expand conditional increment or decrement using adb/sbb instructions.
21497 The default case using setcc followed by the conditional move can be
21498 done by generic code. */
21500 ix86_expand_int_addcc (rtx operands
[])
21502 enum rtx_code code
= GET_CODE (operands
[1]);
21504 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21506 rtx val
= const0_rtx
;
21507 bool fpcmp
= false;
21508 enum machine_mode mode
;
21509 rtx op0
= XEXP (operands
[1], 0);
21510 rtx op1
= XEXP (operands
[1], 1);
21512 if (operands
[3] != const1_rtx
21513 && operands
[3] != constm1_rtx
)
21515 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21517 code
= GET_CODE (compare_op
);
21519 flags
= XEXP (compare_op
, 0);
21521 if (GET_MODE (flags
) == CCFPmode
21522 || GET_MODE (flags
) == CCFPUmode
)
21525 code
= ix86_fp_compare_code_to_integer (code
);
21532 PUT_CODE (compare_op
,
21533 reverse_condition_maybe_unordered
21534 (GET_CODE (compare_op
)));
21536 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21539 mode
= GET_MODE (operands
[0]);
21541 /* Construct either adc or sbb insn. */
21542 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21547 insn
= gen_subqi3_carry
;
21550 insn
= gen_subhi3_carry
;
21553 insn
= gen_subsi3_carry
;
21556 insn
= gen_subdi3_carry
;
21559 gcc_unreachable ();
21567 insn
= gen_addqi3_carry
;
21570 insn
= gen_addhi3_carry
;
21573 insn
= gen_addsi3_carry
;
21576 insn
= gen_adddi3_carry
;
21579 gcc_unreachable ();
21582 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21588 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21589 but works for floating pointer parameters and nonoffsetable memories.
21590 For pushes, it returns just stack offsets; the values will be saved
21591 in the right order. Maximally three parts are generated. */
21594 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21599 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21601 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21603 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21604 gcc_assert (size
>= 2 && size
<= 4);
21606 /* Optimize constant pool reference to immediates. This is used by fp
21607 moves, that force all constants to memory to allow combining. */
21608 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21610 rtx tmp
= maybe_get_pool_constant (operand
);
21615 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21617 /* The only non-offsetable memories we handle are pushes. */
21618 int ok
= push_operand (operand
, VOIDmode
);
21622 operand
= copy_rtx (operand
);
21623 PUT_MODE (operand
, word_mode
);
21624 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21628 if (GET_CODE (operand
) == CONST_VECTOR
)
21630 enum machine_mode imode
= int_mode_for_mode (mode
);
21631 /* Caution: if we looked through a constant pool memory above,
21632 the operand may actually have a different mode now. That's
21633 ok, since we want to pun this all the way back to an integer. */
21634 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21635 gcc_assert (operand
!= NULL
);
21641 if (mode
== DImode
)
21642 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21647 if (REG_P (operand
))
21649 gcc_assert (reload_completed
);
21650 for (i
= 0; i
< size
; i
++)
21651 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21653 else if (offsettable_memref_p (operand
))
21655 operand
= adjust_address (operand
, SImode
, 0);
21656 parts
[0] = operand
;
21657 for (i
= 1; i
< size
; i
++)
21658 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21660 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21665 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21669 real_to_target (l
, &r
, mode
);
21670 parts
[3] = gen_int_mode (l
[3], SImode
);
21671 parts
[2] = gen_int_mode (l
[2], SImode
);
21674 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21675 long double may not be 80-bit. */
21676 real_to_target (l
, &r
, mode
);
21677 parts
[2] = gen_int_mode (l
[2], SImode
);
21680 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21683 gcc_unreachable ();
21685 parts
[1] = gen_int_mode (l
[1], SImode
);
21686 parts
[0] = gen_int_mode (l
[0], SImode
);
21689 gcc_unreachable ();
21694 if (mode
== TImode
)
21695 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21696 if (mode
== XFmode
|| mode
== TFmode
)
21698 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21699 if (REG_P (operand
))
21701 gcc_assert (reload_completed
);
21702 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21703 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21705 else if (offsettable_memref_p (operand
))
21707 operand
= adjust_address (operand
, DImode
, 0);
21708 parts
[0] = operand
;
21709 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21711 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21716 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21717 real_to_target (l
, &r
, mode
);
21719 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21720 if (HOST_BITS_PER_WIDE_INT
>= 64)
21723 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21724 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21727 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21729 if (upper_mode
== SImode
)
21730 parts
[1] = gen_int_mode (l
[2], SImode
);
21731 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21734 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21735 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21738 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21741 gcc_unreachable ();
21748 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21749 Return false when normal moves are needed; true when all required
21750 insns have been emitted. Operands 2-4 contain the input values
21751 int the correct order; operands 5-7 contain the output values. */
21754 ix86_split_long_move (rtx operands
[])
21759 int collisions
= 0;
21760 enum machine_mode mode
= GET_MODE (operands
[0]);
21761 bool collisionparts
[4];
21763 /* The DFmode expanders may ask us to move double.
21764 For 64bit target this is single move. By hiding the fact
21765 here we simplify i386.md splitters. */
21766 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21768 /* Optimize constant pool reference to immediates. This is used by
21769 fp moves, that force all constants to memory to allow combining. */
21771 if (MEM_P (operands
[1])
21772 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21773 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21774 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21775 if (push_operand (operands
[0], VOIDmode
))
21777 operands
[0] = copy_rtx (operands
[0]);
21778 PUT_MODE (operands
[0], word_mode
);
21781 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21782 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21783 emit_move_insn (operands
[0], operands
[1]);
21787 /* The only non-offsettable memory we handle is push. */
21788 if (push_operand (operands
[0], VOIDmode
))
21791 gcc_assert (!MEM_P (operands
[0])
21792 || offsettable_memref_p (operands
[0]));
21794 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21795 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21797 /* When emitting push, take care for source operands on the stack. */
21798 if (push
&& MEM_P (operands
[1])
21799 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21801 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21803 /* Compensate for the stack decrement by 4. */
21804 if (!TARGET_64BIT
&& nparts
== 3
21805 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21806 src_base
= plus_constant (Pmode
, src_base
, 4);
21808 /* src_base refers to the stack pointer and is
21809 automatically decreased by emitted push. */
21810 for (i
= 0; i
< nparts
; i
++)
21811 part
[1][i
] = change_address (part
[1][i
],
21812 GET_MODE (part
[1][i
]), src_base
);
21815 /* We need to do copy in the right order in case an address register
21816 of the source overlaps the destination. */
21817 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21821 for (i
= 0; i
< nparts
; i
++)
21824 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21825 if (collisionparts
[i
])
21829 /* Collision in the middle part can be handled by reordering. */
21830 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21832 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21833 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21835 else if (collisions
== 1
21837 && (collisionparts
[1] || collisionparts
[2]))
21839 if (collisionparts
[1])
21841 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21842 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21846 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21847 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21851 /* If there are more collisions, we can't handle it by reordering.
21852 Do an lea to the last part and use only one colliding move. */
21853 else if (collisions
> 1)
21859 base
= part
[0][nparts
- 1];
21861 /* Handle the case when the last part isn't valid for lea.
21862 Happens in 64-bit mode storing the 12-byte XFmode. */
21863 if (GET_MODE (base
) != Pmode
)
21864 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21866 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21867 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21868 for (i
= 1; i
< nparts
; i
++)
21870 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21871 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21882 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21883 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21884 stack_pointer_rtx
, GEN_INT (-4)));
21885 emit_move_insn (part
[0][2], part
[1][2]);
21887 else if (nparts
== 4)
21889 emit_move_insn (part
[0][3], part
[1][3]);
21890 emit_move_insn (part
[0][2], part
[1][2]);
21895 /* In 64bit mode we don't have 32bit push available. In case this is
21896 register, it is OK - we will just use larger counterpart. We also
21897 retype memory - these comes from attempt to avoid REX prefix on
21898 moving of second half of TFmode value. */
21899 if (GET_MODE (part
[1][1]) == SImode
)
21901 switch (GET_CODE (part
[1][1]))
21904 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21908 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21912 gcc_unreachable ();
21915 if (GET_MODE (part
[1][0]) == SImode
)
21916 part
[1][0] = part
[1][1];
21919 emit_move_insn (part
[0][1], part
[1][1]);
21920 emit_move_insn (part
[0][0], part
[1][0]);
21924 /* Choose correct order to not overwrite the source before it is copied. */
21925 if ((REG_P (part
[0][0])
21926 && REG_P (part
[1][1])
21927 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21929 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21931 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21933 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21935 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21937 operands
[2 + i
] = part
[0][j
];
21938 operands
[6 + i
] = part
[1][j
];
21943 for (i
= 0; i
< nparts
; i
++)
21945 operands
[2 + i
] = part
[0][i
];
21946 operands
[6 + i
] = part
[1][i
];
21950 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21951 if (optimize_insn_for_size_p ())
21953 for (j
= 0; j
< nparts
- 1; j
++)
21954 if (CONST_INT_P (operands
[6 + j
])
21955 && operands
[6 + j
] != const0_rtx
21956 && REG_P (operands
[2 + j
]))
21957 for (i
= j
; i
< nparts
- 1; i
++)
21958 if (CONST_INT_P (operands
[7 + i
])
21959 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21960 operands
[7 + i
] = operands
[2 + j
];
21963 for (i
= 0; i
< nparts
; i
++)
21964 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21969 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21970 left shift by a constant, either using a single shift or
21971 a sequence of add instructions. */
21974 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21976 rtx (*insn
)(rtx
, rtx
, rtx
);
21979 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21980 && !optimize_insn_for_size_p ()))
21982 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21983 while (count
-- > 0)
21984 emit_insn (insn (operand
, operand
, operand
));
21988 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21989 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21994 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21996 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21997 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21998 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22000 rtx low
[2], high
[2];
22003 if (CONST_INT_P (operands
[2]))
22005 split_double_mode (mode
, operands
, 2, low
, high
);
22006 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22008 if (count
>= half_width
)
22010 emit_move_insn (high
[0], low
[1]);
22011 emit_move_insn (low
[0], const0_rtx
);
22013 if (count
> half_width
)
22014 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
22018 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22020 if (!rtx_equal_p (operands
[0], operands
[1]))
22021 emit_move_insn (operands
[0], operands
[1]);
22023 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
22024 ix86_expand_ashl_const (low
[0], count
, mode
);
22029 split_double_mode (mode
, operands
, 1, low
, high
);
22031 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22033 if (operands
[1] == const1_rtx
)
22035 /* Assuming we've chosen a QImode capable registers, then 1 << N
22036 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22037 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
22039 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
22041 ix86_expand_clear (low
[0]);
22042 ix86_expand_clear (high
[0]);
22043 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
22045 d
= gen_lowpart (QImode
, low
[0]);
22046 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22047 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
22048 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22050 d
= gen_lowpart (QImode
, high
[0]);
22051 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22052 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
22053 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22056 /* Otherwise, we can get the same results by manually performing
22057 a bit extract operation on bit 5/6, and then performing the two
22058 shifts. The two methods of getting 0/1 into low/high are exactly
22059 the same size. Avoiding the shift in the bit extract case helps
22060 pentium4 a bit; no one else seems to care much either way. */
22063 enum machine_mode half_mode
;
22064 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
22065 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
22066 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
22067 HOST_WIDE_INT bits
;
22070 if (mode
== DImode
)
22072 half_mode
= SImode
;
22073 gen_lshr3
= gen_lshrsi3
;
22074 gen_and3
= gen_andsi3
;
22075 gen_xor3
= gen_xorsi3
;
22080 half_mode
= DImode
;
22081 gen_lshr3
= gen_lshrdi3
;
22082 gen_and3
= gen_anddi3
;
22083 gen_xor3
= gen_xordi3
;
22087 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
22088 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
22090 x
= gen_lowpart (half_mode
, operands
[2]);
22091 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
22093 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
22094 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
22095 emit_move_insn (low
[0], high
[0]);
22096 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
22099 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22100 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
22104 if (operands
[1] == constm1_rtx
)
22106 /* For -1 << N, we can avoid the shld instruction, because we
22107 know that we're shifting 0...31/63 ones into a -1. */
22108 emit_move_insn (low
[0], constm1_rtx
);
22109 if (optimize_insn_for_size_p ())
22110 emit_move_insn (high
[0], low
[0]);
22112 emit_move_insn (high
[0], constm1_rtx
);
22116 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22118 if (!rtx_equal_p (operands
[0], operands
[1]))
22119 emit_move_insn (operands
[0], operands
[1]);
22121 split_double_mode (mode
, operands
, 1, low
, high
);
22122 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22125 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22127 if (TARGET_CMOVE
&& scratch
)
22129 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22130 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22132 ix86_expand_clear (scratch
);
22133 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22137 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22138 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22140 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22145 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22147 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22148 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22149 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22150 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22152 rtx low
[2], high
[2];
22155 if (CONST_INT_P (operands
[2]))
22157 split_double_mode (mode
, operands
, 2, low
, high
);
22158 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22160 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22162 emit_move_insn (high
[0], high
[1]);
22163 emit_insn (gen_ashr3 (high
[0], high
[0],
22164 GEN_INT (half_width
- 1)));
22165 emit_move_insn (low
[0], high
[0]);
22168 else if (count
>= half_width
)
22170 emit_move_insn (low
[0], high
[1]);
22171 emit_move_insn (high
[0], low
[0]);
22172 emit_insn (gen_ashr3 (high
[0], high
[0],
22173 GEN_INT (half_width
- 1)));
22175 if (count
> half_width
)
22176 emit_insn (gen_ashr3 (low
[0], low
[0],
22177 GEN_INT (count
- half_width
)));
22181 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22183 if (!rtx_equal_p (operands
[0], operands
[1]))
22184 emit_move_insn (operands
[0], operands
[1]);
22186 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22187 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22192 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22194 if (!rtx_equal_p (operands
[0], operands
[1]))
22195 emit_move_insn (operands
[0], operands
[1]);
22197 split_double_mode (mode
, operands
, 1, low
, high
);
22199 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22200 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22202 if (TARGET_CMOVE
&& scratch
)
22204 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22205 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22207 emit_move_insn (scratch
, high
[0]);
22208 emit_insn (gen_ashr3 (scratch
, scratch
,
22209 GEN_INT (half_width
- 1)));
22210 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22215 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22216 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22218 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22224 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22226 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22227 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22228 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22229 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22231 rtx low
[2], high
[2];
22234 if (CONST_INT_P (operands
[2]))
22236 split_double_mode (mode
, operands
, 2, low
, high
);
22237 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22239 if (count
>= half_width
)
22241 emit_move_insn (low
[0], high
[1]);
22242 ix86_expand_clear (high
[0]);
22244 if (count
> half_width
)
22245 emit_insn (gen_lshr3 (low
[0], low
[0],
22246 GEN_INT (count
- half_width
)));
22250 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22252 if (!rtx_equal_p (operands
[0], operands
[1]))
22253 emit_move_insn (operands
[0], operands
[1]);
22255 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22256 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22261 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22263 if (!rtx_equal_p (operands
[0], operands
[1]))
22264 emit_move_insn (operands
[0], operands
[1]);
22266 split_double_mode (mode
, operands
, 1, low
, high
);
22268 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22269 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22271 if (TARGET_CMOVE
&& scratch
)
22273 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22274 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22276 ix86_expand_clear (scratch
);
22277 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22282 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22283 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22285 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22290 /* Predict just emitted jump instruction to be taken with probability PROB. */
22292 predict_jump (int prob
)
22294 rtx insn
= get_last_insn ();
22295 gcc_assert (JUMP_P (insn
));
22296 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22299 /* Helper function for the string operations below. Dest VARIABLE whether
22300 it is aligned to VALUE bytes. If true, jump to the label. */
22302 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22304 rtx label
= gen_label_rtx ();
22305 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22306 if (GET_MODE (variable
) == DImode
)
22307 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22309 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22310 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22313 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22315 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22319 /* Adjust COUNTER by the VALUE. */
22321 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22323 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22324 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22326 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22329 /* Zero extend possibly SImode EXP to Pmode register. */
22331 ix86_zero_extend_to_Pmode (rtx exp
)
22333 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22336 /* Divide COUNTREG by SCALE. */
22338 scale_counter (rtx countreg
, int scale
)
22344 if (CONST_INT_P (countreg
))
22345 return GEN_INT (INTVAL (countreg
) / scale
);
22346 gcc_assert (REG_P (countreg
));
22348 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22349 GEN_INT (exact_log2 (scale
)),
22350 NULL
, 1, OPTAB_DIRECT
);
22354 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22355 DImode for constant loop counts. */
22357 static enum machine_mode
22358 counter_mode (rtx count_exp
)
22360 if (GET_MODE (count_exp
) != VOIDmode
)
22361 return GET_MODE (count_exp
);
22362 if (!CONST_INT_P (count_exp
))
22364 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22369 /* Copy the address to a Pmode register. This is used for x32 to
22370 truncate DImode TLS address to a SImode register. */
22373 ix86_copy_addr_to_reg (rtx addr
)
22375 if (GET_MODE (addr
) == Pmode
)
22376 return copy_addr_to_reg (addr
);
22379 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22380 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22384 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22385 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22386 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22387 memory by VALUE (supposed to be in MODE).
22389 The size is rounded down to whole number of chunk size moved at once.
22390 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22394 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22395 rtx destptr
, rtx srcptr
, rtx value
,
22396 rtx count
, enum machine_mode mode
, int unroll
,
22397 int expected_size
, bool issetmem
)
22399 rtx out_label
, top_label
, iter
, tmp
;
22400 enum machine_mode iter_mode
= counter_mode (count
);
22401 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22402 rtx piece_size
= GEN_INT (piece_size_n
);
22403 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22407 top_label
= gen_label_rtx ();
22408 out_label
= gen_label_rtx ();
22409 iter
= gen_reg_rtx (iter_mode
);
22411 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22412 NULL
, 1, OPTAB_DIRECT
);
22413 /* Those two should combine. */
22414 if (piece_size
== const1_rtx
)
22416 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22418 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22420 emit_move_insn (iter
, const0_rtx
);
22422 emit_label (top_label
);
22424 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22426 /* This assert could be relaxed - in this case we'll need to compute
22427 smallest power of two, containing in PIECE_SIZE_N and pass it to
22429 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22430 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22431 destmem
= adjust_address (destmem
, mode
, 0);
22435 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22436 srcmem
= adjust_address (srcmem
, mode
, 0);
22438 /* When unrolling for chips that reorder memory reads and writes,
22439 we can save registers by using single temporary.
22440 Also using 4 temporaries is overkill in 32bit mode. */
22441 if (!TARGET_64BIT
&& 0)
22443 for (i
= 0; i
< unroll
; i
++)
22448 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22450 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22452 emit_move_insn (destmem
, srcmem
);
22458 gcc_assert (unroll
<= 4);
22459 for (i
= 0; i
< unroll
; i
++)
22461 tmpreg
[i
] = gen_reg_rtx (mode
);
22465 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22467 emit_move_insn (tmpreg
[i
], srcmem
);
22469 for (i
= 0; i
< unroll
; i
++)
22474 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22476 emit_move_insn (destmem
, tmpreg
[i
]);
22481 for (i
= 0; i
< unroll
; i
++)
22485 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22486 emit_move_insn (destmem
, value
);
22489 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22490 true, OPTAB_LIB_WIDEN
);
22492 emit_move_insn (iter
, tmp
);
22494 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22496 if (expected_size
!= -1)
22498 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22499 if (expected_size
== 0)
22501 else if (expected_size
> REG_BR_PROB_BASE
)
22502 predict_jump (REG_BR_PROB_BASE
- 1);
22504 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22507 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22508 iter
= ix86_zero_extend_to_Pmode (iter
);
22509 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22510 true, OPTAB_LIB_WIDEN
);
22511 if (tmp
!= destptr
)
22512 emit_move_insn (destptr
, tmp
);
22515 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22516 true, OPTAB_LIB_WIDEN
);
22518 emit_move_insn (srcptr
, tmp
);
22520 emit_label (out_label
);
22523 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22524 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22525 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22526 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22527 ORIG_VALUE is the original value passed to memset to fill the memory with.
22528 Other arguments have same meaning as for previous function. */
22531 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
22532 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
22534 enum machine_mode mode
, bool issetmem
)
22539 HOST_WIDE_INT rounded_count
;
22541 /* If possible, it is shorter to use rep movs.
22542 TODO: Maybe it is better to move this logic to decide_alg. */
22543 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
22544 && (!issetmem
|| orig_value
== const0_rtx
))
22547 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22548 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22550 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
22551 GET_MODE_SIZE (mode
)));
22552 if (mode
!= QImode
)
22554 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22555 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22556 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22559 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22560 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
22562 rounded_count
= (INTVAL (count
)
22563 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22564 destmem
= shallow_copy_rtx (destmem
);
22565 set_mem_size (destmem
, rounded_count
);
22567 else if (MEM_SIZE_KNOWN_P (destmem
))
22568 clear_mem_size (destmem
);
22572 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22573 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22577 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22578 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22579 if (mode
!= QImode
)
22581 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22582 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22583 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22586 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22587 if (CONST_INT_P (count
))
22589 rounded_count
= (INTVAL (count
)
22590 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22591 srcmem
= shallow_copy_rtx (srcmem
);
22592 set_mem_size (srcmem
, rounded_count
);
22596 if (MEM_SIZE_KNOWN_P (srcmem
))
22597 clear_mem_size (srcmem
);
22599 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22604 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22606 SRC is passed by pointer to be updated on return.
22607 Return value is updated DST. */
22609 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22610 HOST_WIDE_INT size_to_move
)
22612 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22613 enum insn_code code
;
22614 enum machine_mode move_mode
;
22617 /* Find the widest mode in which we could perform moves.
22618 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22619 it until move of such size is supported. */
22620 piece_size
= 1 << floor_log2 (size_to_move
);
22621 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22622 code
= optab_handler (mov_optab
, move_mode
);
22623 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22626 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22627 code
= optab_handler (mov_optab
, move_mode
);
22630 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22631 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22632 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22634 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22635 move_mode
= mode_for_vector (word_mode
, nunits
);
22636 code
= optab_handler (mov_optab
, move_mode
);
22637 if (code
== CODE_FOR_nothing
)
22639 move_mode
= word_mode
;
22640 piece_size
= GET_MODE_SIZE (move_mode
);
22641 code
= optab_handler (mov_optab
, move_mode
);
22644 gcc_assert (code
!= CODE_FOR_nothing
);
22646 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22647 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22649 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22650 gcc_assert (size_to_move
% piece_size
== 0);
22651 adjust
= GEN_INT (piece_size
);
22652 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22654 /* We move from memory to memory, so we'll need to do it via
22655 a temporary register. */
22656 tempreg
= gen_reg_rtx (move_mode
);
22657 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22658 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22660 emit_move_insn (destptr
,
22661 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22662 emit_move_insn (srcptr
,
22663 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22665 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22667 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22671 /* Update DST and SRC rtx. */
22676 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22678 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22679 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22682 if (CONST_INT_P (count
))
22684 HOST_WIDE_INT countval
= INTVAL (count
);
22685 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22688 /* For now MAX_SIZE should be a power of 2. This assert could be
22689 relaxed, but it'll require a bit more complicated epilogue
22691 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22692 for (i
= max_size
; i
>= 1; i
>>= 1)
22694 if (epilogue_size
& i
)
22695 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22701 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22702 count
, 1, OPTAB_DIRECT
);
22703 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22704 count
, QImode
, 1, 4, false);
22708 /* When there are stringops, we can cheaply increase dest and src pointers.
22709 Otherwise we save code size by maintaining offset (zero is readily
22710 available from preceding rep operation) and using x86 addressing modes.
22712 if (TARGET_SINGLE_STRINGOP
)
22716 rtx label
= ix86_expand_aligntest (count
, 4, true);
22717 src
= change_address (srcmem
, SImode
, srcptr
);
22718 dest
= change_address (destmem
, SImode
, destptr
);
22719 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22720 emit_label (label
);
22721 LABEL_NUSES (label
) = 1;
22725 rtx label
= ix86_expand_aligntest (count
, 2, true);
22726 src
= change_address (srcmem
, HImode
, srcptr
);
22727 dest
= change_address (destmem
, HImode
, destptr
);
22728 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22729 emit_label (label
);
22730 LABEL_NUSES (label
) = 1;
22734 rtx label
= ix86_expand_aligntest (count
, 1, true);
22735 src
= change_address (srcmem
, QImode
, srcptr
);
22736 dest
= change_address (destmem
, QImode
, destptr
);
22737 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22738 emit_label (label
);
22739 LABEL_NUSES (label
) = 1;
22744 rtx offset
= force_reg (Pmode
, const0_rtx
);
22749 rtx label
= ix86_expand_aligntest (count
, 4, true);
22750 src
= change_address (srcmem
, SImode
, srcptr
);
22751 dest
= change_address (destmem
, SImode
, destptr
);
22752 emit_move_insn (dest
, src
);
22753 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22754 true, OPTAB_LIB_WIDEN
);
22756 emit_move_insn (offset
, tmp
);
22757 emit_label (label
);
22758 LABEL_NUSES (label
) = 1;
22762 rtx label
= ix86_expand_aligntest (count
, 2, true);
22763 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22764 src
= change_address (srcmem
, HImode
, tmp
);
22765 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22766 dest
= change_address (destmem
, HImode
, tmp
);
22767 emit_move_insn (dest
, src
);
22768 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22769 true, OPTAB_LIB_WIDEN
);
22771 emit_move_insn (offset
, tmp
);
22772 emit_label (label
);
22773 LABEL_NUSES (label
) = 1;
22777 rtx label
= ix86_expand_aligntest (count
, 1, true);
22778 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22779 src
= change_address (srcmem
, QImode
, tmp
);
22780 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22781 dest
= change_address (destmem
, QImode
, tmp
);
22782 emit_move_insn (dest
, src
);
22783 emit_label (label
);
22784 LABEL_NUSES (label
) = 1;
22789 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
22790 with value PROMOTED_VAL.
22791 SRC is passed by pointer to be updated on return.
22792 Return value is updated DST. */
22794 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
22795 HOST_WIDE_INT size_to_move
)
22797 rtx dst
= destmem
, adjust
;
22798 enum insn_code code
;
22799 enum machine_mode move_mode
;
22802 /* Find the widest mode in which we could perform moves.
22803 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22804 it until move of such size is supported. */
22805 move_mode
= GET_MODE (promoted_val
);
22806 if (move_mode
== VOIDmode
)
22807 move_mode
= QImode
;
22808 if (size_to_move
< GET_MODE_SIZE (move_mode
))
22810 move_mode
= mode_for_size (size_to_move
* BITS_PER_UNIT
, MODE_INT
, 0);
22811 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
22813 piece_size
= GET_MODE_SIZE (move_mode
);
22814 code
= optab_handler (mov_optab
, move_mode
);
22815 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
22817 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22819 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22820 gcc_assert (size_to_move
% piece_size
== 0);
22821 adjust
= GEN_INT (piece_size
);
22822 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22824 if (piece_size
<= GET_MODE_SIZE (word_mode
))
22826 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
22830 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
22832 emit_move_insn (destptr
,
22833 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22835 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22839 /* Update DST rtx. */
22842 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22844 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22845 rtx count
, int max_size
)
22848 expand_simple_binop (counter_mode (count
), AND
, count
,
22849 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22850 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22851 gen_lowpart (QImode
, value
), count
, QImode
,
22852 1, max_size
/ 2, true);
22855 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22857 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
22858 rtx count
, int max_size
)
22862 if (CONST_INT_P (count
))
22864 HOST_WIDE_INT countval
= INTVAL (count
);
22865 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22868 /* For now MAX_SIZE should be a power of 2. This assert could be
22869 relaxed, but it'll require a bit more complicated epilogue
22871 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22872 for (i
= max_size
; i
>= 1; i
>>= 1)
22874 if (epilogue_size
& i
)
22876 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22877 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22879 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22886 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22891 rtx label
= ix86_expand_aligntest (count
, 16, true);
22894 dest
= change_address (destmem
, DImode
, destptr
);
22895 emit_insn (gen_strset (destptr
, dest
, value
));
22896 emit_insn (gen_strset (destptr
, dest
, value
));
22900 dest
= change_address (destmem
, SImode
, destptr
);
22901 emit_insn (gen_strset (destptr
, dest
, value
));
22902 emit_insn (gen_strset (destptr
, dest
, value
));
22903 emit_insn (gen_strset (destptr
, dest
, value
));
22904 emit_insn (gen_strset (destptr
, dest
, value
));
22906 emit_label (label
);
22907 LABEL_NUSES (label
) = 1;
22911 rtx label
= ix86_expand_aligntest (count
, 8, true);
22914 dest
= change_address (destmem
, DImode
, destptr
);
22915 emit_insn (gen_strset (destptr
, dest
, value
));
22919 dest
= change_address (destmem
, SImode
, destptr
);
22920 emit_insn (gen_strset (destptr
, dest
, value
));
22921 emit_insn (gen_strset (destptr
, dest
, value
));
22923 emit_label (label
);
22924 LABEL_NUSES (label
) = 1;
22928 rtx label
= ix86_expand_aligntest (count
, 4, true);
22929 dest
= change_address (destmem
, SImode
, destptr
);
22930 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22931 emit_label (label
);
22932 LABEL_NUSES (label
) = 1;
22936 rtx label
= ix86_expand_aligntest (count
, 2, true);
22937 dest
= change_address (destmem
, HImode
, destptr
);
22938 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22939 emit_label (label
);
22940 LABEL_NUSES (label
) = 1;
22944 rtx label
= ix86_expand_aligntest (count
, 1, true);
22945 dest
= change_address (destmem
, QImode
, destptr
);
22946 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22947 emit_label (label
);
22948 LABEL_NUSES (label
) = 1;
22952 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
22953 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
22954 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
22956 Return value is updated DESTMEM. */
22958 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
22959 rtx destptr
, rtx srcptr
, rtx value
,
22960 rtx vec_value
, rtx count
, int align
,
22961 int desired_alignment
, bool issetmem
)
22964 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22968 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22971 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22972 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22974 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22977 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22978 ix86_adjust_counter (count
, i
);
22979 emit_label (label
);
22980 LABEL_NUSES (label
) = 1;
22981 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22987 /* Test if COUNT&SIZE is nonzero and if so, expand movme
22988 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
22989 and jump to DONE_LABEL. */
22991 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
22992 rtx destptr
, rtx srcptr
,
22993 rtx value
, rtx vec_value
,
22994 rtx count
, int size
,
22995 rtx done_label
, bool issetmem
)
22997 rtx label
= ix86_expand_aligntest (count
, size
, false);
22998 enum machine_mode mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 1);
23002 /* If we do not have vector value to copy, we must reduce size. */
23007 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
23009 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
23010 mode
= GET_MODE (value
);
23013 mode
= GET_MODE (vec_value
), value
= vec_value
;
23017 /* Choose appropriate vector mode. */
23019 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
23020 else if (size
>= 16)
23021 mode
= TARGET_SSE
? V16QImode
: DImode
;
23022 srcmem
= change_address (srcmem
, mode
, srcptr
);
23024 destmem
= change_address (destmem
, mode
, destptr
);
23025 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23026 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23027 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23030 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23033 emit_move_insn (destmem
, srcmem
);
23034 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23036 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23039 destmem
= offset_address (destmem
, count
, 1);
23040 destmem
= offset_address (destmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23041 GET_MODE_SIZE (mode
));
23043 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23046 srcmem
= offset_address (srcmem
, count
, 1);
23047 srcmem
= offset_address (srcmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23048 GET_MODE_SIZE (mode
));
23049 emit_move_insn (destmem
, srcmem
);
23051 emit_jump_insn (gen_jump (done_label
));
23054 emit_label (label
);
23055 LABEL_NUSES (label
) = 1;
23058 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23059 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23060 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23061 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23062 DONE_LABEL is a label after the whole copying sequence. The label is created
23063 on demand if *DONE_LABEL is NULL.
23064 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23065 bounds after the initial copies.
23067 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23068 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23069 we will dispatch to a library call for large blocks.
23071 In pseudocode we do:
23075 Assume that SIZE is 4. Bigger sizes are handled analogously
23078 copy 4 bytes from SRCPTR to DESTPTR
23079 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23084 copy 1 byte from SRCPTR to DESTPTR
23087 copy 2 bytes from SRCPTR to DESTPTR
23088 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23093 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23094 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23096 OLD_DESPTR = DESTPTR;
23097 Align DESTPTR up to DESIRED_ALIGN
23098 SRCPTR += DESTPTR - OLD_DESTPTR
23099 COUNT -= DEST_PTR - OLD_DESTPTR
23101 Round COUNT down to multiple of SIZE
23102 << optional caller supplied zero size guard is here >>
23103 << optional caller suppplied dynamic check is here >>
23104 << caller supplied main copy loop is here >>
23109 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
23110 rtx
*destptr
, rtx
*srcptr
,
23111 enum machine_mode mode
,
23112 rtx value
, rtx vec_value
,
23118 unsigned HOST_WIDE_INT
*min_size
,
23119 bool dynamic_check
,
23122 rtx loop_label
= NULL
, label
;
23125 int prolog_size
= 0;
23128 /* Chose proper value to copy. */
23129 if (issetmem
&& VECTOR_MODE_P (mode
))
23130 mode_value
= vec_value
;
23132 mode_value
= value
;
23133 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23135 /* See if block is big or small, handle small blocks. */
23136 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
23139 loop_label
= gen_label_rtx ();
23142 *done_label
= gen_label_rtx ();
23144 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
23148 /* Handle sizes > 3. */
23149 for (;size2
> 2; size2
>>= 1)
23150 expand_small_movmem_or_setmem (destmem
, srcmem
,
23154 size2
, *done_label
, issetmem
);
23155 /* Nothing to copy? Jump to DONE_LABEL if so */
23156 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
23159 /* Do a byte copy. */
23160 destmem
= change_address (destmem
, QImode
, *destptr
);
23162 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
23165 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
23166 emit_move_insn (destmem
, srcmem
);
23169 /* Handle sizes 2 and 3. */
23170 label
= ix86_expand_aligntest (*count
, 2, false);
23171 destmem
= change_address (destmem
, HImode
, *destptr
);
23172 destmem
= offset_address (destmem
, *count
, 1);
23173 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
23175 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
23178 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
23179 srcmem
= offset_address (srcmem
, *count
, 1);
23180 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
23181 emit_move_insn (destmem
, srcmem
);
23184 emit_label (label
);
23185 LABEL_NUSES (label
) = 1;
23186 emit_jump_insn (gen_jump (*done_label
));
23190 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
23191 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
23193 /* Start memcpy for COUNT >= SIZE. */
23196 emit_label (loop_label
);
23197 LABEL_NUSES (loop_label
) = 1;
23200 /* Copy first desired_align bytes. */
23202 srcmem
= change_address (srcmem
, mode
, *srcptr
);
23203 destmem
= change_address (destmem
, mode
, *destptr
);
23204 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23205 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
23208 emit_move_insn (destmem
, mode_value
);
23211 emit_move_insn (destmem
, srcmem
);
23212 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23214 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23215 prolog_size
+= GET_MODE_SIZE (mode
);
23219 /* Copy last SIZE bytes. */
23220 destmem
= offset_address (destmem
, *count
, 1);
23221 destmem
= offset_address (destmem
,
23222 GEN_INT (-size
- prolog_size
),
23225 emit_move_insn (destmem
, mode_value
);
23228 srcmem
= offset_address (srcmem
, *count
, 1);
23229 srcmem
= offset_address (srcmem
,
23230 GEN_INT (-size
- prolog_size
),
23232 emit_move_insn (destmem
, srcmem
);
23234 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23236 destmem
= offset_address (destmem
, modesize
, 1);
23238 emit_move_insn (destmem
, mode_value
);
23241 srcmem
= offset_address (srcmem
, modesize
, 1);
23242 emit_move_insn (destmem
, srcmem
);
23246 /* Align destination. */
23247 if (desired_align
> 1 && desired_align
> align
)
23249 rtx saveddest
= *destptr
;
23251 gcc_assert (desired_align
<= size
);
23252 /* Align destptr up, place it to new register. */
23253 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
23254 GEN_INT (prolog_size
),
23255 NULL_RTX
, 1, OPTAB_DIRECT
);
23256 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
23257 GEN_INT (-desired_align
),
23258 *destptr
, 1, OPTAB_DIRECT
);
23259 /* See how many bytes we skipped. */
23260 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
23262 saveddest
, 1, OPTAB_DIRECT
);
23263 /* Adjust srcptr and count. */
23265 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
, saveddest
,
23266 *srcptr
, 1, OPTAB_DIRECT
);
23267 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23268 saveddest
, *count
, 1, OPTAB_DIRECT
);
23269 /* We copied at most size + prolog_size. */
23270 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
23271 *min_size
= (*min_size
- size
) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23275 /* Our loops always round down the bock size, but for dispatch to library
23276 we need precise value. */
23278 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
23279 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
23283 gcc_assert (prolog_size
== 0);
23284 /* Decrease count, so we won't end up copying last word twice. */
23285 if (!CONST_INT_P (*count
))
23286 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23287 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
23289 *count
= GEN_INT ((UINTVAL (*count
) - 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1));
23291 *min_size
= (*min_size
- 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23296 /* This function is like the previous one, except here we know how many bytes
23297 need to be copied. That allows us to update alignment not only of DST, which
23298 is returned, but also of SRC, which is passed as a pointer for that
23301 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
23302 rtx srcreg
, rtx value
, rtx vec_value
,
23303 int desired_align
, int align_bytes
,
23307 rtx orig_dst
= dst
;
23308 rtx orig_src
= NULL
;
23309 int piece_size
= 1;
23310 int copied_bytes
= 0;
23314 gcc_assert (srcp
!= NULL
);
23319 for (piece_size
= 1;
23320 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
23323 if (align_bytes
& piece_size
)
23327 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
23328 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
23330 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
23333 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
23334 copied_bytes
+= piece_size
;
23337 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
23338 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23339 if (MEM_SIZE_KNOWN_P (orig_dst
))
23340 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
23344 int src_align_bytes
= get_mem_align_offset (src
, desired_align
23346 if (src_align_bytes
>= 0)
23347 src_align_bytes
= desired_align
- src_align_bytes
;
23348 if (src_align_bytes
>= 0)
23350 unsigned int src_align
;
23351 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
23353 if ((src_align_bytes
& (src_align
- 1))
23354 == (align_bytes
& (src_align
- 1)))
23357 if (src_align
> (unsigned int) desired_align
)
23358 src_align
= desired_align
;
23359 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
23360 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
23362 if (MEM_SIZE_KNOWN_P (orig_src
))
23363 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
23370 /* Return true if ALG can be used in current context.
23371 Assume we expand memset if MEMSET is true. */
23373 alg_usable_p (enum stringop_alg alg
, bool memset
)
23375 if (alg
== no_stringop
)
23377 if (alg
== vector_loop
)
23378 return TARGET_SSE
|| TARGET_AVX
;
23379 /* Algorithms using the rep prefix want at least edi and ecx;
23380 additionally, memset wants eax and memcpy wants esi. Don't
23381 consider such algorithms if the user has appropriated those
23382 registers for their own purposes. */
23383 if (alg
== rep_prefix_1_byte
23384 || alg
== rep_prefix_4_byte
23385 || alg
== rep_prefix_8_byte
)
23386 return !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
23387 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
23391 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23392 static enum stringop_alg
23393 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
23394 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
23395 bool memset
, bool zero_memset
, int *dynamic_check
, bool *noalign
)
23397 const struct stringop_algs
* algs
;
23398 bool optimize_for_speed
;
23400 const struct processor_costs
*cost
;
23402 bool any_alg_usable_p
= false;
23405 *dynamic_check
= -1;
23407 /* Even if the string operation call is cold, we still might spend a lot
23408 of time processing large blocks. */
23409 if (optimize_function_for_size_p (cfun
)
23410 || (optimize_insn_for_size_p ()
23412 || (expected_size
!= -1 && expected_size
< 256))))
23413 optimize_for_speed
= false;
23415 optimize_for_speed
= true;
23417 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23419 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23421 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23423 /* See maximal size for user defined algorithm. */
23424 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23426 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23427 bool usable
= alg_usable_p (candidate
, memset
);
23428 any_alg_usable_p
|= usable
;
23430 if (candidate
!= libcall
&& candidate
&& usable
)
23431 max
= algs
->size
[i
].max
;
23434 /* If expected size is not known but max size is small enough
23435 so inline version is a win, set expected size into
23437 if (max
> 1 && (unsigned HOST_WIDE_INT
)max
>= max_size
&& expected_size
== -1)
23438 expected_size
= min_size
/ 2 + max_size
/ 2;
23440 /* If user specified the algorithm, honnor it if possible. */
23441 if (ix86_stringop_alg
!= no_stringop
23442 && alg_usable_p (ix86_stringop_alg
, memset
))
23443 return ix86_stringop_alg
;
23444 /* rep; movq or rep; movl is the smallest variant. */
23445 else if (!optimize_for_speed
)
23448 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
23449 return alg_usable_p (rep_prefix_1_byte
, memset
)
23450 ? rep_prefix_1_byte
: loop_1_byte
;
23452 return alg_usable_p (rep_prefix_4_byte
, memset
)
23453 ? rep_prefix_4_byte
: loop
;
23455 /* Very tiny blocks are best handled via the loop, REP is expensive to
23457 else if (expected_size
!= -1 && expected_size
< 4)
23458 return loop_1_byte
;
23459 else if (expected_size
!= -1)
23461 enum stringop_alg alg
= libcall
;
23462 bool alg_noalign
= false;
23463 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23465 /* We get here if the algorithms that were not libcall-based
23466 were rep-prefix based and we are unable to use rep prefixes
23467 based on global register usage. Break out of the loop and
23468 use the heuristic below. */
23469 if (algs
->size
[i
].max
== 0)
23471 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23473 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23475 if (candidate
!= libcall
&& alg_usable_p (candidate
, memset
))
23478 alg_noalign
= algs
->size
[i
].noalign
;
23480 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23481 last non-libcall inline algorithm. */
23482 if (TARGET_INLINE_ALL_STRINGOPS
)
23484 /* When the current size is best to be copied by a libcall,
23485 but we are still forced to inline, run the heuristic below
23486 that will pick code for medium sized blocks. */
23487 if (alg
!= libcall
)
23489 *noalign
= alg_noalign
;
23494 else if (alg_usable_p (candidate
, memset
))
23496 *noalign
= algs
->size
[i
].noalign
;
23502 /* When asked to inline the call anyway, try to pick meaningful choice.
23503 We look for maximal size of block that is faster to copy by hand and
23504 take blocks of at most of that size guessing that average size will
23505 be roughly half of the block.
23507 If this turns out to be bad, we might simply specify the preferred
23508 choice in ix86_costs. */
23509 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23510 && (algs
->unknown_size
== libcall
23511 || !alg_usable_p (algs
->unknown_size
, memset
)))
23513 enum stringop_alg alg
;
23515 /* If there aren't any usable algorithms, then recursing on
23516 smaller sizes isn't going to find anything. Just return the
23517 simple byte-at-a-time copy loop. */
23518 if (!any_alg_usable_p
)
23520 /* Pick something reasonable. */
23521 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23522 *dynamic_check
= 128;
23523 return loop_1_byte
;
23527 alg
= decide_alg (count
, max
/ 2, min_size
, max_size
, memset
,
23528 zero_memset
, dynamic_check
, noalign
);
23529 gcc_assert (*dynamic_check
== -1);
23530 gcc_assert (alg
!= libcall
);
23531 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23532 *dynamic_check
= max
;
23535 return (alg_usable_p (algs
->unknown_size
, memset
)
23536 ? algs
->unknown_size
: libcall
);
23539 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23540 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23542 decide_alignment (int align
,
23543 enum stringop_alg alg
,
23545 enum machine_mode move_mode
)
23547 int desired_align
= 0;
23549 gcc_assert (alg
!= no_stringop
);
23551 if (alg
== libcall
)
23553 if (move_mode
== VOIDmode
)
23556 desired_align
= GET_MODE_SIZE (move_mode
);
23557 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23558 copying whole cacheline at once. */
23559 if (TARGET_PENTIUMPRO
23560 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
23565 if (desired_align
< align
)
23566 desired_align
= align
;
23567 if (expected_size
!= -1 && expected_size
< 4)
23568 desired_align
= align
;
23570 return desired_align
;
23574 /* Helper function for memcpy. For QImode value 0xXY produce
23575 0xXYXYXYXY of wide specified by MODE. This is essentially
23576 a * 0x10101010, but we can do slightly better than
23577 synth_mult by unwinding the sequence by hand on CPUs with
23580 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23582 enum machine_mode valmode
= GET_MODE (val
);
23584 int nops
= mode
== DImode
? 3 : 2;
23586 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
23587 if (val
== const0_rtx
)
23588 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
23589 if (CONST_INT_P (val
))
23591 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23595 if (mode
== DImode
)
23596 v
|= (v
<< 16) << 16;
23597 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23600 if (valmode
== VOIDmode
)
23602 if (valmode
!= QImode
)
23603 val
= gen_lowpart (QImode
, val
);
23604 if (mode
== QImode
)
23606 if (!TARGET_PARTIAL_REG_STALL
)
23608 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23609 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23610 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23611 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23613 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23614 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23615 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23620 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23622 if (!TARGET_PARTIAL_REG_STALL
)
23623 if (mode
== SImode
)
23624 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23626 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23629 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23630 NULL
, 1, OPTAB_DIRECT
);
23632 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23634 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23635 NULL
, 1, OPTAB_DIRECT
);
23636 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23637 if (mode
== SImode
)
23639 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23640 NULL
, 1, OPTAB_DIRECT
);
23641 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23646 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23647 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23648 alignment from ALIGN to DESIRED_ALIGN. */
23650 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
23656 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23657 promoted_val
= promote_duplicated_reg (DImode
, val
);
23658 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23659 promoted_val
= promote_duplicated_reg (SImode
, val
);
23660 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23661 promoted_val
= promote_duplicated_reg (HImode
, val
);
23663 promoted_val
= val
;
23665 return promoted_val
;
23668 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
23669 operations when profitable. The code depends upon architecture, block size
23670 and alignment, but always has one of the following overall structures:
23672 Aligned move sequence:
23674 1) Prologue guard: Conditional that jumps up to epilogues for small
23675 blocks that can be handled by epilogue alone. This is faster
23676 but also needed for correctness, since prologue assume the block
23677 is larger than the desired alignment.
23679 Optional dynamic check for size and libcall for large
23680 blocks is emitted here too, with -minline-stringops-dynamically.
23682 2) Prologue: copy first few bytes in order to get destination
23683 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23684 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23685 copied. We emit either a jump tree on power of two sized
23686 blocks, or a byte loop.
23688 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23689 with specified algorithm.
23691 4) Epilogue: code copying tail of the block that is too small to be
23692 handled by main body (or up to size guarded by prologue guard).
23694 Misaligned move sequence
23696 1) missaligned move prologue/epilogue containing:
23697 a) Prologue handling small memory blocks and jumping to done_label
23698 (skipped if blocks are known to be large enough)
23699 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
23700 needed by single possibly misaligned move
23701 (skipped if alignment is not needed)
23702 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
23704 2) Zero size guard dispatching to done_label, if needed
23706 3) dispatch to library call, if needed,
23708 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23709 with specified algorithm. */
23711 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
23712 rtx align_exp
, rtx expected_align_exp
,
23713 rtx expected_size_exp
, rtx min_size_exp
,
23714 rtx max_size_exp
, bool issetmem
)
23720 rtx jump_around_label
= NULL
;
23721 HOST_WIDE_INT align
= 1;
23722 unsigned HOST_WIDE_INT count
= 0;
23723 HOST_WIDE_INT expected_size
= -1;
23724 int size_needed
= 0, epilogue_size_needed
;
23725 int desired_align
= 0, align_bytes
= 0;
23726 enum stringop_alg alg
;
23727 rtx promoted_val
= NULL
;
23728 rtx vec_promoted_val
= NULL
;
23729 bool force_loopy_epilogue
= false;
23731 bool need_zero_guard
= false;
23733 enum machine_mode move_mode
= VOIDmode
;
23734 int unroll_factor
= 1;
23735 /* TODO: Once vlaue ranges are available, fill in proper data. */
23736 unsigned HOST_WIDE_INT min_size
= 0;
23737 unsigned HOST_WIDE_INT max_size
= -1;
23738 bool misaligned_prologue_used
= false;
23740 if (CONST_INT_P (align_exp
))
23741 align
= INTVAL (align_exp
);
23742 /* i386 can do misaligned access on reasonably increased cost. */
23743 if (CONST_INT_P (expected_align_exp
)
23744 && INTVAL (expected_align_exp
) > align
)
23745 align
= INTVAL (expected_align_exp
);
23746 /* ALIGN is the minimum of destination and source alignment, but we care here
23747 just about destination alignment. */
23749 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23750 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23752 if (CONST_INT_P (count_exp
))
23753 min_size
= max_size
= count
= expected_size
= INTVAL (count_exp
);
23755 min_size
= INTVAL (min_size_exp
);
23757 max_size
= INTVAL (max_size_exp
);
23758 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23759 expected_size
= INTVAL (expected_size_exp
);
23761 /* Make sure we don't need to care about overflow later on. */
23762 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23765 /* Step 0: Decide on preferred algorithm, desired alignment and
23766 size of chunks to be copied by main loop. */
23767 alg
= decide_alg (count
, expected_size
, min_size
, max_size
, issetmem
,
23768 issetmem
&& val_exp
== const0_rtx
,
23769 &dynamic_check
, &noalign
);
23770 if (alg
== libcall
)
23772 gcc_assert (alg
!= no_stringop
);
23774 /* For now vector-version of memset is generated only for memory zeroing, as
23775 creating of promoted vector value is very cheap in this case. */
23776 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
23777 alg
= unrolled_loop
;
23780 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23781 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23783 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
23786 move_mode
= word_mode
;
23792 gcc_unreachable ();
23794 need_zero_guard
= true;
23795 move_mode
= QImode
;
23798 need_zero_guard
= true;
23800 case unrolled_loop
:
23801 need_zero_guard
= true;
23802 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23805 need_zero_guard
= true;
23807 /* Find the widest supported mode. */
23808 move_mode
= word_mode
;
23809 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23810 != CODE_FOR_nothing
)
23811 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23813 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23814 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23815 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23817 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23818 move_mode
= mode_for_vector (word_mode
, nunits
);
23819 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23820 move_mode
= word_mode
;
23822 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23824 case rep_prefix_8_byte
:
23825 move_mode
= DImode
;
23827 case rep_prefix_4_byte
:
23828 move_mode
= SImode
;
23830 case rep_prefix_1_byte
:
23831 move_mode
= QImode
;
23834 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23835 epilogue_size_needed
= size_needed
;
23837 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23838 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23839 align
= desired_align
;
23841 /* Step 1: Prologue guard. */
23843 /* Alignment code needs count to be in register. */
23844 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23846 if (INTVAL (count_exp
) > desired_align
23847 && INTVAL (count_exp
) > size_needed
)
23850 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23851 if (align_bytes
<= 0)
23854 align_bytes
= desired_align
- align_bytes
;
23856 if (align_bytes
== 0)
23857 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23859 gcc_assert (desired_align
>= 1 && align
>= 1);
23861 /* Misaligned move sequences handle both prologue and epilogue at once.
23862 Default code generation results in a smaller code for large alignments
23863 and also avoids redundant job when sizes are known precisely. */
23864 misaligned_prologue_used
23865 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
23866 && MAX (desired_align
, epilogue_size_needed
) <= 32
23867 && desired_align
<= epilogue_size_needed
23868 && ((desired_align
> align
&& !align_bytes
)
23869 || (!count
&& epilogue_size_needed
> 1)));
23871 /* Do the cheap promotion to allow better CSE across the
23872 main loop and epilogue (ie one load of the big constant in the
23874 For now the misaligned move sequences do not have fast path
23875 without broadcasting. */
23876 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
23878 if (alg
== vector_loop
)
23880 gcc_assert (val_exp
== const0_rtx
);
23881 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
23882 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
23883 GET_MODE_SIZE (word_mode
),
23884 desired_align
, align
);
23888 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23889 desired_align
, align
);
23892 /* Misaligned move sequences handles both prologues and epilogues at once.
23893 Default code generation results in smaller code for large alignments and
23894 also avoids redundant job when sizes are known precisely. */
23895 if (misaligned_prologue_used
)
23897 /* Misaligned move prologue handled small blocks by itself. */
23898 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
23899 (dst
, src
, &destreg
, &srcreg
,
23900 move_mode
, promoted_val
, vec_promoted_val
,
23902 &jump_around_label
,
23903 desired_align
< align
23904 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
23905 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
23907 src
= change_address (src
, BLKmode
, srcreg
);
23908 dst
= change_address (dst
, BLKmode
, destreg
);
23909 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23910 epilogue_size_needed
= 0;
23911 if (need_zero_guard
&& !min_size
)
23913 /* It is possible that we copied enough so the main loop will not
23915 gcc_assert (size_needed
> 1);
23916 if (jump_around_label
== NULL_RTX
)
23917 jump_around_label
= gen_label_rtx ();
23918 emit_cmp_and_jump_insns (count_exp
,
23919 GEN_INT (size_needed
),
23920 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
23921 if (expected_size
== -1
23922 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23923 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23925 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23928 /* Ensure that alignment prologue won't copy past end of block. */
23929 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23931 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23932 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23933 Make sure it is power of 2. */
23934 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23936 /* To improve performance of small blocks, we jump around the VAL
23937 promoting mode. This mean that if the promoted VAL is not constant,
23938 we might not use it in the epilogue and have to use byte
23940 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
23941 force_loopy_epilogue
= true;
23944 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23946 /* If main algorithm works on QImode, no epilogue is needed.
23947 For small sizes just don't align anything. */
23948 if (size_needed
== 1)
23949 desired_align
= align
;
23954 else if (min_size
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23956 gcc_assert (max_size
>= (unsigned HOST_WIDE_INT
)epilogue_size_needed
);
23957 label
= gen_label_rtx ();
23958 emit_cmp_and_jump_insns (count_exp
,
23959 GEN_INT (epilogue_size_needed
),
23960 LTU
, 0, counter_mode (count_exp
), 1, label
);
23961 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23962 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23964 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23968 /* Emit code to decide on runtime whether library call or inline should be
23970 if (dynamic_check
!= -1)
23972 if (!issetmem
&& CONST_INT_P (count_exp
))
23974 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23976 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23977 count_exp
= const0_rtx
;
23983 rtx hot_label
= gen_label_rtx ();
23984 jump_around_label
= gen_label_rtx ();
23985 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23986 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23987 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23989 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23991 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23992 emit_jump (jump_around_label
);
23993 emit_label (hot_label
);
23997 /* Step 2: Alignment prologue. */
23998 /* Do the expensive promotion once we branched off the small blocks. */
23999 if (issetmem
&& !promoted_val
)
24000 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
24001 desired_align
, align
);
24003 if (desired_align
> align
&& !misaligned_prologue_used
)
24005 if (align_bytes
== 0)
24007 /* Except for the first move in prologue, we no longer know
24008 constant offset in aliasing info. It don't seems to worth
24009 the pain to maintain it for the first move, so throw away
24011 dst
= change_address (dst
, BLKmode
, destreg
);
24013 src
= change_address (src
, BLKmode
, srcreg
);
24014 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
24015 promoted_val
, vec_promoted_val
,
24016 count_exp
, align
, desired_align
,
24018 /* At most desired_align - align bytes are copied. */
24019 if (min_size
< (unsigned)(desired_align
- align
))
24022 min_size
-= desired_align
- align
;
24026 /* If we know how many bytes need to be stored before dst is
24027 sufficiently aligned, maintain aliasing info accurately. */
24028 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
24036 count_exp
= plus_constant (counter_mode (count_exp
),
24037 count_exp
, -align_bytes
);
24038 count
-= align_bytes
;
24039 min_size
-= align_bytes
;
24040 max_size
-= align_bytes
;
24042 if (need_zero_guard
24044 && (count
< (unsigned HOST_WIDE_INT
) size_needed
24045 || (align_bytes
== 0
24046 && count
< ((unsigned HOST_WIDE_INT
) size_needed
24047 + desired_align
- align
))))
24049 /* It is possible that we copied enough so the main loop will not
24051 gcc_assert (size_needed
> 1);
24052 if (label
== NULL_RTX
)
24053 label
= gen_label_rtx ();
24054 emit_cmp_and_jump_insns (count_exp
,
24055 GEN_INT (size_needed
),
24056 LTU
, 0, counter_mode (count_exp
), 1, label
);
24057 if (expected_size
== -1
24058 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24059 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24061 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24064 if (label
&& size_needed
== 1)
24066 emit_label (label
);
24067 LABEL_NUSES (label
) = 1;
24069 epilogue_size_needed
= 1;
24071 promoted_val
= val_exp
;
24073 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
24074 epilogue_size_needed
= size_needed
;
24076 /* Step 3: Main loop. */
24083 gcc_unreachable ();
24086 case unrolled_loop
:
24087 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
24088 count_exp
, move_mode
, unroll_factor
,
24089 expected_size
, issetmem
);
24092 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
24093 vec_promoted_val
, count_exp
, move_mode
,
24094 unroll_factor
, expected_size
, issetmem
);
24096 case rep_prefix_8_byte
:
24097 case rep_prefix_4_byte
:
24098 case rep_prefix_1_byte
:
24099 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
24100 val_exp
, count_exp
, move_mode
, issetmem
);
24103 /* Adjust properly the offset of src and dest memory for aliasing. */
24104 if (CONST_INT_P (count_exp
))
24107 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
24108 (count
/ size_needed
) * size_needed
);
24109 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
24110 (count
/ size_needed
) * size_needed
);
24115 src
= change_address (src
, BLKmode
, srcreg
);
24116 dst
= change_address (dst
, BLKmode
, destreg
);
24119 /* Step 4: Epilogue to copy the remaining bytes. */
24123 /* When the main loop is done, COUNT_EXP might hold original count,
24124 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24125 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24126 bytes. Compensate if needed. */
24128 if (size_needed
< epilogue_size_needed
)
24131 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
24132 GEN_INT (size_needed
- 1), count_exp
, 1,
24134 if (tmp
!= count_exp
)
24135 emit_move_insn (count_exp
, tmp
);
24137 emit_label (label
);
24138 LABEL_NUSES (label
) = 1;
24141 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
24143 if (force_loopy_epilogue
)
24144 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
24145 epilogue_size_needed
);
24149 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
24150 vec_promoted_val
, count_exp
,
24151 epilogue_size_needed
);
24153 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
24154 epilogue_size_needed
);
24157 if (jump_around_label
)
24158 emit_label (jump_around_label
);
24163 /* Expand the appropriate insns for doing strlen if not just doing
24166 out = result, initialized with the start address
24167 align_rtx = alignment of the address.
24168 scratch = scratch register, initialized with the startaddress when
24169 not aligned, otherwise undefined
24171 This is just the body. It needs the initializations mentioned above and
24172 some address computing at the end. These things are done in i386.md. */
24175 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
24179 rtx align_2_label
= NULL_RTX
;
24180 rtx align_3_label
= NULL_RTX
;
24181 rtx align_4_label
= gen_label_rtx ();
24182 rtx end_0_label
= gen_label_rtx ();
24184 rtx tmpreg
= gen_reg_rtx (SImode
);
24185 rtx scratch
= gen_reg_rtx (SImode
);
24189 if (CONST_INT_P (align_rtx
))
24190 align
= INTVAL (align_rtx
);
24192 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24194 /* Is there a known alignment and is it less than 4? */
24197 rtx scratch1
= gen_reg_rtx (Pmode
);
24198 emit_move_insn (scratch1
, out
);
24199 /* Is there a known alignment and is it not 2? */
24202 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
24203 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
24205 /* Leave just the 3 lower bits. */
24206 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
24207 NULL_RTX
, 0, OPTAB_WIDEN
);
24209 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24210 Pmode
, 1, align_4_label
);
24211 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
24212 Pmode
, 1, align_2_label
);
24213 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
24214 Pmode
, 1, align_3_label
);
24218 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24219 check if is aligned to 4 - byte. */
24221 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
24222 NULL_RTX
, 0, OPTAB_WIDEN
);
24224 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24225 Pmode
, 1, align_4_label
);
24228 mem
= change_address (src
, QImode
, out
);
24230 /* Now compare the bytes. */
24232 /* Compare the first n unaligned byte on a byte per byte basis. */
24233 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
24234 QImode
, 1, end_0_label
);
24236 /* Increment the address. */
24237 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24239 /* Not needed with an alignment of 2 */
24242 emit_label (align_2_label
);
24244 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24247 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24249 emit_label (align_3_label
);
24252 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24255 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24258 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24259 align this loop. It gives only huge programs, but does not help to
24261 emit_label (align_4_label
);
24263 mem
= change_address (src
, SImode
, out
);
24264 emit_move_insn (scratch
, mem
);
24265 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
24267 /* This formula yields a nonzero result iff one of the bytes is zero.
24268 This saves three branches inside loop and many cycles. */
24270 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
24271 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
24272 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
24273 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
24274 gen_int_mode (0x80808080, SImode
)));
24275 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
24280 rtx reg
= gen_reg_rtx (SImode
);
24281 rtx reg2
= gen_reg_rtx (Pmode
);
24282 emit_move_insn (reg
, tmpreg
);
24283 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
24285 /* If zero is not in the first two bytes, move two bytes forward. */
24286 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24287 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24288 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24289 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24290 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24293 /* Emit lea manually to avoid clobbering of flags. */
24294 emit_insn (gen_rtx_SET (SImode
, reg2
,
24295 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24297 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24298 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24299 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24300 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24306 rtx end_2_label
= gen_label_rtx ();
24307 /* Is zero in the first two bytes? */
24309 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24310 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24311 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24312 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24313 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24315 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24316 JUMP_LABEL (tmp
) = end_2_label
;
24318 /* Not in the first two. Move two bytes forward. */
24319 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24320 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24322 emit_label (end_2_label
);
24326 /* Avoid branch in fixing the byte. */
24327 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24328 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24329 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24330 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24331 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24333 emit_label (end_0_label
);
24336 /* Expand strlen. */
24339 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24341 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24343 /* The generic case of strlen expander is long. Avoid it's
24344 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24346 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24347 && !TARGET_INLINE_ALL_STRINGOPS
24348 && !optimize_insn_for_size_p ()
24349 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24352 addr
= force_reg (Pmode
, XEXP (src
, 0));
24353 scratch1
= gen_reg_rtx (Pmode
);
24355 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24356 && !optimize_insn_for_size_p ())
24358 /* Well it seems that some optimizer does not combine a call like
24359 foo(strlen(bar), strlen(bar));
24360 when the move and the subtraction is done here. It does calculate
24361 the length just once when these instructions are done inside of
24362 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24363 often used and I use one fewer register for the lifetime of
24364 output_strlen_unroll() this is better. */
24366 emit_move_insn (out
, addr
);
24368 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24370 /* strlensi_unroll_1 returns the address of the zero at the end of
24371 the string, like memchr(), so compute the length by subtracting
24372 the start address. */
24373 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24379 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24380 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24383 scratch2
= gen_reg_rtx (Pmode
);
24384 scratch3
= gen_reg_rtx (Pmode
);
24385 scratch4
= force_reg (Pmode
, constm1_rtx
);
24387 emit_move_insn (scratch3
, addr
);
24388 eoschar
= force_reg (QImode
, eoschar
);
24390 src
= replace_equiv_address_nv (src
, scratch3
);
24392 /* If .md starts supporting :P, this can be done in .md. */
24393 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24394 scratch4
), UNSPEC_SCAS
);
24395 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24396 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24397 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24402 /* For given symbol (function) construct code to compute address of it's PLT
24403 entry in large x86-64 PIC model. */
24405 construct_plt_address (rtx symbol
)
24409 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24410 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24411 gcc_assert (Pmode
== DImode
);
24413 tmp
= gen_reg_rtx (Pmode
);
24414 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24416 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24417 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24422 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24424 rtx pop
, bool sibcall
)
24426 unsigned int const cregs_size
24427 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24428 rtx vec
[3 + cregs_size
];
24429 rtx use
= NULL
, call
;
24430 unsigned int vec_len
= 0;
24432 if (pop
== const0_rtx
)
24434 gcc_assert (!TARGET_64BIT
|| !pop
);
24436 if (TARGET_MACHO
&& !TARGET_64BIT
)
24439 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24440 fnaddr
= machopic_indirect_call_target (fnaddr
);
24445 /* Static functions and indirect calls don't need the pic register. */
24448 || (ix86_cmodel
== CM_LARGE_PIC
24449 && DEFAULT_ABI
!= MS_ABI
))
24450 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24451 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24452 use_reg (&use
, pic_offset_table_rtx
);
24455 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24457 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24458 emit_move_insn (al
, callarg2
);
24459 use_reg (&use
, al
);
24462 if (ix86_cmodel
== CM_LARGE_PIC
24465 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24466 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24467 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24469 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24470 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24472 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24473 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24476 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24478 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24479 vec
[vec_len
++] = call
;
24483 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24484 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24485 vec
[vec_len
++] = pop
;
24488 if (TARGET_64BIT_MS_ABI
24489 && (!callarg2
|| INTVAL (callarg2
) != -2))
24493 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24494 UNSPEC_MS_TO_SYSV_CALL
);
24496 for (i
= 0; i
< cregs_size
; i
++)
24498 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24499 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24502 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24507 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24508 call
= emit_call_insn (call
);
24510 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24515 /* Output the assembly for a call instruction. */
24518 ix86_output_call_insn (rtx insn
, rtx call_op
)
24520 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24521 bool seh_nop_p
= false;
24524 if (SIBLING_CALL_P (insn
))
24527 xasm
= "%!jmp\t%P0";
24528 /* SEH epilogue detection requires the indirect branch case
24529 to include REX.W. */
24530 else if (TARGET_SEH
)
24531 xasm
= "%!rex.W jmp %A0";
24533 xasm
= "%!jmp\t%A0";
24535 output_asm_insn (xasm
, &call_op
);
24539 /* SEH unwinding can require an extra nop to be emitted in several
24540 circumstances. Determine if we have one of those. */
24545 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24547 /* If we get to another real insn, we don't need the nop. */
24551 /* If we get to the epilogue note, prevent a catch region from
24552 being adjacent to the standard epilogue sequence. If non-
24553 call-exceptions, we'll have done this during epilogue emission. */
24554 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24555 && !flag_non_call_exceptions
24556 && !can_throw_internal (insn
))
24563 /* If we didn't find a real insn following the call, prevent the
24564 unwinder from looking into the next function. */
24570 xasm
= "%!call\t%P0";
24572 xasm
= "%!call\t%A0";
24574 output_asm_insn (xasm
, &call_op
);
24582 /* Clear stack slot assignments remembered from previous functions.
24583 This is called from INIT_EXPANDERS once before RTL is emitted for each
24586 static struct machine_function
*
24587 ix86_init_machine_status (void)
24589 struct machine_function
*f
;
24591 f
= ggc_alloc_cleared_machine_function ();
24592 f
->use_fast_prologue_epilogue_nregs
= -1;
24593 f
->call_abi
= ix86_abi
;
24598 /* Return a MEM corresponding to a stack slot with mode MODE.
24599 Allocate a new slot if necessary.
24601 The RTL for a function can have several slots available: N is
24602 which slot to use. */
24605 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24607 struct stack_local_entry
*s
;
24609 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24611 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24612 if (s
->mode
== mode
&& s
->n
== n
)
24613 return validize_mem (copy_rtx (s
->rtl
));
24615 s
= ggc_alloc_stack_local_entry ();
24618 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24620 s
->next
= ix86_stack_locals
;
24621 ix86_stack_locals
= s
;
24622 return validize_mem (s
->rtl
);
24626 ix86_instantiate_decls (void)
24628 struct stack_local_entry
*s
;
24630 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24631 if (s
->rtl
!= NULL_RTX
)
24632 instantiate_decl_rtl (s
->rtl
);
24635 /* Check whether x86 address PARTS is a pc-relative address. */
24638 rip_relative_addr_p (struct ix86_address
*parts
)
24640 rtx base
, index
, disp
;
24642 base
= parts
->base
;
24643 index
= parts
->index
;
24644 disp
= parts
->disp
;
24646 if (disp
&& !base
&& !index
)
24652 if (GET_CODE (disp
) == CONST
)
24653 symbol
= XEXP (disp
, 0);
24654 if (GET_CODE (symbol
) == PLUS
24655 && CONST_INT_P (XEXP (symbol
, 1)))
24656 symbol
= XEXP (symbol
, 0);
24658 if (GET_CODE (symbol
) == LABEL_REF
24659 || (GET_CODE (symbol
) == SYMBOL_REF
24660 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
24661 || (GET_CODE (symbol
) == UNSPEC
24662 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
24663 || XINT (symbol
, 1) == UNSPEC_PCREL
24664 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
24671 /* Calculate the length of the memory address in the instruction encoding.
24672 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24673 or other prefixes. We never generate addr32 prefix for LEA insn. */
24676 memory_address_length (rtx addr
, bool lea
)
24678 struct ix86_address parts
;
24679 rtx base
, index
, disp
;
24683 if (GET_CODE (addr
) == PRE_DEC
24684 || GET_CODE (addr
) == POST_INC
24685 || GET_CODE (addr
) == PRE_MODIFY
24686 || GET_CODE (addr
) == POST_MODIFY
)
24689 ok
= ix86_decompose_address (addr
, &parts
);
24692 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24694 /* If this is not LEA instruction, add the length of addr32 prefix. */
24695 if (TARGET_64BIT
&& !lea
24696 && (SImode_address_operand (addr
, VOIDmode
)
24697 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24698 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24702 index
= parts
.index
;
24705 if (base
&& GET_CODE (base
) == SUBREG
)
24706 base
= SUBREG_REG (base
);
24707 if (index
&& GET_CODE (index
) == SUBREG
)
24708 index
= SUBREG_REG (index
);
24710 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24711 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24714 - esp as the base always wants an index,
24715 - ebp as the base always wants a displacement,
24716 - r12 as the base always wants an index,
24717 - r13 as the base always wants a displacement. */
24719 /* Register Indirect. */
24720 if (base
&& !index
&& !disp
)
24722 /* esp (for its index) and ebp (for its displacement) need
24723 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24725 if (base
== arg_pointer_rtx
24726 || base
== frame_pointer_rtx
24727 || REGNO (base
) == SP_REG
24728 || REGNO (base
) == BP_REG
24729 || REGNO (base
) == R12_REG
24730 || REGNO (base
) == R13_REG
)
24734 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24735 is not disp32, but disp32(%rip), so for disp32
24736 SIB byte is needed, unless print_operand_address
24737 optimizes it into disp32(%rip) or (%rip) is implied
24739 else if (disp
&& !base
&& !index
)
24742 if (rip_relative_addr_p (&parts
))
24747 /* Find the length of the displacement constant. */
24750 if (base
&& satisfies_constraint_K (disp
))
24755 /* ebp always wants a displacement. Similarly r13. */
24756 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24759 /* An index requires the two-byte modrm form.... */
24761 /* ...like esp (or r12), which always wants an index. */
24762 || base
== arg_pointer_rtx
24763 || base
== frame_pointer_rtx
24764 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24771 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24772 is set, expect that insn have 8bit immediate alternative. */
24774 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24778 extract_insn_cached (insn
);
24779 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24780 if (CONSTANT_P (recog_data
.operand
[i
]))
24782 enum attr_mode mode
= get_attr_mode (insn
);
24785 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24787 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24794 ival
= trunc_int_for_mode (ival
, HImode
);
24797 ival
= trunc_int_for_mode (ival
, SImode
);
24802 if (IN_RANGE (ival
, -128, 127))
24819 /* Immediates for DImode instructions are encoded
24820 as 32bit sign extended values. */
24825 fatal_insn ("unknown insn mode", insn
);
24831 /* Compute default value for "length_address" attribute. */
24833 ix86_attr_length_address_default (rtx insn
)
24837 if (get_attr_type (insn
) == TYPE_LEA
)
24839 rtx set
= PATTERN (insn
), addr
;
24841 if (GET_CODE (set
) == PARALLEL
)
24842 set
= XVECEXP (set
, 0, 0);
24844 gcc_assert (GET_CODE (set
) == SET
);
24846 addr
= SET_SRC (set
);
24848 return memory_address_length (addr
, true);
24851 extract_insn_cached (insn
);
24852 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24853 if (MEM_P (recog_data
.operand
[i
]))
24855 constrain_operands_cached (reload_completed
);
24856 if (which_alternative
!= -1)
24858 const char *constraints
= recog_data
.constraints
[i
];
24859 int alt
= which_alternative
;
24861 while (*constraints
== '=' || *constraints
== '+')
24864 while (*constraints
++ != ',')
24866 /* Skip ignored operands. */
24867 if (*constraints
== 'X')
24870 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24875 /* Compute default value for "length_vex" attribute. It includes
24876 2 or 3 byte VEX prefix and 1 opcode byte. */
24879 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24883 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24884 byte VEX prefix. */
24885 if (!has_0f_opcode
|| has_vex_w
)
24888 /* We can always use 2 byte VEX prefix in 32bit. */
24892 extract_insn_cached (insn
);
24894 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24895 if (REG_P (recog_data
.operand
[i
]))
24897 /* REX.W bit uses 3 byte VEX prefix. */
24898 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24899 && GENERAL_REG_P (recog_data
.operand
[i
]))
24904 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24905 if (MEM_P (recog_data
.operand
[i
])
24906 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24913 /* Return the maximum number of instructions a cpu can issue. */
24916 ix86_issue_rate (void)
24920 case PROCESSOR_PENTIUM
:
24921 case PROCESSOR_ATOM
:
24922 case PROCESSOR_SLM
:
24924 case PROCESSOR_BTVER2
:
24925 case PROCESSOR_PENTIUM4
:
24926 case PROCESSOR_NOCONA
:
24929 case PROCESSOR_PENTIUMPRO
:
24930 case PROCESSOR_ATHLON
:
24932 case PROCESSOR_AMDFAM10
:
24933 case PROCESSOR_GENERIC
:
24934 case PROCESSOR_BTVER1
:
24937 case PROCESSOR_BDVER1
:
24938 case PROCESSOR_BDVER2
:
24939 case PROCESSOR_BDVER3
:
24940 case PROCESSOR_BDVER4
:
24941 case PROCESSOR_CORE2
:
24942 case PROCESSOR_COREI7
:
24943 case PROCESSOR_COREI7_AVX
:
24944 case PROCESSOR_HASWELL
:
24952 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24953 by DEP_INSN and nothing set by DEP_INSN. */
24956 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24960 /* Simplify the test for uninteresting insns. */
24961 if (insn_type
!= TYPE_SETCC
24962 && insn_type
!= TYPE_ICMOV
24963 && insn_type
!= TYPE_FCMOV
24964 && insn_type
!= TYPE_IBR
)
24967 if ((set
= single_set (dep_insn
)) != 0)
24969 set
= SET_DEST (set
);
24972 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24973 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24974 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24975 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24977 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24978 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24983 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24986 /* This test is true if the dependent insn reads the flags but
24987 not any other potentially set register. */
24988 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24991 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24997 /* Return true iff USE_INSN has a memory address with operands set by
25001 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
25004 extract_insn_cached (use_insn
);
25005 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25006 if (MEM_P (recog_data
.operand
[i
]))
25008 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
25009 return modified_in_p (addr
, set_insn
) != 0;
25014 /* Helper function for exact_store_load_dependency.
25015 Return true if addr is found in insn. */
25017 exact_dependency_1 (rtx addr
, rtx insn
)
25019 enum rtx_code code
;
25020 const char *format_ptr
;
25023 code
= GET_CODE (insn
);
25027 if (rtx_equal_p (addr
, insn
))
25042 format_ptr
= GET_RTX_FORMAT (code
);
25043 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
25045 switch (*format_ptr
++)
25048 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
25052 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
25053 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
25061 /* Return true if there exists exact dependency for store & load, i.e.
25062 the same memory address is used in them. */
25064 exact_store_load_dependency (rtx store
, rtx load
)
25068 set1
= single_set (store
);
25071 if (!MEM_P (SET_DEST (set1
)))
25073 set2
= single_set (load
);
25076 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
25082 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
25084 enum attr_type insn_type
, dep_insn_type
;
25085 enum attr_memory memory
;
25087 int dep_insn_code_number
;
25089 /* Anti and output dependencies have zero cost on all CPUs. */
25090 if (REG_NOTE_KIND (link
) != 0)
25093 dep_insn_code_number
= recog_memoized (dep_insn
);
25095 /* If we can't recognize the insns, we can't really do anything. */
25096 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
25099 insn_type
= get_attr_type (insn
);
25100 dep_insn_type
= get_attr_type (dep_insn
);
25104 case PROCESSOR_PENTIUM
:
25105 /* Address Generation Interlock adds a cycle of latency. */
25106 if (insn_type
== TYPE_LEA
)
25108 rtx addr
= PATTERN (insn
);
25110 if (GET_CODE (addr
) == PARALLEL
)
25111 addr
= XVECEXP (addr
, 0, 0);
25113 gcc_assert (GET_CODE (addr
) == SET
);
25115 addr
= SET_SRC (addr
);
25116 if (modified_in_p (addr
, dep_insn
))
25119 else if (ix86_agi_dependent (dep_insn
, insn
))
25122 /* ??? Compares pair with jump/setcc. */
25123 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
25126 /* Floating point stores require value to be ready one cycle earlier. */
25127 if (insn_type
== TYPE_FMOV
25128 && get_attr_memory (insn
) == MEMORY_STORE
25129 && !ix86_agi_dependent (dep_insn
, insn
))
25133 case PROCESSOR_PENTIUMPRO
:
25134 memory
= get_attr_memory (insn
);
25136 /* INT->FP conversion is expensive. */
25137 if (get_attr_fp_int_src (dep_insn
))
25140 /* There is one cycle extra latency between an FP op and a store. */
25141 if (insn_type
== TYPE_FMOV
25142 && (set
= single_set (dep_insn
)) != NULL_RTX
25143 && (set2
= single_set (insn
)) != NULL_RTX
25144 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
25145 && MEM_P (SET_DEST (set2
)))
25148 /* Show ability of reorder buffer to hide latency of load by executing
25149 in parallel with previous instruction in case
25150 previous instruction is not needed to compute the address. */
25151 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25152 && !ix86_agi_dependent (dep_insn
, insn
))
25154 /* Claim moves to take one cycle, as core can issue one load
25155 at time and the next load can start cycle later. */
25156 if (dep_insn_type
== TYPE_IMOV
25157 || dep_insn_type
== TYPE_FMOV
)
25165 memory
= get_attr_memory (insn
);
25167 /* The esp dependency is resolved before the instruction is really
25169 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25170 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25173 /* INT->FP conversion is expensive. */
25174 if (get_attr_fp_int_src (dep_insn
))
25177 /* Show ability of reorder buffer to hide latency of load by executing
25178 in parallel with previous instruction in case
25179 previous instruction is not needed to compute the address. */
25180 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25181 && !ix86_agi_dependent (dep_insn
, insn
))
25183 /* Claim moves to take one cycle, as core can issue one load
25184 at time and the next load can start cycle later. */
25185 if (dep_insn_type
== TYPE_IMOV
25186 || dep_insn_type
== TYPE_FMOV
)
25195 case PROCESSOR_ATHLON
:
25197 case PROCESSOR_AMDFAM10
:
25198 case PROCESSOR_BDVER1
:
25199 case PROCESSOR_BDVER2
:
25200 case PROCESSOR_BDVER3
:
25201 case PROCESSOR_BDVER4
:
25202 case PROCESSOR_BTVER1
:
25203 case PROCESSOR_BTVER2
:
25204 case PROCESSOR_GENERIC
:
25205 memory
= get_attr_memory (insn
);
25207 /* Stack engine allows to execute push&pop instructions in parall. */
25208 if (((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25209 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25210 && (ix86_tune
!= PROCESSOR_ATHLON
&& ix86_tune
!= PROCESSOR_K8
))
25213 /* Show ability of reorder buffer to hide latency of load by executing
25214 in parallel with previous instruction in case
25215 previous instruction is not needed to compute the address. */
25216 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25217 && !ix86_agi_dependent (dep_insn
, insn
))
25219 enum attr_unit unit
= get_attr_unit (insn
);
25222 /* Because of the difference between the length of integer and
25223 floating unit pipeline preparation stages, the memory operands
25224 for floating point are cheaper.
25226 ??? For Athlon it the difference is most probably 2. */
25227 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
25230 loadcost
= TARGET_ATHLON
? 2 : 0;
25232 if (cost
>= loadcost
)
25239 case PROCESSOR_CORE2
:
25240 case PROCESSOR_COREI7
:
25241 case PROCESSOR_COREI7_AVX
:
25242 case PROCESSOR_HASWELL
:
25243 memory
= get_attr_memory (insn
);
25245 /* Stack engine allows to execute push&pop instructions in parall. */
25246 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25247 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25250 /* Show ability of reorder buffer to hide latency of load by executing
25251 in parallel with previous instruction in case
25252 previous instruction is not needed to compute the address. */
25253 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25254 && !ix86_agi_dependent (dep_insn
, insn
))
25263 case PROCESSOR_SLM
:
25264 if (!reload_completed
)
25267 /* Increase cost of integer loads. */
25268 memory
= get_attr_memory (dep_insn
);
25269 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25271 enum attr_unit unit
= get_attr_unit (dep_insn
);
25272 if (unit
== UNIT_INTEGER
&& cost
== 1)
25274 if (memory
== MEMORY_LOAD
)
25278 /* Increase cost of ld/st for short int types only
25279 because of store forwarding issue. */
25280 rtx set
= single_set (dep_insn
);
25281 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
25282 || GET_MODE (SET_DEST (set
)) == HImode
))
25284 /* Increase cost of store/load insn if exact
25285 dependence exists and it is load insn. */
25286 enum attr_memory insn_memory
= get_attr_memory (insn
);
25287 if (insn_memory
== MEMORY_LOAD
25288 && exact_store_load_dependency (dep_insn
, insn
))
25302 /* How many alternative schedules to try. This should be as wide as the
25303 scheduling freedom in the DFA, but no wider. Making this value too
25304 large results extra work for the scheduler. */
25307 ia32_multipass_dfa_lookahead (void)
25311 case PROCESSOR_PENTIUM
:
25314 case PROCESSOR_PENTIUMPRO
:
25318 case PROCESSOR_BDVER1
:
25319 case PROCESSOR_BDVER2
:
25320 case PROCESSOR_BDVER3
:
25321 case PROCESSOR_BDVER4
:
25322 /* We use lookahead value 4 for BD both before and after reload
25323 schedules. Plan is to have value 8 included for O3. */
25326 case PROCESSOR_CORE2
:
25327 case PROCESSOR_COREI7
:
25328 case PROCESSOR_COREI7_AVX
:
25329 case PROCESSOR_HASWELL
:
25330 case PROCESSOR_ATOM
:
25331 case PROCESSOR_SLM
:
25332 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25333 as many instructions can be executed on a cycle, i.e.,
25334 issue_rate. I wonder why tuning for many CPUs does not do this. */
25335 if (reload_completed
)
25336 return ix86_issue_rate ();
25337 /* Don't use lookahead for pre-reload schedule to save compile time. */
25345 /* Return true if target platform supports macro-fusion. */
25348 ix86_macro_fusion_p ()
25350 return TARGET_FUSE_CMP_AND_BRANCH
;
25353 /* Check whether current microarchitecture support macro fusion
25354 for insn pair "CONDGEN + CONDJMP". Refer to
25355 "Intel Architectures Optimization Reference Manual". */
25358 ix86_macro_fusion_pair_p (rtx condgen
, rtx condjmp
)
25361 rtx single_set
= single_set (condgen
);
25362 enum rtx_code ccode
;
25363 rtx compare_set
= NULL_RTX
, test_if
, cond
;
25364 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
25366 if (get_attr_type (condgen
) != TYPE_TEST
25367 && get_attr_type (condgen
) != TYPE_ICMP
25368 && get_attr_type (condgen
) != TYPE_INCDEC
25369 && get_attr_type (condgen
) != TYPE_ALU
)
25372 if (single_set
== NULL_RTX
25373 && !TARGET_FUSE_ALU_AND_BRANCH
)
25376 if (single_set
!= NULL_RTX
)
25377 compare_set
= single_set
;
25381 rtx pat
= PATTERN (condgen
);
25382 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25383 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25385 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
25386 if (GET_CODE (set_src
) == COMPARE
)
25387 compare_set
= XVECEXP (pat
, 0, i
);
25389 alu_set
= XVECEXP (pat
, 0, i
);
25392 if (compare_set
== NULL_RTX
)
25394 src
= SET_SRC (compare_set
);
25395 if (GET_CODE (src
) != COMPARE
)
25398 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25400 if ((MEM_P (XEXP (src
, 0))
25401 && CONST_INT_P (XEXP (src
, 1)))
25402 || (MEM_P (XEXP (src
, 1))
25403 && CONST_INT_P (XEXP (src
, 0))))
25406 /* No fusion for RIP-relative address. */
25407 if (MEM_P (XEXP (src
, 0)))
25408 addr
= XEXP (XEXP (src
, 0), 0);
25409 else if (MEM_P (XEXP (src
, 1)))
25410 addr
= XEXP (XEXP (src
, 1), 0);
25413 ix86_address parts
;
25414 int ok
= ix86_decompose_address (addr
, &parts
);
25417 if (rip_relative_addr_p (&parts
))
25421 test_if
= SET_SRC (pc_set (condjmp
));
25422 cond
= XEXP (test_if
, 0);
25423 ccode
= GET_CODE (cond
);
25424 /* Check whether conditional jump use Sign or Overflow Flags. */
25425 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25432 /* Return true for TYPE_TEST and TYPE_ICMP. */
25433 if (get_attr_type (condgen
) == TYPE_TEST
25434 || get_attr_type (condgen
) == TYPE_ICMP
)
25437 /* The following is the case that macro-fusion for alu + jmp. */
25438 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
25441 /* No fusion for alu op with memory destination operand. */
25442 dest
= SET_DEST (alu_set
);
25446 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25448 if (get_attr_type (condgen
) == TYPE_INCDEC
25458 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25459 execution. It is applied if
25460 (1) IMUL instruction is on the top of list;
25461 (2) There exists the only producer of independent IMUL instruction in
25463 Return index of IMUL producer if it was found and -1 otherwise. */
25465 do_reorder_for_imul (rtx
*ready
, int n_ready
)
25467 rtx insn
, set
, insn1
, insn2
;
25468 sd_iterator_def sd_it
;
25473 if (ix86_tune
!= PROCESSOR_ATOM
)
25476 /* Check that IMUL instruction is on the top of ready list. */
25477 insn
= ready
[n_ready
- 1];
25478 set
= single_set (insn
);
25481 if (!(GET_CODE (SET_SRC (set
)) == MULT
25482 && GET_MODE (SET_SRC (set
)) == SImode
))
25485 /* Search for producer of independent IMUL instruction. */
25486 for (i
= n_ready
- 2; i
>= 0; i
--)
25489 if (!NONDEBUG_INSN_P (insn
))
25491 /* Skip IMUL instruction. */
25492 insn2
= PATTERN (insn
);
25493 if (GET_CODE (insn2
) == PARALLEL
)
25494 insn2
= XVECEXP (insn2
, 0, 0);
25495 if (GET_CODE (insn2
) == SET
25496 && GET_CODE (SET_SRC (insn2
)) == MULT
25497 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25500 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25503 con
= DEP_CON (dep
);
25504 if (!NONDEBUG_INSN_P (con
))
25506 insn1
= PATTERN (con
);
25507 if (GET_CODE (insn1
) == PARALLEL
)
25508 insn1
= XVECEXP (insn1
, 0, 0);
25510 if (GET_CODE (insn1
) == SET
25511 && GET_CODE (SET_SRC (insn1
)) == MULT
25512 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25514 sd_iterator_def sd_it1
;
25516 /* Check if there is no other dependee for IMUL. */
25518 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25521 pro
= DEP_PRO (dep1
);
25522 if (!NONDEBUG_INSN_P (pro
))
25537 /* Try to find the best candidate on the top of ready list if two insns
25538 have the same priority - candidate is best if its dependees were
25539 scheduled earlier. Applied for Silvermont only.
25540 Return true if top 2 insns must be interchanged. */
25542 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
25544 rtx top
= ready
[n_ready
- 1];
25545 rtx next
= ready
[n_ready
- 2];
25547 sd_iterator_def sd_it
;
25551 #define INSN_TICK(INSN) (HID (INSN)->tick)
25553 if (ix86_tune
!= PROCESSOR_SLM
)
25556 if (!NONDEBUG_INSN_P (top
))
25558 if (!NONJUMP_INSN_P (top
))
25560 if (!NONDEBUG_INSN_P (next
))
25562 if (!NONJUMP_INSN_P (next
))
25564 set
= single_set (top
);
25567 set
= single_set (next
);
25571 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25573 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25575 /* Determine winner more precise. */
25576 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25579 pro
= DEP_PRO (dep
);
25580 if (!NONDEBUG_INSN_P (pro
))
25582 if (INSN_TICK (pro
) > clock1
)
25583 clock1
= INSN_TICK (pro
);
25585 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25588 pro
= DEP_PRO (dep
);
25589 if (!NONDEBUG_INSN_P (pro
))
25591 if (INSN_TICK (pro
) > clock2
)
25592 clock2
= INSN_TICK (pro
);
25595 if (clock1
== clock2
)
25597 /* Determine winner - load must win. */
25598 enum attr_memory memory1
, memory2
;
25599 memory1
= get_attr_memory (top
);
25600 memory2
= get_attr_memory (next
);
25601 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25604 return (bool) (clock2
< clock1
);
25610 /* Perform possible reodering of ready list for Atom/Silvermont only.
25611 Return issue rate. */
25613 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25616 int issue_rate
= -1;
25617 int n_ready
= *pn_ready
;
25622 /* Set up issue rate. */
25623 issue_rate
= ix86_issue_rate ();
25625 /* Do reodering for Atom/SLM only. */
25626 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25629 /* Nothing to do if ready list contains only 1 instruction. */
25633 /* Do reodering for post-reload scheduler only. */
25634 if (!reload_completed
)
25637 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25639 if (sched_verbose
> 1)
25640 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25641 INSN_UID (ready
[index
]));
25643 /* Put IMUL producer (ready[index]) at the top of ready list. */
25644 insn
= ready
[index
];
25645 for (i
= index
; i
< n_ready
- 1; i
++)
25646 ready
[i
] = ready
[i
+ 1];
25647 ready
[n_ready
- 1] = insn
;
25650 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25652 if (sched_verbose
> 1)
25653 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25654 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25655 /* Swap 2 top elements of ready list. */
25656 insn
= ready
[n_ready
- 1];
25657 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25658 ready
[n_ready
- 2] = insn
;
25664 ix86_class_likely_spilled_p (reg_class_t
);
25666 /* Returns true if lhs of insn is HW function argument register and set up
25667 is_spilled to true if it is likely spilled HW register. */
25669 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25673 if (!NONDEBUG_INSN_P (insn
))
25675 /* Call instructions are not movable, ignore it. */
25678 insn
= PATTERN (insn
);
25679 if (GET_CODE (insn
) == PARALLEL
)
25680 insn
= XVECEXP (insn
, 0, 0);
25681 if (GET_CODE (insn
) != SET
)
25683 dst
= SET_DEST (insn
);
25684 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25685 && ix86_function_arg_regno_p (REGNO (dst
)))
25687 /* Is it likely spilled HW register? */
25688 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25689 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25690 *is_spilled
= true;
25696 /* Add output dependencies for chain of function adjacent arguments if only
25697 there is a move to likely spilled HW register. Return first argument
25698 if at least one dependence was added or NULL otherwise. */
25700 add_parameter_dependencies (rtx call
, rtx head
)
25704 rtx first_arg
= NULL
;
25705 bool is_spilled
= false;
25707 head
= PREV_INSN (head
);
25709 /* Find nearest to call argument passing instruction. */
25712 last
= PREV_INSN (last
);
25715 if (!NONDEBUG_INSN_P (last
))
25717 if (insn_is_function_arg (last
, &is_spilled
))
25725 insn
= PREV_INSN (last
);
25726 if (!INSN_P (insn
))
25730 if (!NONDEBUG_INSN_P (insn
))
25735 if (insn_is_function_arg (insn
, &is_spilled
))
25737 /* Add output depdendence between two function arguments if chain
25738 of output arguments contains likely spilled HW registers. */
25740 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25741 first_arg
= last
= insn
;
25751 /* Add output or anti dependency from insn to first_arg to restrict its code
25754 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25759 set
= single_set (insn
);
25762 tmp
= SET_DEST (set
);
25765 /* Add output dependency to the first function argument. */
25766 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25769 /* Add anti dependency. */
25770 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25773 /* Avoid cross block motion of function argument through adding dependency
25774 from the first non-jump instruction in bb. */
25776 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25778 rtx insn
= BB_END (bb
);
25782 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25784 rtx set
= single_set (insn
);
25787 avoid_func_arg_motion (arg
, insn
);
25791 if (insn
== BB_HEAD (bb
))
25793 insn
= PREV_INSN (insn
);
25797 /* Hook for pre-reload schedule - avoid motion of function arguments
25798 passed in likely spilled HW registers. */
25800 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25803 rtx first_arg
= NULL
;
25804 if (reload_completed
)
25806 while (head
!= tail
&& DEBUG_INSN_P (head
))
25807 head
= NEXT_INSN (head
);
25808 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25809 if (INSN_P (insn
) && CALL_P (insn
))
25811 first_arg
= add_parameter_dependencies (insn
, head
);
25814 /* Add dependee for first argument to predecessors if only
25815 region contains more than one block. */
25816 basic_block bb
= BLOCK_FOR_INSN (insn
);
25817 int rgn
= CONTAINING_RGN (bb
->index
);
25818 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25819 /* Skip trivial regions and region head blocks that can have
25820 predecessors outside of region. */
25821 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25825 /* Assume that region is SCC, i.e. all immediate predecessors
25826 of non-head block are in the same region. */
25827 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25829 /* Avoid creating of loop-carried dependencies through
25830 using topological odering in region. */
25831 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25832 add_dependee_for_func_arg (first_arg
, e
->src
);
25840 else if (first_arg
)
25841 avoid_func_arg_motion (first_arg
, insn
);
25844 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25845 HW registers to maximum, to schedule them at soon as possible. These are
25846 moves from function argument registers at the top of the function entry
25847 and moves from function return value registers after call. */
25849 ix86_adjust_priority (rtx insn
, int priority
)
25853 if (reload_completed
)
25856 if (!NONDEBUG_INSN_P (insn
))
25859 set
= single_set (insn
);
25862 rtx tmp
= SET_SRC (set
);
25864 && HARD_REGISTER_P (tmp
)
25865 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25866 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25867 return current_sched_info
->sched_max_insns_priority
;
25873 /* Model decoder of Core 2/i7.
25874 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25875 track the instruction fetch block boundaries and make sure that long
25876 (9+ bytes) instructions are assigned to D0. */
25878 /* Maximum length of an insn that can be handled by
25879 a secondary decoder unit. '8' for Core 2/i7. */
25880 static int core2i7_secondary_decoder_max_insn_size
;
25882 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25883 '16' for Core 2/i7. */
25884 static int core2i7_ifetch_block_size
;
25886 /* Maximum number of instructions decoder can handle per cycle.
25887 '6' for Core 2/i7. */
25888 static int core2i7_ifetch_block_max_insns
;
25890 typedef struct ix86_first_cycle_multipass_data_
*
25891 ix86_first_cycle_multipass_data_t
;
25892 typedef const struct ix86_first_cycle_multipass_data_
*
25893 const_ix86_first_cycle_multipass_data_t
;
25895 /* A variable to store target state across calls to max_issue within
25897 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25898 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25900 /* Initialize DATA. */
25902 core2i7_first_cycle_multipass_init (void *_data
)
25904 ix86_first_cycle_multipass_data_t data
25905 = (ix86_first_cycle_multipass_data_t
) _data
;
25907 data
->ifetch_block_len
= 0;
25908 data
->ifetch_block_n_insns
= 0;
25909 data
->ready_try_change
= NULL
;
25910 data
->ready_try_change_size
= 0;
25913 /* Advancing the cycle; reset ifetch block counts. */
25915 core2i7_dfa_post_advance_cycle (void)
25917 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25919 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25921 data
->ifetch_block_len
= 0;
25922 data
->ifetch_block_n_insns
= 0;
25925 static int min_insn_size (rtx
);
25927 /* Filter out insns from ready_try that the core will not be able to issue
25928 on current cycle due to decoder. */
25930 core2i7_first_cycle_multipass_filter_ready_try
25931 (const_ix86_first_cycle_multipass_data_t data
,
25932 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25939 if (ready_try
[n_ready
])
25942 insn
= get_ready_element (n_ready
);
25943 insn_size
= min_insn_size (insn
);
25945 if (/* If this is a too long an insn for a secondary decoder ... */
25946 (!first_cycle_insn_p
25947 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25948 /* ... or it would not fit into the ifetch block ... */
25949 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25950 /* ... or the decoder is full already ... */
25951 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25952 /* ... mask the insn out. */
25954 ready_try
[n_ready
] = 1;
25956 if (data
->ready_try_change
)
25957 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25962 /* Prepare for a new round of multipass lookahead scheduling. */
25964 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25965 bool first_cycle_insn_p
)
25967 ix86_first_cycle_multipass_data_t data
25968 = (ix86_first_cycle_multipass_data_t
) _data
;
25969 const_ix86_first_cycle_multipass_data_t prev_data
25970 = ix86_first_cycle_multipass_data
;
25972 /* Restore the state from the end of the previous round. */
25973 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25974 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25976 /* Filter instructions that cannot be issued on current cycle due to
25977 decoder restrictions. */
25978 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25979 first_cycle_insn_p
);
25982 /* INSN is being issued in current solution. Account for its impact on
25983 the decoder model. */
25985 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25986 rtx insn
, const void *_prev_data
)
25988 ix86_first_cycle_multipass_data_t data
25989 = (ix86_first_cycle_multipass_data_t
) _data
;
25990 const_ix86_first_cycle_multipass_data_t prev_data
25991 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25993 int insn_size
= min_insn_size (insn
);
25995 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25996 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25997 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25998 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26000 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26001 if (!data
->ready_try_change
)
26003 data
->ready_try_change
= sbitmap_alloc (n_ready
);
26004 data
->ready_try_change_size
= n_ready
;
26006 else if (data
->ready_try_change_size
< n_ready
)
26008 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
26010 data
->ready_try_change_size
= n_ready
;
26012 bitmap_clear (data
->ready_try_change
);
26014 /* Filter out insns from ready_try that the core will not be able to issue
26015 on current cycle due to decoder. */
26016 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26020 /* Revert the effect on ready_try. */
26022 core2i7_first_cycle_multipass_backtrack (const void *_data
,
26024 int n_ready ATTRIBUTE_UNUSED
)
26026 const_ix86_first_cycle_multipass_data_t data
26027 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26028 unsigned int i
= 0;
26029 sbitmap_iterator sbi
;
26031 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
26032 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
26038 /* Save the result of multipass lookahead scheduling for the next round. */
26040 core2i7_first_cycle_multipass_end (const void *_data
)
26042 const_ix86_first_cycle_multipass_data_t data
26043 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26044 ix86_first_cycle_multipass_data_t next_data
26045 = ix86_first_cycle_multipass_data
;
26049 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
26050 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
26054 /* Deallocate target data. */
26056 core2i7_first_cycle_multipass_fini (void *_data
)
26058 ix86_first_cycle_multipass_data_t data
26059 = (ix86_first_cycle_multipass_data_t
) _data
;
26061 if (data
->ready_try_change
)
26063 sbitmap_free (data
->ready_try_change
);
26064 data
->ready_try_change
= NULL
;
26065 data
->ready_try_change_size
= 0;
26069 /* Prepare for scheduling pass. */
26071 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
26072 int verbose ATTRIBUTE_UNUSED
,
26073 int max_uid ATTRIBUTE_UNUSED
)
26075 /* Install scheduling hooks for current CPU. Some of these hooks are used
26076 in time-critical parts of the scheduler, so we only set them up when
26077 they are actually used. */
26080 case PROCESSOR_CORE2
:
26081 case PROCESSOR_COREI7
:
26082 case PROCESSOR_COREI7_AVX
:
26083 case PROCESSOR_HASWELL
:
26084 /* Do not perform multipass scheduling for pre-reload schedule
26085 to save compile time. */
26086 if (reload_completed
)
26088 targetm
.sched
.dfa_post_advance_cycle
26089 = core2i7_dfa_post_advance_cycle
;
26090 targetm
.sched
.first_cycle_multipass_init
26091 = core2i7_first_cycle_multipass_init
;
26092 targetm
.sched
.first_cycle_multipass_begin
26093 = core2i7_first_cycle_multipass_begin
;
26094 targetm
.sched
.first_cycle_multipass_issue
26095 = core2i7_first_cycle_multipass_issue
;
26096 targetm
.sched
.first_cycle_multipass_backtrack
26097 = core2i7_first_cycle_multipass_backtrack
;
26098 targetm
.sched
.first_cycle_multipass_end
26099 = core2i7_first_cycle_multipass_end
;
26100 targetm
.sched
.first_cycle_multipass_fini
26101 = core2i7_first_cycle_multipass_fini
;
26103 /* Set decoder parameters. */
26104 core2i7_secondary_decoder_max_insn_size
= 8;
26105 core2i7_ifetch_block_size
= 16;
26106 core2i7_ifetch_block_max_insns
= 6;
26109 /* ... Fall through ... */
26111 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
26112 targetm
.sched
.first_cycle_multipass_init
= NULL
;
26113 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
26114 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
26115 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
26116 targetm
.sched
.first_cycle_multipass_end
= NULL
;
26117 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
26123 /* Compute the alignment given to a constant that is being placed in memory.
26124 EXP is the constant and ALIGN is the alignment that the object would
26126 The value of this function is used instead of that alignment to align
26130 ix86_constant_alignment (tree exp
, int align
)
26132 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
26133 || TREE_CODE (exp
) == INTEGER_CST
)
26135 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
26137 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
26140 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
26141 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
26142 return BITS_PER_WORD
;
26147 /* Compute the alignment for a static variable.
26148 TYPE is the data type, and ALIGN is the alignment that
26149 the object would ordinarily have. The value of this function is used
26150 instead of that alignment to align the object. */
26153 ix86_data_alignment (tree type
, int align
, bool opt
)
26155 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
26158 && AGGREGATE_TYPE_P (type
)
26159 && TYPE_SIZE (type
)
26160 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26161 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
26162 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
26163 && align
< max_align
)
26166 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26167 to 16byte boundary. */
26170 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
26171 && TYPE_SIZE (type
)
26172 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26173 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
26174 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26181 if (TREE_CODE (type
) == ARRAY_TYPE
)
26183 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26185 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26188 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26191 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26193 if ((TYPE_MODE (type
) == XCmode
26194 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26197 else if ((TREE_CODE (type
) == RECORD_TYPE
26198 || TREE_CODE (type
) == UNION_TYPE
26199 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26200 && TYPE_FIELDS (type
))
26202 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26204 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26207 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26208 || TREE_CODE (type
) == INTEGER_TYPE
)
26210 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26212 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26219 /* Compute the alignment for a local variable or a stack slot. EXP is
26220 the data type or decl itself, MODE is the widest mode available and
26221 ALIGN is the alignment that the object would ordinarily have. The
26222 value of this macro is used instead of that alignment to align the
26226 ix86_local_alignment (tree exp
, enum machine_mode mode
,
26227 unsigned int align
)
26231 if (exp
&& DECL_P (exp
))
26233 type
= TREE_TYPE (exp
);
26242 /* Don't do dynamic stack realignment for long long objects with
26243 -mpreferred-stack-boundary=2. */
26246 && ix86_preferred_stack_boundary
< 64
26247 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26248 && (!type
|| !TYPE_USER_ALIGN (type
))
26249 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26252 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26253 register in MODE. We will return the largest alignment of XF
26257 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
26258 align
= GET_MODE_ALIGNMENT (DFmode
);
26262 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26263 to 16byte boundary. Exact wording is:
26265 An array uses the same alignment as its elements, except that a local or
26266 global array variable of length at least 16 bytes or
26267 a C99 variable-length array variable always has alignment of at least 16 bytes.
26269 This was added to allow use of aligned SSE instructions at arrays. This
26270 rule is meant for static storage (where compiler can not do the analysis
26271 by itself). We follow it for automatic variables only when convenient.
26272 We fully control everything in the function compiled and functions from
26273 other unit can not rely on the alignment.
26275 Exclude va_list type. It is the common case of local array where
26276 we can not benefit from the alignment.
26278 TODO: Probably one should optimize for size only when var is not escaping. */
26279 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
26282 if (AGGREGATE_TYPE_P (type
)
26283 && (va_list_type_node
== NULL_TREE
26284 || (TYPE_MAIN_VARIANT (type
)
26285 != TYPE_MAIN_VARIANT (va_list_type_node
)))
26286 && TYPE_SIZE (type
)
26287 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26288 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
26289 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26292 if (TREE_CODE (type
) == ARRAY_TYPE
)
26294 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26296 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26299 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26301 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26303 if ((TYPE_MODE (type
) == XCmode
26304 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26307 else if ((TREE_CODE (type
) == RECORD_TYPE
26308 || TREE_CODE (type
) == UNION_TYPE
26309 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26310 && TYPE_FIELDS (type
))
26312 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26314 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26317 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26318 || TREE_CODE (type
) == INTEGER_TYPE
)
26321 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26323 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26329 /* Compute the minimum required alignment for dynamic stack realignment
26330 purposes for a local variable, parameter or a stack slot. EXP is
26331 the data type or decl itself, MODE is its mode and ALIGN is the
26332 alignment that the object would ordinarily have. */
26335 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
26336 unsigned int align
)
26340 if (exp
&& DECL_P (exp
))
26342 type
= TREE_TYPE (exp
);
26351 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
26354 /* Don't do dynamic stack realignment for long long objects with
26355 -mpreferred-stack-boundary=2. */
26356 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26357 && (!type
|| !TYPE_USER_ALIGN (type
))
26358 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26364 /* Find a location for the static chain incoming to a nested function.
26365 This is a register, unless all free registers are used by arguments. */
26368 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
26372 if (!DECL_STATIC_CHAIN (fndecl
))
26377 /* We always use R10 in 64-bit mode. */
26385 /* By default in 32-bit mode we use ECX to pass the static chain. */
26388 fntype
= TREE_TYPE (fndecl
);
26389 ccvt
= ix86_get_callcvt (fntype
);
26390 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
26392 /* Fastcall functions use ecx/edx for arguments, which leaves
26393 us with EAX for the static chain.
26394 Thiscall functions use ecx for arguments, which also
26395 leaves us with EAX for the static chain. */
26398 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
26400 /* Thiscall functions use ecx for arguments, which leaves
26401 us with EAX and EDX for the static chain.
26402 We are using for abi-compatibility EAX. */
26405 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
26407 /* For regparm 3, we have no free call-clobbered registers in
26408 which to store the static chain. In order to implement this,
26409 we have the trampoline push the static chain to the stack.
26410 However, we can't push a value below the return address when
26411 we call the nested function directly, so we have to use an
26412 alternate entry point. For this we use ESI, and have the
26413 alternate entry point push ESI, so that things appear the
26414 same once we're executing the nested function. */
26417 if (fndecl
== current_function_decl
)
26418 ix86_static_chain_on_stack
= true;
26419 return gen_frame_mem (SImode
,
26420 plus_constant (Pmode
,
26421 arg_pointer_rtx
, -8));
26427 return gen_rtx_REG (Pmode
, regno
);
26430 /* Emit RTL insns to initialize the variable parts of a trampoline.
26431 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26432 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26433 to be passed to the target function. */
26436 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
26442 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
26448 /* Load the function address to r11. Try to load address using
26449 the shorter movl instead of movabs. We may want to support
26450 movq for kernel mode, but kernel does not use trampolines at
26451 the moment. FNADDR is a 32bit address and may not be in
26452 DImode when ptr_mode == SImode. Always use movl in this
26454 if (ptr_mode
== SImode
26455 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
26457 fnaddr
= copy_addr_to_reg (fnaddr
);
26459 mem
= adjust_address (m_tramp
, HImode
, offset
);
26460 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
26462 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
26463 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26468 mem
= adjust_address (m_tramp
, HImode
, offset
);
26469 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26471 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26472 emit_move_insn (mem
, fnaddr
);
26476 /* Load static chain using movabs to r10. Use the shorter movl
26477 instead of movabs when ptr_mode == SImode. */
26478 if (ptr_mode
== SImode
)
26489 mem
= adjust_address (m_tramp
, HImode
, offset
);
26490 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26492 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26493 emit_move_insn (mem
, chain_value
);
26496 /* Jump to r11; the last (unused) byte is a nop, only there to
26497 pad the write out to a single 32-bit store. */
26498 mem
= adjust_address (m_tramp
, SImode
, offset
);
26499 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26506 /* Depending on the static chain location, either load a register
26507 with a constant, or push the constant to the stack. All of the
26508 instructions are the same size. */
26509 chain
= ix86_static_chain (fndecl
, true);
26512 switch (REGNO (chain
))
26515 opcode
= 0xb8; break;
26517 opcode
= 0xb9; break;
26519 gcc_unreachable ();
26525 mem
= adjust_address (m_tramp
, QImode
, offset
);
26526 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
26528 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26529 emit_move_insn (mem
, chain_value
);
26532 mem
= adjust_address (m_tramp
, QImode
, offset
);
26533 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
26535 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26537 /* Compute offset from the end of the jmp to the target function.
26538 In the case in which the trampoline stores the static chain on
26539 the stack, we need to skip the first insn which pushes the
26540 (call-saved) register static chain; this push is 1 byte. */
26542 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
26543 plus_constant (Pmode
, XEXP (m_tramp
, 0),
26544 offset
- (MEM_P (chain
) ? 1 : 0)),
26545 NULL_RTX
, 1, OPTAB_DIRECT
);
26546 emit_move_insn (mem
, disp
);
26549 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
26551 #ifdef HAVE_ENABLE_EXECUTE_STACK
26552 #ifdef CHECK_EXECUTE_STACK_ENABLED
26553 if (CHECK_EXECUTE_STACK_ENABLED
)
26555 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
26556 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
26560 /* The following file contains several enumerations and data structures
26561 built from the definitions in i386-builtin-types.def. */
26563 #include "i386-builtin-types.inc"
26565 /* Table for the ix86 builtin non-function types. */
26566 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
26568 /* Retrieve an element from the above table, building some of
26569 the types lazily. */
26572 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26574 unsigned int index
;
26577 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26579 type
= ix86_builtin_type_tab
[(int) tcode
];
26583 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26584 if (tcode
<= IX86_BT_LAST_VECT
)
26586 enum machine_mode mode
;
26588 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26589 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26590 mode
= ix86_builtin_type_vect_mode
[index
];
26592 type
= build_vector_type_for_mode (itype
, mode
);
26598 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26599 if (tcode
<= IX86_BT_LAST_PTR
)
26600 quals
= TYPE_UNQUALIFIED
;
26602 quals
= TYPE_QUAL_CONST
;
26604 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26605 if (quals
!= TYPE_UNQUALIFIED
)
26606 itype
= build_qualified_type (itype
, quals
);
26608 type
= build_pointer_type (itype
);
26611 ix86_builtin_type_tab
[(int) tcode
] = type
;
26615 /* Table for the ix86 builtin function types. */
26616 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26618 /* Retrieve an element from the above table, building some of
26619 the types lazily. */
26622 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26626 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26628 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26632 if (tcode
<= IX86_BT_LAST_FUNC
)
26634 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26635 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26636 tree rtype
, atype
, args
= void_list_node
;
26639 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26640 for (i
= after
- 1; i
> start
; --i
)
26642 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26643 args
= tree_cons (NULL
, atype
, args
);
26646 type
= build_function_type (rtype
, args
);
26650 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26651 enum ix86_builtin_func_type icode
;
26653 icode
= ix86_builtin_func_alias_base
[index
];
26654 type
= ix86_get_builtin_func_type (icode
);
26657 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26662 /* Codes for all the SSE/MMX builtins. */
26665 IX86_BUILTIN_ADDPS
,
26666 IX86_BUILTIN_ADDSS
,
26667 IX86_BUILTIN_DIVPS
,
26668 IX86_BUILTIN_DIVSS
,
26669 IX86_BUILTIN_MULPS
,
26670 IX86_BUILTIN_MULSS
,
26671 IX86_BUILTIN_SUBPS
,
26672 IX86_BUILTIN_SUBSS
,
26674 IX86_BUILTIN_CMPEQPS
,
26675 IX86_BUILTIN_CMPLTPS
,
26676 IX86_BUILTIN_CMPLEPS
,
26677 IX86_BUILTIN_CMPGTPS
,
26678 IX86_BUILTIN_CMPGEPS
,
26679 IX86_BUILTIN_CMPNEQPS
,
26680 IX86_BUILTIN_CMPNLTPS
,
26681 IX86_BUILTIN_CMPNLEPS
,
26682 IX86_BUILTIN_CMPNGTPS
,
26683 IX86_BUILTIN_CMPNGEPS
,
26684 IX86_BUILTIN_CMPORDPS
,
26685 IX86_BUILTIN_CMPUNORDPS
,
26686 IX86_BUILTIN_CMPEQSS
,
26687 IX86_BUILTIN_CMPLTSS
,
26688 IX86_BUILTIN_CMPLESS
,
26689 IX86_BUILTIN_CMPNEQSS
,
26690 IX86_BUILTIN_CMPNLTSS
,
26691 IX86_BUILTIN_CMPNLESS
,
26692 IX86_BUILTIN_CMPORDSS
,
26693 IX86_BUILTIN_CMPUNORDSS
,
26695 IX86_BUILTIN_COMIEQSS
,
26696 IX86_BUILTIN_COMILTSS
,
26697 IX86_BUILTIN_COMILESS
,
26698 IX86_BUILTIN_COMIGTSS
,
26699 IX86_BUILTIN_COMIGESS
,
26700 IX86_BUILTIN_COMINEQSS
,
26701 IX86_BUILTIN_UCOMIEQSS
,
26702 IX86_BUILTIN_UCOMILTSS
,
26703 IX86_BUILTIN_UCOMILESS
,
26704 IX86_BUILTIN_UCOMIGTSS
,
26705 IX86_BUILTIN_UCOMIGESS
,
26706 IX86_BUILTIN_UCOMINEQSS
,
26708 IX86_BUILTIN_CVTPI2PS
,
26709 IX86_BUILTIN_CVTPS2PI
,
26710 IX86_BUILTIN_CVTSI2SS
,
26711 IX86_BUILTIN_CVTSI642SS
,
26712 IX86_BUILTIN_CVTSS2SI
,
26713 IX86_BUILTIN_CVTSS2SI64
,
26714 IX86_BUILTIN_CVTTPS2PI
,
26715 IX86_BUILTIN_CVTTSS2SI
,
26716 IX86_BUILTIN_CVTTSS2SI64
,
26718 IX86_BUILTIN_MAXPS
,
26719 IX86_BUILTIN_MAXSS
,
26720 IX86_BUILTIN_MINPS
,
26721 IX86_BUILTIN_MINSS
,
26723 IX86_BUILTIN_LOADUPS
,
26724 IX86_BUILTIN_STOREUPS
,
26725 IX86_BUILTIN_MOVSS
,
26727 IX86_BUILTIN_MOVHLPS
,
26728 IX86_BUILTIN_MOVLHPS
,
26729 IX86_BUILTIN_LOADHPS
,
26730 IX86_BUILTIN_LOADLPS
,
26731 IX86_BUILTIN_STOREHPS
,
26732 IX86_BUILTIN_STORELPS
,
26734 IX86_BUILTIN_MASKMOVQ
,
26735 IX86_BUILTIN_MOVMSKPS
,
26736 IX86_BUILTIN_PMOVMSKB
,
26738 IX86_BUILTIN_MOVNTPS
,
26739 IX86_BUILTIN_MOVNTQ
,
26741 IX86_BUILTIN_LOADDQU
,
26742 IX86_BUILTIN_STOREDQU
,
26744 IX86_BUILTIN_PACKSSWB
,
26745 IX86_BUILTIN_PACKSSDW
,
26746 IX86_BUILTIN_PACKUSWB
,
26748 IX86_BUILTIN_PADDB
,
26749 IX86_BUILTIN_PADDW
,
26750 IX86_BUILTIN_PADDD
,
26751 IX86_BUILTIN_PADDQ
,
26752 IX86_BUILTIN_PADDSB
,
26753 IX86_BUILTIN_PADDSW
,
26754 IX86_BUILTIN_PADDUSB
,
26755 IX86_BUILTIN_PADDUSW
,
26756 IX86_BUILTIN_PSUBB
,
26757 IX86_BUILTIN_PSUBW
,
26758 IX86_BUILTIN_PSUBD
,
26759 IX86_BUILTIN_PSUBQ
,
26760 IX86_BUILTIN_PSUBSB
,
26761 IX86_BUILTIN_PSUBSW
,
26762 IX86_BUILTIN_PSUBUSB
,
26763 IX86_BUILTIN_PSUBUSW
,
26766 IX86_BUILTIN_PANDN
,
26770 IX86_BUILTIN_PAVGB
,
26771 IX86_BUILTIN_PAVGW
,
26773 IX86_BUILTIN_PCMPEQB
,
26774 IX86_BUILTIN_PCMPEQW
,
26775 IX86_BUILTIN_PCMPEQD
,
26776 IX86_BUILTIN_PCMPGTB
,
26777 IX86_BUILTIN_PCMPGTW
,
26778 IX86_BUILTIN_PCMPGTD
,
26780 IX86_BUILTIN_PMADDWD
,
26782 IX86_BUILTIN_PMAXSW
,
26783 IX86_BUILTIN_PMAXUB
,
26784 IX86_BUILTIN_PMINSW
,
26785 IX86_BUILTIN_PMINUB
,
26787 IX86_BUILTIN_PMULHUW
,
26788 IX86_BUILTIN_PMULHW
,
26789 IX86_BUILTIN_PMULLW
,
26791 IX86_BUILTIN_PSADBW
,
26792 IX86_BUILTIN_PSHUFW
,
26794 IX86_BUILTIN_PSLLW
,
26795 IX86_BUILTIN_PSLLD
,
26796 IX86_BUILTIN_PSLLQ
,
26797 IX86_BUILTIN_PSRAW
,
26798 IX86_BUILTIN_PSRAD
,
26799 IX86_BUILTIN_PSRLW
,
26800 IX86_BUILTIN_PSRLD
,
26801 IX86_BUILTIN_PSRLQ
,
26802 IX86_BUILTIN_PSLLWI
,
26803 IX86_BUILTIN_PSLLDI
,
26804 IX86_BUILTIN_PSLLQI
,
26805 IX86_BUILTIN_PSRAWI
,
26806 IX86_BUILTIN_PSRADI
,
26807 IX86_BUILTIN_PSRLWI
,
26808 IX86_BUILTIN_PSRLDI
,
26809 IX86_BUILTIN_PSRLQI
,
26811 IX86_BUILTIN_PUNPCKHBW
,
26812 IX86_BUILTIN_PUNPCKHWD
,
26813 IX86_BUILTIN_PUNPCKHDQ
,
26814 IX86_BUILTIN_PUNPCKLBW
,
26815 IX86_BUILTIN_PUNPCKLWD
,
26816 IX86_BUILTIN_PUNPCKLDQ
,
26818 IX86_BUILTIN_SHUFPS
,
26820 IX86_BUILTIN_RCPPS
,
26821 IX86_BUILTIN_RCPSS
,
26822 IX86_BUILTIN_RSQRTPS
,
26823 IX86_BUILTIN_RSQRTPS_NR
,
26824 IX86_BUILTIN_RSQRTSS
,
26825 IX86_BUILTIN_RSQRTF
,
26826 IX86_BUILTIN_SQRTPS
,
26827 IX86_BUILTIN_SQRTPS_NR
,
26828 IX86_BUILTIN_SQRTSS
,
26830 IX86_BUILTIN_UNPCKHPS
,
26831 IX86_BUILTIN_UNPCKLPS
,
26833 IX86_BUILTIN_ANDPS
,
26834 IX86_BUILTIN_ANDNPS
,
26836 IX86_BUILTIN_XORPS
,
26839 IX86_BUILTIN_LDMXCSR
,
26840 IX86_BUILTIN_STMXCSR
,
26841 IX86_BUILTIN_SFENCE
,
26843 IX86_BUILTIN_FXSAVE
,
26844 IX86_BUILTIN_FXRSTOR
,
26845 IX86_BUILTIN_FXSAVE64
,
26846 IX86_BUILTIN_FXRSTOR64
,
26848 IX86_BUILTIN_XSAVE
,
26849 IX86_BUILTIN_XRSTOR
,
26850 IX86_BUILTIN_XSAVE64
,
26851 IX86_BUILTIN_XRSTOR64
,
26853 IX86_BUILTIN_XSAVEOPT
,
26854 IX86_BUILTIN_XSAVEOPT64
,
26856 /* 3DNow! Original */
26857 IX86_BUILTIN_FEMMS
,
26858 IX86_BUILTIN_PAVGUSB
,
26859 IX86_BUILTIN_PF2ID
,
26860 IX86_BUILTIN_PFACC
,
26861 IX86_BUILTIN_PFADD
,
26862 IX86_BUILTIN_PFCMPEQ
,
26863 IX86_BUILTIN_PFCMPGE
,
26864 IX86_BUILTIN_PFCMPGT
,
26865 IX86_BUILTIN_PFMAX
,
26866 IX86_BUILTIN_PFMIN
,
26867 IX86_BUILTIN_PFMUL
,
26868 IX86_BUILTIN_PFRCP
,
26869 IX86_BUILTIN_PFRCPIT1
,
26870 IX86_BUILTIN_PFRCPIT2
,
26871 IX86_BUILTIN_PFRSQIT1
,
26872 IX86_BUILTIN_PFRSQRT
,
26873 IX86_BUILTIN_PFSUB
,
26874 IX86_BUILTIN_PFSUBR
,
26875 IX86_BUILTIN_PI2FD
,
26876 IX86_BUILTIN_PMULHRW
,
26878 /* 3DNow! Athlon Extensions */
26879 IX86_BUILTIN_PF2IW
,
26880 IX86_BUILTIN_PFNACC
,
26881 IX86_BUILTIN_PFPNACC
,
26882 IX86_BUILTIN_PI2FW
,
26883 IX86_BUILTIN_PSWAPDSI
,
26884 IX86_BUILTIN_PSWAPDSF
,
26887 IX86_BUILTIN_ADDPD
,
26888 IX86_BUILTIN_ADDSD
,
26889 IX86_BUILTIN_DIVPD
,
26890 IX86_BUILTIN_DIVSD
,
26891 IX86_BUILTIN_MULPD
,
26892 IX86_BUILTIN_MULSD
,
26893 IX86_BUILTIN_SUBPD
,
26894 IX86_BUILTIN_SUBSD
,
26896 IX86_BUILTIN_CMPEQPD
,
26897 IX86_BUILTIN_CMPLTPD
,
26898 IX86_BUILTIN_CMPLEPD
,
26899 IX86_BUILTIN_CMPGTPD
,
26900 IX86_BUILTIN_CMPGEPD
,
26901 IX86_BUILTIN_CMPNEQPD
,
26902 IX86_BUILTIN_CMPNLTPD
,
26903 IX86_BUILTIN_CMPNLEPD
,
26904 IX86_BUILTIN_CMPNGTPD
,
26905 IX86_BUILTIN_CMPNGEPD
,
26906 IX86_BUILTIN_CMPORDPD
,
26907 IX86_BUILTIN_CMPUNORDPD
,
26908 IX86_BUILTIN_CMPEQSD
,
26909 IX86_BUILTIN_CMPLTSD
,
26910 IX86_BUILTIN_CMPLESD
,
26911 IX86_BUILTIN_CMPNEQSD
,
26912 IX86_BUILTIN_CMPNLTSD
,
26913 IX86_BUILTIN_CMPNLESD
,
26914 IX86_BUILTIN_CMPORDSD
,
26915 IX86_BUILTIN_CMPUNORDSD
,
26917 IX86_BUILTIN_COMIEQSD
,
26918 IX86_BUILTIN_COMILTSD
,
26919 IX86_BUILTIN_COMILESD
,
26920 IX86_BUILTIN_COMIGTSD
,
26921 IX86_BUILTIN_COMIGESD
,
26922 IX86_BUILTIN_COMINEQSD
,
26923 IX86_BUILTIN_UCOMIEQSD
,
26924 IX86_BUILTIN_UCOMILTSD
,
26925 IX86_BUILTIN_UCOMILESD
,
26926 IX86_BUILTIN_UCOMIGTSD
,
26927 IX86_BUILTIN_UCOMIGESD
,
26928 IX86_BUILTIN_UCOMINEQSD
,
26930 IX86_BUILTIN_MAXPD
,
26931 IX86_BUILTIN_MAXSD
,
26932 IX86_BUILTIN_MINPD
,
26933 IX86_BUILTIN_MINSD
,
26935 IX86_BUILTIN_ANDPD
,
26936 IX86_BUILTIN_ANDNPD
,
26938 IX86_BUILTIN_XORPD
,
26940 IX86_BUILTIN_SQRTPD
,
26941 IX86_BUILTIN_SQRTSD
,
26943 IX86_BUILTIN_UNPCKHPD
,
26944 IX86_BUILTIN_UNPCKLPD
,
26946 IX86_BUILTIN_SHUFPD
,
26948 IX86_BUILTIN_LOADUPD
,
26949 IX86_BUILTIN_STOREUPD
,
26950 IX86_BUILTIN_MOVSD
,
26952 IX86_BUILTIN_LOADHPD
,
26953 IX86_BUILTIN_LOADLPD
,
26955 IX86_BUILTIN_CVTDQ2PD
,
26956 IX86_BUILTIN_CVTDQ2PS
,
26958 IX86_BUILTIN_CVTPD2DQ
,
26959 IX86_BUILTIN_CVTPD2PI
,
26960 IX86_BUILTIN_CVTPD2PS
,
26961 IX86_BUILTIN_CVTTPD2DQ
,
26962 IX86_BUILTIN_CVTTPD2PI
,
26964 IX86_BUILTIN_CVTPI2PD
,
26965 IX86_BUILTIN_CVTSI2SD
,
26966 IX86_BUILTIN_CVTSI642SD
,
26968 IX86_BUILTIN_CVTSD2SI
,
26969 IX86_BUILTIN_CVTSD2SI64
,
26970 IX86_BUILTIN_CVTSD2SS
,
26971 IX86_BUILTIN_CVTSS2SD
,
26972 IX86_BUILTIN_CVTTSD2SI
,
26973 IX86_BUILTIN_CVTTSD2SI64
,
26975 IX86_BUILTIN_CVTPS2DQ
,
26976 IX86_BUILTIN_CVTPS2PD
,
26977 IX86_BUILTIN_CVTTPS2DQ
,
26979 IX86_BUILTIN_MOVNTI
,
26980 IX86_BUILTIN_MOVNTI64
,
26981 IX86_BUILTIN_MOVNTPD
,
26982 IX86_BUILTIN_MOVNTDQ
,
26984 IX86_BUILTIN_MOVQ128
,
26987 IX86_BUILTIN_MASKMOVDQU
,
26988 IX86_BUILTIN_MOVMSKPD
,
26989 IX86_BUILTIN_PMOVMSKB128
,
26991 IX86_BUILTIN_PACKSSWB128
,
26992 IX86_BUILTIN_PACKSSDW128
,
26993 IX86_BUILTIN_PACKUSWB128
,
26995 IX86_BUILTIN_PADDB128
,
26996 IX86_BUILTIN_PADDW128
,
26997 IX86_BUILTIN_PADDD128
,
26998 IX86_BUILTIN_PADDQ128
,
26999 IX86_BUILTIN_PADDSB128
,
27000 IX86_BUILTIN_PADDSW128
,
27001 IX86_BUILTIN_PADDUSB128
,
27002 IX86_BUILTIN_PADDUSW128
,
27003 IX86_BUILTIN_PSUBB128
,
27004 IX86_BUILTIN_PSUBW128
,
27005 IX86_BUILTIN_PSUBD128
,
27006 IX86_BUILTIN_PSUBQ128
,
27007 IX86_BUILTIN_PSUBSB128
,
27008 IX86_BUILTIN_PSUBSW128
,
27009 IX86_BUILTIN_PSUBUSB128
,
27010 IX86_BUILTIN_PSUBUSW128
,
27012 IX86_BUILTIN_PAND128
,
27013 IX86_BUILTIN_PANDN128
,
27014 IX86_BUILTIN_POR128
,
27015 IX86_BUILTIN_PXOR128
,
27017 IX86_BUILTIN_PAVGB128
,
27018 IX86_BUILTIN_PAVGW128
,
27020 IX86_BUILTIN_PCMPEQB128
,
27021 IX86_BUILTIN_PCMPEQW128
,
27022 IX86_BUILTIN_PCMPEQD128
,
27023 IX86_BUILTIN_PCMPGTB128
,
27024 IX86_BUILTIN_PCMPGTW128
,
27025 IX86_BUILTIN_PCMPGTD128
,
27027 IX86_BUILTIN_PMADDWD128
,
27029 IX86_BUILTIN_PMAXSW128
,
27030 IX86_BUILTIN_PMAXUB128
,
27031 IX86_BUILTIN_PMINSW128
,
27032 IX86_BUILTIN_PMINUB128
,
27034 IX86_BUILTIN_PMULUDQ
,
27035 IX86_BUILTIN_PMULUDQ128
,
27036 IX86_BUILTIN_PMULHUW128
,
27037 IX86_BUILTIN_PMULHW128
,
27038 IX86_BUILTIN_PMULLW128
,
27040 IX86_BUILTIN_PSADBW128
,
27041 IX86_BUILTIN_PSHUFHW
,
27042 IX86_BUILTIN_PSHUFLW
,
27043 IX86_BUILTIN_PSHUFD
,
27045 IX86_BUILTIN_PSLLDQI128
,
27046 IX86_BUILTIN_PSLLWI128
,
27047 IX86_BUILTIN_PSLLDI128
,
27048 IX86_BUILTIN_PSLLQI128
,
27049 IX86_BUILTIN_PSRAWI128
,
27050 IX86_BUILTIN_PSRADI128
,
27051 IX86_BUILTIN_PSRLDQI128
,
27052 IX86_BUILTIN_PSRLWI128
,
27053 IX86_BUILTIN_PSRLDI128
,
27054 IX86_BUILTIN_PSRLQI128
,
27056 IX86_BUILTIN_PSLLDQ128
,
27057 IX86_BUILTIN_PSLLW128
,
27058 IX86_BUILTIN_PSLLD128
,
27059 IX86_BUILTIN_PSLLQ128
,
27060 IX86_BUILTIN_PSRAW128
,
27061 IX86_BUILTIN_PSRAD128
,
27062 IX86_BUILTIN_PSRLW128
,
27063 IX86_BUILTIN_PSRLD128
,
27064 IX86_BUILTIN_PSRLQ128
,
27066 IX86_BUILTIN_PUNPCKHBW128
,
27067 IX86_BUILTIN_PUNPCKHWD128
,
27068 IX86_BUILTIN_PUNPCKHDQ128
,
27069 IX86_BUILTIN_PUNPCKHQDQ128
,
27070 IX86_BUILTIN_PUNPCKLBW128
,
27071 IX86_BUILTIN_PUNPCKLWD128
,
27072 IX86_BUILTIN_PUNPCKLDQ128
,
27073 IX86_BUILTIN_PUNPCKLQDQ128
,
27075 IX86_BUILTIN_CLFLUSH
,
27076 IX86_BUILTIN_MFENCE
,
27077 IX86_BUILTIN_LFENCE
,
27078 IX86_BUILTIN_PAUSE
,
27080 IX86_BUILTIN_FNSTENV
,
27081 IX86_BUILTIN_FLDENV
,
27082 IX86_BUILTIN_FNSTSW
,
27083 IX86_BUILTIN_FNCLEX
,
27085 IX86_BUILTIN_BSRSI
,
27086 IX86_BUILTIN_BSRDI
,
27087 IX86_BUILTIN_RDPMC
,
27088 IX86_BUILTIN_RDTSC
,
27089 IX86_BUILTIN_RDTSCP
,
27090 IX86_BUILTIN_ROLQI
,
27091 IX86_BUILTIN_ROLHI
,
27092 IX86_BUILTIN_RORQI
,
27093 IX86_BUILTIN_RORHI
,
27096 IX86_BUILTIN_ADDSUBPS
,
27097 IX86_BUILTIN_HADDPS
,
27098 IX86_BUILTIN_HSUBPS
,
27099 IX86_BUILTIN_MOVSHDUP
,
27100 IX86_BUILTIN_MOVSLDUP
,
27101 IX86_BUILTIN_ADDSUBPD
,
27102 IX86_BUILTIN_HADDPD
,
27103 IX86_BUILTIN_HSUBPD
,
27104 IX86_BUILTIN_LDDQU
,
27106 IX86_BUILTIN_MONITOR
,
27107 IX86_BUILTIN_MWAIT
,
27110 IX86_BUILTIN_PHADDW
,
27111 IX86_BUILTIN_PHADDD
,
27112 IX86_BUILTIN_PHADDSW
,
27113 IX86_BUILTIN_PHSUBW
,
27114 IX86_BUILTIN_PHSUBD
,
27115 IX86_BUILTIN_PHSUBSW
,
27116 IX86_BUILTIN_PMADDUBSW
,
27117 IX86_BUILTIN_PMULHRSW
,
27118 IX86_BUILTIN_PSHUFB
,
27119 IX86_BUILTIN_PSIGNB
,
27120 IX86_BUILTIN_PSIGNW
,
27121 IX86_BUILTIN_PSIGND
,
27122 IX86_BUILTIN_PALIGNR
,
27123 IX86_BUILTIN_PABSB
,
27124 IX86_BUILTIN_PABSW
,
27125 IX86_BUILTIN_PABSD
,
27127 IX86_BUILTIN_PHADDW128
,
27128 IX86_BUILTIN_PHADDD128
,
27129 IX86_BUILTIN_PHADDSW128
,
27130 IX86_BUILTIN_PHSUBW128
,
27131 IX86_BUILTIN_PHSUBD128
,
27132 IX86_BUILTIN_PHSUBSW128
,
27133 IX86_BUILTIN_PMADDUBSW128
,
27134 IX86_BUILTIN_PMULHRSW128
,
27135 IX86_BUILTIN_PSHUFB128
,
27136 IX86_BUILTIN_PSIGNB128
,
27137 IX86_BUILTIN_PSIGNW128
,
27138 IX86_BUILTIN_PSIGND128
,
27139 IX86_BUILTIN_PALIGNR128
,
27140 IX86_BUILTIN_PABSB128
,
27141 IX86_BUILTIN_PABSW128
,
27142 IX86_BUILTIN_PABSD128
,
27144 /* AMDFAM10 - SSE4A New Instructions. */
27145 IX86_BUILTIN_MOVNTSD
,
27146 IX86_BUILTIN_MOVNTSS
,
27147 IX86_BUILTIN_EXTRQI
,
27148 IX86_BUILTIN_EXTRQ
,
27149 IX86_BUILTIN_INSERTQI
,
27150 IX86_BUILTIN_INSERTQ
,
27153 IX86_BUILTIN_BLENDPD
,
27154 IX86_BUILTIN_BLENDPS
,
27155 IX86_BUILTIN_BLENDVPD
,
27156 IX86_BUILTIN_BLENDVPS
,
27157 IX86_BUILTIN_PBLENDVB128
,
27158 IX86_BUILTIN_PBLENDW128
,
27163 IX86_BUILTIN_INSERTPS128
,
27165 IX86_BUILTIN_MOVNTDQA
,
27166 IX86_BUILTIN_MPSADBW128
,
27167 IX86_BUILTIN_PACKUSDW128
,
27168 IX86_BUILTIN_PCMPEQQ
,
27169 IX86_BUILTIN_PHMINPOSUW128
,
27171 IX86_BUILTIN_PMAXSB128
,
27172 IX86_BUILTIN_PMAXSD128
,
27173 IX86_BUILTIN_PMAXUD128
,
27174 IX86_BUILTIN_PMAXUW128
,
27176 IX86_BUILTIN_PMINSB128
,
27177 IX86_BUILTIN_PMINSD128
,
27178 IX86_BUILTIN_PMINUD128
,
27179 IX86_BUILTIN_PMINUW128
,
27181 IX86_BUILTIN_PMOVSXBW128
,
27182 IX86_BUILTIN_PMOVSXBD128
,
27183 IX86_BUILTIN_PMOVSXBQ128
,
27184 IX86_BUILTIN_PMOVSXWD128
,
27185 IX86_BUILTIN_PMOVSXWQ128
,
27186 IX86_BUILTIN_PMOVSXDQ128
,
27188 IX86_BUILTIN_PMOVZXBW128
,
27189 IX86_BUILTIN_PMOVZXBD128
,
27190 IX86_BUILTIN_PMOVZXBQ128
,
27191 IX86_BUILTIN_PMOVZXWD128
,
27192 IX86_BUILTIN_PMOVZXWQ128
,
27193 IX86_BUILTIN_PMOVZXDQ128
,
27195 IX86_BUILTIN_PMULDQ128
,
27196 IX86_BUILTIN_PMULLD128
,
27198 IX86_BUILTIN_ROUNDSD
,
27199 IX86_BUILTIN_ROUNDSS
,
27201 IX86_BUILTIN_ROUNDPD
,
27202 IX86_BUILTIN_ROUNDPS
,
27204 IX86_BUILTIN_FLOORPD
,
27205 IX86_BUILTIN_CEILPD
,
27206 IX86_BUILTIN_TRUNCPD
,
27207 IX86_BUILTIN_RINTPD
,
27208 IX86_BUILTIN_ROUNDPD_AZ
,
27210 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
27211 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
27212 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
27214 IX86_BUILTIN_FLOORPS
,
27215 IX86_BUILTIN_CEILPS
,
27216 IX86_BUILTIN_TRUNCPS
,
27217 IX86_BUILTIN_RINTPS
,
27218 IX86_BUILTIN_ROUNDPS_AZ
,
27220 IX86_BUILTIN_FLOORPS_SFIX
,
27221 IX86_BUILTIN_CEILPS_SFIX
,
27222 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
27224 IX86_BUILTIN_PTESTZ
,
27225 IX86_BUILTIN_PTESTC
,
27226 IX86_BUILTIN_PTESTNZC
,
27228 IX86_BUILTIN_VEC_INIT_V2SI
,
27229 IX86_BUILTIN_VEC_INIT_V4HI
,
27230 IX86_BUILTIN_VEC_INIT_V8QI
,
27231 IX86_BUILTIN_VEC_EXT_V2DF
,
27232 IX86_BUILTIN_VEC_EXT_V2DI
,
27233 IX86_BUILTIN_VEC_EXT_V4SF
,
27234 IX86_BUILTIN_VEC_EXT_V4SI
,
27235 IX86_BUILTIN_VEC_EXT_V8HI
,
27236 IX86_BUILTIN_VEC_EXT_V2SI
,
27237 IX86_BUILTIN_VEC_EXT_V4HI
,
27238 IX86_BUILTIN_VEC_EXT_V16QI
,
27239 IX86_BUILTIN_VEC_SET_V2DI
,
27240 IX86_BUILTIN_VEC_SET_V4SF
,
27241 IX86_BUILTIN_VEC_SET_V4SI
,
27242 IX86_BUILTIN_VEC_SET_V8HI
,
27243 IX86_BUILTIN_VEC_SET_V4HI
,
27244 IX86_BUILTIN_VEC_SET_V16QI
,
27246 IX86_BUILTIN_VEC_PACK_SFIX
,
27247 IX86_BUILTIN_VEC_PACK_SFIX256
,
27250 IX86_BUILTIN_CRC32QI
,
27251 IX86_BUILTIN_CRC32HI
,
27252 IX86_BUILTIN_CRC32SI
,
27253 IX86_BUILTIN_CRC32DI
,
27255 IX86_BUILTIN_PCMPESTRI128
,
27256 IX86_BUILTIN_PCMPESTRM128
,
27257 IX86_BUILTIN_PCMPESTRA128
,
27258 IX86_BUILTIN_PCMPESTRC128
,
27259 IX86_BUILTIN_PCMPESTRO128
,
27260 IX86_BUILTIN_PCMPESTRS128
,
27261 IX86_BUILTIN_PCMPESTRZ128
,
27262 IX86_BUILTIN_PCMPISTRI128
,
27263 IX86_BUILTIN_PCMPISTRM128
,
27264 IX86_BUILTIN_PCMPISTRA128
,
27265 IX86_BUILTIN_PCMPISTRC128
,
27266 IX86_BUILTIN_PCMPISTRO128
,
27267 IX86_BUILTIN_PCMPISTRS128
,
27268 IX86_BUILTIN_PCMPISTRZ128
,
27270 IX86_BUILTIN_PCMPGTQ
,
27272 /* AES instructions */
27273 IX86_BUILTIN_AESENC128
,
27274 IX86_BUILTIN_AESENCLAST128
,
27275 IX86_BUILTIN_AESDEC128
,
27276 IX86_BUILTIN_AESDECLAST128
,
27277 IX86_BUILTIN_AESIMC128
,
27278 IX86_BUILTIN_AESKEYGENASSIST128
,
27280 /* PCLMUL instruction */
27281 IX86_BUILTIN_PCLMULQDQ128
,
27284 IX86_BUILTIN_ADDPD256
,
27285 IX86_BUILTIN_ADDPS256
,
27286 IX86_BUILTIN_ADDSUBPD256
,
27287 IX86_BUILTIN_ADDSUBPS256
,
27288 IX86_BUILTIN_ANDPD256
,
27289 IX86_BUILTIN_ANDPS256
,
27290 IX86_BUILTIN_ANDNPD256
,
27291 IX86_BUILTIN_ANDNPS256
,
27292 IX86_BUILTIN_BLENDPD256
,
27293 IX86_BUILTIN_BLENDPS256
,
27294 IX86_BUILTIN_BLENDVPD256
,
27295 IX86_BUILTIN_BLENDVPS256
,
27296 IX86_BUILTIN_DIVPD256
,
27297 IX86_BUILTIN_DIVPS256
,
27298 IX86_BUILTIN_DPPS256
,
27299 IX86_BUILTIN_HADDPD256
,
27300 IX86_BUILTIN_HADDPS256
,
27301 IX86_BUILTIN_HSUBPD256
,
27302 IX86_BUILTIN_HSUBPS256
,
27303 IX86_BUILTIN_MAXPD256
,
27304 IX86_BUILTIN_MAXPS256
,
27305 IX86_BUILTIN_MINPD256
,
27306 IX86_BUILTIN_MINPS256
,
27307 IX86_BUILTIN_MULPD256
,
27308 IX86_BUILTIN_MULPS256
,
27309 IX86_BUILTIN_ORPD256
,
27310 IX86_BUILTIN_ORPS256
,
27311 IX86_BUILTIN_SHUFPD256
,
27312 IX86_BUILTIN_SHUFPS256
,
27313 IX86_BUILTIN_SUBPD256
,
27314 IX86_BUILTIN_SUBPS256
,
27315 IX86_BUILTIN_XORPD256
,
27316 IX86_BUILTIN_XORPS256
,
27317 IX86_BUILTIN_CMPSD
,
27318 IX86_BUILTIN_CMPSS
,
27319 IX86_BUILTIN_CMPPD
,
27320 IX86_BUILTIN_CMPPS
,
27321 IX86_BUILTIN_CMPPD256
,
27322 IX86_BUILTIN_CMPPS256
,
27323 IX86_BUILTIN_CVTDQ2PD256
,
27324 IX86_BUILTIN_CVTDQ2PS256
,
27325 IX86_BUILTIN_CVTPD2PS256
,
27326 IX86_BUILTIN_CVTPS2DQ256
,
27327 IX86_BUILTIN_CVTPS2PD256
,
27328 IX86_BUILTIN_CVTTPD2DQ256
,
27329 IX86_BUILTIN_CVTPD2DQ256
,
27330 IX86_BUILTIN_CVTTPS2DQ256
,
27331 IX86_BUILTIN_EXTRACTF128PD256
,
27332 IX86_BUILTIN_EXTRACTF128PS256
,
27333 IX86_BUILTIN_EXTRACTF128SI256
,
27334 IX86_BUILTIN_VZEROALL
,
27335 IX86_BUILTIN_VZEROUPPER
,
27336 IX86_BUILTIN_VPERMILVARPD
,
27337 IX86_BUILTIN_VPERMILVARPS
,
27338 IX86_BUILTIN_VPERMILVARPD256
,
27339 IX86_BUILTIN_VPERMILVARPS256
,
27340 IX86_BUILTIN_VPERMILPD
,
27341 IX86_BUILTIN_VPERMILPS
,
27342 IX86_BUILTIN_VPERMILPD256
,
27343 IX86_BUILTIN_VPERMILPS256
,
27344 IX86_BUILTIN_VPERMIL2PD
,
27345 IX86_BUILTIN_VPERMIL2PS
,
27346 IX86_BUILTIN_VPERMIL2PD256
,
27347 IX86_BUILTIN_VPERMIL2PS256
,
27348 IX86_BUILTIN_VPERM2F128PD256
,
27349 IX86_BUILTIN_VPERM2F128PS256
,
27350 IX86_BUILTIN_VPERM2F128SI256
,
27351 IX86_BUILTIN_VBROADCASTSS
,
27352 IX86_BUILTIN_VBROADCASTSD256
,
27353 IX86_BUILTIN_VBROADCASTSS256
,
27354 IX86_BUILTIN_VBROADCASTPD256
,
27355 IX86_BUILTIN_VBROADCASTPS256
,
27356 IX86_BUILTIN_VINSERTF128PD256
,
27357 IX86_BUILTIN_VINSERTF128PS256
,
27358 IX86_BUILTIN_VINSERTF128SI256
,
27359 IX86_BUILTIN_LOADUPD256
,
27360 IX86_BUILTIN_LOADUPS256
,
27361 IX86_BUILTIN_STOREUPD256
,
27362 IX86_BUILTIN_STOREUPS256
,
27363 IX86_BUILTIN_LDDQU256
,
27364 IX86_BUILTIN_MOVNTDQ256
,
27365 IX86_BUILTIN_MOVNTPD256
,
27366 IX86_BUILTIN_MOVNTPS256
,
27367 IX86_BUILTIN_LOADDQU256
,
27368 IX86_BUILTIN_STOREDQU256
,
27369 IX86_BUILTIN_MASKLOADPD
,
27370 IX86_BUILTIN_MASKLOADPS
,
27371 IX86_BUILTIN_MASKSTOREPD
,
27372 IX86_BUILTIN_MASKSTOREPS
,
27373 IX86_BUILTIN_MASKLOADPD256
,
27374 IX86_BUILTIN_MASKLOADPS256
,
27375 IX86_BUILTIN_MASKSTOREPD256
,
27376 IX86_BUILTIN_MASKSTOREPS256
,
27377 IX86_BUILTIN_MOVSHDUP256
,
27378 IX86_BUILTIN_MOVSLDUP256
,
27379 IX86_BUILTIN_MOVDDUP256
,
27381 IX86_BUILTIN_SQRTPD256
,
27382 IX86_BUILTIN_SQRTPS256
,
27383 IX86_BUILTIN_SQRTPS_NR256
,
27384 IX86_BUILTIN_RSQRTPS256
,
27385 IX86_BUILTIN_RSQRTPS_NR256
,
27387 IX86_BUILTIN_RCPPS256
,
27389 IX86_BUILTIN_ROUNDPD256
,
27390 IX86_BUILTIN_ROUNDPS256
,
27392 IX86_BUILTIN_FLOORPD256
,
27393 IX86_BUILTIN_CEILPD256
,
27394 IX86_BUILTIN_TRUNCPD256
,
27395 IX86_BUILTIN_RINTPD256
,
27396 IX86_BUILTIN_ROUNDPD_AZ256
,
27398 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
27399 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
27400 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
27402 IX86_BUILTIN_FLOORPS256
,
27403 IX86_BUILTIN_CEILPS256
,
27404 IX86_BUILTIN_TRUNCPS256
,
27405 IX86_BUILTIN_RINTPS256
,
27406 IX86_BUILTIN_ROUNDPS_AZ256
,
27408 IX86_BUILTIN_FLOORPS_SFIX256
,
27409 IX86_BUILTIN_CEILPS_SFIX256
,
27410 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
27412 IX86_BUILTIN_UNPCKHPD256
,
27413 IX86_BUILTIN_UNPCKLPD256
,
27414 IX86_BUILTIN_UNPCKHPS256
,
27415 IX86_BUILTIN_UNPCKLPS256
,
27417 IX86_BUILTIN_SI256_SI
,
27418 IX86_BUILTIN_PS256_PS
,
27419 IX86_BUILTIN_PD256_PD
,
27420 IX86_BUILTIN_SI_SI256
,
27421 IX86_BUILTIN_PS_PS256
,
27422 IX86_BUILTIN_PD_PD256
,
27424 IX86_BUILTIN_VTESTZPD
,
27425 IX86_BUILTIN_VTESTCPD
,
27426 IX86_BUILTIN_VTESTNZCPD
,
27427 IX86_BUILTIN_VTESTZPS
,
27428 IX86_BUILTIN_VTESTCPS
,
27429 IX86_BUILTIN_VTESTNZCPS
,
27430 IX86_BUILTIN_VTESTZPD256
,
27431 IX86_BUILTIN_VTESTCPD256
,
27432 IX86_BUILTIN_VTESTNZCPD256
,
27433 IX86_BUILTIN_VTESTZPS256
,
27434 IX86_BUILTIN_VTESTCPS256
,
27435 IX86_BUILTIN_VTESTNZCPS256
,
27436 IX86_BUILTIN_PTESTZ256
,
27437 IX86_BUILTIN_PTESTC256
,
27438 IX86_BUILTIN_PTESTNZC256
,
27440 IX86_BUILTIN_MOVMSKPD256
,
27441 IX86_BUILTIN_MOVMSKPS256
,
27444 IX86_BUILTIN_MPSADBW256
,
27445 IX86_BUILTIN_PABSB256
,
27446 IX86_BUILTIN_PABSW256
,
27447 IX86_BUILTIN_PABSD256
,
27448 IX86_BUILTIN_PACKSSDW256
,
27449 IX86_BUILTIN_PACKSSWB256
,
27450 IX86_BUILTIN_PACKUSDW256
,
27451 IX86_BUILTIN_PACKUSWB256
,
27452 IX86_BUILTIN_PADDB256
,
27453 IX86_BUILTIN_PADDW256
,
27454 IX86_BUILTIN_PADDD256
,
27455 IX86_BUILTIN_PADDQ256
,
27456 IX86_BUILTIN_PADDSB256
,
27457 IX86_BUILTIN_PADDSW256
,
27458 IX86_BUILTIN_PADDUSB256
,
27459 IX86_BUILTIN_PADDUSW256
,
27460 IX86_BUILTIN_PALIGNR256
,
27461 IX86_BUILTIN_AND256I
,
27462 IX86_BUILTIN_ANDNOT256I
,
27463 IX86_BUILTIN_PAVGB256
,
27464 IX86_BUILTIN_PAVGW256
,
27465 IX86_BUILTIN_PBLENDVB256
,
27466 IX86_BUILTIN_PBLENDVW256
,
27467 IX86_BUILTIN_PCMPEQB256
,
27468 IX86_BUILTIN_PCMPEQW256
,
27469 IX86_BUILTIN_PCMPEQD256
,
27470 IX86_BUILTIN_PCMPEQQ256
,
27471 IX86_BUILTIN_PCMPGTB256
,
27472 IX86_BUILTIN_PCMPGTW256
,
27473 IX86_BUILTIN_PCMPGTD256
,
27474 IX86_BUILTIN_PCMPGTQ256
,
27475 IX86_BUILTIN_PHADDW256
,
27476 IX86_BUILTIN_PHADDD256
,
27477 IX86_BUILTIN_PHADDSW256
,
27478 IX86_BUILTIN_PHSUBW256
,
27479 IX86_BUILTIN_PHSUBD256
,
27480 IX86_BUILTIN_PHSUBSW256
,
27481 IX86_BUILTIN_PMADDUBSW256
,
27482 IX86_BUILTIN_PMADDWD256
,
27483 IX86_BUILTIN_PMAXSB256
,
27484 IX86_BUILTIN_PMAXSW256
,
27485 IX86_BUILTIN_PMAXSD256
,
27486 IX86_BUILTIN_PMAXUB256
,
27487 IX86_BUILTIN_PMAXUW256
,
27488 IX86_BUILTIN_PMAXUD256
,
27489 IX86_BUILTIN_PMINSB256
,
27490 IX86_BUILTIN_PMINSW256
,
27491 IX86_BUILTIN_PMINSD256
,
27492 IX86_BUILTIN_PMINUB256
,
27493 IX86_BUILTIN_PMINUW256
,
27494 IX86_BUILTIN_PMINUD256
,
27495 IX86_BUILTIN_PMOVMSKB256
,
27496 IX86_BUILTIN_PMOVSXBW256
,
27497 IX86_BUILTIN_PMOVSXBD256
,
27498 IX86_BUILTIN_PMOVSXBQ256
,
27499 IX86_BUILTIN_PMOVSXWD256
,
27500 IX86_BUILTIN_PMOVSXWQ256
,
27501 IX86_BUILTIN_PMOVSXDQ256
,
27502 IX86_BUILTIN_PMOVZXBW256
,
27503 IX86_BUILTIN_PMOVZXBD256
,
27504 IX86_BUILTIN_PMOVZXBQ256
,
27505 IX86_BUILTIN_PMOVZXWD256
,
27506 IX86_BUILTIN_PMOVZXWQ256
,
27507 IX86_BUILTIN_PMOVZXDQ256
,
27508 IX86_BUILTIN_PMULDQ256
,
27509 IX86_BUILTIN_PMULHRSW256
,
27510 IX86_BUILTIN_PMULHUW256
,
27511 IX86_BUILTIN_PMULHW256
,
27512 IX86_BUILTIN_PMULLW256
,
27513 IX86_BUILTIN_PMULLD256
,
27514 IX86_BUILTIN_PMULUDQ256
,
27515 IX86_BUILTIN_POR256
,
27516 IX86_BUILTIN_PSADBW256
,
27517 IX86_BUILTIN_PSHUFB256
,
27518 IX86_BUILTIN_PSHUFD256
,
27519 IX86_BUILTIN_PSHUFHW256
,
27520 IX86_BUILTIN_PSHUFLW256
,
27521 IX86_BUILTIN_PSIGNB256
,
27522 IX86_BUILTIN_PSIGNW256
,
27523 IX86_BUILTIN_PSIGND256
,
27524 IX86_BUILTIN_PSLLDQI256
,
27525 IX86_BUILTIN_PSLLWI256
,
27526 IX86_BUILTIN_PSLLW256
,
27527 IX86_BUILTIN_PSLLDI256
,
27528 IX86_BUILTIN_PSLLD256
,
27529 IX86_BUILTIN_PSLLQI256
,
27530 IX86_BUILTIN_PSLLQ256
,
27531 IX86_BUILTIN_PSRAWI256
,
27532 IX86_BUILTIN_PSRAW256
,
27533 IX86_BUILTIN_PSRADI256
,
27534 IX86_BUILTIN_PSRAD256
,
27535 IX86_BUILTIN_PSRLDQI256
,
27536 IX86_BUILTIN_PSRLWI256
,
27537 IX86_BUILTIN_PSRLW256
,
27538 IX86_BUILTIN_PSRLDI256
,
27539 IX86_BUILTIN_PSRLD256
,
27540 IX86_BUILTIN_PSRLQI256
,
27541 IX86_BUILTIN_PSRLQ256
,
27542 IX86_BUILTIN_PSUBB256
,
27543 IX86_BUILTIN_PSUBW256
,
27544 IX86_BUILTIN_PSUBD256
,
27545 IX86_BUILTIN_PSUBQ256
,
27546 IX86_BUILTIN_PSUBSB256
,
27547 IX86_BUILTIN_PSUBSW256
,
27548 IX86_BUILTIN_PSUBUSB256
,
27549 IX86_BUILTIN_PSUBUSW256
,
27550 IX86_BUILTIN_PUNPCKHBW256
,
27551 IX86_BUILTIN_PUNPCKHWD256
,
27552 IX86_BUILTIN_PUNPCKHDQ256
,
27553 IX86_BUILTIN_PUNPCKHQDQ256
,
27554 IX86_BUILTIN_PUNPCKLBW256
,
27555 IX86_BUILTIN_PUNPCKLWD256
,
27556 IX86_BUILTIN_PUNPCKLDQ256
,
27557 IX86_BUILTIN_PUNPCKLQDQ256
,
27558 IX86_BUILTIN_PXOR256
,
27559 IX86_BUILTIN_MOVNTDQA256
,
27560 IX86_BUILTIN_VBROADCASTSS_PS
,
27561 IX86_BUILTIN_VBROADCASTSS_PS256
,
27562 IX86_BUILTIN_VBROADCASTSD_PD256
,
27563 IX86_BUILTIN_VBROADCASTSI256
,
27564 IX86_BUILTIN_PBLENDD256
,
27565 IX86_BUILTIN_PBLENDD128
,
27566 IX86_BUILTIN_PBROADCASTB256
,
27567 IX86_BUILTIN_PBROADCASTW256
,
27568 IX86_BUILTIN_PBROADCASTD256
,
27569 IX86_BUILTIN_PBROADCASTQ256
,
27570 IX86_BUILTIN_PBROADCASTB128
,
27571 IX86_BUILTIN_PBROADCASTW128
,
27572 IX86_BUILTIN_PBROADCASTD128
,
27573 IX86_BUILTIN_PBROADCASTQ128
,
27574 IX86_BUILTIN_VPERMVARSI256
,
27575 IX86_BUILTIN_VPERMDF256
,
27576 IX86_BUILTIN_VPERMVARSF256
,
27577 IX86_BUILTIN_VPERMDI256
,
27578 IX86_BUILTIN_VPERMTI256
,
27579 IX86_BUILTIN_VEXTRACT128I256
,
27580 IX86_BUILTIN_VINSERT128I256
,
27581 IX86_BUILTIN_MASKLOADD
,
27582 IX86_BUILTIN_MASKLOADQ
,
27583 IX86_BUILTIN_MASKLOADD256
,
27584 IX86_BUILTIN_MASKLOADQ256
,
27585 IX86_BUILTIN_MASKSTORED
,
27586 IX86_BUILTIN_MASKSTOREQ
,
27587 IX86_BUILTIN_MASKSTORED256
,
27588 IX86_BUILTIN_MASKSTOREQ256
,
27589 IX86_BUILTIN_PSLLVV4DI
,
27590 IX86_BUILTIN_PSLLVV2DI
,
27591 IX86_BUILTIN_PSLLVV8SI
,
27592 IX86_BUILTIN_PSLLVV4SI
,
27593 IX86_BUILTIN_PSRAVV8SI
,
27594 IX86_BUILTIN_PSRAVV4SI
,
27595 IX86_BUILTIN_PSRLVV4DI
,
27596 IX86_BUILTIN_PSRLVV2DI
,
27597 IX86_BUILTIN_PSRLVV8SI
,
27598 IX86_BUILTIN_PSRLVV4SI
,
27600 IX86_BUILTIN_GATHERSIV2DF
,
27601 IX86_BUILTIN_GATHERSIV4DF
,
27602 IX86_BUILTIN_GATHERDIV2DF
,
27603 IX86_BUILTIN_GATHERDIV4DF
,
27604 IX86_BUILTIN_GATHERSIV4SF
,
27605 IX86_BUILTIN_GATHERSIV8SF
,
27606 IX86_BUILTIN_GATHERDIV4SF
,
27607 IX86_BUILTIN_GATHERDIV8SF
,
27608 IX86_BUILTIN_GATHERSIV2DI
,
27609 IX86_BUILTIN_GATHERSIV4DI
,
27610 IX86_BUILTIN_GATHERDIV2DI
,
27611 IX86_BUILTIN_GATHERDIV4DI
,
27612 IX86_BUILTIN_GATHERSIV4SI
,
27613 IX86_BUILTIN_GATHERSIV8SI
,
27614 IX86_BUILTIN_GATHERDIV4SI
,
27615 IX86_BUILTIN_GATHERDIV8SI
,
27617 /* Alternate 4 element gather for the vectorizer where
27618 all operands are 32-byte wide. */
27619 IX86_BUILTIN_GATHERALTSIV4DF
,
27620 IX86_BUILTIN_GATHERALTDIV8SF
,
27621 IX86_BUILTIN_GATHERALTSIV4DI
,
27622 IX86_BUILTIN_GATHERALTDIV8SI
,
27624 /* TFmode support builtins. */
27626 IX86_BUILTIN_HUGE_VALQ
,
27627 IX86_BUILTIN_FABSQ
,
27628 IX86_BUILTIN_COPYSIGNQ
,
27630 /* Vectorizer support builtins. */
27631 IX86_BUILTIN_CPYSGNPS
,
27632 IX86_BUILTIN_CPYSGNPD
,
27633 IX86_BUILTIN_CPYSGNPS256
,
27634 IX86_BUILTIN_CPYSGNPD256
,
27636 /* FMA4 instructions. */
27637 IX86_BUILTIN_VFMADDSS
,
27638 IX86_BUILTIN_VFMADDSD
,
27639 IX86_BUILTIN_VFMADDPS
,
27640 IX86_BUILTIN_VFMADDPD
,
27641 IX86_BUILTIN_VFMADDPS256
,
27642 IX86_BUILTIN_VFMADDPD256
,
27643 IX86_BUILTIN_VFMADDSUBPS
,
27644 IX86_BUILTIN_VFMADDSUBPD
,
27645 IX86_BUILTIN_VFMADDSUBPS256
,
27646 IX86_BUILTIN_VFMADDSUBPD256
,
27648 /* FMA3 instructions. */
27649 IX86_BUILTIN_VFMADDSS3
,
27650 IX86_BUILTIN_VFMADDSD3
,
27652 /* XOP instructions. */
27653 IX86_BUILTIN_VPCMOV
,
27654 IX86_BUILTIN_VPCMOV_V2DI
,
27655 IX86_BUILTIN_VPCMOV_V4SI
,
27656 IX86_BUILTIN_VPCMOV_V8HI
,
27657 IX86_BUILTIN_VPCMOV_V16QI
,
27658 IX86_BUILTIN_VPCMOV_V4SF
,
27659 IX86_BUILTIN_VPCMOV_V2DF
,
27660 IX86_BUILTIN_VPCMOV256
,
27661 IX86_BUILTIN_VPCMOV_V4DI256
,
27662 IX86_BUILTIN_VPCMOV_V8SI256
,
27663 IX86_BUILTIN_VPCMOV_V16HI256
,
27664 IX86_BUILTIN_VPCMOV_V32QI256
,
27665 IX86_BUILTIN_VPCMOV_V8SF256
,
27666 IX86_BUILTIN_VPCMOV_V4DF256
,
27668 IX86_BUILTIN_VPPERM
,
27670 IX86_BUILTIN_VPMACSSWW
,
27671 IX86_BUILTIN_VPMACSWW
,
27672 IX86_BUILTIN_VPMACSSWD
,
27673 IX86_BUILTIN_VPMACSWD
,
27674 IX86_BUILTIN_VPMACSSDD
,
27675 IX86_BUILTIN_VPMACSDD
,
27676 IX86_BUILTIN_VPMACSSDQL
,
27677 IX86_BUILTIN_VPMACSSDQH
,
27678 IX86_BUILTIN_VPMACSDQL
,
27679 IX86_BUILTIN_VPMACSDQH
,
27680 IX86_BUILTIN_VPMADCSSWD
,
27681 IX86_BUILTIN_VPMADCSWD
,
27683 IX86_BUILTIN_VPHADDBW
,
27684 IX86_BUILTIN_VPHADDBD
,
27685 IX86_BUILTIN_VPHADDBQ
,
27686 IX86_BUILTIN_VPHADDWD
,
27687 IX86_BUILTIN_VPHADDWQ
,
27688 IX86_BUILTIN_VPHADDDQ
,
27689 IX86_BUILTIN_VPHADDUBW
,
27690 IX86_BUILTIN_VPHADDUBD
,
27691 IX86_BUILTIN_VPHADDUBQ
,
27692 IX86_BUILTIN_VPHADDUWD
,
27693 IX86_BUILTIN_VPHADDUWQ
,
27694 IX86_BUILTIN_VPHADDUDQ
,
27695 IX86_BUILTIN_VPHSUBBW
,
27696 IX86_BUILTIN_VPHSUBWD
,
27697 IX86_BUILTIN_VPHSUBDQ
,
27699 IX86_BUILTIN_VPROTB
,
27700 IX86_BUILTIN_VPROTW
,
27701 IX86_BUILTIN_VPROTD
,
27702 IX86_BUILTIN_VPROTQ
,
27703 IX86_BUILTIN_VPROTB_IMM
,
27704 IX86_BUILTIN_VPROTW_IMM
,
27705 IX86_BUILTIN_VPROTD_IMM
,
27706 IX86_BUILTIN_VPROTQ_IMM
,
27708 IX86_BUILTIN_VPSHLB
,
27709 IX86_BUILTIN_VPSHLW
,
27710 IX86_BUILTIN_VPSHLD
,
27711 IX86_BUILTIN_VPSHLQ
,
27712 IX86_BUILTIN_VPSHAB
,
27713 IX86_BUILTIN_VPSHAW
,
27714 IX86_BUILTIN_VPSHAD
,
27715 IX86_BUILTIN_VPSHAQ
,
27717 IX86_BUILTIN_VFRCZSS
,
27718 IX86_BUILTIN_VFRCZSD
,
27719 IX86_BUILTIN_VFRCZPS
,
27720 IX86_BUILTIN_VFRCZPD
,
27721 IX86_BUILTIN_VFRCZPS256
,
27722 IX86_BUILTIN_VFRCZPD256
,
27724 IX86_BUILTIN_VPCOMEQUB
,
27725 IX86_BUILTIN_VPCOMNEUB
,
27726 IX86_BUILTIN_VPCOMLTUB
,
27727 IX86_BUILTIN_VPCOMLEUB
,
27728 IX86_BUILTIN_VPCOMGTUB
,
27729 IX86_BUILTIN_VPCOMGEUB
,
27730 IX86_BUILTIN_VPCOMFALSEUB
,
27731 IX86_BUILTIN_VPCOMTRUEUB
,
27733 IX86_BUILTIN_VPCOMEQUW
,
27734 IX86_BUILTIN_VPCOMNEUW
,
27735 IX86_BUILTIN_VPCOMLTUW
,
27736 IX86_BUILTIN_VPCOMLEUW
,
27737 IX86_BUILTIN_VPCOMGTUW
,
27738 IX86_BUILTIN_VPCOMGEUW
,
27739 IX86_BUILTIN_VPCOMFALSEUW
,
27740 IX86_BUILTIN_VPCOMTRUEUW
,
27742 IX86_BUILTIN_VPCOMEQUD
,
27743 IX86_BUILTIN_VPCOMNEUD
,
27744 IX86_BUILTIN_VPCOMLTUD
,
27745 IX86_BUILTIN_VPCOMLEUD
,
27746 IX86_BUILTIN_VPCOMGTUD
,
27747 IX86_BUILTIN_VPCOMGEUD
,
27748 IX86_BUILTIN_VPCOMFALSEUD
,
27749 IX86_BUILTIN_VPCOMTRUEUD
,
27751 IX86_BUILTIN_VPCOMEQUQ
,
27752 IX86_BUILTIN_VPCOMNEUQ
,
27753 IX86_BUILTIN_VPCOMLTUQ
,
27754 IX86_BUILTIN_VPCOMLEUQ
,
27755 IX86_BUILTIN_VPCOMGTUQ
,
27756 IX86_BUILTIN_VPCOMGEUQ
,
27757 IX86_BUILTIN_VPCOMFALSEUQ
,
27758 IX86_BUILTIN_VPCOMTRUEUQ
,
27760 IX86_BUILTIN_VPCOMEQB
,
27761 IX86_BUILTIN_VPCOMNEB
,
27762 IX86_BUILTIN_VPCOMLTB
,
27763 IX86_BUILTIN_VPCOMLEB
,
27764 IX86_BUILTIN_VPCOMGTB
,
27765 IX86_BUILTIN_VPCOMGEB
,
27766 IX86_BUILTIN_VPCOMFALSEB
,
27767 IX86_BUILTIN_VPCOMTRUEB
,
27769 IX86_BUILTIN_VPCOMEQW
,
27770 IX86_BUILTIN_VPCOMNEW
,
27771 IX86_BUILTIN_VPCOMLTW
,
27772 IX86_BUILTIN_VPCOMLEW
,
27773 IX86_BUILTIN_VPCOMGTW
,
27774 IX86_BUILTIN_VPCOMGEW
,
27775 IX86_BUILTIN_VPCOMFALSEW
,
27776 IX86_BUILTIN_VPCOMTRUEW
,
27778 IX86_BUILTIN_VPCOMEQD
,
27779 IX86_BUILTIN_VPCOMNED
,
27780 IX86_BUILTIN_VPCOMLTD
,
27781 IX86_BUILTIN_VPCOMLED
,
27782 IX86_BUILTIN_VPCOMGTD
,
27783 IX86_BUILTIN_VPCOMGED
,
27784 IX86_BUILTIN_VPCOMFALSED
,
27785 IX86_BUILTIN_VPCOMTRUED
,
27787 IX86_BUILTIN_VPCOMEQQ
,
27788 IX86_BUILTIN_VPCOMNEQ
,
27789 IX86_BUILTIN_VPCOMLTQ
,
27790 IX86_BUILTIN_VPCOMLEQ
,
27791 IX86_BUILTIN_VPCOMGTQ
,
27792 IX86_BUILTIN_VPCOMGEQ
,
27793 IX86_BUILTIN_VPCOMFALSEQ
,
27794 IX86_BUILTIN_VPCOMTRUEQ
,
27796 /* LWP instructions. */
27797 IX86_BUILTIN_LLWPCB
,
27798 IX86_BUILTIN_SLWPCB
,
27799 IX86_BUILTIN_LWPVAL32
,
27800 IX86_BUILTIN_LWPVAL64
,
27801 IX86_BUILTIN_LWPINS32
,
27802 IX86_BUILTIN_LWPINS64
,
27807 IX86_BUILTIN_XBEGIN
,
27809 IX86_BUILTIN_XABORT
,
27810 IX86_BUILTIN_XTEST
,
27812 /* BMI instructions. */
27813 IX86_BUILTIN_BEXTR32
,
27814 IX86_BUILTIN_BEXTR64
,
27817 /* TBM instructions. */
27818 IX86_BUILTIN_BEXTRI32
,
27819 IX86_BUILTIN_BEXTRI64
,
27821 /* BMI2 instructions. */
27822 IX86_BUILTIN_BZHI32
,
27823 IX86_BUILTIN_BZHI64
,
27824 IX86_BUILTIN_PDEP32
,
27825 IX86_BUILTIN_PDEP64
,
27826 IX86_BUILTIN_PEXT32
,
27827 IX86_BUILTIN_PEXT64
,
27829 /* ADX instructions. */
27830 IX86_BUILTIN_ADDCARRYX32
,
27831 IX86_BUILTIN_ADDCARRYX64
,
27833 /* FSGSBASE instructions. */
27834 IX86_BUILTIN_RDFSBASE32
,
27835 IX86_BUILTIN_RDFSBASE64
,
27836 IX86_BUILTIN_RDGSBASE32
,
27837 IX86_BUILTIN_RDGSBASE64
,
27838 IX86_BUILTIN_WRFSBASE32
,
27839 IX86_BUILTIN_WRFSBASE64
,
27840 IX86_BUILTIN_WRGSBASE32
,
27841 IX86_BUILTIN_WRGSBASE64
,
27843 /* RDRND instructions. */
27844 IX86_BUILTIN_RDRAND16_STEP
,
27845 IX86_BUILTIN_RDRAND32_STEP
,
27846 IX86_BUILTIN_RDRAND64_STEP
,
27848 /* RDSEED instructions. */
27849 IX86_BUILTIN_RDSEED16_STEP
,
27850 IX86_BUILTIN_RDSEED32_STEP
,
27851 IX86_BUILTIN_RDSEED64_STEP
,
27853 /* F16C instructions. */
27854 IX86_BUILTIN_CVTPH2PS
,
27855 IX86_BUILTIN_CVTPH2PS256
,
27856 IX86_BUILTIN_CVTPS2PH
,
27857 IX86_BUILTIN_CVTPS2PH256
,
27859 /* CFString built-in for darwin */
27860 IX86_BUILTIN_CFSTRING
,
27862 /* Builtins to get CPU type and supported features. */
27863 IX86_BUILTIN_CPU_INIT
,
27864 IX86_BUILTIN_CPU_IS
,
27865 IX86_BUILTIN_CPU_SUPPORTS
,
27870 /* Table for the ix86 builtin decls. */
27871 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27873 /* Table of all of the builtin functions that are possible with different ISA's
27874 but are waiting to be built until a function is declared to use that
27876 struct builtin_isa
{
27877 const char *name
; /* function name */
27878 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27879 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27880 bool const_p
; /* true if the declaration is constant */
27881 bool set_and_not_built_p
;
27884 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27887 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27888 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27889 function decl in the ix86_builtins array. Returns the function decl or
27890 NULL_TREE, if the builtin was not added.
27892 If the front end has a special hook for builtin functions, delay adding
27893 builtin functions that aren't in the current ISA until the ISA is changed
27894 with function specific optimization. Doing so, can save about 300K for the
27895 default compiler. When the builtin is expanded, check at that time whether
27898 If the front end doesn't have a special hook, record all builtins, even if
27899 it isn't an instruction set in the current ISA in case the user uses
27900 function specific options for a different ISA, so that we don't get scope
27901 errors if a builtin is added in the middle of a function scope. */
27904 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27905 enum ix86_builtin_func_type tcode
,
27906 enum ix86_builtins code
)
27908 tree decl
= NULL_TREE
;
27910 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27912 ix86_builtins_isa
[(int) code
].isa
= mask
;
27914 mask
&= ~OPTION_MASK_ISA_64BIT
;
27916 || (mask
& ix86_isa_flags
) != 0
27917 || (lang_hooks
.builtin_function
27918 == lang_hooks
.builtin_function_ext_scope
))
27921 tree type
= ix86_get_builtin_func_type (tcode
);
27922 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27924 ix86_builtins
[(int) code
] = decl
;
27925 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27929 ix86_builtins
[(int) code
] = NULL_TREE
;
27930 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27931 ix86_builtins_isa
[(int) code
].name
= name
;
27932 ix86_builtins_isa
[(int) code
].const_p
= false;
27933 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27940 /* Like def_builtin, but also marks the function decl "const". */
27943 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27944 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27946 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27948 TREE_READONLY (decl
) = 1;
27950 ix86_builtins_isa
[(int) code
].const_p
= true;
27955 /* Add any new builtin functions for a given ISA that may not have been
27956 declared. This saves a bit of space compared to adding all of the
27957 declarations to the tree, even if we didn't use them. */
27960 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27964 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27966 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27967 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27971 /* Don't define the builtin again. */
27972 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27974 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27975 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27976 type
, i
, BUILT_IN_MD
, NULL
,
27979 ix86_builtins
[i
] = decl
;
27980 if (ix86_builtins_isa
[i
].const_p
)
27981 TREE_READONLY (decl
) = 1;
27986 /* Bits for builtin_description.flag. */
27988 /* Set when we don't support the comparison natively, and should
27989 swap_comparison in order to support it. */
27990 #define BUILTIN_DESC_SWAP_OPERANDS 1
27992 struct builtin_description
27994 const HOST_WIDE_INT mask
;
27995 const enum insn_code icode
;
27996 const char *const name
;
27997 const enum ix86_builtins code
;
27998 const enum rtx_code comparison
;
28002 static const struct builtin_description bdesc_comi
[] =
28004 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
28005 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
28006 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
28007 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
28008 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
28009 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
28010 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
28011 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
28012 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
28013 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
28014 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
28015 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
28016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
28017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
28018 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
28019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
28020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
28021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
28022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
28023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
28024 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
28025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
28026 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
28027 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
28030 static const struct builtin_description bdesc_pcmpestr
[] =
28033 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
28034 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
28035 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
28036 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
28037 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
28038 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
28039 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
28042 static const struct builtin_description bdesc_pcmpistr
[] =
28045 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
28046 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
28047 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
28048 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
28049 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
28050 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
28051 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
28054 /* Special builtins with variable number of arguments. */
28055 static const struct builtin_description bdesc_special_args
[] =
28057 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28058 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
28059 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28061 /* 80387 (for use internally for atomic compound assignment). */
28062 { 0, CODE_FOR_fnstenv
, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28063 { 0, CODE_FOR_fldenv
, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV
, UNKNOWN
, (int) VOID_FTYPE_PCVOID
},
28064 { 0, CODE_FOR_fnstsw
, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW
, UNKNOWN
, (int) VOID_FTYPE_PUSHORT
},
28065 { 0, CODE_FOR_fnclex
, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28068 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28071 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28073 /* FXSR, XSAVE and XSAVEOPT */
28074 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28075 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28076 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28077 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28078 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28080 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28081 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28082 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28083 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28084 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28088 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28092 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28096 /* SSE or 3DNow!A */
28097 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28098 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
28101 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28102 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28103 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28104 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
28105 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28106 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
28107 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
28108 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
28109 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
28110 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28112 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28113 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28116 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28119 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
28122 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28123 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28129 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28130 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28131 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28132 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
28133 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
28135 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28136 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28137 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28138 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28139 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28140 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
28141 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28143 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
28144 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28145 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28147 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
28148 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
28149 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
28150 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
28151 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
28152 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
28153 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
28154 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
28157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
28158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
28159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
28160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
28161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
28162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
28163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
28164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
28165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
28167 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28168 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
28169 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
28170 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
28171 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
28172 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
28175 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28176 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28177 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28178 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28179 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28180 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28181 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28182 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28185 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28186 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28187 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
28190 /* Builtins with variable number of arguments. */
28191 static const struct builtin_description bdesc_args
[] =
28193 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
28194 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
28195 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
28196 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28197 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28198 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28199 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28202 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28203 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28204 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28205 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28206 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28207 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28209 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28210 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28211 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28212 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28213 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28214 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28215 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28216 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28218 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28219 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28221 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28222 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28223 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28224 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28226 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28227 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28228 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28229 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28230 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28231 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28233 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28234 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28235 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28236 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28237 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28238 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28240 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28241 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
28242 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28244 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
28246 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28247 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28248 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28249 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28250 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28251 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28253 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28254 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28255 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28256 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28257 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28258 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28260 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28261 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28262 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28263 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28266 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28267 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28268 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28269 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28271 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28272 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28273 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28274 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28275 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28276 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28277 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28278 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28279 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28280 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28281 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28282 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28283 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28284 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28285 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28288 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28289 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28290 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28291 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28292 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28293 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28296 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28297 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28298 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28299 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28300 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28301 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28302 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28303 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28304 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28305 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28306 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28307 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28311 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28312 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28313 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28315 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28316 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28317 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28318 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28320 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28321 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28322 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28323 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28324 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28325 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28326 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28328 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28329 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28330 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28331 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28332 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28333 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28334 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28335 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28336 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28337 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28338 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28339 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28341 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28342 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28343 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28344 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28346 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28347 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28348 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28349 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28351 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28353 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28354 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28356 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28357 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28359 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
28360 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
28361 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
28363 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
28365 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28366 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28367 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28369 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
28370 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
28372 /* SSE MMX or 3Dnow!A */
28373 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28374 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28375 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28377 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28378 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28379 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28380 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28382 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
28383 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
28385 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
28388 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28390 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28391 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
28392 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28393 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
28394 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
28396 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28397 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
28399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
28404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28405 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28406 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28407 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
28411 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28413 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28414 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28415 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28416 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28417 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28418 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28419 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28420 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28422 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28423 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28424 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28425 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28426 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28427 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28428 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28429 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28430 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28431 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28432 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28433 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28434 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28435 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28436 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28437 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28438 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28439 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28440 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28441 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28443 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28444 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28445 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28446 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28450 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28455 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28456 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28459 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28461 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28462 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28463 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28464 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28465 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28466 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28467 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28468 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28470 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28471 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28472 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28473 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28474 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28475 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28476 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28479 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
28482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28490 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28492 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
28518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
28519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
28523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
28524 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
28525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
28526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
28528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28531 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28533 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28536 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28538 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28541 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28542 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28545 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28547 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
28550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28553 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
28555 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28562 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28563 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28565 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28566 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28567 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28568 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28569 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28570 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28573 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28574 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
28575 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28576 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
28577 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28578 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28580 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28581 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28582 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28583 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28584 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28585 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28586 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28587 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28588 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28589 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28590 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28591 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28592 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28593 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28594 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28595 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28596 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28597 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28598 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28599 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28600 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28601 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28602 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28603 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28606 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28607 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28610 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28611 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28612 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28613 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28614 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28615 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28616 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28617 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28618 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28619 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28621 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28622 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28623 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28624 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28625 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28626 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28627 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28628 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28629 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28630 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28631 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28632 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28633 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28635 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28636 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28637 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28638 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28639 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28640 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28641 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28642 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28643 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28644 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28645 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28646 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28649 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28650 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28651 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28652 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28654 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28655 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28656 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28657 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28659 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28660 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28662 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28663 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28665 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28666 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28667 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28668 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28670 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28671 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28673 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28674 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28676 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28677 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28678 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28681 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28682 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28683 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28684 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28685 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28688 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28689 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28690 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28691 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28694 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28700 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28706 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28707 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28708 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28709 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28710 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28711 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28712 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28713 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28714 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28715 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28716 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28717 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28718 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28719 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28720 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28721 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28722 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28723 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28724 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28725 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28726 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28727 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28728 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28729 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28730 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28731 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28733 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28734 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28735 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28736 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28738 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28739 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28740 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28741 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28742 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28743 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28744 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28745 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28746 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28747 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28748 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28749 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28750 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28751 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28752 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28753 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28754 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28755 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28757 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28758 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28759 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28760 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28761 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28762 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28763 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28764 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28765 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28766 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28767 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28768 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28769 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28770 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28771 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28774 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28775 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28777 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28779 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28781 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28785 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28786 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28788 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28789 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28790 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28791 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28793 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28794 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28796 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28797 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28799 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28800 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28801 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28802 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28804 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28805 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28807 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28808 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28810 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28811 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28812 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28813 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28815 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28816 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28817 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28818 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28819 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28820 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28823 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28824 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28825 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28826 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28827 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28830 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28831 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28832 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28833 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28834 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28835 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28836 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28838 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28839 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28841 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28842 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28844 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28847 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28848 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28849 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28850 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28851 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28852 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28853 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28854 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28855 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28856 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28857 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28858 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28859 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28860 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28861 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28862 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28863 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28864 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28865 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28866 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28867 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28868 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28869 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28870 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28871 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28872 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28873 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28874 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28875 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28876 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28877 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28878 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28879 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28880 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28881 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28882 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28883 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28884 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28885 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28886 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28887 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28888 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28889 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28890 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28891 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28892 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28893 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28894 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28895 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28896 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28897 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28898 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28899 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28900 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28901 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28902 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28903 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28904 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28905 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28906 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28907 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28908 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28918 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28919 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28920 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28921 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28922 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28923 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28924 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28925 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28926 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28927 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28928 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28929 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28930 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28931 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28932 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28933 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28934 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28935 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28983 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28984 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28985 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28986 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28987 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28988 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28989 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28990 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28991 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28992 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28994 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28997 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28998 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28999 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29002 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29003 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29006 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
29007 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
29008 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
29009 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
29012 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29013 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29014 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29015 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29016 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29017 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29020 /* FMA4 and XOP. */
29021 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29022 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29023 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29024 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29025 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29026 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29027 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29028 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29029 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29030 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29031 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29032 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29033 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29034 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29035 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29036 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29037 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29038 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29039 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29040 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29041 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29042 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29043 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29044 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29045 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29046 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29047 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29048 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29049 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29050 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29051 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29052 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29053 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29054 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29055 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29056 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29057 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29058 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29059 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29060 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29061 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29062 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29063 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29064 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29065 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29066 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29067 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29068 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29069 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29070 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29071 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29072 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29074 static const struct builtin_description bdesc_multi_arg
[] =
29076 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
29077 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
29078 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29079 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
29080 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
29081 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29083 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
29084 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
29085 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29086 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
29087 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
29088 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29090 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
29091 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
29092 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29093 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
29094 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
29095 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29096 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
29097 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
29098 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29099 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
29100 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
29101 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29103 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
29104 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
29105 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29106 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
29107 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
29108 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29109 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
29110 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
29111 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29112 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
29113 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
29114 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29116 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29117 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29118 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29119 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29120 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
29121 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
29122 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
29124 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29125 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29126 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
29127 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
29128 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
29129 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29130 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29132 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
29134 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29135 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29136 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29137 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29138 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29139 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29140 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29141 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29142 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29143 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29144 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29145 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29147 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29148 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29149 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29150 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29151 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
29152 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
29153 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
29154 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
29155 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29156 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29157 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29158 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29159 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29160 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29161 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29162 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29164 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
29165 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
29166 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
29167 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
29168 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
29169 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
29171 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29172 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29173 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29174 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29175 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29176 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29177 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29178 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29179 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29180 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29181 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29182 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29183 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29184 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29185 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29187 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29188 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29189 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29190 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
29191 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
29192 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
29193 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
29195 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29196 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29197 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29198 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
29199 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
29200 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
29201 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
29203 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29204 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
29207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
29208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
29209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
29211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29212 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
29215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
29216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
29217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
29219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29220 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29222 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
29223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
29224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
29225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
29227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29228 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29229 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
29231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
29232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
29233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
29235 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29236 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
29239 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
29240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
29241 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
29243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29244 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
29247 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
29248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
29249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
29251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29252 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29255 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29258 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29260 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29263 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29265 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
29270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
29271 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
29272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
29276 /* TM vector builtins. */
29278 /* Reuse the existing x86-specific `struct builtin_description' cause
29279 we're lazy. Add casts to make them fit. */
29280 static const struct builtin_description bdesc_tm
[] =
29282 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29283 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29284 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29285 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29286 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29287 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29288 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29290 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29291 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29292 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29293 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29294 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29295 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29296 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29298 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29299 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29300 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29301 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29302 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29303 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29304 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29306 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29307 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29308 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29311 /* TM callbacks. */
29313 /* Return the builtin decl needed to load a vector of TYPE. */
29316 ix86_builtin_tm_load (tree type
)
29318 if (TREE_CODE (type
) == VECTOR_TYPE
)
29320 switch (tree_low_cst (TYPE_SIZE (type
), 1))
29323 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
29325 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
29327 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
29333 /* Return the builtin decl needed to store a vector of TYPE. */
29336 ix86_builtin_tm_store (tree type
)
29338 if (TREE_CODE (type
) == VECTOR_TYPE
)
29340 switch (tree_low_cst (TYPE_SIZE (type
), 1))
29343 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
29345 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
29347 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
29353 /* Initialize the transactional memory vector load/store builtins. */
29356 ix86_init_tm_builtins (void)
29358 enum ix86_builtin_func_type ftype
;
29359 const struct builtin_description
*d
;
29362 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
29363 tree attrs_log
, attrs_type_log
;
29368 /* If there are no builtins defined, we must be compiling in a
29369 language without trans-mem support. */
29370 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
29373 /* Use whatever attributes a normal TM load has. */
29374 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
29375 attrs_load
= DECL_ATTRIBUTES (decl
);
29376 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29377 /* Use whatever attributes a normal TM store has. */
29378 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
29379 attrs_store
= DECL_ATTRIBUTES (decl
);
29380 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29381 /* Use whatever attributes a normal TM log has. */
29382 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
29383 attrs_log
= DECL_ATTRIBUTES (decl
);
29384 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29386 for (i
= 0, d
= bdesc_tm
;
29387 i
< ARRAY_SIZE (bdesc_tm
);
29390 if ((d
->mask
& ix86_isa_flags
) != 0
29391 || (lang_hooks
.builtin_function
29392 == lang_hooks
.builtin_function_ext_scope
))
29394 tree type
, attrs
, attrs_type
;
29395 enum built_in_function code
= (enum built_in_function
) d
->code
;
29397 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29398 type
= ix86_get_builtin_func_type (ftype
);
29400 if (BUILTIN_TM_LOAD_P (code
))
29402 attrs
= attrs_load
;
29403 attrs_type
= attrs_type_load
;
29405 else if (BUILTIN_TM_STORE_P (code
))
29407 attrs
= attrs_store
;
29408 attrs_type
= attrs_type_store
;
29413 attrs_type
= attrs_type_log
;
29415 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
29416 /* The builtin without the prefix for
29417 calling it directly. */
29418 d
->name
+ strlen ("__builtin_"),
29420 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29421 set the TYPE_ATTRIBUTES. */
29422 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
29424 set_builtin_decl (code
, decl
, false);
29429 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
29430 in the current target ISA to allow the user to compile particular modules
29431 with different target specific options that differ from the command line
29434 ix86_init_mmx_sse_builtins (void)
29436 const struct builtin_description
* d
;
29437 enum ix86_builtin_func_type ftype
;
29440 /* Add all special builtins with variable number of operands. */
29441 for (i
= 0, d
= bdesc_special_args
;
29442 i
< ARRAY_SIZE (bdesc_special_args
);
29448 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29449 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
29452 /* Add all builtins with variable number of operands. */
29453 for (i
= 0, d
= bdesc_args
;
29454 i
< ARRAY_SIZE (bdesc_args
);
29460 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29461 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29464 /* pcmpestr[im] insns. */
29465 for (i
= 0, d
= bdesc_pcmpestr
;
29466 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29469 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29470 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
29472 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
29473 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29476 /* pcmpistr[im] insns. */
29477 for (i
= 0, d
= bdesc_pcmpistr
;
29478 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29481 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29482 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
29484 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
29485 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29488 /* comi/ucomi insns. */
29489 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29491 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
29492 ftype
= INT_FTYPE_V2DF_V2DF
;
29494 ftype
= INT_FTYPE_V4SF_V4SF
;
29495 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29499 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
29500 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
29501 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
29502 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
29504 /* SSE or 3DNow!A */
29505 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29506 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
29507 IX86_BUILTIN_MASKMOVQ
);
29510 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
29511 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
29513 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
29514 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
29515 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
29516 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
29519 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
29520 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
29521 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
29522 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
29525 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
29526 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
29527 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
29528 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
29529 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
29530 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
29531 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
29532 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
29533 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
29534 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
29535 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
29536 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
29539 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
29540 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
29543 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
29544 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
29545 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
29546 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
29547 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
29548 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
29549 IX86_BUILTIN_RDRAND64_STEP
);
29552 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
29553 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
29554 IX86_BUILTIN_GATHERSIV2DF
);
29556 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
29557 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
29558 IX86_BUILTIN_GATHERSIV4DF
);
29560 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
29561 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
29562 IX86_BUILTIN_GATHERDIV2DF
);
29564 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
29565 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
29566 IX86_BUILTIN_GATHERDIV4DF
);
29568 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
29569 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
29570 IX86_BUILTIN_GATHERSIV4SF
);
29572 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
29573 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
29574 IX86_BUILTIN_GATHERSIV8SF
);
29576 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
29577 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
29578 IX86_BUILTIN_GATHERDIV4SF
);
29580 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
29581 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29582 IX86_BUILTIN_GATHERDIV8SF
);
29584 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29585 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29586 IX86_BUILTIN_GATHERSIV2DI
);
29588 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29589 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29590 IX86_BUILTIN_GATHERSIV4DI
);
29592 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29593 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29594 IX86_BUILTIN_GATHERDIV2DI
);
29596 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29597 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29598 IX86_BUILTIN_GATHERDIV4DI
);
29600 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29601 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29602 IX86_BUILTIN_GATHERSIV4SI
);
29604 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29605 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29606 IX86_BUILTIN_GATHERSIV8SI
);
29608 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29609 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29610 IX86_BUILTIN_GATHERDIV4SI
);
29612 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29613 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29614 IX86_BUILTIN_GATHERDIV8SI
);
29616 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29617 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29618 IX86_BUILTIN_GATHERALTSIV4DF
);
29620 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29621 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29622 IX86_BUILTIN_GATHERALTDIV8SF
);
29624 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29625 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29626 IX86_BUILTIN_GATHERALTSIV4DI
);
29628 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29629 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29630 IX86_BUILTIN_GATHERALTDIV8SI
);
29633 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29634 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29636 /* MMX access to the vec_init patterns. */
29637 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29638 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29640 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29641 V4HI_FTYPE_HI_HI_HI_HI
,
29642 IX86_BUILTIN_VEC_INIT_V4HI
);
29644 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29645 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29646 IX86_BUILTIN_VEC_INIT_V8QI
);
29648 /* Access to the vec_extract patterns. */
29649 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29650 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29651 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29652 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29653 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29654 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29655 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29656 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29657 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29658 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29660 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29661 "__builtin_ia32_vec_ext_v4hi",
29662 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29664 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29665 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29667 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29668 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29670 /* Access to the vec_set patterns. */
29671 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29672 "__builtin_ia32_vec_set_v2di",
29673 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29675 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29676 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29678 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29679 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29681 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29682 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29684 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29685 "__builtin_ia32_vec_set_v4hi",
29686 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29688 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29689 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29692 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29693 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29694 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29695 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29696 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29697 "__builtin_ia32_rdseed_di_step",
29698 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29701 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29702 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29703 def_builtin (OPTION_MASK_ISA_64BIT
,
29704 "__builtin_ia32_addcarryx_u64",
29705 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29706 IX86_BUILTIN_ADDCARRYX64
);
29708 /* Add FMA4 multi-arg argument instructions */
29709 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29714 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29715 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29719 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29720 to return a pointer to VERSION_DECL if the outcome of the expression
29721 formed by PREDICATE_CHAIN is true. This function will be called during
29722 version dispatch to decide which function version to execute. It returns
29723 the basic block at the end, to which more conditions can be added. */
29726 add_condition_to_bb (tree function_decl
, tree version_decl
,
29727 tree predicate_chain
, basic_block new_bb
)
29729 gimple return_stmt
;
29730 tree convert_expr
, result_var
;
29731 gimple convert_stmt
;
29732 gimple call_cond_stmt
;
29733 gimple if_else_stmt
;
29735 basic_block bb1
, bb2
, bb3
;
29738 tree cond_var
, and_expr_var
= NULL_TREE
;
29741 tree predicate_decl
, predicate_arg
;
29743 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29745 gcc_assert (new_bb
!= NULL
);
29746 gseq
= bb_seq (new_bb
);
29749 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29750 build_fold_addr_expr (version_decl
));
29751 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29752 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29753 return_stmt
= gimple_build_return (result_var
);
29755 if (predicate_chain
== NULL_TREE
)
29757 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29758 gimple_seq_add_stmt (&gseq
, return_stmt
);
29759 set_bb_seq (new_bb
, gseq
);
29760 gimple_set_bb (convert_stmt
, new_bb
);
29761 gimple_set_bb (return_stmt
, new_bb
);
29766 while (predicate_chain
!= NULL
)
29768 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29769 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29770 predicate_arg
= TREE_VALUE (predicate_chain
);
29771 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29772 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29774 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29775 gimple_set_bb (call_cond_stmt
, new_bb
);
29776 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29778 predicate_chain
= TREE_CHAIN (predicate_chain
);
29780 if (and_expr_var
== NULL
)
29781 and_expr_var
= cond_var
;
29784 gimple assign_stmt
;
29785 /* Use MIN_EXPR to check if any integer is zero?.
29786 and_expr_var = min_expr <cond_var, and_expr_var> */
29787 assign_stmt
= gimple_build_assign (and_expr_var
,
29788 build2 (MIN_EXPR
, integer_type_node
,
29789 cond_var
, and_expr_var
));
29791 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29792 gimple_set_bb (assign_stmt
, new_bb
);
29793 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29797 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29799 NULL_TREE
, NULL_TREE
);
29800 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29801 gimple_set_bb (if_else_stmt
, new_bb
);
29802 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29804 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29805 gimple_seq_add_stmt (&gseq
, return_stmt
);
29806 set_bb_seq (new_bb
, gseq
);
29809 e12
= split_block (bb1
, if_else_stmt
);
29811 e12
->flags
&= ~EDGE_FALLTHRU
;
29812 e12
->flags
|= EDGE_TRUE_VALUE
;
29814 e23
= split_block (bb2
, return_stmt
);
29816 gimple_set_bb (convert_stmt
, bb2
);
29817 gimple_set_bb (return_stmt
, bb2
);
29820 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29823 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29830 /* This parses the attribute arguments to target in DECL and determines
29831 the right builtin to use to match the platform specification.
29832 It returns the priority value for this version decl. If PREDICATE_LIST
29833 is not NULL, it stores the list of cpu features that need to be checked
29834 before dispatching this function. */
29836 static unsigned int
29837 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29840 struct cl_target_option cur_target
;
29842 struct cl_target_option
*new_target
;
29843 const char *arg_str
= NULL
;
29844 const char *attrs_str
= NULL
;
29845 char *tok_str
= NULL
;
29848 /* Priority of i386 features, greater value is higher priority. This is
29849 used to decide the order in which function dispatch must happen. For
29850 instance, a version specialized for SSE4.2 should be checked for dispatch
29851 before a version for SSE3, as SSE4.2 implies SSE3. */
29852 enum feature_priority
29873 enum feature_priority priority
= P_ZERO
;
29875 /* These are the target attribute strings for which a dispatcher is
29876 available, from fold_builtin_cpu. */
29878 static struct _feature_list
29880 const char *const name
;
29881 const enum feature_priority priority
;
29883 const feature_list
[] =
29889 {"ssse3", P_SSSE3
},
29890 {"sse4.1", P_SSE4_1
},
29891 {"sse4.2", P_SSE4_2
},
29892 {"popcnt", P_POPCNT
},
29898 static unsigned int NUM_FEATURES
29899 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29903 tree predicate_chain
= NULL_TREE
;
29904 tree predicate_decl
, predicate_arg
;
29906 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29907 gcc_assert (attrs
!= NULL
);
29909 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29911 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29912 attrs_str
= TREE_STRING_POINTER (attrs
);
29914 /* Return priority zero for default function. */
29915 if (strcmp (attrs_str
, "default") == 0)
29918 /* Handle arch= if specified. For priority, set it to be 1 more than
29919 the best instruction set the processor can handle. For instance, if
29920 there is a version for atom and a version for ssse3 (the highest ISA
29921 priority for atom), the atom version must be checked for dispatch
29922 before the ssse3 version. */
29923 if (strstr (attrs_str
, "arch=") != NULL
)
29925 cl_target_option_save (&cur_target
, &global_options
);
29926 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
29927 &global_options_set
);
29929 gcc_assert (target_node
);
29930 new_target
= TREE_TARGET_OPTION (target_node
);
29931 gcc_assert (new_target
);
29933 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29935 switch (new_target
->arch
)
29937 case PROCESSOR_CORE2
:
29939 priority
= P_PROC_SSSE3
;
29941 case PROCESSOR_COREI7
:
29942 arg_str
= "corei7";
29943 priority
= P_PROC_SSE4_2
;
29945 case PROCESSOR_COREI7_AVX
:
29946 arg_str
= "corei7-avx";
29947 priority
= P_PROC_SSE4_2
;
29949 case PROCESSOR_ATOM
:
29951 priority
= P_PROC_SSSE3
;
29953 case PROCESSOR_AMDFAM10
:
29954 arg_str
= "amdfam10h";
29955 priority
= P_PROC_SSE4_a
;
29957 case PROCESSOR_BDVER1
:
29958 arg_str
= "bdver1";
29959 priority
= P_PROC_FMA
;
29961 case PROCESSOR_BDVER2
:
29962 arg_str
= "bdver2";
29963 priority
= P_PROC_FMA
;
29968 cl_target_option_restore (&global_options
, &cur_target
);
29970 if (predicate_list
&& arg_str
== NULL
)
29972 error_at (DECL_SOURCE_LOCATION (decl
),
29973 "No dispatcher found for the versioning attributes");
29977 if (predicate_list
)
29979 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29980 /* For a C string literal the length includes the trailing NULL. */
29981 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29982 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29987 /* Process feature name. */
29988 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29989 strcpy (tok_str
, attrs_str
);
29990 token
= strtok (tok_str
, ",");
29991 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29993 while (token
!= NULL
)
29995 /* Do not process "arch=" */
29996 if (strncmp (token
, "arch=", 5) == 0)
29998 token
= strtok (NULL
, ",");
30001 for (i
= 0; i
< NUM_FEATURES
; ++i
)
30003 if (strcmp (token
, feature_list
[i
].name
) == 0)
30005 if (predicate_list
)
30007 predicate_arg
= build_string_literal (
30008 strlen (feature_list
[i
].name
) + 1,
30009 feature_list
[i
].name
);
30010 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
30013 /* Find the maximum priority feature. */
30014 if (feature_list
[i
].priority
> priority
)
30015 priority
= feature_list
[i
].priority
;
30020 if (predicate_list
&& i
== NUM_FEATURES
)
30022 error_at (DECL_SOURCE_LOCATION (decl
),
30023 "No dispatcher found for %s", token
);
30026 token
= strtok (NULL
, ",");
30030 if (predicate_list
&& predicate_chain
== NULL_TREE
)
30032 error_at (DECL_SOURCE_LOCATION (decl
),
30033 "No dispatcher found for the versioning attributes : %s",
30037 else if (predicate_list
)
30039 predicate_chain
= nreverse (predicate_chain
);
30040 *predicate_list
= predicate_chain
;
30046 /* This compares the priority of target features in function DECL1
30047 and DECL2. It returns positive value if DECL1 is higher priority,
30048 negative value if DECL2 is higher priority and 0 if they are the
30052 ix86_compare_version_priority (tree decl1
, tree decl2
)
30054 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
30055 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
30057 return (int)priority1
- (int)priority2
;
30060 /* V1 and V2 point to function versions with different priorities
30061 based on the target ISA. This function compares their priorities. */
30064 feature_compare (const void *v1
, const void *v2
)
30066 typedef struct _function_version_info
30069 tree predicate_chain
;
30070 unsigned int dispatch_priority
;
30071 } function_version_info
;
30073 const function_version_info c1
= *(const function_version_info
*)v1
;
30074 const function_version_info c2
= *(const function_version_info
*)v2
;
30075 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
30078 /* This function generates the dispatch function for
30079 multi-versioned functions. DISPATCH_DECL is the function which will
30080 contain the dispatch logic. FNDECLS are the function choices for
30081 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
30082 in DISPATCH_DECL in which the dispatch code is generated. */
30085 dispatch_function_versions (tree dispatch_decl
,
30087 basic_block
*empty_bb
)
30090 gimple ifunc_cpu_init_stmt
;
30094 vec
<tree
> *fndecls
;
30095 unsigned int num_versions
= 0;
30096 unsigned int actual_versions
= 0;
30099 struct _function_version_info
30102 tree predicate_chain
;
30103 unsigned int dispatch_priority
;
30104 }*function_version_info
;
30106 gcc_assert (dispatch_decl
!= NULL
30107 && fndecls_p
!= NULL
30108 && empty_bb
!= NULL
);
30110 /*fndecls_p is actually a vector. */
30111 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
30113 /* At least one more version other than the default. */
30114 num_versions
= fndecls
->length ();
30115 gcc_assert (num_versions
>= 2);
30117 function_version_info
= (struct _function_version_info
*)
30118 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
30120 /* The first version in the vector is the default decl. */
30121 default_decl
= (*fndecls
)[0];
30123 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
30125 gseq
= bb_seq (*empty_bb
);
30126 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
30127 constructors, so explicity call __builtin_cpu_init here. */
30128 ifunc_cpu_init_stmt
= gimple_build_call_vec (
30129 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
30130 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
30131 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
30132 set_bb_seq (*empty_bb
, gseq
);
30137 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
30139 tree version_decl
= ele
;
30140 tree predicate_chain
= NULL_TREE
;
30141 unsigned int priority
;
30142 /* Get attribute string, parse it and find the right predicate decl.
30143 The predicate function could be a lengthy combination of many
30144 features, like arch-type and various isa-variants. */
30145 priority
= get_builtin_code_for_version (version_decl
,
30148 if (predicate_chain
== NULL_TREE
)
30151 function_version_info
[actual_versions
].version_decl
= version_decl
;
30152 function_version_info
[actual_versions
].predicate_chain
30154 function_version_info
[actual_versions
].dispatch_priority
= priority
;
30158 /* Sort the versions according to descending order of dispatch priority. The
30159 priority is based on the ISA. This is not a perfect solution. There
30160 could still be ambiguity. If more than one function version is suitable
30161 to execute, which one should be dispatched? In future, allow the user
30162 to specify a dispatch priority next to the version. */
30163 qsort (function_version_info
, actual_versions
,
30164 sizeof (struct _function_version_info
), feature_compare
);
30166 for (i
= 0; i
< actual_versions
; ++i
)
30167 *empty_bb
= add_condition_to_bb (dispatch_decl
,
30168 function_version_info
[i
].version_decl
,
30169 function_version_info
[i
].predicate_chain
,
30172 /* dispatch default version at the end. */
30173 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
30176 free (function_version_info
);
30180 /* Comparator function to be used in qsort routine to sort attribute
30181 specification strings to "target". */
30184 attr_strcmp (const void *v1
, const void *v2
)
30186 const char *c1
= *(char *const*)v1
;
30187 const char *c2
= *(char *const*)v2
;
30188 return strcmp (c1
, c2
);
30191 /* ARGLIST is the argument to target attribute. This function tokenizes
30192 the comma separated arguments, sorts them and returns a string which
30193 is a unique identifier for the comma separated arguments. It also
30194 replaces non-identifier characters "=,-" with "_". */
30197 sorted_attr_string (tree arglist
)
30200 size_t str_len_sum
= 0;
30201 char **args
= NULL
;
30202 char *attr_str
, *ret_str
;
30204 unsigned int argnum
= 1;
30207 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30209 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30210 size_t len
= strlen (str
);
30211 str_len_sum
+= len
+ 1;
30212 if (arg
!= arglist
)
30214 for (i
= 0; i
< strlen (str
); i
++)
30219 attr_str
= XNEWVEC (char, str_len_sum
);
30221 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30223 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30224 size_t len
= strlen (str
);
30225 memcpy (attr_str
+ str_len_sum
, str
, len
);
30226 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
30227 str_len_sum
+= len
+ 1;
30230 /* Replace "=,-" with "_". */
30231 for (i
= 0; i
< strlen (attr_str
); i
++)
30232 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
30238 args
= XNEWVEC (char *, argnum
);
30241 attr
= strtok (attr_str
, ",");
30242 while (attr
!= NULL
)
30246 attr
= strtok (NULL
, ",");
30249 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
30251 ret_str
= XNEWVEC (char, str_len_sum
);
30253 for (i
= 0; i
< argnum
; i
++)
30255 size_t len
= strlen (args
[i
]);
30256 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
30257 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
30258 str_len_sum
+= len
+ 1;
30262 XDELETEVEC (attr_str
);
30266 /* This function changes the assembler name for functions that are
30267 versions. If DECL is a function version and has a "target"
30268 attribute, it appends the attribute string to its assembler name. */
30271 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
30274 const char *orig_name
, *version_string
;
30275 char *attr_str
, *assembler_name
;
30277 if (DECL_DECLARED_INLINE_P (decl
)
30278 && lookup_attribute ("gnu_inline",
30279 DECL_ATTRIBUTES (decl
)))
30280 error_at (DECL_SOURCE_LOCATION (decl
),
30281 "Function versions cannot be marked as gnu_inline,"
30282 " bodies have to be generated");
30284 if (DECL_VIRTUAL_P (decl
)
30285 || DECL_VINDEX (decl
))
30286 sorry ("Virtual function multiversioning not supported");
30288 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30290 /* target attribute string cannot be NULL. */
30291 gcc_assert (version_attr
!= NULL_TREE
);
30293 orig_name
= IDENTIFIER_POINTER (id
);
30295 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
30297 if (strcmp (version_string
, "default") == 0)
30300 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
30301 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
30303 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
30305 /* Allow assembler name to be modified if already set. */
30306 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
30307 SET_DECL_RTL (decl
, NULL
);
30309 tree ret
= get_identifier (assembler_name
);
30310 XDELETEVEC (attr_str
);
30311 XDELETEVEC (assembler_name
);
30315 /* This function returns true if FN1 and FN2 are versions of the same function,
30316 that is, the target strings of the function decls are different. This assumes
30317 that FN1 and FN2 have the same signature. */
30320 ix86_function_versions (tree fn1
, tree fn2
)
30323 char *target1
, *target2
;
30326 if (TREE_CODE (fn1
) != FUNCTION_DECL
30327 || TREE_CODE (fn2
) != FUNCTION_DECL
)
30330 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
30331 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
30333 /* At least one function decl should have the target attribute specified. */
30334 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
30337 /* Diagnose missing target attribute if one of the decls is already
30338 multi-versioned. */
30339 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
30341 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
30343 if (attr2
!= NULL_TREE
)
30350 error_at (DECL_SOURCE_LOCATION (fn2
),
30351 "missing %<target%> attribute for multi-versioned %D",
30353 inform (DECL_SOURCE_LOCATION (fn1
),
30354 "previous declaration of %D", fn1
);
30355 /* Prevent diagnosing of the same error multiple times. */
30356 DECL_ATTRIBUTES (fn2
)
30357 = tree_cons (get_identifier ("target"),
30358 copy_node (TREE_VALUE (attr1
)),
30359 DECL_ATTRIBUTES (fn2
));
30364 target1
= sorted_attr_string (TREE_VALUE (attr1
));
30365 target2
= sorted_attr_string (TREE_VALUE (attr2
));
30367 /* The sorted target strings must be different for fn1 and fn2
30369 if (strcmp (target1
, target2
) == 0)
30374 XDELETEVEC (target1
);
30375 XDELETEVEC (target2
);
30381 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
30383 /* For function version, add the target suffix to the assembler name. */
30384 if (TREE_CODE (decl
) == FUNCTION_DECL
30385 && DECL_FUNCTION_VERSIONED (decl
))
30386 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
30387 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
30388 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
30394 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
30395 is true, append the full path name of the source file. */
30398 make_name (tree decl
, const char *suffix
, bool make_unique
)
30400 char *global_var_name
;
30403 const char *unique_name
= NULL
;
30405 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
30407 /* Get a unique name that can be used globally without any chances
30408 of collision at link time. */
30410 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
30412 name_len
= strlen (name
) + strlen (suffix
) + 2;
30415 name_len
+= strlen (unique_name
) + 1;
30416 global_var_name
= XNEWVEC (char, name_len
);
30418 /* Use '.' to concatenate names as it is demangler friendly. */
30420 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
30423 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
30425 return global_var_name
;
30428 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30430 /* Make a dispatcher declaration for the multi-versioned function DECL.
30431 Calls to DECL function will be replaced with calls to the dispatcher
30432 by the front-end. Return the decl created. */
30435 make_dispatcher_decl (const tree decl
)
30439 tree fn_type
, func_type
;
30440 bool is_uniq
= false;
30442 if (TREE_PUBLIC (decl
) == 0)
30445 func_name
= make_name (decl
, "ifunc", is_uniq
);
30447 fn_type
= TREE_TYPE (decl
);
30448 func_type
= build_function_type (TREE_TYPE (fn_type
),
30449 TYPE_ARG_TYPES (fn_type
));
30451 func_decl
= build_fn_decl (func_name
, func_type
);
30452 XDELETEVEC (func_name
);
30453 TREE_USED (func_decl
) = 1;
30454 DECL_CONTEXT (func_decl
) = NULL_TREE
;
30455 DECL_INITIAL (func_decl
) = error_mark_node
;
30456 DECL_ARTIFICIAL (func_decl
) = 1;
30457 /* Mark this func as external, the resolver will flip it again if
30458 it gets generated. */
30459 DECL_EXTERNAL (func_decl
) = 1;
30460 /* This will be of type IFUNCs have to be externally visible. */
30461 TREE_PUBLIC (func_decl
) = 1;
30468 /* Returns true if decl is multi-versioned and DECL is the default function,
30469 that is it is not tagged with target specific optimization. */
30472 is_function_default_version (const tree decl
)
30474 if (TREE_CODE (decl
) != FUNCTION_DECL
30475 || !DECL_FUNCTION_VERSIONED (decl
))
30477 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30479 attr
= TREE_VALUE (TREE_VALUE (attr
));
30480 return (TREE_CODE (attr
) == STRING_CST
30481 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
30484 /* Make a dispatcher declaration for the multi-versioned function DECL.
30485 Calls to DECL function will be replaced with calls to the dispatcher
30486 by the front-end. Returns the decl of the dispatcher function. */
30489 ix86_get_function_versions_dispatcher (void *decl
)
30491 tree fn
= (tree
) decl
;
30492 struct cgraph_node
*node
= NULL
;
30493 struct cgraph_node
*default_node
= NULL
;
30494 struct cgraph_function_version_info
*node_v
= NULL
;
30495 struct cgraph_function_version_info
*first_v
= NULL
;
30497 tree dispatch_decl
= NULL
;
30499 struct cgraph_function_version_info
*default_version_info
= NULL
;
30501 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
30503 node
= cgraph_get_node (fn
);
30504 gcc_assert (node
!= NULL
);
30506 node_v
= get_cgraph_node_version (node
);
30507 gcc_assert (node_v
!= NULL
);
30509 if (node_v
->dispatcher_resolver
!= NULL
)
30510 return node_v
->dispatcher_resolver
;
30512 /* Find the default version and make it the first node. */
30514 /* Go to the beginning of the chain. */
30515 while (first_v
->prev
!= NULL
)
30516 first_v
= first_v
->prev
;
30517 default_version_info
= first_v
;
30518 while (default_version_info
!= NULL
)
30520 if (is_function_default_version
30521 (default_version_info
->this_node
->decl
))
30523 default_version_info
= default_version_info
->next
;
30526 /* If there is no default node, just return NULL. */
30527 if (default_version_info
== NULL
)
30530 /* Make default info the first node. */
30531 if (first_v
!= default_version_info
)
30533 default_version_info
->prev
->next
= default_version_info
->next
;
30534 if (default_version_info
->next
)
30535 default_version_info
->next
->prev
= default_version_info
->prev
;
30536 first_v
->prev
= default_version_info
;
30537 default_version_info
->next
= first_v
;
30538 default_version_info
->prev
= NULL
;
30541 default_node
= default_version_info
->this_node
;
30543 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30544 if (targetm
.has_ifunc_p ())
30546 struct cgraph_function_version_info
*it_v
= NULL
;
30547 struct cgraph_node
*dispatcher_node
= NULL
;
30548 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
30550 /* Right now, the dispatching is done via ifunc. */
30551 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
30553 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
30554 gcc_assert (dispatcher_node
!= NULL
);
30555 dispatcher_node
->dispatcher_function
= 1;
30556 dispatcher_version_info
30557 = insert_new_cgraph_node_version (dispatcher_node
);
30558 dispatcher_version_info
->next
= default_version_info
;
30559 dispatcher_node
->definition
= 1;
30561 /* Set the dispatcher for all the versions. */
30562 it_v
= default_version_info
;
30563 while (it_v
!= NULL
)
30565 it_v
->dispatcher_resolver
= dispatch_decl
;
30572 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
30573 "multiversioning needs ifunc which is not supported "
30577 return dispatch_decl
;
30580 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
30584 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30587 tree attr_arg_name
;
30591 attr_name
= get_identifier (name
);
30592 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30593 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30594 attr
= tree_cons (attr_name
, attr_args
, chain
);
30598 /* Make the resolver function decl to dispatch the versions of
30599 a multi-versioned function, DEFAULT_DECL. Create an
30600 empty basic block in the resolver and store the pointer in
30601 EMPTY_BB. Return the decl of the resolver function. */
30604 make_resolver_func (const tree default_decl
,
30605 const tree dispatch_decl
,
30606 basic_block
*empty_bb
)
30608 char *resolver_name
;
30609 tree decl
, type
, decl_name
, t
;
30610 bool is_uniq
= false;
30612 /* IFUNC's have to be globally visible. So, if the default_decl is
30613 not, then the name of the IFUNC should be made unique. */
30614 if (TREE_PUBLIC (default_decl
) == 0)
30617 /* Append the filename to the resolver function if the versions are
30618 not externally visible. This is because the resolver function has
30619 to be externally visible for the loader to find it. So, appending
30620 the filename will prevent conflicts with a resolver function from
30621 another module which is based on the same version name. */
30622 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30624 /* The resolver function should return a (void *). */
30625 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30627 decl
= build_fn_decl (resolver_name
, type
);
30628 decl_name
= get_identifier (resolver_name
);
30629 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30631 DECL_NAME (decl
) = decl_name
;
30632 TREE_USED (decl
) = 1;
30633 DECL_ARTIFICIAL (decl
) = 1;
30634 DECL_IGNORED_P (decl
) = 0;
30635 /* IFUNC resolvers have to be externally visible. */
30636 TREE_PUBLIC (decl
) = 1;
30637 DECL_UNINLINABLE (decl
) = 1;
30639 /* Resolver is not external, body is generated. */
30640 DECL_EXTERNAL (decl
) = 0;
30641 DECL_EXTERNAL (dispatch_decl
) = 0;
30643 DECL_CONTEXT (decl
) = NULL_TREE
;
30644 DECL_INITIAL (decl
) = make_node (BLOCK
);
30645 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30647 if (DECL_COMDAT_GROUP (default_decl
)
30648 || TREE_PUBLIC (default_decl
))
30650 /* In this case, each translation unit with a call to this
30651 versioned function will put out a resolver. Ensure it
30652 is comdat to keep just one copy. */
30653 DECL_COMDAT (decl
) = 1;
30654 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30656 /* Build result decl and add to function_decl. */
30657 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30658 DECL_ARTIFICIAL (t
) = 1;
30659 DECL_IGNORED_P (t
) = 1;
30660 DECL_RESULT (decl
) = t
;
30662 gimplify_function_tree (decl
);
30663 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30664 *empty_bb
= init_lowered_empty_function (decl
, false);
30666 cgraph_add_new_function (decl
, true);
30667 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30671 gcc_assert (dispatch_decl
!= NULL
);
30672 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30673 DECL_ATTRIBUTES (dispatch_decl
)
30674 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30676 /* Create the alias for dispatch to resolver here. */
30677 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30678 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30679 XDELETEVEC (resolver_name
);
30683 /* Generate the dispatching code body to dispatch multi-versioned function
30684 DECL. The target hook is called to process the "target" attributes and
30685 provide the code to dispatch the right function at run-time. NODE points
30686 to the dispatcher decl whose body will be created. */
30689 ix86_generate_version_dispatcher_body (void *node_p
)
30691 tree resolver_decl
;
30692 basic_block empty_bb
;
30693 vec
<tree
> fn_ver_vec
= vNULL
;
30694 tree default_ver_decl
;
30695 struct cgraph_node
*versn
;
30696 struct cgraph_node
*node
;
30698 struct cgraph_function_version_info
*node_version_info
= NULL
;
30699 struct cgraph_function_version_info
*versn_info
= NULL
;
30701 node
= (cgraph_node
*)node_p
;
30703 node_version_info
= get_cgraph_node_version (node
);
30704 gcc_assert (node
->dispatcher_function
30705 && node_version_info
!= NULL
);
30707 if (node_version_info
->dispatcher_resolver
)
30708 return node_version_info
->dispatcher_resolver
;
30710 /* The first version in the chain corresponds to the default version. */
30711 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
30713 /* node is going to be an alias, so remove the finalized bit. */
30714 node
->definition
= false;
30716 resolver_decl
= make_resolver_func (default_ver_decl
,
30717 node
->decl
, &empty_bb
);
30719 node_version_info
->dispatcher_resolver
= resolver_decl
;
30721 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30723 fn_ver_vec
.create (2);
30725 for (versn_info
= node_version_info
->next
; versn_info
;
30726 versn_info
= versn_info
->next
)
30728 versn
= versn_info
->this_node
;
30729 /* Check for virtual functions here again, as by this time it should
30730 have been determined if this function needs a vtable index or
30731 not. This happens for methods in derived classes that override
30732 virtual methods in base classes but are not explicitly marked as
30734 if (DECL_VINDEX (versn
->decl
))
30735 sorry ("Virtual function multiversioning not supported");
30737 fn_ver_vec
.safe_push (versn
->decl
);
30740 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30741 fn_ver_vec
.release ();
30742 rebuild_cgraph_edges ();
30744 return resolver_decl
;
30746 /* This builds the processor_model struct type defined in
30747 libgcc/config/i386/cpuinfo.c */
30750 build_processor_model_struct (void)
30752 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30754 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30756 tree type
= make_node (RECORD_TYPE
);
30758 /* The first 3 fields are unsigned int. */
30759 for (i
= 0; i
< 3; ++i
)
30761 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30762 get_identifier (field_name
[i
]), unsigned_type_node
);
30763 if (field_chain
!= NULL_TREE
)
30764 DECL_CHAIN (field
) = field_chain
;
30765 field_chain
= field
;
30768 /* The last field is an array of unsigned integers of size one. */
30769 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30770 get_identifier (field_name
[3]),
30771 build_array_type (unsigned_type_node
,
30772 build_index_type (size_one_node
)));
30773 if (field_chain
!= NULL_TREE
)
30774 DECL_CHAIN (field
) = field_chain
;
30775 field_chain
= field
;
30777 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30781 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30784 make_var_decl (tree type
, const char *name
)
30788 new_decl
= build_decl (UNKNOWN_LOCATION
,
30790 get_identifier(name
),
30793 DECL_EXTERNAL (new_decl
) = 1;
30794 TREE_STATIC (new_decl
) = 1;
30795 TREE_PUBLIC (new_decl
) = 1;
30796 DECL_INITIAL (new_decl
) = 0;
30797 DECL_ARTIFICIAL (new_decl
) = 0;
30798 DECL_PRESERVE_P (new_decl
) = 1;
30800 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30801 assemble_variable (new_decl
, 0, 0, 0);
30806 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30807 into an integer defined in libgcc/config/i386/cpuinfo.c */
30810 fold_builtin_cpu (tree fndecl
, tree
*args
)
30813 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30814 DECL_FUNCTION_CODE (fndecl
);
30815 tree param_string_cst
= NULL
;
30817 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30818 enum processor_features
30834 /* These are the values for vendor types and cpu types and subtypes
30835 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30836 the corresponding start value. */
30837 enum processor_model
30848 M_CPU_SUBTYPE_START
,
30849 M_INTEL_COREI7_NEHALEM
,
30850 M_INTEL_COREI7_WESTMERE
,
30851 M_INTEL_COREI7_SANDYBRIDGE
,
30852 M_AMDFAM10H_BARCELONA
,
30853 M_AMDFAM10H_SHANGHAI
,
30854 M_AMDFAM10H_ISTANBUL
,
30855 M_AMDFAM15H_BDVER1
,
30856 M_AMDFAM15H_BDVER2
,
30857 M_AMDFAM15H_BDVER3
,
30861 static struct _arch_names_table
30863 const char *const name
;
30864 const enum processor_model model
;
30866 const arch_names_table
[] =
30869 {"intel", M_INTEL
},
30870 {"atom", M_INTEL_ATOM
},
30871 {"slm", M_INTEL_SLM
},
30872 {"core2", M_INTEL_CORE2
},
30873 {"corei7", M_INTEL_COREI7
},
30874 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30875 {"westmere", M_INTEL_COREI7_WESTMERE
},
30876 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30877 {"amdfam10h", M_AMDFAM10H
},
30878 {"barcelona", M_AMDFAM10H_BARCELONA
},
30879 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30880 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30881 {"amdfam15h", M_AMDFAM15H
},
30882 {"bdver1", M_AMDFAM15H_BDVER1
},
30883 {"bdver2", M_AMDFAM15H_BDVER2
},
30884 {"bdver3", M_AMDFAM15H_BDVER3
},
30885 {"bdver4", M_AMDFAM15H_BDVER4
},
30888 static struct _isa_names_table
30890 const char *const name
;
30891 const enum processor_features feature
;
30893 const isa_names_table
[] =
30897 {"popcnt", F_POPCNT
},
30901 {"ssse3", F_SSSE3
},
30902 {"sse4.1", F_SSE4_1
},
30903 {"sse4.2", F_SSE4_2
},
30908 tree __processor_model_type
= build_processor_model_struct ();
30909 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30913 varpool_add_new_variable (__cpu_model_var
);
30915 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30917 param_string_cst
= *args
;
30918 while (param_string_cst
30919 && TREE_CODE (param_string_cst
) != STRING_CST
)
30921 /* *args must be a expr that can contain other EXPRS leading to a
30923 if (!EXPR_P (param_string_cst
))
30925 error ("Parameter to builtin must be a string constant or literal");
30926 return integer_zero_node
;
30928 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30931 gcc_assert (param_string_cst
);
30933 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30939 unsigned int field_val
= 0;
30940 unsigned int NUM_ARCH_NAMES
30941 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30943 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30944 if (strcmp (arch_names_table
[i
].name
,
30945 TREE_STRING_POINTER (param_string_cst
)) == 0)
30948 if (i
== NUM_ARCH_NAMES
)
30950 error ("Parameter to builtin not valid: %s",
30951 TREE_STRING_POINTER (param_string_cst
));
30952 return integer_zero_node
;
30955 field
= TYPE_FIELDS (__processor_model_type
);
30956 field_val
= arch_names_table
[i
].model
;
30958 /* CPU types are stored in the next field. */
30959 if (field_val
> M_CPU_TYPE_START
30960 && field_val
< M_CPU_SUBTYPE_START
)
30962 field
= DECL_CHAIN (field
);
30963 field_val
-= M_CPU_TYPE_START
;
30966 /* CPU subtypes are stored in the next field. */
30967 if (field_val
> M_CPU_SUBTYPE_START
)
30969 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30970 field_val
-= M_CPU_SUBTYPE_START
;
30973 /* Get the appropriate field in __cpu_model. */
30974 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30977 /* Check the value. */
30978 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30979 build_int_cstu (unsigned_type_node
, field_val
));
30980 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30982 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30989 unsigned int field_val
= 0;
30990 unsigned int NUM_ISA_NAMES
30991 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30993 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30994 if (strcmp (isa_names_table
[i
].name
,
30995 TREE_STRING_POINTER (param_string_cst
)) == 0)
30998 if (i
== NUM_ISA_NAMES
)
31000 error ("Parameter to builtin not valid: %s",
31001 TREE_STRING_POINTER (param_string_cst
));
31002 return integer_zero_node
;
31005 field
= TYPE_FIELDS (__processor_model_type
);
31006 /* Get the last field, which is __cpu_features. */
31007 while (DECL_CHAIN (field
))
31008 field
= DECL_CHAIN (field
);
31010 /* Get the appropriate field: __cpu_model.__cpu_features */
31011 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
31014 /* Access the 0th element of __cpu_features array. */
31015 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
31016 integer_zero_node
, NULL_TREE
, NULL_TREE
);
31018 field_val
= (1 << isa_names_table
[i
].feature
);
31019 /* Return __cpu_model.__cpu_features[0] & field_val */
31020 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
31021 build_int_cstu (unsigned_type_node
, field_val
));
31022 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
31024 gcc_unreachable ();
31028 ix86_fold_builtin (tree fndecl
, int n_args
,
31029 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
31031 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
31033 enum ix86_builtins fn_code
= (enum ix86_builtins
)
31034 DECL_FUNCTION_CODE (fndecl
);
31035 if (fn_code
== IX86_BUILTIN_CPU_IS
31036 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
31038 gcc_assert (n_args
== 1);
31039 return fold_builtin_cpu (fndecl
, args
);
31043 #ifdef SUBTARGET_FOLD_BUILTIN
31044 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
31050 /* Make builtins to detect cpu type and features supported. NAME is
31051 the builtin name, CODE is the builtin code, and FTYPE is the function
31052 type of the builtin. */
31055 make_cpu_type_builtin (const char* name
, int code
,
31056 enum ix86_builtin_func_type ftype
, bool is_const
)
31061 type
= ix86_get_builtin_func_type (ftype
);
31062 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
31064 gcc_assert (decl
!= NULL_TREE
);
31065 ix86_builtins
[(int) code
] = decl
;
31066 TREE_READONLY (decl
) = is_const
;
31069 /* Make builtins to get CPU type and features supported. The created
31072 __builtin_cpu_init (), to detect cpu type and features,
31073 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
31074 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
31078 ix86_init_platform_type_builtins (void)
31080 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
31081 INT_FTYPE_VOID
, false);
31082 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
31083 INT_FTYPE_PCCHAR
, true);
31084 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
31085 INT_FTYPE_PCCHAR
, true);
31088 /* Internal method for ix86_init_builtins. */
31091 ix86_init_builtins_va_builtins_abi (void)
31093 tree ms_va_ref
, sysv_va_ref
;
31094 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
31095 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
31096 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
31097 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
31101 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
31102 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
31103 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
31105 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
31108 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31109 fnvoid_va_start_ms
=
31110 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31111 fnvoid_va_end_sysv
=
31112 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
31113 fnvoid_va_start_sysv
=
31114 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
31116 fnvoid_va_copy_ms
=
31117 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
31119 fnvoid_va_copy_sysv
=
31120 build_function_type_list (void_type_node
, sysv_va_ref
,
31121 sysv_va_ref
, NULL_TREE
);
31123 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
31124 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31125 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
31126 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31127 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
31128 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31129 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
31130 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31131 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
31132 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31133 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
31134 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31138 ix86_init_builtin_types (void)
31140 tree float128_type_node
, float80_type_node
;
31142 /* The __float80 type. */
31143 float80_type_node
= long_double_type_node
;
31144 if (TYPE_MODE (float80_type_node
) != XFmode
)
31146 /* The __float80 type. */
31147 float80_type_node
= make_node (REAL_TYPE
);
31149 TYPE_PRECISION (float80_type_node
) = 80;
31150 layout_type (float80_type_node
);
31152 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
31154 /* The __float128 type. */
31155 float128_type_node
= make_node (REAL_TYPE
);
31156 TYPE_PRECISION (float128_type_node
) = 128;
31157 layout_type (float128_type_node
);
31158 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
31160 /* This macro is built by i386-builtin-types.awk. */
31161 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
31165 ix86_init_builtins (void)
31169 ix86_init_builtin_types ();
31171 /* Builtins to get CPU type and features. */
31172 ix86_init_platform_type_builtins ();
31174 /* TFmode support builtins. */
31175 def_builtin_const (0, "__builtin_infq",
31176 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
31177 def_builtin_const (0, "__builtin_huge_valq",
31178 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
31180 /* We will expand them to normal call if SSE isn't available since
31181 they are used by libgcc. */
31182 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
31183 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
31184 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
31185 TREE_READONLY (t
) = 1;
31186 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
31188 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
31189 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
31190 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
31191 TREE_READONLY (t
) = 1;
31192 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
31194 ix86_init_tm_builtins ();
31195 ix86_init_mmx_sse_builtins ();
31198 ix86_init_builtins_va_builtins_abi ();
31200 #ifdef SUBTARGET_INIT_BUILTINS
31201 SUBTARGET_INIT_BUILTINS
;
31205 /* Return the ix86 builtin for CODE. */
31208 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
31210 if (code
>= IX86_BUILTIN_MAX
)
31211 return error_mark_node
;
31213 return ix86_builtins
[code
];
31216 /* Errors in the source file can cause expand_expr to return const0_rtx
31217 where we expect a vector. To avoid crashing, use one of the vector
31218 clear instructions. */
31220 safe_vector_operand (rtx x
, enum machine_mode mode
)
31222 if (x
== const0_rtx
)
31223 x
= CONST0_RTX (mode
);
31227 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
31230 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
31233 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31234 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31235 rtx op0
= expand_normal (arg0
);
31236 rtx op1
= expand_normal (arg1
);
31237 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31238 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31239 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
31241 if (VECTOR_MODE_P (mode0
))
31242 op0
= safe_vector_operand (op0
, mode0
);
31243 if (VECTOR_MODE_P (mode1
))
31244 op1
= safe_vector_operand (op1
, mode1
);
31246 if (optimize
|| !target
31247 || GET_MODE (target
) != tmode
31248 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31249 target
= gen_reg_rtx (tmode
);
31251 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
31253 rtx x
= gen_reg_rtx (V4SImode
);
31254 emit_insn (gen_sse2_loadd (x
, op1
));
31255 op1
= gen_lowpart (TImode
, x
);
31258 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31259 op0
= copy_to_mode_reg (mode0
, op0
);
31260 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
31261 op1
= copy_to_mode_reg (mode1
, op1
);
31263 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31272 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
31275 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
31276 enum ix86_builtin_func_type m_type
,
31277 enum rtx_code sub_code
)
31282 bool comparison_p
= false;
31284 bool last_arg_constant
= false;
31285 int num_memory
= 0;
31288 enum machine_mode mode
;
31291 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31295 case MULTI_ARG_4_DF2_DI_I
:
31296 case MULTI_ARG_4_DF2_DI_I1
:
31297 case MULTI_ARG_4_SF2_SI_I
:
31298 case MULTI_ARG_4_SF2_SI_I1
:
31300 last_arg_constant
= true;
31303 case MULTI_ARG_3_SF
:
31304 case MULTI_ARG_3_DF
:
31305 case MULTI_ARG_3_SF2
:
31306 case MULTI_ARG_3_DF2
:
31307 case MULTI_ARG_3_DI
:
31308 case MULTI_ARG_3_SI
:
31309 case MULTI_ARG_3_SI_DI
:
31310 case MULTI_ARG_3_HI
:
31311 case MULTI_ARG_3_HI_SI
:
31312 case MULTI_ARG_3_QI
:
31313 case MULTI_ARG_3_DI2
:
31314 case MULTI_ARG_3_SI2
:
31315 case MULTI_ARG_3_HI2
:
31316 case MULTI_ARG_3_QI2
:
31320 case MULTI_ARG_2_SF
:
31321 case MULTI_ARG_2_DF
:
31322 case MULTI_ARG_2_DI
:
31323 case MULTI_ARG_2_SI
:
31324 case MULTI_ARG_2_HI
:
31325 case MULTI_ARG_2_QI
:
31329 case MULTI_ARG_2_DI_IMM
:
31330 case MULTI_ARG_2_SI_IMM
:
31331 case MULTI_ARG_2_HI_IMM
:
31332 case MULTI_ARG_2_QI_IMM
:
31334 last_arg_constant
= true;
31337 case MULTI_ARG_1_SF
:
31338 case MULTI_ARG_1_DF
:
31339 case MULTI_ARG_1_SF2
:
31340 case MULTI_ARG_1_DF2
:
31341 case MULTI_ARG_1_DI
:
31342 case MULTI_ARG_1_SI
:
31343 case MULTI_ARG_1_HI
:
31344 case MULTI_ARG_1_QI
:
31345 case MULTI_ARG_1_SI_DI
:
31346 case MULTI_ARG_1_HI_DI
:
31347 case MULTI_ARG_1_HI_SI
:
31348 case MULTI_ARG_1_QI_DI
:
31349 case MULTI_ARG_1_QI_SI
:
31350 case MULTI_ARG_1_QI_HI
:
31354 case MULTI_ARG_2_DI_CMP
:
31355 case MULTI_ARG_2_SI_CMP
:
31356 case MULTI_ARG_2_HI_CMP
:
31357 case MULTI_ARG_2_QI_CMP
:
31359 comparison_p
= true;
31362 case MULTI_ARG_2_SF_TF
:
31363 case MULTI_ARG_2_DF_TF
:
31364 case MULTI_ARG_2_DI_TF
:
31365 case MULTI_ARG_2_SI_TF
:
31366 case MULTI_ARG_2_HI_TF
:
31367 case MULTI_ARG_2_QI_TF
:
31373 gcc_unreachable ();
31376 if (optimize
|| !target
31377 || GET_MODE (target
) != tmode
31378 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31379 target
= gen_reg_rtx (tmode
);
31381 gcc_assert (nargs
<= 4);
31383 for (i
= 0; i
< nargs
; i
++)
31385 tree arg
= CALL_EXPR_ARG (exp
, i
);
31386 rtx op
= expand_normal (arg
);
31387 int adjust
= (comparison_p
) ? 1 : 0;
31388 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
31390 if (last_arg_constant
&& i
== nargs
- 1)
31392 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
31394 enum insn_code new_icode
= icode
;
31397 case CODE_FOR_xop_vpermil2v2df3
:
31398 case CODE_FOR_xop_vpermil2v4sf3
:
31399 case CODE_FOR_xop_vpermil2v4df3
:
31400 case CODE_FOR_xop_vpermil2v8sf3
:
31401 error ("the last argument must be a 2-bit immediate");
31402 return gen_reg_rtx (tmode
);
31403 case CODE_FOR_xop_rotlv2di3
:
31404 new_icode
= CODE_FOR_rotlv2di3
;
31406 case CODE_FOR_xop_rotlv4si3
:
31407 new_icode
= CODE_FOR_rotlv4si3
;
31409 case CODE_FOR_xop_rotlv8hi3
:
31410 new_icode
= CODE_FOR_rotlv8hi3
;
31412 case CODE_FOR_xop_rotlv16qi3
:
31413 new_icode
= CODE_FOR_rotlv16qi3
;
31415 if (CONST_INT_P (op
))
31417 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
31418 op
= GEN_INT (INTVAL (op
) & mask
);
31419 gcc_checking_assert
31420 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
31424 gcc_checking_assert
31426 && insn_data
[new_icode
].operand
[0].mode
== tmode
31427 && insn_data
[new_icode
].operand
[1].mode
== tmode
31428 && insn_data
[new_icode
].operand
[2].mode
== mode
31429 && insn_data
[new_icode
].operand
[0].predicate
31430 == insn_data
[icode
].operand
[0].predicate
31431 && insn_data
[new_icode
].operand
[1].predicate
31432 == insn_data
[icode
].operand
[1].predicate
);
31438 gcc_unreachable ();
31445 if (VECTOR_MODE_P (mode
))
31446 op
= safe_vector_operand (op
, mode
);
31448 /* If we aren't optimizing, only allow one memory operand to be
31450 if (memory_operand (op
, mode
))
31453 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
31456 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
31458 op
= force_reg (mode
, op
);
31462 args
[i
].mode
= mode
;
31468 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31473 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
31474 GEN_INT ((int)sub_code
));
31475 else if (! comparison_p
)
31476 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31479 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
31483 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
31488 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31492 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
31496 gcc_unreachable ();
31506 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
31507 insns with vec_merge. */
31510 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
31514 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31515 rtx op1
, op0
= expand_normal (arg0
);
31516 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31517 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31519 if (optimize
|| !target
31520 || GET_MODE (target
) != tmode
31521 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31522 target
= gen_reg_rtx (tmode
);
31524 if (VECTOR_MODE_P (mode0
))
31525 op0
= safe_vector_operand (op0
, mode0
);
31527 if ((optimize
&& !register_operand (op0
, mode0
))
31528 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31529 op0
= copy_to_mode_reg (mode0
, op0
);
31532 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
31533 op1
= copy_to_mode_reg (mode0
, op1
);
31535 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31542 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
31545 ix86_expand_sse_compare (const struct builtin_description
*d
,
31546 tree exp
, rtx target
, bool swap
)
31549 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31550 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31551 rtx op0
= expand_normal (arg0
);
31552 rtx op1
= expand_normal (arg1
);
31554 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31555 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31556 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31557 enum rtx_code comparison
= d
->comparison
;
31559 if (VECTOR_MODE_P (mode0
))
31560 op0
= safe_vector_operand (op0
, mode0
);
31561 if (VECTOR_MODE_P (mode1
))
31562 op1
= safe_vector_operand (op1
, mode1
);
31564 /* Swap operands if we have a comparison that isn't available in
31568 rtx tmp
= gen_reg_rtx (mode1
);
31569 emit_move_insn (tmp
, op1
);
31574 if (optimize
|| !target
31575 || GET_MODE (target
) != tmode
31576 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31577 target
= gen_reg_rtx (tmode
);
31579 if ((optimize
&& !register_operand (op0
, mode0
))
31580 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
31581 op0
= copy_to_mode_reg (mode0
, op0
);
31582 if ((optimize
&& !register_operand (op1
, mode1
))
31583 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
31584 op1
= copy_to_mode_reg (mode1
, op1
);
31586 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
31587 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31594 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31597 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31601 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31602 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31603 rtx op0
= expand_normal (arg0
);
31604 rtx op1
= expand_normal (arg1
);
31605 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31606 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31607 enum rtx_code comparison
= d
->comparison
;
31609 if (VECTOR_MODE_P (mode0
))
31610 op0
= safe_vector_operand (op0
, mode0
);
31611 if (VECTOR_MODE_P (mode1
))
31612 op1
= safe_vector_operand (op1
, mode1
);
31614 /* Swap operands if we have a comparison that isn't available in
31616 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31623 target
= gen_reg_rtx (SImode
);
31624 emit_move_insn (target
, const0_rtx
);
31625 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31627 if ((optimize
&& !register_operand (op0
, mode0
))
31628 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31629 op0
= copy_to_mode_reg (mode0
, op0
);
31630 if ((optimize
&& !register_operand (op1
, mode1
))
31631 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31632 op1
= copy_to_mode_reg (mode1
, op1
);
31634 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31638 emit_insn (gen_rtx_SET (VOIDmode
,
31639 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31640 gen_rtx_fmt_ee (comparison
, QImode
,
31644 return SUBREG_REG (target
);
31647 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31650 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31654 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31655 rtx op1
, op0
= expand_normal (arg0
);
31656 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31657 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31659 if (optimize
|| target
== 0
31660 || GET_MODE (target
) != tmode
31661 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31662 target
= gen_reg_rtx (tmode
);
31664 if (VECTOR_MODE_P (mode0
))
31665 op0
= safe_vector_operand (op0
, mode0
);
31667 if ((optimize
&& !register_operand (op0
, mode0
))
31668 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31669 op0
= copy_to_mode_reg (mode0
, op0
);
31671 op1
= GEN_INT (d
->comparison
);
31673 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31681 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31682 tree exp
, rtx target
)
31685 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31686 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31687 rtx op0
= expand_normal (arg0
);
31688 rtx op1
= expand_normal (arg1
);
31690 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31691 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31692 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31694 if (optimize
|| target
== 0
31695 || GET_MODE (target
) != tmode
31696 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31697 target
= gen_reg_rtx (tmode
);
31699 op0
= safe_vector_operand (op0
, mode0
);
31700 op1
= safe_vector_operand (op1
, mode1
);
31702 if ((optimize
&& !register_operand (op0
, mode0
))
31703 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31704 op0
= copy_to_mode_reg (mode0
, op0
);
31705 if ((optimize
&& !register_operand (op1
, mode1
))
31706 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31707 op1
= copy_to_mode_reg (mode1
, op1
);
31709 op2
= GEN_INT (d
->comparison
);
31711 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31718 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31721 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31725 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31726 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31727 rtx op0
= expand_normal (arg0
);
31728 rtx op1
= expand_normal (arg1
);
31729 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31730 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31731 enum rtx_code comparison
= d
->comparison
;
31733 if (VECTOR_MODE_P (mode0
))
31734 op0
= safe_vector_operand (op0
, mode0
);
31735 if (VECTOR_MODE_P (mode1
))
31736 op1
= safe_vector_operand (op1
, mode1
);
31738 target
= gen_reg_rtx (SImode
);
31739 emit_move_insn (target
, const0_rtx
);
31740 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31742 if ((optimize
&& !register_operand (op0
, mode0
))
31743 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31744 op0
= copy_to_mode_reg (mode0
, op0
);
31745 if ((optimize
&& !register_operand (op1
, mode1
))
31746 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31747 op1
= copy_to_mode_reg (mode1
, op1
);
31749 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31753 emit_insn (gen_rtx_SET (VOIDmode
,
31754 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31755 gen_rtx_fmt_ee (comparison
, QImode
,
31759 return SUBREG_REG (target
);
31762 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31765 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31766 tree exp
, rtx target
)
31769 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31770 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31771 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31772 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31773 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31774 rtx scratch0
, scratch1
;
31775 rtx op0
= expand_normal (arg0
);
31776 rtx op1
= expand_normal (arg1
);
31777 rtx op2
= expand_normal (arg2
);
31778 rtx op3
= expand_normal (arg3
);
31779 rtx op4
= expand_normal (arg4
);
31780 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31782 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31783 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31784 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31785 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31786 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31787 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31788 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31790 if (VECTOR_MODE_P (modev2
))
31791 op0
= safe_vector_operand (op0
, modev2
);
31792 if (VECTOR_MODE_P (modev4
))
31793 op2
= safe_vector_operand (op2
, modev4
);
31795 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31796 op0
= copy_to_mode_reg (modev2
, op0
);
31797 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31798 op1
= copy_to_mode_reg (modei3
, op1
);
31799 if ((optimize
&& !register_operand (op2
, modev4
))
31800 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31801 op2
= copy_to_mode_reg (modev4
, op2
);
31802 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31803 op3
= copy_to_mode_reg (modei5
, op3
);
31805 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31807 error ("the fifth argument must be an 8-bit immediate");
31811 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31813 if (optimize
|| !target
31814 || GET_MODE (target
) != tmode0
31815 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31816 target
= gen_reg_rtx (tmode0
);
31818 scratch1
= gen_reg_rtx (tmode1
);
31820 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31822 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31824 if (optimize
|| !target
31825 || GET_MODE (target
) != tmode1
31826 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31827 target
= gen_reg_rtx (tmode1
);
31829 scratch0
= gen_reg_rtx (tmode0
);
31831 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31835 gcc_assert (d
->flag
);
31837 scratch0
= gen_reg_rtx (tmode0
);
31838 scratch1
= gen_reg_rtx (tmode1
);
31840 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31850 target
= gen_reg_rtx (SImode
);
31851 emit_move_insn (target
, const0_rtx
);
31852 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31855 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31856 gen_rtx_fmt_ee (EQ
, QImode
,
31857 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31860 return SUBREG_REG (target
);
31867 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31870 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31871 tree exp
, rtx target
)
31874 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31875 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31876 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31877 rtx scratch0
, scratch1
;
31878 rtx op0
= expand_normal (arg0
);
31879 rtx op1
= expand_normal (arg1
);
31880 rtx op2
= expand_normal (arg2
);
31881 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31883 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31884 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31885 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31886 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31887 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31889 if (VECTOR_MODE_P (modev2
))
31890 op0
= safe_vector_operand (op0
, modev2
);
31891 if (VECTOR_MODE_P (modev3
))
31892 op1
= safe_vector_operand (op1
, modev3
);
31894 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31895 op0
= copy_to_mode_reg (modev2
, op0
);
31896 if ((optimize
&& !register_operand (op1
, modev3
))
31897 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31898 op1
= copy_to_mode_reg (modev3
, op1
);
31900 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31902 error ("the third argument must be an 8-bit immediate");
31906 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31908 if (optimize
|| !target
31909 || GET_MODE (target
) != tmode0
31910 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31911 target
= gen_reg_rtx (tmode0
);
31913 scratch1
= gen_reg_rtx (tmode1
);
31915 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31917 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31919 if (optimize
|| !target
31920 || GET_MODE (target
) != tmode1
31921 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31922 target
= gen_reg_rtx (tmode1
);
31924 scratch0
= gen_reg_rtx (tmode0
);
31926 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31930 gcc_assert (d
->flag
);
31932 scratch0
= gen_reg_rtx (tmode0
);
31933 scratch1
= gen_reg_rtx (tmode1
);
31935 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31945 target
= gen_reg_rtx (SImode
);
31946 emit_move_insn (target
, const0_rtx
);
31947 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31950 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31951 gen_rtx_fmt_ee (EQ
, QImode
,
31952 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31955 return SUBREG_REG (target
);
31961 /* Subroutine of ix86_expand_builtin to take care of insns with
31962 variable number of operands. */
31965 ix86_expand_args_builtin (const struct builtin_description
*d
,
31966 tree exp
, rtx target
)
31968 rtx pat
, real_target
;
31969 unsigned int i
, nargs
;
31970 unsigned int nargs_constant
= 0;
31971 int num_memory
= 0;
31975 enum machine_mode mode
;
31977 bool last_arg_count
= false;
31978 enum insn_code icode
= d
->icode
;
31979 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31980 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31981 enum machine_mode rmode
= VOIDmode
;
31983 enum rtx_code comparison
= d
->comparison
;
31985 switch ((enum ix86_builtin_func_type
) d
->flag
)
31987 case V2DF_FTYPE_V2DF_ROUND
:
31988 case V4DF_FTYPE_V4DF_ROUND
:
31989 case V4SF_FTYPE_V4SF_ROUND
:
31990 case V8SF_FTYPE_V8SF_ROUND
:
31991 case V4SI_FTYPE_V4SF_ROUND
:
31992 case V8SI_FTYPE_V8SF_ROUND
:
31993 return ix86_expand_sse_round (d
, exp
, target
);
31994 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31995 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31996 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31997 case INT_FTYPE_V8SF_V8SF_PTEST
:
31998 case INT_FTYPE_V4DI_V4DI_PTEST
:
31999 case INT_FTYPE_V4DF_V4DF_PTEST
:
32000 case INT_FTYPE_V4SF_V4SF_PTEST
:
32001 case INT_FTYPE_V2DI_V2DI_PTEST
:
32002 case INT_FTYPE_V2DF_V2DF_PTEST
:
32003 return ix86_expand_sse_ptest (d
, exp
, target
);
32004 case FLOAT128_FTYPE_FLOAT128
:
32005 case FLOAT_FTYPE_FLOAT
:
32006 case INT_FTYPE_INT
:
32007 case UINT64_FTYPE_INT
:
32008 case UINT16_FTYPE_UINT16
:
32009 case INT64_FTYPE_INT64
:
32010 case INT64_FTYPE_V4SF
:
32011 case INT64_FTYPE_V2DF
:
32012 case INT_FTYPE_V16QI
:
32013 case INT_FTYPE_V8QI
:
32014 case INT_FTYPE_V8SF
:
32015 case INT_FTYPE_V4DF
:
32016 case INT_FTYPE_V4SF
:
32017 case INT_FTYPE_V2DF
:
32018 case INT_FTYPE_V32QI
:
32019 case V16QI_FTYPE_V16QI
:
32020 case V8SI_FTYPE_V8SF
:
32021 case V8SI_FTYPE_V4SI
:
32022 case V8HI_FTYPE_V8HI
:
32023 case V8HI_FTYPE_V16QI
:
32024 case V8QI_FTYPE_V8QI
:
32025 case V8SF_FTYPE_V8SF
:
32026 case V8SF_FTYPE_V8SI
:
32027 case V8SF_FTYPE_V4SF
:
32028 case V8SF_FTYPE_V8HI
:
32029 case V4SI_FTYPE_V4SI
:
32030 case V4SI_FTYPE_V16QI
:
32031 case V4SI_FTYPE_V4SF
:
32032 case V4SI_FTYPE_V8SI
:
32033 case V4SI_FTYPE_V8HI
:
32034 case V4SI_FTYPE_V4DF
:
32035 case V4SI_FTYPE_V2DF
:
32036 case V4HI_FTYPE_V4HI
:
32037 case V4DF_FTYPE_V4DF
:
32038 case V4DF_FTYPE_V4SI
:
32039 case V4DF_FTYPE_V4SF
:
32040 case V4DF_FTYPE_V2DF
:
32041 case V4SF_FTYPE_V4SF
:
32042 case V4SF_FTYPE_V4SI
:
32043 case V4SF_FTYPE_V8SF
:
32044 case V4SF_FTYPE_V4DF
:
32045 case V4SF_FTYPE_V8HI
:
32046 case V4SF_FTYPE_V2DF
:
32047 case V2DI_FTYPE_V2DI
:
32048 case V2DI_FTYPE_V16QI
:
32049 case V2DI_FTYPE_V8HI
:
32050 case V2DI_FTYPE_V4SI
:
32051 case V2DF_FTYPE_V2DF
:
32052 case V2DF_FTYPE_V4SI
:
32053 case V2DF_FTYPE_V4DF
:
32054 case V2DF_FTYPE_V4SF
:
32055 case V2DF_FTYPE_V2SI
:
32056 case V2SI_FTYPE_V2SI
:
32057 case V2SI_FTYPE_V4SF
:
32058 case V2SI_FTYPE_V2SF
:
32059 case V2SI_FTYPE_V2DF
:
32060 case V2SF_FTYPE_V2SF
:
32061 case V2SF_FTYPE_V2SI
:
32062 case V32QI_FTYPE_V32QI
:
32063 case V32QI_FTYPE_V16QI
:
32064 case V16HI_FTYPE_V16HI
:
32065 case V16HI_FTYPE_V8HI
:
32066 case V8SI_FTYPE_V8SI
:
32067 case V16HI_FTYPE_V16QI
:
32068 case V8SI_FTYPE_V16QI
:
32069 case V4DI_FTYPE_V16QI
:
32070 case V8SI_FTYPE_V8HI
:
32071 case V4DI_FTYPE_V8HI
:
32072 case V4DI_FTYPE_V4SI
:
32073 case V4DI_FTYPE_V2DI
:
32076 case V4SF_FTYPE_V4SF_VEC_MERGE
:
32077 case V2DF_FTYPE_V2DF_VEC_MERGE
:
32078 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
32079 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
32080 case V16QI_FTYPE_V16QI_V16QI
:
32081 case V16QI_FTYPE_V8HI_V8HI
:
32082 case V8QI_FTYPE_V8QI_V8QI
:
32083 case V8QI_FTYPE_V4HI_V4HI
:
32084 case V8HI_FTYPE_V8HI_V8HI
:
32085 case V8HI_FTYPE_V16QI_V16QI
:
32086 case V8HI_FTYPE_V4SI_V4SI
:
32087 case V8SF_FTYPE_V8SF_V8SF
:
32088 case V8SF_FTYPE_V8SF_V8SI
:
32089 case V4SI_FTYPE_V4SI_V4SI
:
32090 case V4SI_FTYPE_V8HI_V8HI
:
32091 case V4SI_FTYPE_V4SF_V4SF
:
32092 case V4SI_FTYPE_V2DF_V2DF
:
32093 case V4HI_FTYPE_V4HI_V4HI
:
32094 case V4HI_FTYPE_V8QI_V8QI
:
32095 case V4HI_FTYPE_V2SI_V2SI
:
32096 case V4DF_FTYPE_V4DF_V4DF
:
32097 case V4DF_FTYPE_V4DF_V4DI
:
32098 case V4SF_FTYPE_V4SF_V4SF
:
32099 case V4SF_FTYPE_V4SF_V4SI
:
32100 case V4SF_FTYPE_V4SF_V2SI
:
32101 case V4SF_FTYPE_V4SF_V2DF
:
32102 case V4SF_FTYPE_V4SF_DI
:
32103 case V4SF_FTYPE_V4SF_SI
:
32104 case V2DI_FTYPE_V2DI_V2DI
:
32105 case V2DI_FTYPE_V16QI_V16QI
:
32106 case V2DI_FTYPE_V4SI_V4SI
:
32107 case V2UDI_FTYPE_V4USI_V4USI
:
32108 case V2DI_FTYPE_V2DI_V16QI
:
32109 case V2DI_FTYPE_V2DF_V2DF
:
32110 case V2SI_FTYPE_V2SI_V2SI
:
32111 case V2SI_FTYPE_V4HI_V4HI
:
32112 case V2SI_FTYPE_V2SF_V2SF
:
32113 case V2DF_FTYPE_V2DF_V2DF
:
32114 case V2DF_FTYPE_V2DF_V4SF
:
32115 case V2DF_FTYPE_V2DF_V2DI
:
32116 case V2DF_FTYPE_V2DF_DI
:
32117 case V2DF_FTYPE_V2DF_SI
:
32118 case V2SF_FTYPE_V2SF_V2SF
:
32119 case V1DI_FTYPE_V1DI_V1DI
:
32120 case V1DI_FTYPE_V8QI_V8QI
:
32121 case V1DI_FTYPE_V2SI_V2SI
:
32122 case V32QI_FTYPE_V16HI_V16HI
:
32123 case V16HI_FTYPE_V8SI_V8SI
:
32124 case V32QI_FTYPE_V32QI_V32QI
:
32125 case V16HI_FTYPE_V32QI_V32QI
:
32126 case V16HI_FTYPE_V16HI_V16HI
:
32127 case V8SI_FTYPE_V4DF_V4DF
:
32128 case V8SI_FTYPE_V8SI_V8SI
:
32129 case V8SI_FTYPE_V16HI_V16HI
:
32130 case V4DI_FTYPE_V4DI_V4DI
:
32131 case V4DI_FTYPE_V8SI_V8SI
:
32132 case V4UDI_FTYPE_V8USI_V8USI
:
32133 if (comparison
== UNKNOWN
)
32134 return ix86_expand_binop_builtin (icode
, exp
, target
);
32137 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
32138 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
32139 gcc_assert (comparison
!= UNKNOWN
);
32143 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
32144 case V16HI_FTYPE_V16HI_SI_COUNT
:
32145 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
32146 case V8SI_FTYPE_V8SI_SI_COUNT
:
32147 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
32148 case V4DI_FTYPE_V4DI_INT_COUNT
:
32149 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
32150 case V8HI_FTYPE_V8HI_SI_COUNT
:
32151 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
32152 case V4SI_FTYPE_V4SI_SI_COUNT
:
32153 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
32154 case V4HI_FTYPE_V4HI_SI_COUNT
:
32155 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
32156 case V2DI_FTYPE_V2DI_SI_COUNT
:
32157 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
32158 case V2SI_FTYPE_V2SI_SI_COUNT
:
32159 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
32160 case V1DI_FTYPE_V1DI_SI_COUNT
:
32162 last_arg_count
= true;
32164 case UINT64_FTYPE_UINT64_UINT64
:
32165 case UINT_FTYPE_UINT_UINT
:
32166 case UINT_FTYPE_UINT_USHORT
:
32167 case UINT_FTYPE_UINT_UCHAR
:
32168 case UINT16_FTYPE_UINT16_INT
:
32169 case UINT8_FTYPE_UINT8_INT
:
32172 case V2DI_FTYPE_V2DI_INT_CONVERT
:
32175 nargs_constant
= 1;
32177 case V4DI_FTYPE_V4DI_INT_CONVERT
:
32180 nargs_constant
= 1;
32182 case V8HI_FTYPE_V8HI_INT
:
32183 case V8HI_FTYPE_V8SF_INT
:
32184 case V8HI_FTYPE_V4SF_INT
:
32185 case V8SF_FTYPE_V8SF_INT
:
32186 case V4SI_FTYPE_V4SI_INT
:
32187 case V4SI_FTYPE_V8SI_INT
:
32188 case V4HI_FTYPE_V4HI_INT
:
32189 case V4DF_FTYPE_V4DF_INT
:
32190 case V4SF_FTYPE_V4SF_INT
:
32191 case V4SF_FTYPE_V8SF_INT
:
32192 case V2DI_FTYPE_V2DI_INT
:
32193 case V2DF_FTYPE_V2DF_INT
:
32194 case V2DF_FTYPE_V4DF_INT
:
32195 case V16HI_FTYPE_V16HI_INT
:
32196 case V8SI_FTYPE_V8SI_INT
:
32197 case V4DI_FTYPE_V4DI_INT
:
32198 case V2DI_FTYPE_V4DI_INT
:
32200 nargs_constant
= 1;
32202 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
32203 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
32204 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
32205 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
32206 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
32207 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
32210 case V32QI_FTYPE_V32QI_V32QI_INT
:
32211 case V16HI_FTYPE_V16HI_V16HI_INT
:
32212 case V16QI_FTYPE_V16QI_V16QI_INT
:
32213 case V4DI_FTYPE_V4DI_V4DI_INT
:
32214 case V8HI_FTYPE_V8HI_V8HI_INT
:
32215 case V8SI_FTYPE_V8SI_V8SI_INT
:
32216 case V8SI_FTYPE_V8SI_V4SI_INT
:
32217 case V8SF_FTYPE_V8SF_V8SF_INT
:
32218 case V8SF_FTYPE_V8SF_V4SF_INT
:
32219 case V4SI_FTYPE_V4SI_V4SI_INT
:
32220 case V4DF_FTYPE_V4DF_V4DF_INT
:
32221 case V4DF_FTYPE_V4DF_V2DF_INT
:
32222 case V4SF_FTYPE_V4SF_V4SF_INT
:
32223 case V2DI_FTYPE_V2DI_V2DI_INT
:
32224 case V4DI_FTYPE_V4DI_V2DI_INT
:
32225 case V2DF_FTYPE_V2DF_V2DF_INT
:
32227 nargs_constant
= 1;
32229 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
32232 nargs_constant
= 1;
32234 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
32237 nargs_constant
= 1;
32239 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
32242 nargs_constant
= 1;
32244 case V2DI_FTYPE_V2DI_UINT_UINT
:
32246 nargs_constant
= 2;
32248 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
32249 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
32250 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
32251 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
32253 nargs_constant
= 1;
32255 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
32257 nargs_constant
= 2;
32259 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
32260 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
32264 gcc_unreachable ();
32267 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32269 if (comparison
!= UNKNOWN
)
32271 gcc_assert (nargs
== 2);
32272 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
32275 if (rmode
== VOIDmode
|| rmode
== tmode
)
32279 || GET_MODE (target
) != tmode
32280 || !insn_p
->operand
[0].predicate (target
, tmode
))
32281 target
= gen_reg_rtx (tmode
);
32282 real_target
= target
;
32286 real_target
= gen_reg_rtx (tmode
);
32287 target
= simplify_gen_subreg (rmode
, real_target
, tmode
, 0);
32290 for (i
= 0; i
< nargs
; i
++)
32292 tree arg
= CALL_EXPR_ARG (exp
, i
);
32293 rtx op
= expand_normal (arg
);
32294 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32295 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32297 if (last_arg_count
&& (i
+ 1) == nargs
)
32299 /* SIMD shift insns take either an 8-bit immediate or
32300 register as count. But builtin functions take int as
32301 count. If count doesn't match, we put it in register. */
32304 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
32305 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
32306 op
= copy_to_reg (op
);
32309 else if ((nargs
- i
) <= nargs_constant
)
32314 case CODE_FOR_avx2_inserti128
:
32315 case CODE_FOR_avx2_extracti128
:
32316 error ("the last argument must be an 1-bit immediate");
32319 case CODE_FOR_sse4_1_roundsd
:
32320 case CODE_FOR_sse4_1_roundss
:
32322 case CODE_FOR_sse4_1_roundpd
:
32323 case CODE_FOR_sse4_1_roundps
:
32324 case CODE_FOR_avx_roundpd256
:
32325 case CODE_FOR_avx_roundps256
:
32327 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
32328 case CODE_FOR_sse4_1_roundps_sfix
:
32329 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
32330 case CODE_FOR_avx_roundps_sfix256
:
32332 case CODE_FOR_sse4_1_blendps
:
32333 case CODE_FOR_avx_blendpd256
:
32334 case CODE_FOR_avx_vpermilv4df
:
32335 error ("the last argument must be a 4-bit immediate");
32338 case CODE_FOR_sse4_1_blendpd
:
32339 case CODE_FOR_avx_vpermilv2df
:
32340 case CODE_FOR_xop_vpermil2v2df3
:
32341 case CODE_FOR_xop_vpermil2v4sf3
:
32342 case CODE_FOR_xop_vpermil2v4df3
:
32343 case CODE_FOR_xop_vpermil2v8sf3
:
32344 error ("the last argument must be a 2-bit immediate");
32347 case CODE_FOR_avx_vextractf128v4df
:
32348 case CODE_FOR_avx_vextractf128v8sf
:
32349 case CODE_FOR_avx_vextractf128v8si
:
32350 case CODE_FOR_avx_vinsertf128v4df
:
32351 case CODE_FOR_avx_vinsertf128v8sf
:
32352 case CODE_FOR_avx_vinsertf128v8si
:
32353 error ("the last argument must be a 1-bit immediate");
32356 case CODE_FOR_avx_vmcmpv2df3
:
32357 case CODE_FOR_avx_vmcmpv4sf3
:
32358 case CODE_FOR_avx_cmpv2df3
:
32359 case CODE_FOR_avx_cmpv4sf3
:
32360 case CODE_FOR_avx_cmpv4df3
:
32361 case CODE_FOR_avx_cmpv8sf3
:
32362 error ("the last argument must be a 5-bit immediate");
32366 switch (nargs_constant
)
32369 if ((nargs
- i
) == nargs_constant
)
32371 error ("the next to last argument must be an 8-bit immediate");
32375 error ("the last argument must be an 8-bit immediate");
32378 gcc_unreachable ();
32385 if (VECTOR_MODE_P (mode
))
32386 op
= safe_vector_operand (op
, mode
);
32388 /* If we aren't optimizing, only allow one memory operand to
32390 if (memory_operand (op
, mode
))
32393 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
32395 if (optimize
|| !match
|| num_memory
> 1)
32396 op
= copy_to_mode_reg (mode
, op
);
32400 op
= copy_to_reg (op
);
32401 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
32406 args
[i
].mode
= mode
;
32412 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
32415 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
32418 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32422 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32423 args
[2].op
, args
[3].op
);
32426 gcc_unreachable ();
32436 /* Subroutine of ix86_expand_builtin to take care of special insns
32437 with variable number of operands. */
32440 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
32441 tree exp
, rtx target
)
32445 unsigned int i
, nargs
, arg_adjust
, memory
;
32449 enum machine_mode mode
;
32451 enum insn_code icode
= d
->icode
;
32452 bool last_arg_constant
= false;
32453 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
32454 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
32455 enum { load
, store
} klass
;
32457 switch ((enum ix86_builtin_func_type
) d
->flag
)
32459 case VOID_FTYPE_VOID
:
32460 emit_insn (GEN_FCN (icode
) (target
));
32462 case VOID_FTYPE_UINT64
:
32463 case VOID_FTYPE_UNSIGNED
:
32469 case INT_FTYPE_VOID
:
32470 case UINT64_FTYPE_VOID
:
32471 case UNSIGNED_FTYPE_VOID
:
32476 case UINT64_FTYPE_PUNSIGNED
:
32477 case V2DI_FTYPE_PV2DI
:
32478 case V4DI_FTYPE_PV4DI
:
32479 case V32QI_FTYPE_PCCHAR
:
32480 case V16QI_FTYPE_PCCHAR
:
32481 case V8SF_FTYPE_PCV4SF
:
32482 case V8SF_FTYPE_PCFLOAT
:
32483 case V4SF_FTYPE_PCFLOAT
:
32484 case V4DF_FTYPE_PCV2DF
:
32485 case V4DF_FTYPE_PCDOUBLE
:
32486 case V2DF_FTYPE_PCDOUBLE
:
32487 case VOID_FTYPE_PVOID
:
32492 case VOID_FTYPE_PV2SF_V4SF
:
32493 case VOID_FTYPE_PV4DI_V4DI
:
32494 case VOID_FTYPE_PV2DI_V2DI
:
32495 case VOID_FTYPE_PCHAR_V32QI
:
32496 case VOID_FTYPE_PCHAR_V16QI
:
32497 case VOID_FTYPE_PFLOAT_V8SF
:
32498 case VOID_FTYPE_PFLOAT_V4SF
:
32499 case VOID_FTYPE_PDOUBLE_V4DF
:
32500 case VOID_FTYPE_PDOUBLE_V2DF
:
32501 case VOID_FTYPE_PLONGLONG_LONGLONG
:
32502 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
32503 case VOID_FTYPE_PINT_INT
:
32506 /* Reserve memory operand for target. */
32507 memory
= ARRAY_SIZE (args
);
32509 case V4SF_FTYPE_V4SF_PCV2SF
:
32510 case V2DF_FTYPE_V2DF_PCDOUBLE
:
32515 case V8SF_FTYPE_PCV8SF_V8SI
:
32516 case V4DF_FTYPE_PCV4DF_V4DI
:
32517 case V4SF_FTYPE_PCV4SF_V4SI
:
32518 case V2DF_FTYPE_PCV2DF_V2DI
:
32519 case V8SI_FTYPE_PCV8SI_V8SI
:
32520 case V4DI_FTYPE_PCV4DI_V4DI
:
32521 case V4SI_FTYPE_PCV4SI_V4SI
:
32522 case V2DI_FTYPE_PCV2DI_V2DI
:
32527 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
32528 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
32529 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
32530 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
32531 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
32532 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
32533 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
32534 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
32537 /* Reserve memory operand for target. */
32538 memory
= ARRAY_SIZE (args
);
32540 case VOID_FTYPE_UINT_UINT_UINT
:
32541 case VOID_FTYPE_UINT64_UINT_UINT
:
32542 case UCHAR_FTYPE_UINT_UINT_UINT
:
32543 case UCHAR_FTYPE_UINT64_UINT_UINT
:
32546 memory
= ARRAY_SIZE (args
);
32547 last_arg_constant
= true;
32550 gcc_unreachable ();
32553 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32555 if (klass
== store
)
32557 arg
= CALL_EXPR_ARG (exp
, 0);
32558 op
= expand_normal (arg
);
32559 gcc_assert (target
== 0);
32562 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32563 target
= gen_rtx_MEM (tmode
, op
);
32566 target
= force_reg (tmode
, op
);
32574 || !register_operand (target
, tmode
)
32575 || GET_MODE (target
) != tmode
)
32576 target
= gen_reg_rtx (tmode
);
32579 for (i
= 0; i
< nargs
; i
++)
32581 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32584 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
32585 op
= expand_normal (arg
);
32586 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32588 if (last_arg_constant
&& (i
+ 1) == nargs
)
32592 if (icode
== CODE_FOR_lwp_lwpvalsi3
32593 || icode
== CODE_FOR_lwp_lwpinssi3
32594 || icode
== CODE_FOR_lwp_lwpvaldi3
32595 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32596 error ("the last argument must be a 32-bit immediate");
32598 error ("the last argument must be an 8-bit immediate");
32606 /* This must be the memory operand. */
32607 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32608 op
= gen_rtx_MEM (mode
, op
);
32609 gcc_assert (GET_MODE (op
) == mode
32610 || GET_MODE (op
) == VOIDmode
);
32614 /* This must be register. */
32615 if (VECTOR_MODE_P (mode
))
32616 op
= safe_vector_operand (op
, mode
);
32618 gcc_assert (GET_MODE (op
) == mode
32619 || GET_MODE (op
) == VOIDmode
);
32620 op
= copy_to_mode_reg (mode
, op
);
32625 args
[i
].mode
= mode
;
32631 pat
= GEN_FCN (icode
) (target
);
32634 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32637 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32640 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32643 gcc_unreachable ();
32649 return klass
== store
? 0 : target
;
32652 /* Return the integer constant in ARG. Constrain it to be in the range
32653 of the subparts of VEC_TYPE; issue an error if not. */
32656 get_element_number (tree vec_type
, tree arg
)
32658 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32660 if (!host_integerp (arg
, 1)
32661 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
32663 error ("selector must be an integer constant in the range 0..%wi", max
);
32670 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32671 ix86_expand_vector_init. We DO have language-level syntax for this, in
32672 the form of (type){ init-list }. Except that since we can't place emms
32673 instructions from inside the compiler, we can't allow the use of MMX
32674 registers unless the user explicitly asks for it. So we do *not* define
32675 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32676 we have builtins invoked by mmintrin.h that gives us license to emit
32677 these sorts of instructions. */
32680 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32682 enum machine_mode tmode
= TYPE_MODE (type
);
32683 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32684 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32685 rtvec v
= rtvec_alloc (n_elt
);
32687 gcc_assert (VECTOR_MODE_P (tmode
));
32688 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32690 for (i
= 0; i
< n_elt
; ++i
)
32692 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32693 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32696 if (!target
|| !register_operand (target
, tmode
))
32697 target
= gen_reg_rtx (tmode
);
32699 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32703 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32704 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32705 had a language-level syntax for referencing vector elements. */
32708 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32710 enum machine_mode tmode
, mode0
;
32715 arg0
= CALL_EXPR_ARG (exp
, 0);
32716 arg1
= CALL_EXPR_ARG (exp
, 1);
32718 op0
= expand_normal (arg0
);
32719 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32721 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32722 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32723 gcc_assert (VECTOR_MODE_P (mode0
));
32725 op0
= force_reg (mode0
, op0
);
32727 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32728 target
= gen_reg_rtx (tmode
);
32730 ix86_expand_vector_extract (true, target
, op0
, elt
);
32735 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32736 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32737 a language-level syntax for referencing vector elements. */
32740 ix86_expand_vec_set_builtin (tree exp
)
32742 enum machine_mode tmode
, mode1
;
32743 tree arg0
, arg1
, arg2
;
32745 rtx op0
, op1
, target
;
32747 arg0
= CALL_EXPR_ARG (exp
, 0);
32748 arg1
= CALL_EXPR_ARG (exp
, 1);
32749 arg2
= CALL_EXPR_ARG (exp
, 2);
32751 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32752 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32753 gcc_assert (VECTOR_MODE_P (tmode
));
32755 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32756 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32757 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32759 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32760 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32762 op0
= force_reg (tmode
, op0
);
32763 op1
= force_reg (mode1
, op1
);
32765 /* OP0 is the source of these builtin functions and shouldn't be
32766 modified. Create a copy, use it and return it as target. */
32767 target
= gen_reg_rtx (tmode
);
32768 emit_move_insn (target
, op0
);
32769 ix86_expand_vector_set (true, target
, op1
, elt
);
32774 /* Expand an expression EXP that calls a built-in function,
32775 with result going to TARGET if that's convenient
32776 (and in mode MODE if that's convenient).
32777 SUBTARGET may be used as the target for computing one of EXP's operands.
32778 IGNORE is nonzero if the value is to be ignored. */
32781 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32782 enum machine_mode mode
, int ignore
)
32784 const struct builtin_description
*d
;
32786 enum insn_code icode
;
32787 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32788 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32789 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32790 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32791 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32793 /* For CPU builtins that can be folded, fold first and expand the fold. */
32796 case IX86_BUILTIN_CPU_INIT
:
32798 /* Make it call __cpu_indicator_init in libgcc. */
32799 tree call_expr
, fndecl
, type
;
32800 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32801 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32802 call_expr
= build_call_expr (fndecl
, 0);
32803 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32805 case IX86_BUILTIN_CPU_IS
:
32806 case IX86_BUILTIN_CPU_SUPPORTS
:
32808 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32809 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32810 gcc_assert (fold_expr
!= NULL_TREE
);
32811 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32815 /* Determine whether the builtin function is available under the current ISA.
32816 Originally the builtin was not created if it wasn't applicable to the
32817 current ISA based on the command line switches. With function specific
32818 options, we need to check in the context of the function making the call
32819 whether it is supported. */
32820 if (ix86_builtins_isa
[fcode
].isa
32821 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32823 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32824 NULL
, (enum fpmath_unit
) 0, false);
32827 error ("%qE needs unknown isa option", fndecl
);
32830 gcc_assert (opts
!= NULL
);
32831 error ("%qE needs isa option %s", fndecl
, opts
);
32839 case IX86_BUILTIN_MASKMOVQ
:
32840 case IX86_BUILTIN_MASKMOVDQU
:
32841 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32842 ? CODE_FOR_mmx_maskmovq
32843 : CODE_FOR_sse2_maskmovdqu
);
32844 /* Note the arg order is different from the operand order. */
32845 arg1
= CALL_EXPR_ARG (exp
, 0);
32846 arg2
= CALL_EXPR_ARG (exp
, 1);
32847 arg0
= CALL_EXPR_ARG (exp
, 2);
32848 op0
= expand_normal (arg0
);
32849 op1
= expand_normal (arg1
);
32850 op2
= expand_normal (arg2
);
32851 mode0
= insn_data
[icode
].operand
[0].mode
;
32852 mode1
= insn_data
[icode
].operand
[1].mode
;
32853 mode2
= insn_data
[icode
].operand
[2].mode
;
32855 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32856 op0
= gen_rtx_MEM (mode1
, op0
);
32858 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32859 op0
= copy_to_mode_reg (mode0
, op0
);
32860 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32861 op1
= copy_to_mode_reg (mode1
, op1
);
32862 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32863 op2
= copy_to_mode_reg (mode2
, op2
);
32864 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32870 case IX86_BUILTIN_LDMXCSR
:
32871 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32872 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32873 emit_move_insn (target
, op0
);
32874 emit_insn (gen_sse_ldmxcsr (target
));
32877 case IX86_BUILTIN_STMXCSR
:
32878 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32879 emit_insn (gen_sse_stmxcsr (target
));
32880 return copy_to_mode_reg (SImode
, target
);
32882 case IX86_BUILTIN_CLFLUSH
:
32883 arg0
= CALL_EXPR_ARG (exp
, 0);
32884 op0
= expand_normal (arg0
);
32885 icode
= CODE_FOR_sse2_clflush
;
32886 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32887 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32889 emit_insn (gen_sse2_clflush (op0
));
32892 case IX86_BUILTIN_MONITOR
:
32893 arg0
= CALL_EXPR_ARG (exp
, 0);
32894 arg1
= CALL_EXPR_ARG (exp
, 1);
32895 arg2
= CALL_EXPR_ARG (exp
, 2);
32896 op0
= expand_normal (arg0
);
32897 op1
= expand_normal (arg1
);
32898 op2
= expand_normal (arg2
);
32900 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32902 op1
= copy_to_mode_reg (SImode
, op1
);
32904 op2
= copy_to_mode_reg (SImode
, op2
);
32905 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32908 case IX86_BUILTIN_MWAIT
:
32909 arg0
= CALL_EXPR_ARG (exp
, 0);
32910 arg1
= CALL_EXPR_ARG (exp
, 1);
32911 op0
= expand_normal (arg0
);
32912 op1
= expand_normal (arg1
);
32914 op0
= copy_to_mode_reg (SImode
, op0
);
32916 op1
= copy_to_mode_reg (SImode
, op1
);
32917 emit_insn (gen_sse3_mwait (op0
, op1
));
32920 case IX86_BUILTIN_VEC_INIT_V2SI
:
32921 case IX86_BUILTIN_VEC_INIT_V4HI
:
32922 case IX86_BUILTIN_VEC_INIT_V8QI
:
32923 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32925 case IX86_BUILTIN_VEC_EXT_V2DF
:
32926 case IX86_BUILTIN_VEC_EXT_V2DI
:
32927 case IX86_BUILTIN_VEC_EXT_V4SF
:
32928 case IX86_BUILTIN_VEC_EXT_V4SI
:
32929 case IX86_BUILTIN_VEC_EXT_V8HI
:
32930 case IX86_BUILTIN_VEC_EXT_V2SI
:
32931 case IX86_BUILTIN_VEC_EXT_V4HI
:
32932 case IX86_BUILTIN_VEC_EXT_V16QI
:
32933 return ix86_expand_vec_ext_builtin (exp
, target
);
32935 case IX86_BUILTIN_VEC_SET_V2DI
:
32936 case IX86_BUILTIN_VEC_SET_V4SF
:
32937 case IX86_BUILTIN_VEC_SET_V4SI
:
32938 case IX86_BUILTIN_VEC_SET_V8HI
:
32939 case IX86_BUILTIN_VEC_SET_V4HI
:
32940 case IX86_BUILTIN_VEC_SET_V16QI
:
32941 return ix86_expand_vec_set_builtin (exp
);
32943 case IX86_BUILTIN_INFQ
:
32944 case IX86_BUILTIN_HUGE_VALQ
:
32946 REAL_VALUE_TYPE inf
;
32950 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32952 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32955 target
= gen_reg_rtx (mode
);
32957 emit_move_insn (target
, tmp
);
32961 case IX86_BUILTIN_RDPMC
:
32962 case IX86_BUILTIN_RDTSC
:
32963 case IX86_BUILTIN_RDTSCP
:
32965 op0
= gen_reg_rtx (DImode
);
32966 op1
= gen_reg_rtx (DImode
);
32968 if (fcode
== IX86_BUILTIN_RDPMC
)
32970 arg0
= CALL_EXPR_ARG (exp
, 0);
32971 op2
= expand_normal (arg0
);
32972 if (!register_operand (op2
, SImode
))
32973 op2
= copy_to_mode_reg (SImode
, op2
);
32975 insn
= (TARGET_64BIT
32976 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32977 : gen_rdpmc (op0
, op2
));
32980 else if (fcode
== IX86_BUILTIN_RDTSC
)
32982 insn
= (TARGET_64BIT
32983 ? gen_rdtsc_rex64 (op0
, op1
)
32984 : gen_rdtsc (op0
));
32989 op2
= gen_reg_rtx (SImode
);
32991 insn
= (TARGET_64BIT
32992 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32993 : gen_rdtscp (op0
, op2
));
32996 arg0
= CALL_EXPR_ARG (exp
, 0);
32997 op4
= expand_normal (arg0
);
32998 if (!address_operand (op4
, VOIDmode
))
33000 op4
= convert_memory_address (Pmode
, op4
);
33001 op4
= copy_addr_to_reg (op4
);
33003 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
33008 /* mode is VOIDmode if __builtin_rd* has been called
33010 if (mode
== VOIDmode
)
33012 target
= gen_reg_rtx (mode
);
33017 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
33018 op1
, 1, OPTAB_DIRECT
);
33019 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
33020 op0
, 1, OPTAB_DIRECT
);
33023 emit_move_insn (target
, op0
);
33026 case IX86_BUILTIN_FXSAVE
:
33027 case IX86_BUILTIN_FXRSTOR
:
33028 case IX86_BUILTIN_FXSAVE64
:
33029 case IX86_BUILTIN_FXRSTOR64
:
33030 case IX86_BUILTIN_FNSTENV
:
33031 case IX86_BUILTIN_FLDENV
:
33032 case IX86_BUILTIN_FNSTSW
:
33036 case IX86_BUILTIN_FXSAVE
:
33037 icode
= CODE_FOR_fxsave
;
33039 case IX86_BUILTIN_FXRSTOR
:
33040 icode
= CODE_FOR_fxrstor
;
33042 case IX86_BUILTIN_FXSAVE64
:
33043 icode
= CODE_FOR_fxsave64
;
33045 case IX86_BUILTIN_FXRSTOR64
:
33046 icode
= CODE_FOR_fxrstor64
;
33048 case IX86_BUILTIN_FNSTENV
:
33049 icode
= CODE_FOR_fnstenv
;
33051 case IX86_BUILTIN_FLDENV
:
33052 icode
= CODE_FOR_fldenv
;
33054 case IX86_BUILTIN_FNSTSW
:
33055 icode
= CODE_FOR_fnstsw
;
33059 gcc_unreachable ();
33062 arg0
= CALL_EXPR_ARG (exp
, 0);
33063 op0
= expand_normal (arg0
);
33065 if (!address_operand (op0
, VOIDmode
))
33067 op0
= convert_memory_address (Pmode
, op0
);
33068 op0
= copy_addr_to_reg (op0
);
33070 op0
= gen_rtx_MEM (mode0
, op0
);
33072 pat
= GEN_FCN (icode
) (op0
);
33077 case IX86_BUILTIN_XSAVE
:
33078 case IX86_BUILTIN_XRSTOR
:
33079 case IX86_BUILTIN_XSAVE64
:
33080 case IX86_BUILTIN_XRSTOR64
:
33081 case IX86_BUILTIN_XSAVEOPT
:
33082 case IX86_BUILTIN_XSAVEOPT64
:
33083 arg0
= CALL_EXPR_ARG (exp
, 0);
33084 arg1
= CALL_EXPR_ARG (exp
, 1);
33085 op0
= expand_normal (arg0
);
33086 op1
= expand_normal (arg1
);
33088 if (!address_operand (op0
, VOIDmode
))
33090 op0
= convert_memory_address (Pmode
, op0
);
33091 op0
= copy_addr_to_reg (op0
);
33093 op0
= gen_rtx_MEM (BLKmode
, op0
);
33095 op1
= force_reg (DImode
, op1
);
33099 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
33100 NULL
, 1, OPTAB_DIRECT
);
33103 case IX86_BUILTIN_XSAVE
:
33104 icode
= CODE_FOR_xsave_rex64
;
33106 case IX86_BUILTIN_XRSTOR
:
33107 icode
= CODE_FOR_xrstor_rex64
;
33109 case IX86_BUILTIN_XSAVE64
:
33110 icode
= CODE_FOR_xsave64
;
33112 case IX86_BUILTIN_XRSTOR64
:
33113 icode
= CODE_FOR_xrstor64
;
33115 case IX86_BUILTIN_XSAVEOPT
:
33116 icode
= CODE_FOR_xsaveopt_rex64
;
33118 case IX86_BUILTIN_XSAVEOPT64
:
33119 icode
= CODE_FOR_xsaveopt64
;
33122 gcc_unreachable ();
33125 op2
= gen_lowpart (SImode
, op2
);
33126 op1
= gen_lowpart (SImode
, op1
);
33127 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
33133 case IX86_BUILTIN_XSAVE
:
33134 icode
= CODE_FOR_xsave
;
33136 case IX86_BUILTIN_XRSTOR
:
33137 icode
= CODE_FOR_xrstor
;
33139 case IX86_BUILTIN_XSAVEOPT
:
33140 icode
= CODE_FOR_xsaveopt
;
33143 gcc_unreachable ();
33145 pat
= GEN_FCN (icode
) (op0
, op1
);
33152 case IX86_BUILTIN_LLWPCB
:
33153 arg0
= CALL_EXPR_ARG (exp
, 0);
33154 op0
= expand_normal (arg0
);
33155 icode
= CODE_FOR_lwp_llwpcb
;
33156 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
33157 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
33158 emit_insn (gen_lwp_llwpcb (op0
));
33161 case IX86_BUILTIN_SLWPCB
:
33162 icode
= CODE_FOR_lwp_slwpcb
;
33164 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
33165 target
= gen_reg_rtx (Pmode
);
33166 emit_insn (gen_lwp_slwpcb (target
));
33169 case IX86_BUILTIN_BEXTRI32
:
33170 case IX86_BUILTIN_BEXTRI64
:
33171 arg0
= CALL_EXPR_ARG (exp
, 0);
33172 arg1
= CALL_EXPR_ARG (exp
, 1);
33173 op0
= expand_normal (arg0
);
33174 op1
= expand_normal (arg1
);
33175 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
33176 ? CODE_FOR_tbm_bextri_si
33177 : CODE_FOR_tbm_bextri_di
);
33178 if (!CONST_INT_P (op1
))
33180 error ("last argument must be an immediate");
33185 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
33186 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
33187 op1
= GEN_INT (length
);
33188 op2
= GEN_INT (lsb_index
);
33189 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
33195 case IX86_BUILTIN_RDRAND16_STEP
:
33196 icode
= CODE_FOR_rdrandhi_1
;
33200 case IX86_BUILTIN_RDRAND32_STEP
:
33201 icode
= CODE_FOR_rdrandsi_1
;
33205 case IX86_BUILTIN_RDRAND64_STEP
:
33206 icode
= CODE_FOR_rdranddi_1
;
33210 op0
= gen_reg_rtx (mode0
);
33211 emit_insn (GEN_FCN (icode
) (op0
));
33213 arg0
= CALL_EXPR_ARG (exp
, 0);
33214 op1
= expand_normal (arg0
);
33215 if (!address_operand (op1
, VOIDmode
))
33217 op1
= convert_memory_address (Pmode
, op1
);
33218 op1
= copy_addr_to_reg (op1
);
33220 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33222 op1
= gen_reg_rtx (SImode
);
33223 emit_move_insn (op1
, CONST1_RTX (SImode
));
33225 /* Emit SImode conditional move. */
33226 if (mode0
== HImode
)
33228 op2
= gen_reg_rtx (SImode
);
33229 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
33231 else if (mode0
== SImode
)
33234 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
33237 target
= gen_reg_rtx (SImode
);
33239 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33241 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33242 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
33245 case IX86_BUILTIN_RDSEED16_STEP
:
33246 icode
= CODE_FOR_rdseedhi_1
;
33250 case IX86_BUILTIN_RDSEED32_STEP
:
33251 icode
= CODE_FOR_rdseedsi_1
;
33255 case IX86_BUILTIN_RDSEED64_STEP
:
33256 icode
= CODE_FOR_rdseeddi_1
;
33260 op0
= gen_reg_rtx (mode0
);
33261 emit_insn (GEN_FCN (icode
) (op0
));
33263 arg0
= CALL_EXPR_ARG (exp
, 0);
33264 op1
= expand_normal (arg0
);
33265 if (!address_operand (op1
, VOIDmode
))
33267 op1
= convert_memory_address (Pmode
, op1
);
33268 op1
= copy_addr_to_reg (op1
);
33270 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33272 op2
= gen_reg_rtx (QImode
);
33274 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33276 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
33279 target
= gen_reg_rtx (SImode
);
33281 emit_insn (gen_zero_extendqisi2 (target
, op2
));
33284 case IX86_BUILTIN_ADDCARRYX32
:
33285 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
33289 case IX86_BUILTIN_ADDCARRYX64
:
33290 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
33294 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
33295 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
33296 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
33297 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
33299 op0
= gen_reg_rtx (QImode
);
33301 /* Generate CF from input operand. */
33302 op1
= expand_normal (arg0
);
33303 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
33304 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
33306 /* Gen ADCX instruction to compute X+Y+CF. */
33307 op2
= expand_normal (arg1
);
33308 op3
= expand_normal (arg2
);
33311 op2
= copy_to_mode_reg (mode0
, op2
);
33313 op3
= copy_to_mode_reg (mode0
, op3
);
33315 op0
= gen_reg_rtx (mode0
);
33317 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
33318 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
33319 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
33321 /* Store the result. */
33322 op4
= expand_normal (arg3
);
33323 if (!address_operand (op4
, VOIDmode
))
33325 op4
= convert_memory_address (Pmode
, op4
);
33326 op4
= copy_addr_to_reg (op4
);
33328 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
33330 /* Return current CF value. */
33332 target
= gen_reg_rtx (QImode
);
33334 PUT_MODE (pat
, QImode
);
33335 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
33338 case IX86_BUILTIN_GATHERSIV2DF
:
33339 icode
= CODE_FOR_avx2_gathersiv2df
;
33341 case IX86_BUILTIN_GATHERSIV4DF
:
33342 icode
= CODE_FOR_avx2_gathersiv4df
;
33344 case IX86_BUILTIN_GATHERDIV2DF
:
33345 icode
= CODE_FOR_avx2_gatherdiv2df
;
33347 case IX86_BUILTIN_GATHERDIV4DF
:
33348 icode
= CODE_FOR_avx2_gatherdiv4df
;
33350 case IX86_BUILTIN_GATHERSIV4SF
:
33351 icode
= CODE_FOR_avx2_gathersiv4sf
;
33353 case IX86_BUILTIN_GATHERSIV8SF
:
33354 icode
= CODE_FOR_avx2_gathersiv8sf
;
33356 case IX86_BUILTIN_GATHERDIV4SF
:
33357 icode
= CODE_FOR_avx2_gatherdiv4sf
;
33359 case IX86_BUILTIN_GATHERDIV8SF
:
33360 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33362 case IX86_BUILTIN_GATHERSIV2DI
:
33363 icode
= CODE_FOR_avx2_gathersiv2di
;
33365 case IX86_BUILTIN_GATHERSIV4DI
:
33366 icode
= CODE_FOR_avx2_gathersiv4di
;
33368 case IX86_BUILTIN_GATHERDIV2DI
:
33369 icode
= CODE_FOR_avx2_gatherdiv2di
;
33371 case IX86_BUILTIN_GATHERDIV4DI
:
33372 icode
= CODE_FOR_avx2_gatherdiv4di
;
33374 case IX86_BUILTIN_GATHERSIV4SI
:
33375 icode
= CODE_FOR_avx2_gathersiv4si
;
33377 case IX86_BUILTIN_GATHERSIV8SI
:
33378 icode
= CODE_FOR_avx2_gathersiv8si
;
33380 case IX86_BUILTIN_GATHERDIV4SI
:
33381 icode
= CODE_FOR_avx2_gatherdiv4si
;
33383 case IX86_BUILTIN_GATHERDIV8SI
:
33384 icode
= CODE_FOR_avx2_gatherdiv8si
;
33386 case IX86_BUILTIN_GATHERALTSIV4DF
:
33387 icode
= CODE_FOR_avx2_gathersiv4df
;
33389 case IX86_BUILTIN_GATHERALTDIV8SF
:
33390 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33392 case IX86_BUILTIN_GATHERALTSIV4DI
:
33393 icode
= CODE_FOR_avx2_gathersiv4di
;
33395 case IX86_BUILTIN_GATHERALTDIV8SI
:
33396 icode
= CODE_FOR_avx2_gatherdiv8si
;
33400 arg0
= CALL_EXPR_ARG (exp
, 0);
33401 arg1
= CALL_EXPR_ARG (exp
, 1);
33402 arg2
= CALL_EXPR_ARG (exp
, 2);
33403 arg3
= CALL_EXPR_ARG (exp
, 3);
33404 arg4
= CALL_EXPR_ARG (exp
, 4);
33405 op0
= expand_normal (arg0
);
33406 op1
= expand_normal (arg1
);
33407 op2
= expand_normal (arg2
);
33408 op3
= expand_normal (arg3
);
33409 op4
= expand_normal (arg4
);
33410 /* Note the arg order is different from the operand order. */
33411 mode0
= insn_data
[icode
].operand
[1].mode
;
33412 mode2
= insn_data
[icode
].operand
[3].mode
;
33413 mode3
= insn_data
[icode
].operand
[4].mode
;
33414 mode4
= insn_data
[icode
].operand
[5].mode
;
33416 if (target
== NULL_RTX
33417 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
33418 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
33420 subtarget
= target
;
33422 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
33423 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
33425 rtx half
= gen_reg_rtx (V4SImode
);
33426 if (!nonimmediate_operand (op2
, V8SImode
))
33427 op2
= copy_to_mode_reg (V8SImode
, op2
);
33428 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
33431 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
33432 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
33434 rtx (*gen
) (rtx
, rtx
);
33435 rtx half
= gen_reg_rtx (mode0
);
33436 if (mode0
== V4SFmode
)
33437 gen
= gen_vec_extract_lo_v8sf
;
33439 gen
= gen_vec_extract_lo_v8si
;
33440 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
33441 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
33442 emit_insn (gen (half
, op0
));
33444 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
33445 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
33446 emit_insn (gen (half
, op3
));
33450 /* Force memory operand only with base register here. But we
33451 don't want to do it on memory operand for other builtin
33453 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
33455 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
33456 op0
= copy_to_mode_reg (mode0
, op0
);
33457 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
33458 op1
= copy_to_mode_reg (Pmode
, op1
);
33459 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
33460 op2
= copy_to_mode_reg (mode2
, op2
);
33461 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
33462 op3
= copy_to_mode_reg (mode3
, op3
);
33463 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
33465 error ("last argument must be scale 1, 2, 4, 8");
33469 /* Optimize. If mask is known to have all high bits set,
33470 replace op0 with pc_rtx to signal that the instruction
33471 overwrites the whole destination and doesn't use its
33472 previous contents. */
33475 if (TREE_CODE (arg3
) == VECTOR_CST
)
33477 unsigned int negative
= 0;
33478 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
33480 tree cst
= VECTOR_CST_ELT (arg3
, i
);
33481 if (TREE_CODE (cst
) == INTEGER_CST
33482 && tree_int_cst_sign_bit (cst
))
33484 else if (TREE_CODE (cst
) == REAL_CST
33485 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
33488 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
33491 else if (TREE_CODE (arg3
) == SSA_NAME
)
33493 /* Recognize also when mask is like:
33494 __v2df src = _mm_setzero_pd ();
33495 __v2df mask = _mm_cmpeq_pd (src, src);
33497 __v8sf src = _mm256_setzero_ps ();
33498 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
33499 as that is a cheaper way to load all ones into
33500 a register than having to load a constant from
33502 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
33503 if (is_gimple_call (def_stmt
))
33505 tree fndecl
= gimple_call_fndecl (def_stmt
);
33507 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
33508 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
33510 case IX86_BUILTIN_CMPPD
:
33511 case IX86_BUILTIN_CMPPS
:
33512 case IX86_BUILTIN_CMPPD256
:
33513 case IX86_BUILTIN_CMPPS256
:
33514 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
33517 case IX86_BUILTIN_CMPEQPD
:
33518 case IX86_BUILTIN_CMPEQPS
:
33519 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
33520 && initializer_zerop (gimple_call_arg (def_stmt
,
33531 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
33536 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
33537 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
33539 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
33540 ? V4SFmode
: V4SImode
;
33541 if (target
== NULL_RTX
)
33542 target
= gen_reg_rtx (tmode
);
33543 if (tmode
== V4SFmode
)
33544 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
33546 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
33549 target
= subtarget
;
33553 case IX86_BUILTIN_XABORT
:
33554 icode
= CODE_FOR_xabort
;
33555 arg0
= CALL_EXPR_ARG (exp
, 0);
33556 op0
= expand_normal (arg0
);
33557 mode0
= insn_data
[icode
].operand
[0].mode
;
33558 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33560 error ("the xabort's argument must be an 8-bit immediate");
33563 emit_insn (gen_xabort (op0
));
33570 for (i
= 0, d
= bdesc_special_args
;
33571 i
< ARRAY_SIZE (bdesc_special_args
);
33573 if (d
->code
== fcode
)
33574 return ix86_expand_special_args_builtin (d
, exp
, target
);
33576 for (i
= 0, d
= bdesc_args
;
33577 i
< ARRAY_SIZE (bdesc_args
);
33579 if (d
->code
== fcode
)
33582 case IX86_BUILTIN_FABSQ
:
33583 case IX86_BUILTIN_COPYSIGNQ
:
33585 /* Emit a normal call if SSE isn't available. */
33586 return expand_call (exp
, target
, ignore
);
33588 return ix86_expand_args_builtin (d
, exp
, target
);
33591 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
33592 if (d
->code
== fcode
)
33593 return ix86_expand_sse_comi (d
, exp
, target
);
33595 for (i
= 0, d
= bdesc_pcmpestr
;
33596 i
< ARRAY_SIZE (bdesc_pcmpestr
);
33598 if (d
->code
== fcode
)
33599 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
33601 for (i
= 0, d
= bdesc_pcmpistr
;
33602 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33604 if (d
->code
== fcode
)
33605 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33607 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33608 if (d
->code
== fcode
)
33609 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33610 (enum ix86_builtin_func_type
)
33611 d
->flag
, d
->comparison
);
33613 gcc_unreachable ();
33616 /* Returns a function decl for a vectorized version of the builtin function
33617 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33618 if it is not available. */
33621 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33624 enum machine_mode in_mode
, out_mode
;
33626 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33628 if (TREE_CODE (type_out
) != VECTOR_TYPE
33629 || TREE_CODE (type_in
) != VECTOR_TYPE
33630 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33633 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33634 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33635 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33636 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33640 case BUILT_IN_SQRT
:
33641 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33643 if (out_n
== 2 && in_n
== 2)
33644 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
33645 else if (out_n
== 4 && in_n
== 4)
33646 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
33650 case BUILT_IN_SQRTF
:
33651 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33653 if (out_n
== 4 && in_n
== 4)
33654 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
33655 else if (out_n
== 8 && in_n
== 8)
33656 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
33660 case BUILT_IN_IFLOOR
:
33661 case BUILT_IN_LFLOOR
:
33662 case BUILT_IN_LLFLOOR
:
33663 /* The round insn does not trap on denormals. */
33664 if (flag_trapping_math
|| !TARGET_ROUND
)
33667 if (out_mode
== SImode
&& in_mode
== DFmode
)
33669 if (out_n
== 4 && in_n
== 2)
33670 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
33671 else if (out_n
== 8 && in_n
== 4)
33672 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
33676 case BUILT_IN_IFLOORF
:
33677 case BUILT_IN_LFLOORF
:
33678 case BUILT_IN_LLFLOORF
:
33679 /* The round insn does not trap on denormals. */
33680 if (flag_trapping_math
|| !TARGET_ROUND
)
33683 if (out_mode
== SImode
&& in_mode
== SFmode
)
33685 if (out_n
== 4 && in_n
== 4)
33686 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
33687 else if (out_n
== 8 && in_n
== 8)
33688 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
33692 case BUILT_IN_ICEIL
:
33693 case BUILT_IN_LCEIL
:
33694 case BUILT_IN_LLCEIL
:
33695 /* The round insn does not trap on denormals. */
33696 if (flag_trapping_math
|| !TARGET_ROUND
)
33699 if (out_mode
== SImode
&& in_mode
== DFmode
)
33701 if (out_n
== 4 && in_n
== 2)
33702 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
33703 else if (out_n
== 8 && in_n
== 4)
33704 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
33708 case BUILT_IN_ICEILF
:
33709 case BUILT_IN_LCEILF
:
33710 case BUILT_IN_LLCEILF
:
33711 /* The round insn does not trap on denormals. */
33712 if (flag_trapping_math
|| !TARGET_ROUND
)
33715 if (out_mode
== SImode
&& in_mode
== SFmode
)
33717 if (out_n
== 4 && in_n
== 4)
33718 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
33719 else if (out_n
== 8 && in_n
== 8)
33720 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33724 case BUILT_IN_IRINT
:
33725 case BUILT_IN_LRINT
:
33726 case BUILT_IN_LLRINT
:
33727 if (out_mode
== SImode
&& in_mode
== DFmode
)
33729 if (out_n
== 4 && in_n
== 2)
33730 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33731 else if (out_n
== 8 && in_n
== 4)
33732 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33736 case BUILT_IN_IRINTF
:
33737 case BUILT_IN_LRINTF
:
33738 case BUILT_IN_LLRINTF
:
33739 if (out_mode
== SImode
&& in_mode
== SFmode
)
33741 if (out_n
== 4 && in_n
== 4)
33742 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33743 else if (out_n
== 8 && in_n
== 8)
33744 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33748 case BUILT_IN_IROUND
:
33749 case BUILT_IN_LROUND
:
33750 case BUILT_IN_LLROUND
:
33751 /* The round insn does not trap on denormals. */
33752 if (flag_trapping_math
|| !TARGET_ROUND
)
33755 if (out_mode
== SImode
&& in_mode
== DFmode
)
33757 if (out_n
== 4 && in_n
== 2)
33758 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33759 else if (out_n
== 8 && in_n
== 4)
33760 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33764 case BUILT_IN_IROUNDF
:
33765 case BUILT_IN_LROUNDF
:
33766 case BUILT_IN_LLROUNDF
:
33767 /* The round insn does not trap on denormals. */
33768 if (flag_trapping_math
|| !TARGET_ROUND
)
33771 if (out_mode
== SImode
&& in_mode
== SFmode
)
33773 if (out_n
== 4 && in_n
== 4)
33774 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33775 else if (out_n
== 8 && in_n
== 8)
33776 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33780 case BUILT_IN_COPYSIGN
:
33781 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33783 if (out_n
== 2 && in_n
== 2)
33784 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33785 else if (out_n
== 4 && in_n
== 4)
33786 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33790 case BUILT_IN_COPYSIGNF
:
33791 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33793 if (out_n
== 4 && in_n
== 4)
33794 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33795 else if (out_n
== 8 && in_n
== 8)
33796 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33800 case BUILT_IN_FLOOR
:
33801 /* The round insn does not trap on denormals. */
33802 if (flag_trapping_math
|| !TARGET_ROUND
)
33805 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33807 if (out_n
== 2 && in_n
== 2)
33808 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33809 else if (out_n
== 4 && in_n
== 4)
33810 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33814 case BUILT_IN_FLOORF
:
33815 /* The round insn does not trap on denormals. */
33816 if (flag_trapping_math
|| !TARGET_ROUND
)
33819 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33821 if (out_n
== 4 && in_n
== 4)
33822 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33823 else if (out_n
== 8 && in_n
== 8)
33824 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33828 case BUILT_IN_CEIL
:
33829 /* The round insn does not trap on denormals. */
33830 if (flag_trapping_math
|| !TARGET_ROUND
)
33833 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33835 if (out_n
== 2 && in_n
== 2)
33836 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33837 else if (out_n
== 4 && in_n
== 4)
33838 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33842 case BUILT_IN_CEILF
:
33843 /* The round insn does not trap on denormals. */
33844 if (flag_trapping_math
|| !TARGET_ROUND
)
33847 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33849 if (out_n
== 4 && in_n
== 4)
33850 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33851 else if (out_n
== 8 && in_n
== 8)
33852 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33856 case BUILT_IN_TRUNC
:
33857 /* The round insn does not trap on denormals. */
33858 if (flag_trapping_math
|| !TARGET_ROUND
)
33861 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33863 if (out_n
== 2 && in_n
== 2)
33864 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33865 else if (out_n
== 4 && in_n
== 4)
33866 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33870 case BUILT_IN_TRUNCF
:
33871 /* The round insn does not trap on denormals. */
33872 if (flag_trapping_math
|| !TARGET_ROUND
)
33875 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33877 if (out_n
== 4 && in_n
== 4)
33878 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33879 else if (out_n
== 8 && in_n
== 8)
33880 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33884 case BUILT_IN_RINT
:
33885 /* The round insn does not trap on denormals. */
33886 if (flag_trapping_math
|| !TARGET_ROUND
)
33889 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33891 if (out_n
== 2 && in_n
== 2)
33892 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33893 else if (out_n
== 4 && in_n
== 4)
33894 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33898 case BUILT_IN_RINTF
:
33899 /* The round insn does not trap on denormals. */
33900 if (flag_trapping_math
|| !TARGET_ROUND
)
33903 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33905 if (out_n
== 4 && in_n
== 4)
33906 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33907 else if (out_n
== 8 && in_n
== 8)
33908 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33912 case BUILT_IN_ROUND
:
33913 /* The round insn does not trap on denormals. */
33914 if (flag_trapping_math
|| !TARGET_ROUND
)
33917 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33919 if (out_n
== 2 && in_n
== 2)
33920 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33921 else if (out_n
== 4 && in_n
== 4)
33922 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33926 case BUILT_IN_ROUNDF
:
33927 /* The round insn does not trap on denormals. */
33928 if (flag_trapping_math
|| !TARGET_ROUND
)
33931 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33933 if (out_n
== 4 && in_n
== 4)
33934 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33935 else if (out_n
== 8 && in_n
== 8)
33936 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33941 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33943 if (out_n
== 2 && in_n
== 2)
33944 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33945 if (out_n
== 4 && in_n
== 4)
33946 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33950 case BUILT_IN_FMAF
:
33951 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33953 if (out_n
== 4 && in_n
== 4)
33954 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33955 if (out_n
== 8 && in_n
== 8)
33956 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33964 /* Dispatch to a handler for a vectorization library. */
33965 if (ix86_veclib_handler
)
33966 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33972 /* Handler for an SVML-style interface to
33973 a library with vectorized intrinsics. */
33976 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33979 tree fntype
, new_fndecl
, args
;
33982 enum machine_mode el_mode
, in_mode
;
33985 /* The SVML is suitable for unsafe math only. */
33986 if (!flag_unsafe_math_optimizations
)
33989 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33990 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33991 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33992 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33993 if (el_mode
!= in_mode
34001 case BUILT_IN_LOG10
:
34003 case BUILT_IN_TANH
:
34005 case BUILT_IN_ATAN
:
34006 case BUILT_IN_ATAN2
:
34007 case BUILT_IN_ATANH
:
34008 case BUILT_IN_CBRT
:
34009 case BUILT_IN_SINH
:
34011 case BUILT_IN_ASINH
:
34012 case BUILT_IN_ASIN
:
34013 case BUILT_IN_COSH
:
34015 case BUILT_IN_ACOSH
:
34016 case BUILT_IN_ACOS
:
34017 if (el_mode
!= DFmode
|| n
!= 2)
34021 case BUILT_IN_EXPF
:
34022 case BUILT_IN_LOGF
:
34023 case BUILT_IN_LOG10F
:
34024 case BUILT_IN_POWF
:
34025 case BUILT_IN_TANHF
:
34026 case BUILT_IN_TANF
:
34027 case BUILT_IN_ATANF
:
34028 case BUILT_IN_ATAN2F
:
34029 case BUILT_IN_ATANHF
:
34030 case BUILT_IN_CBRTF
:
34031 case BUILT_IN_SINHF
:
34032 case BUILT_IN_SINF
:
34033 case BUILT_IN_ASINHF
:
34034 case BUILT_IN_ASINF
:
34035 case BUILT_IN_COSHF
:
34036 case BUILT_IN_COSF
:
34037 case BUILT_IN_ACOSHF
:
34038 case BUILT_IN_ACOSF
:
34039 if (el_mode
!= SFmode
|| n
!= 4)
34047 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34049 if (fn
== BUILT_IN_LOGF
)
34050 strcpy (name
, "vmlsLn4");
34051 else if (fn
== BUILT_IN_LOG
)
34052 strcpy (name
, "vmldLn2");
34055 sprintf (name
, "vmls%s", bname
+10);
34056 name
[strlen (name
)-1] = '4';
34059 sprintf (name
, "vmld%s2", bname
+10);
34061 /* Convert to uppercase. */
34065 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34067 args
= TREE_CHAIN (args
))
34071 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34073 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34075 /* Build a function declaration for the vectorized function. */
34076 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34077 FUNCTION_DECL
, get_identifier (name
), fntype
);
34078 TREE_PUBLIC (new_fndecl
) = 1;
34079 DECL_EXTERNAL (new_fndecl
) = 1;
34080 DECL_IS_NOVOPS (new_fndecl
) = 1;
34081 TREE_READONLY (new_fndecl
) = 1;
34086 /* Handler for an ACML-style interface to
34087 a library with vectorized intrinsics. */
34090 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
34092 char name
[20] = "__vr.._";
34093 tree fntype
, new_fndecl
, args
;
34096 enum machine_mode el_mode
, in_mode
;
34099 /* The ACML is 64bits only and suitable for unsafe math only as
34100 it does not correctly support parts of IEEE with the required
34101 precision such as denormals. */
34103 || !flag_unsafe_math_optimizations
)
34106 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
34107 n
= TYPE_VECTOR_SUBPARTS (type_out
);
34108 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
34109 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
34110 if (el_mode
!= in_mode
34120 case BUILT_IN_LOG2
:
34121 case BUILT_IN_LOG10
:
34124 if (el_mode
!= DFmode
34129 case BUILT_IN_SINF
:
34130 case BUILT_IN_COSF
:
34131 case BUILT_IN_EXPF
:
34132 case BUILT_IN_POWF
:
34133 case BUILT_IN_LOGF
:
34134 case BUILT_IN_LOG2F
:
34135 case BUILT_IN_LOG10F
:
34138 if (el_mode
!= SFmode
34147 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34148 sprintf (name
+ 7, "%s", bname
+10);
34151 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34153 args
= TREE_CHAIN (args
))
34157 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34159 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34161 /* Build a function declaration for the vectorized function. */
34162 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34163 FUNCTION_DECL
, get_identifier (name
), fntype
);
34164 TREE_PUBLIC (new_fndecl
) = 1;
34165 DECL_EXTERNAL (new_fndecl
) = 1;
34166 DECL_IS_NOVOPS (new_fndecl
) = 1;
34167 TREE_READONLY (new_fndecl
) = 1;
34172 /* Returns a decl of a function that implements gather load with
34173 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
34174 Return NULL_TREE if it is not available. */
34177 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
34178 const_tree index_type
, int scale
)
34181 enum ix86_builtins code
;
34186 if ((TREE_CODE (index_type
) != INTEGER_TYPE
34187 && !POINTER_TYPE_P (index_type
))
34188 || (TYPE_MODE (index_type
) != SImode
34189 && TYPE_MODE (index_type
) != DImode
))
34192 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
34195 /* v*gather* insn sign extends index to pointer mode. */
34196 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
34197 && TYPE_UNSIGNED (index_type
))
34202 || (scale
& (scale
- 1)) != 0)
34205 si
= TYPE_MODE (index_type
) == SImode
;
34206 switch (TYPE_MODE (mem_vectype
))
34209 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
34212 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
34215 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
34218 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
34221 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
34224 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
34227 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
34230 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
34236 return ix86_builtins
[code
];
34239 /* Returns a code for a target-specific builtin that implements
34240 reciprocal of the function, or NULL_TREE if not available. */
34243 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
34244 bool sqrt ATTRIBUTE_UNUSED
)
34246 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
34247 && flag_finite_math_only
&& !flag_trapping_math
34248 && flag_unsafe_math_optimizations
))
34252 /* Machine dependent builtins. */
34255 /* Vectorized version of sqrt to rsqrt conversion. */
34256 case IX86_BUILTIN_SQRTPS_NR
:
34257 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
34259 case IX86_BUILTIN_SQRTPS_NR256
:
34260 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
34266 /* Normal builtins. */
34269 /* Sqrt to rsqrt conversion. */
34270 case BUILT_IN_SQRTF
:
34271 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
34278 /* Helper for avx_vpermilps256_operand et al. This is also used by
34279 the expansion functions to turn the parallel back into a mask.
34280 The return value is 0 for no match and the imm8+1 for a match. */
34283 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
34285 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
34287 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34289 if (XVECLEN (par
, 0) != (int) nelt
)
34292 /* Validate that all of the elements are constants, and not totally
34293 out of range. Copy the data into an integral array to make the
34294 subsequent checks easier. */
34295 for (i
= 0; i
< nelt
; ++i
)
34297 rtx er
= XVECEXP (par
, 0, i
);
34298 unsigned HOST_WIDE_INT ei
;
34300 if (!CONST_INT_P (er
))
34311 /* In the 256-bit DFmode case, we can only move elements within
34313 for (i
= 0; i
< 2; ++i
)
34317 mask
|= ipar
[i
] << i
;
34319 for (i
= 2; i
< 4; ++i
)
34323 mask
|= (ipar
[i
] - 2) << i
;
34328 /* In the 256-bit SFmode case, we have full freedom of movement
34329 within the low 128-bit lane, but the high 128-bit lane must
34330 mirror the exact same pattern. */
34331 for (i
= 0; i
< 4; ++i
)
34332 if (ipar
[i
] + 4 != ipar
[i
+ 4])
34339 /* In the 128-bit case, we've full freedom in the placement of
34340 the elements from the source operand. */
34341 for (i
= 0; i
< nelt
; ++i
)
34342 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
34346 gcc_unreachable ();
34349 /* Make sure success has a non-zero value by adding one. */
34353 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
34354 the expansion functions to turn the parallel back into a mask.
34355 The return value is 0 for no match and the imm8+1 for a match. */
34358 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
34360 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
34362 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34364 if (XVECLEN (par
, 0) != (int) nelt
)
34367 /* Validate that all of the elements are constants, and not totally
34368 out of range. Copy the data into an integral array to make the
34369 subsequent checks easier. */
34370 for (i
= 0; i
< nelt
; ++i
)
34372 rtx er
= XVECEXP (par
, 0, i
);
34373 unsigned HOST_WIDE_INT ei
;
34375 if (!CONST_INT_P (er
))
34378 if (ei
>= 2 * nelt
)
34383 /* Validate that the halves of the permute are halves. */
34384 for (i
= 0; i
< nelt2
- 1; ++i
)
34385 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34387 for (i
= nelt2
; i
< nelt
- 1; ++i
)
34388 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34391 /* Reconstruct the mask. */
34392 for (i
= 0; i
< 2; ++i
)
34394 unsigned e
= ipar
[i
* nelt2
];
34398 mask
|= e
<< (i
* 4);
34401 /* Make sure success has a non-zero value by adding one. */
34405 /* Store OPERAND to the memory after reload is completed. This means
34406 that we can't easily use assign_stack_local. */
34408 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
34412 gcc_assert (reload_completed
);
34413 if (ix86_using_red_zone ())
34415 result
= gen_rtx_MEM (mode
,
34416 gen_rtx_PLUS (Pmode
,
34418 GEN_INT (-RED_ZONE_SIZE
)));
34419 emit_move_insn (result
, operand
);
34421 else if (TARGET_64BIT
)
34427 operand
= gen_lowpart (DImode
, operand
);
34431 gen_rtx_SET (VOIDmode
,
34432 gen_rtx_MEM (DImode
,
34433 gen_rtx_PRE_DEC (DImode
,
34434 stack_pointer_rtx
)),
34438 gcc_unreachable ();
34440 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34449 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
34451 gen_rtx_SET (VOIDmode
,
34452 gen_rtx_MEM (SImode
,
34453 gen_rtx_PRE_DEC (Pmode
,
34454 stack_pointer_rtx
)),
34457 gen_rtx_SET (VOIDmode
,
34458 gen_rtx_MEM (SImode
,
34459 gen_rtx_PRE_DEC (Pmode
,
34460 stack_pointer_rtx
)),
34465 /* Store HImodes as SImodes. */
34466 operand
= gen_lowpart (SImode
, operand
);
34470 gen_rtx_SET (VOIDmode
,
34471 gen_rtx_MEM (GET_MODE (operand
),
34472 gen_rtx_PRE_DEC (SImode
,
34473 stack_pointer_rtx
)),
34477 gcc_unreachable ();
34479 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34484 /* Free operand from the memory. */
34486 ix86_free_from_memory (enum machine_mode mode
)
34488 if (!ix86_using_red_zone ())
34492 if (mode
== DImode
|| TARGET_64BIT
)
34496 /* Use LEA to deallocate stack space. In peephole2 it will be converted
34497 to pop or add instruction if registers are available. */
34498 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
34499 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
34504 /* Return a register priority for hard reg REGNO. */
34506 ix86_register_priority (int hard_regno
)
34508 /* ebp and r13 as the base always wants a displacement, r12 as the
34509 base always wants an index. So discourage their usage in an
34511 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
34513 if (hard_regno
== BP_REG
)
34515 /* New x86-64 int registers result in bigger code size. Discourage
34517 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
34519 /* New x86-64 SSE registers result in bigger code size. Discourage
34521 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
34523 /* Usage of AX register results in smaller code. Prefer it. */
34524 if (hard_regno
== 0)
34529 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
34531 Put float CONST_DOUBLE in the constant pool instead of fp regs.
34532 QImode must go into class Q_REGS.
34533 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
34534 movdf to do mem-to-mem moves through integer regs. */
34537 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
34539 enum machine_mode mode
= GET_MODE (x
);
34541 /* We're only allowed to return a subclass of CLASS. Many of the
34542 following checks fail for NO_REGS, so eliminate that early. */
34543 if (regclass
== NO_REGS
)
34546 /* All classes can load zeros. */
34547 if (x
== CONST0_RTX (mode
))
34550 /* Force constants into memory if we are loading a (nonzero) constant into
34551 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
34552 instructions to load from a constant. */
34554 && (MAYBE_MMX_CLASS_P (regclass
)
34555 || MAYBE_SSE_CLASS_P (regclass
)
34556 || MAYBE_MASK_CLASS_P (regclass
)))
34559 /* Prefer SSE regs only, if we can use them for math. */
34560 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
34561 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34563 /* Floating-point constants need more complex checks. */
34564 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
34566 /* General regs can load everything. */
34567 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
34570 /* Floats can load 0 and 1 plus some others. Note that we eliminated
34571 zero above. We only want to wind up preferring 80387 registers if
34572 we plan on doing computation with them. */
34574 && standard_80387_constant_p (x
) > 0)
34576 /* Limit class to non-sse. */
34577 if (regclass
== FLOAT_SSE_REGS
)
34579 if (regclass
== FP_TOP_SSE_REGS
)
34581 if (regclass
== FP_SECOND_SSE_REGS
)
34582 return FP_SECOND_REG
;
34583 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
34590 /* Generally when we see PLUS here, it's the function invariant
34591 (plus soft-fp const_int). Which can only be computed into general
34593 if (GET_CODE (x
) == PLUS
)
34594 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
34596 /* QImode constants are easy to load, but non-constant QImode data
34597 must go into Q_REGS. */
34598 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
34600 if (reg_class_subset_p (regclass
, Q_REGS
))
34602 if (reg_class_subset_p (Q_REGS
, regclass
))
34610 /* Discourage putting floating-point values in SSE registers unless
34611 SSE math is being used, and likewise for the 387 registers. */
34613 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34615 enum machine_mode mode
= GET_MODE (x
);
34617 /* Restrict the output reload class to the register bank that we are doing
34618 math on. If we would like not to return a subset of CLASS, reject this
34619 alternative: if reload cannot do this, it will still use its choice. */
34620 mode
= GET_MODE (x
);
34621 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34622 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
34624 if (X87_FLOAT_MODE_P (mode
))
34626 if (regclass
== FP_TOP_SSE_REGS
)
34628 else if (regclass
== FP_SECOND_SSE_REGS
)
34629 return FP_SECOND_REG
;
34631 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34638 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34639 enum machine_mode mode
, secondary_reload_info
*sri
)
34641 /* Double-word spills from general registers to non-offsettable memory
34642 references (zero-extended addresses) require special handling. */
34645 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34646 && INTEGER_CLASS_P (rclass
)
34647 && !offsettable_memref_p (x
))
34650 ? CODE_FOR_reload_noff_load
34651 : CODE_FOR_reload_noff_store
);
34652 /* Add the cost of moving address to a temporary. */
34653 sri
->extra_cost
= 1;
34658 /* QImode spills from non-QI registers require
34659 intermediate register on 32bit targets. */
34661 && (MAYBE_MASK_CLASS_P (rclass
)
34662 || (!TARGET_64BIT
&& !in_p
34663 && INTEGER_CLASS_P (rclass
)
34664 && MAYBE_NON_Q_CLASS_P (rclass
))))
34673 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34674 regno
= true_regnum (x
);
34676 /* Return Q_REGS if the operand is in memory. */
34681 /* This condition handles corner case where an expression involving
34682 pointers gets vectorized. We're trying to use the address of a
34683 stack slot as a vector initializer.
34685 (set (reg:V2DI 74 [ vect_cst_.2 ])
34686 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34688 Eventually frame gets turned into sp+offset like this:
34690 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34691 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34692 (const_int 392 [0x188]))))
34694 That later gets turned into:
34696 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34697 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34698 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34700 We'll have the following reload recorded:
34702 Reload 0: reload_in (DI) =
34703 (plus:DI (reg/f:DI 7 sp)
34704 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34705 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34706 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34707 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34708 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34709 reload_reg_rtx: (reg:V2DI 22 xmm1)
34711 Which isn't going to work since SSE instructions can't handle scalar
34712 additions. Returning GENERAL_REGS forces the addition into integer
34713 register and reload can handle subsequent reloads without problems. */
34715 if (in_p
&& GET_CODE (x
) == PLUS
34716 && SSE_CLASS_P (rclass
)
34717 && SCALAR_INT_MODE_P (mode
))
34718 return GENERAL_REGS
;
34723 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34726 ix86_class_likely_spilled_p (reg_class_t rclass
)
34737 case SSE_FIRST_REG
:
34739 case FP_SECOND_REG
:
34750 /* If we are copying between general and FP registers, we need a memory
34751 location. The same is true for SSE and MMX registers.
34753 To optimize register_move_cost performance, allow inline variant.
34755 The macro can't work reliably when one of the CLASSES is class containing
34756 registers from multiple units (SSE, MMX, integer). We avoid this by never
34757 combining those units in single alternative in the machine description.
34758 Ensure that this constraint holds to avoid unexpected surprises.
34760 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34761 enforce these sanity checks. */
34764 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34765 enum machine_mode mode
, int strict
)
34767 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34769 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34770 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34771 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34772 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34773 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34774 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34776 gcc_assert (!strict
|| lra_in_progress
);
34780 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34783 /* ??? This is a lie. We do have moves between mmx/general, and for
34784 mmx/sse2. But by saying we need secondary memory we discourage the
34785 register allocator from using the mmx registers unless needed. */
34786 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34789 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34791 /* SSE1 doesn't have any direct moves from other classes. */
34795 /* If the target says that inter-unit moves are more expensive
34796 than moving through memory, then don't generate them. */
34797 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34798 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34801 /* Between SSE and general, we have moves no larger than word size. */
34802 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34810 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34811 enum machine_mode mode
, int strict
)
34813 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34816 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34818 On the 80386, this is the size of MODE in words,
34819 except in the FP regs, where a single reg is always enough. */
34821 static unsigned char
34822 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34824 if (MAYBE_INTEGER_CLASS_P (rclass
))
34826 if (mode
== XFmode
)
34827 return (TARGET_64BIT
? 2 : 3);
34828 else if (mode
== XCmode
)
34829 return (TARGET_64BIT
? 4 : 6);
34831 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34835 if (COMPLEX_MODE_P (mode
))
34842 /* Return true if the registers in CLASS cannot represent the change from
34843 modes FROM to TO. */
34846 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34847 enum reg_class regclass
)
34852 /* x87 registers can't do subreg at all, as all values are reformatted
34853 to extended precision. */
34854 if (MAYBE_FLOAT_CLASS_P (regclass
))
34857 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34859 /* Vector registers do not support QI or HImode loads. If we don't
34860 disallow a change to these modes, reload will assume it's ok to
34861 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34862 the vec_dupv4hi pattern. */
34863 if (GET_MODE_SIZE (from
) < 4)
34866 /* Vector registers do not support subreg with nonzero offsets, which
34867 are otherwise valid for integer registers. Since we can't see
34868 whether we have a nonzero offset from here, prohibit all
34869 nonparadoxical subregs changing size. */
34870 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34877 /* Return the cost of moving data of mode M between a
34878 register and memory. A value of 2 is the default; this cost is
34879 relative to those in `REGISTER_MOVE_COST'.
34881 This function is used extensively by register_move_cost that is used to
34882 build tables at startup. Make it inline in this case.
34883 When IN is 2, return maximum of in and out move cost.
34885 If moving between registers and memory is more expensive than
34886 between two registers, you should define this macro to express the
34889 Model also increased moving costs of QImode registers in non
34893 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34897 if (FLOAT_CLASS_P (regclass
))
34915 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34916 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34918 if (SSE_CLASS_P (regclass
))
34921 switch (GET_MODE_SIZE (mode
))
34936 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34937 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34939 if (MMX_CLASS_P (regclass
))
34942 switch (GET_MODE_SIZE (mode
))
34954 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34955 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34957 switch (GET_MODE_SIZE (mode
))
34960 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34963 return ix86_cost
->int_store
[0];
34964 if (TARGET_PARTIAL_REG_DEPENDENCY
34965 && optimize_function_for_speed_p (cfun
))
34966 cost
= ix86_cost
->movzbl_load
;
34968 cost
= ix86_cost
->int_load
[0];
34970 return MAX (cost
, ix86_cost
->int_store
[0]);
34976 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34978 return ix86_cost
->movzbl_load
;
34980 return ix86_cost
->int_store
[0] + 4;
34985 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34986 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34988 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34989 if (mode
== TFmode
)
34992 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34994 cost
= ix86_cost
->int_load
[2];
34996 cost
= ix86_cost
->int_store
[2];
34997 return (cost
* (((int) GET_MODE_SIZE (mode
)
34998 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
35003 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
35006 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
35010 /* Return the cost of moving data from a register in class CLASS1 to
35011 one in class CLASS2.
35013 It is not required that the cost always equal 2 when FROM is the same as TO;
35014 on some machines it is expensive to move between registers if they are not
35015 general registers. */
35018 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
35019 reg_class_t class2_i
)
35021 enum reg_class class1
= (enum reg_class
) class1_i
;
35022 enum reg_class class2
= (enum reg_class
) class2_i
;
35024 /* In case we require secondary memory, compute cost of the store followed
35025 by load. In order to avoid bad register allocation choices, we need
35026 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
35028 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
35032 cost
+= inline_memory_move_cost (mode
, class1
, 2);
35033 cost
+= inline_memory_move_cost (mode
, class2
, 2);
35035 /* In case of copying from general_purpose_register we may emit multiple
35036 stores followed by single load causing memory size mismatch stall.
35037 Count this as arbitrarily high cost of 20. */
35038 if (targetm
.class_max_nregs (class1
, mode
)
35039 > targetm
.class_max_nregs (class2
, mode
))
35042 /* In the case of FP/MMX moves, the registers actually overlap, and we
35043 have to switch modes in order to treat them differently. */
35044 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
35045 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
35051 /* Moves between SSE/MMX and integer unit are expensive. */
35052 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
35053 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
35055 /* ??? By keeping returned value relatively high, we limit the number
35056 of moves between integer and MMX/SSE registers for all targets.
35057 Additionally, high value prevents problem with x86_modes_tieable_p(),
35058 where integer modes in MMX/SSE registers are not tieable
35059 because of missing QImode and HImode moves to, from or between
35060 MMX/SSE registers. */
35061 return MAX (8, ix86_cost
->mmxsse_to_integer
);
35063 if (MAYBE_FLOAT_CLASS_P (class1
))
35064 return ix86_cost
->fp_move
;
35065 if (MAYBE_SSE_CLASS_P (class1
))
35066 return ix86_cost
->sse_move
;
35067 if (MAYBE_MMX_CLASS_P (class1
))
35068 return ix86_cost
->mmx_move
;
35072 /* Return TRUE if hard register REGNO can hold a value of machine-mode
35076 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
35078 /* Flags and only flags can only hold CCmode values. */
35079 if (CC_REGNO_P (regno
))
35080 return GET_MODE_CLASS (mode
) == MODE_CC
;
35081 if (GET_MODE_CLASS (mode
) == MODE_CC
35082 || GET_MODE_CLASS (mode
) == MODE_RANDOM
35083 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
35085 if (STACK_REGNO_P (regno
))
35086 return VALID_FP_MODE_P (mode
);
35087 if (MASK_REGNO_P (regno
))
35088 return VALID_MASK_REG_MODE (mode
);
35089 if (BND_REGNO_P (regno
))
35090 return VALID_BND_REG_MODE (mode
);
35091 if (SSE_REGNO_P (regno
))
35093 /* We implement the move patterns for all vector modes into and
35094 out of SSE registers, even when no operation instructions
35097 /* For AVX-512 we allow, regardless of regno:
35099 - any of 512-bit wide vector mode
35100 - any scalar mode. */
35103 || VALID_AVX512F_REG_MODE (mode
)
35104 || VALID_AVX512F_SCALAR_MODE (mode
)))
35107 /* xmm16-xmm31 are only available for AVX-512. */
35108 if (EXT_REX_SSE_REGNO_P (regno
))
35111 /* OImode move is available only when AVX is enabled. */
35112 return ((TARGET_AVX
&& mode
== OImode
)
35113 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35114 || VALID_SSE_REG_MODE (mode
)
35115 || VALID_SSE2_REG_MODE (mode
)
35116 || VALID_MMX_REG_MODE (mode
)
35117 || VALID_MMX_REG_MODE_3DNOW (mode
));
35119 if (MMX_REGNO_P (regno
))
35121 /* We implement the move patterns for 3DNOW modes even in MMX mode,
35122 so if the register is available at all, then we can move data of
35123 the given mode into or out of it. */
35124 return (VALID_MMX_REG_MODE (mode
)
35125 || VALID_MMX_REG_MODE_3DNOW (mode
));
35128 if (mode
== QImode
)
35130 /* Take care for QImode values - they can be in non-QI regs,
35131 but then they do cause partial register stalls. */
35132 if (ANY_QI_REGNO_P (regno
))
35134 if (!TARGET_PARTIAL_REG_STALL
)
35136 /* LRA checks if the hard register is OK for the given mode.
35137 QImode values can live in non-QI regs, so we allow all
35139 if (lra_in_progress
)
35141 return !can_create_pseudo_p ();
35143 /* We handle both integer and floats in the general purpose registers. */
35144 else if (VALID_INT_MODE_P (mode
))
35146 else if (VALID_FP_MODE_P (mode
))
35148 else if (VALID_DFP_MODE_P (mode
))
35150 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
35151 on to use that value in smaller contexts, this can easily force a
35152 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
35153 supporting DImode, allow it. */
35154 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
35160 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
35161 tieable integer mode. */
35164 ix86_tieable_integer_mode_p (enum machine_mode mode
)
35173 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
35176 return TARGET_64BIT
;
35183 /* Return true if MODE1 is accessible in a register that can hold MODE2
35184 without copying. That is, all register classes that can hold MODE2
35185 can also hold MODE1. */
35188 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
35190 if (mode1
== mode2
)
35193 if (ix86_tieable_integer_mode_p (mode1
)
35194 && ix86_tieable_integer_mode_p (mode2
))
35197 /* MODE2 being XFmode implies fp stack or general regs, which means we
35198 can tie any smaller floating point modes to it. Note that we do not
35199 tie this with TFmode. */
35200 if (mode2
== XFmode
)
35201 return mode1
== SFmode
|| mode1
== DFmode
;
35203 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
35204 that we can tie it with SFmode. */
35205 if (mode2
== DFmode
)
35206 return mode1
== SFmode
;
35208 /* If MODE2 is only appropriate for an SSE register, then tie with
35209 any other mode acceptable to SSE registers. */
35210 if (GET_MODE_SIZE (mode2
) == 32
35211 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35212 return (GET_MODE_SIZE (mode1
) == 32
35213 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35214 if (GET_MODE_SIZE (mode2
) == 16
35215 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35216 return (GET_MODE_SIZE (mode1
) == 16
35217 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35219 /* If MODE2 is appropriate for an MMX register, then tie
35220 with any other mode acceptable to MMX registers. */
35221 if (GET_MODE_SIZE (mode2
) == 8
35222 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
35223 return (GET_MODE_SIZE (mode1
) == 8
35224 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
35229 /* Return the cost of moving between two registers of mode MODE. */
35232 ix86_set_reg_reg_cost (enum machine_mode mode
)
35234 unsigned int units
= UNITS_PER_WORD
;
35236 switch (GET_MODE_CLASS (mode
))
35242 units
= GET_MODE_SIZE (CCmode
);
35246 if ((TARGET_SSE
&& mode
== TFmode
)
35247 || (TARGET_80387
&& mode
== XFmode
)
35248 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
35249 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
35250 units
= GET_MODE_SIZE (mode
);
35253 case MODE_COMPLEX_FLOAT
:
35254 if ((TARGET_SSE
&& mode
== TCmode
)
35255 || (TARGET_80387
&& mode
== XCmode
)
35256 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
35257 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
35258 units
= GET_MODE_SIZE (mode
);
35261 case MODE_VECTOR_INT
:
35262 case MODE_VECTOR_FLOAT
:
35263 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
35264 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35265 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35266 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35267 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
35268 units
= GET_MODE_SIZE (mode
);
35271 /* Return the cost of moving between two registers of mode MODE,
35272 assuming that the move will be in pieces of at most UNITS bytes. */
35273 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
35276 /* Compute a (partial) cost for rtx X. Return true if the complete
35277 cost has been computed, and false if subexpressions should be
35278 scanned. In either case, *TOTAL contains the cost result. */
35281 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
35284 enum rtx_code code
= (enum rtx_code
) code_i
;
35285 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
35286 enum machine_mode mode
= GET_MODE (x
);
35287 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
35292 if (register_operand (SET_DEST (x
), VOIDmode
)
35293 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
35295 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
35304 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
35306 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
35308 else if (flag_pic
&& SYMBOLIC_CONST (x
)
35310 || (!GET_CODE (x
) != LABEL_REF
35311 && (GET_CODE (x
) != SYMBOL_REF
35312 || !SYMBOL_REF_LOCAL_P (x
)))))
35319 if (mode
== VOIDmode
)
35324 switch (standard_80387_constant_p (x
))
35329 default: /* Other constants */
35336 if (SSE_FLOAT_MODE_P (mode
))
35339 switch (standard_sse_constant_p (x
))
35343 case 1: /* 0: xor eliminates false dependency */
35346 default: /* -1: cmp contains false dependency */
35351 /* Fall back to (MEM (SYMBOL_REF)), since that's where
35352 it'll probably end up. Add a penalty for size. */
35353 *total
= (COSTS_N_INSNS (1)
35354 + (flag_pic
!= 0 && !TARGET_64BIT
)
35355 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
35359 /* The zero extensions is often completely free on x86_64, so make
35360 it as cheap as possible. */
35361 if (TARGET_64BIT
&& mode
== DImode
35362 && GET_MODE (XEXP (x
, 0)) == SImode
)
35364 else if (TARGET_ZERO_EXTEND_WITH_AND
)
35365 *total
= cost
->add
;
35367 *total
= cost
->movzx
;
35371 *total
= cost
->movsx
;
35375 if (SCALAR_INT_MODE_P (mode
)
35376 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
35377 && CONST_INT_P (XEXP (x
, 1)))
35379 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35382 *total
= cost
->add
;
35385 if ((value
== 2 || value
== 3)
35386 && cost
->lea
<= cost
->shift_const
)
35388 *total
= cost
->lea
;
35398 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35400 /* ??? Should be SSE vector operation cost. */
35401 /* At least for published AMD latencies, this really is the same
35402 as the latency for a simple fpu operation like fabs. */
35403 /* V*QImode is emulated with 1-11 insns. */
35404 if (mode
== V16QImode
|| mode
== V32QImode
)
35407 if (TARGET_XOP
&& mode
== V16QImode
)
35409 /* For XOP we use vpshab, which requires a broadcast of the
35410 value to the variable shift insn. For constants this
35411 means a V16Q const in mem; even when we can perform the
35412 shift with one insn set the cost to prefer paddb. */
35413 if (CONSTANT_P (XEXP (x
, 1)))
35415 *total
= (cost
->fabs
35416 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
35417 + (speed
? 2 : COSTS_N_BYTES (16)));
35422 else if (TARGET_SSSE3
)
35424 *total
= cost
->fabs
* count
;
35427 *total
= cost
->fabs
;
35429 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35431 if (CONST_INT_P (XEXP (x
, 1)))
35433 if (INTVAL (XEXP (x
, 1)) > 32)
35434 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
35436 *total
= cost
->shift_const
* 2;
35440 if (GET_CODE (XEXP (x
, 1)) == AND
)
35441 *total
= cost
->shift_var
* 2;
35443 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
35448 if (CONST_INT_P (XEXP (x
, 1)))
35449 *total
= cost
->shift_const
;
35450 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
35451 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
35453 /* Return the cost after shift-and truncation. */
35454 *total
= cost
->shift_var
;
35458 *total
= cost
->shift_var
;
35466 gcc_assert (FLOAT_MODE_P (mode
));
35467 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
35469 /* ??? SSE scalar/vector cost should be used here. */
35470 /* ??? Bald assumption that fma has the same cost as fmul. */
35471 *total
= cost
->fmul
;
35472 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
35474 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
35476 if (GET_CODE (sub
) == NEG
)
35477 sub
= XEXP (sub
, 0);
35478 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
35481 if (GET_CODE (sub
) == NEG
)
35482 sub
= XEXP (sub
, 0);
35483 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
35488 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35490 /* ??? SSE scalar cost should be used here. */
35491 *total
= cost
->fmul
;
35494 else if (X87_FLOAT_MODE_P (mode
))
35496 *total
= cost
->fmul
;
35499 else if (FLOAT_MODE_P (mode
))
35501 /* ??? SSE vector cost should be used here. */
35502 *total
= cost
->fmul
;
35505 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35507 /* V*QImode is emulated with 7-13 insns. */
35508 if (mode
== V16QImode
|| mode
== V32QImode
)
35511 if (TARGET_XOP
&& mode
== V16QImode
)
35513 else if (TARGET_SSSE3
)
35515 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
35517 /* V*DImode is emulated with 5-8 insns. */
35518 else if (mode
== V2DImode
|| mode
== V4DImode
)
35520 if (TARGET_XOP
&& mode
== V2DImode
)
35521 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
35523 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
35525 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
35526 insns, including two PMULUDQ. */
35527 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
35528 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
35530 *total
= cost
->fmul
;
35535 rtx op0
= XEXP (x
, 0);
35536 rtx op1
= XEXP (x
, 1);
35538 if (CONST_INT_P (XEXP (x
, 1)))
35540 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35541 for (nbits
= 0; value
!= 0; value
&= value
- 1)
35545 /* This is arbitrary. */
35548 /* Compute costs correctly for widening multiplication. */
35549 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
35550 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
35551 == GET_MODE_SIZE (mode
))
35553 int is_mulwiden
= 0;
35554 enum machine_mode inner_mode
= GET_MODE (op0
);
35556 if (GET_CODE (op0
) == GET_CODE (op1
))
35557 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
35558 else if (CONST_INT_P (op1
))
35560 if (GET_CODE (op0
) == SIGN_EXTEND
)
35561 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
35564 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
35568 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
35571 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
35572 + nbits
* cost
->mult_bit
35573 + rtx_cost (op0
, outer_code
, opno
, speed
)
35574 + rtx_cost (op1
, outer_code
, opno
, speed
));
35583 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35584 /* ??? SSE cost should be used here. */
35585 *total
= cost
->fdiv
;
35586 else if (X87_FLOAT_MODE_P (mode
))
35587 *total
= cost
->fdiv
;
35588 else if (FLOAT_MODE_P (mode
))
35589 /* ??? SSE vector cost should be used here. */
35590 *total
= cost
->fdiv
;
35592 *total
= cost
->divide
[MODE_INDEX (mode
)];
35596 if (GET_MODE_CLASS (mode
) == MODE_INT
35597 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
35599 if (GET_CODE (XEXP (x
, 0)) == PLUS
35600 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
35601 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
35602 && CONSTANT_P (XEXP (x
, 1)))
35604 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35605 if (val
== 2 || val
== 4 || val
== 8)
35607 *total
= cost
->lea
;
35608 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35609 outer_code
, opno
, speed
);
35610 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35611 outer_code
, opno
, speed
);
35612 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35616 else if (GET_CODE (XEXP (x
, 0)) == MULT
35617 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35619 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35620 if (val
== 2 || val
== 4 || val
== 8)
35622 *total
= cost
->lea
;
35623 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35624 outer_code
, opno
, speed
);
35625 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35629 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35631 *total
= cost
->lea
;
35632 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35633 outer_code
, opno
, speed
);
35634 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35635 outer_code
, opno
, speed
);
35636 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35643 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35645 /* ??? SSE cost should be used here. */
35646 *total
= cost
->fadd
;
35649 else if (X87_FLOAT_MODE_P (mode
))
35651 *total
= cost
->fadd
;
35654 else if (FLOAT_MODE_P (mode
))
35656 /* ??? SSE vector cost should be used here. */
35657 *total
= cost
->fadd
;
35665 if (GET_MODE_CLASS (mode
) == MODE_INT
35666 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35668 *total
= (cost
->add
* 2
35669 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35670 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35671 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35672 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35678 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35680 /* ??? SSE cost should be used here. */
35681 *total
= cost
->fchs
;
35684 else if (X87_FLOAT_MODE_P (mode
))
35686 *total
= cost
->fchs
;
35689 else if (FLOAT_MODE_P (mode
))
35691 /* ??? SSE vector cost should be used here. */
35692 *total
= cost
->fchs
;
35698 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35700 /* ??? Should be SSE vector operation cost. */
35701 /* At least for published AMD latencies, this really is the same
35702 as the latency for a simple fpu operation like fabs. */
35703 *total
= cost
->fabs
;
35705 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35706 *total
= cost
->add
* 2;
35708 *total
= cost
->add
;
35712 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35713 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35714 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35715 && XEXP (x
, 1) == const0_rtx
)
35717 /* This kind of construct is implemented using test[bwl].
35718 Treat it as if we had an AND. */
35719 *total
= (cost
->add
35720 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35721 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35727 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35732 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35733 /* ??? SSE cost should be used here. */
35734 *total
= cost
->fabs
;
35735 else if (X87_FLOAT_MODE_P (mode
))
35736 *total
= cost
->fabs
;
35737 else if (FLOAT_MODE_P (mode
))
35738 /* ??? SSE vector cost should be used here. */
35739 *total
= cost
->fabs
;
35743 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35744 /* ??? SSE cost should be used here. */
35745 *total
= cost
->fsqrt
;
35746 else if (X87_FLOAT_MODE_P (mode
))
35747 *total
= cost
->fsqrt
;
35748 else if (FLOAT_MODE_P (mode
))
35749 /* ??? SSE vector cost should be used here. */
35750 *total
= cost
->fsqrt
;
35754 if (XINT (x
, 1) == UNSPEC_TP
)
35761 case VEC_DUPLICATE
:
35762 /* ??? Assume all of these vector manipulation patterns are
35763 recognizable. In which case they all pretty much have the
35765 *total
= cost
->fabs
;
35775 static int current_machopic_label_num
;
35777 /* Given a symbol name and its associated stub, write out the
35778 definition of the stub. */
35781 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35783 unsigned int length
;
35784 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35785 int label
= ++current_machopic_label_num
;
35787 /* For 64-bit we shouldn't get here. */
35788 gcc_assert (!TARGET_64BIT
);
35790 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35791 symb
= targetm
.strip_name_encoding (symb
);
35793 length
= strlen (stub
);
35794 binder_name
= XALLOCAVEC (char, length
+ 32);
35795 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35797 length
= strlen (symb
);
35798 symbol_name
= XALLOCAVEC (char, length
+ 32);
35799 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35801 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35803 if (MACHOPIC_ATT_STUB
)
35804 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35805 else if (MACHOPIC_PURE
)
35806 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35808 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35810 fprintf (file
, "%s:\n", stub
);
35811 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35813 if (MACHOPIC_ATT_STUB
)
35815 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35817 else if (MACHOPIC_PURE
)
35820 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35821 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35822 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35823 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35824 label
, lazy_ptr_name
, label
);
35825 fprintf (file
, "\tjmp\t*%%ecx\n");
35828 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35830 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35831 it needs no stub-binding-helper. */
35832 if (MACHOPIC_ATT_STUB
)
35835 fprintf (file
, "%s:\n", binder_name
);
35839 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35840 fprintf (file
, "\tpushl\t%%ecx\n");
35843 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35845 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35847 /* N.B. Keep the correspondence of these
35848 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35849 old-pic/new-pic/non-pic stubs; altering this will break
35850 compatibility with existing dylibs. */
35853 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35854 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35857 /* 16-byte -mdynamic-no-pic stub. */
35858 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35860 fprintf (file
, "%s:\n", lazy_ptr_name
);
35861 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35862 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35864 #endif /* TARGET_MACHO */
35866 /* Order the registers for register allocator. */
35869 x86_order_regs_for_local_alloc (void)
35874 /* First allocate the local general purpose registers. */
35875 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35876 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35877 reg_alloc_order
[pos
++] = i
;
35879 /* Global general purpose registers. */
35880 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35881 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35882 reg_alloc_order
[pos
++] = i
;
35884 /* x87 registers come first in case we are doing FP math
35886 if (!TARGET_SSE_MATH
)
35887 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35888 reg_alloc_order
[pos
++] = i
;
35890 /* SSE registers. */
35891 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35892 reg_alloc_order
[pos
++] = i
;
35893 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35894 reg_alloc_order
[pos
++] = i
;
35896 /* Extended REX SSE registers. */
35897 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
35898 reg_alloc_order
[pos
++] = i
;
35900 /* Mask register. */
35901 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
35902 reg_alloc_order
[pos
++] = i
;
35904 /* MPX bound registers. */
35905 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
35906 reg_alloc_order
[pos
++] = i
;
35908 /* x87 registers. */
35909 if (TARGET_SSE_MATH
)
35910 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35911 reg_alloc_order
[pos
++] = i
;
35913 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35914 reg_alloc_order
[pos
++] = i
;
35916 /* Initialize the rest of array as we do not allocate some registers
35918 while (pos
< FIRST_PSEUDO_REGISTER
)
35919 reg_alloc_order
[pos
++] = 0;
35922 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35923 in struct attribute_spec handler. */
35925 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35927 int flags ATTRIBUTE_UNUSED
,
35928 bool *no_add_attrs
)
35930 if (TREE_CODE (*node
) != FUNCTION_TYPE
35931 && TREE_CODE (*node
) != METHOD_TYPE
35932 && TREE_CODE (*node
) != FIELD_DECL
35933 && TREE_CODE (*node
) != TYPE_DECL
)
35935 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35937 *no_add_attrs
= true;
35942 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35944 *no_add_attrs
= true;
35947 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35951 cst
= TREE_VALUE (args
);
35952 if (TREE_CODE (cst
) != INTEGER_CST
)
35954 warning (OPT_Wattributes
,
35955 "%qE attribute requires an integer constant argument",
35957 *no_add_attrs
= true;
35959 else if (compare_tree_int (cst
, 0) != 0
35960 && compare_tree_int (cst
, 1) != 0)
35962 warning (OPT_Wattributes
,
35963 "argument to %qE attribute is neither zero, nor one",
35965 *no_add_attrs
= true;
35974 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35975 struct attribute_spec.handler. */
35977 ix86_handle_abi_attribute (tree
*node
, tree name
,
35978 tree args ATTRIBUTE_UNUSED
,
35979 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35981 if (TREE_CODE (*node
) != FUNCTION_TYPE
35982 && TREE_CODE (*node
) != METHOD_TYPE
35983 && TREE_CODE (*node
) != FIELD_DECL
35984 && TREE_CODE (*node
) != TYPE_DECL
)
35986 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35988 *no_add_attrs
= true;
35992 /* Can combine regparm with all attributes but fastcall. */
35993 if (is_attribute_p ("ms_abi", name
))
35995 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35997 error ("ms_abi and sysv_abi attributes are not compatible");
36002 else if (is_attribute_p ("sysv_abi", name
))
36004 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
36006 error ("ms_abi and sysv_abi attributes are not compatible");
36015 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
36016 struct attribute_spec.handler. */
36018 ix86_handle_struct_attribute (tree
*node
, tree name
,
36019 tree args ATTRIBUTE_UNUSED
,
36020 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36023 if (DECL_P (*node
))
36025 if (TREE_CODE (*node
) == TYPE_DECL
)
36026 type
= &TREE_TYPE (*node
);
36031 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
36033 warning (OPT_Wattributes
, "%qE attribute ignored",
36035 *no_add_attrs
= true;
36038 else if ((is_attribute_p ("ms_struct", name
)
36039 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
36040 || ((is_attribute_p ("gcc_struct", name
)
36041 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
36043 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
36045 *no_add_attrs
= true;
36052 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
36053 tree args ATTRIBUTE_UNUSED
,
36054 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36056 if (TREE_CODE (*node
) != FUNCTION_DECL
)
36058 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36060 *no_add_attrs
= true;
36066 ix86_ms_bitfield_layout_p (const_tree record_type
)
36068 return ((TARGET_MS_BITFIELD_LAYOUT
36069 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
36070 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
36073 /* Returns an expression indicating where the this parameter is
36074 located on entry to the FUNCTION. */
36077 x86_this_parameter (tree function
)
36079 tree type
= TREE_TYPE (function
);
36080 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
36085 const int *parm_regs
;
36087 if (ix86_function_type_abi (type
) == MS_ABI
)
36088 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
36090 parm_regs
= x86_64_int_parameter_registers
;
36091 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
36094 nregs
= ix86_function_regparm (type
, function
);
36096 if (nregs
> 0 && !stdarg_p (type
))
36099 unsigned int ccvt
= ix86_get_callcvt (type
);
36101 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36102 regno
= aggr
? DX_REG
: CX_REG
;
36103 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36107 return gen_rtx_MEM (SImode
,
36108 plus_constant (Pmode
, stack_pointer_rtx
, 4));
36117 return gen_rtx_MEM (SImode
,
36118 plus_constant (Pmode
,
36119 stack_pointer_rtx
, 4));
36122 return gen_rtx_REG (SImode
, regno
);
36125 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
36129 /* Determine whether x86_output_mi_thunk can succeed. */
36132 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
36133 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
36134 HOST_WIDE_INT vcall_offset
, const_tree function
)
36136 /* 64-bit can handle anything. */
36140 /* For 32-bit, everything's fine if we have one free register. */
36141 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
36144 /* Need a free register for vcall_offset. */
36148 /* Need a free register for GOT references. */
36149 if (flag_pic
&& !targetm
.binds_local_p (function
))
36152 /* Otherwise ok. */
36156 /* Output the assembler code for a thunk function. THUNK_DECL is the
36157 declaration for the thunk function itself, FUNCTION is the decl for
36158 the target function. DELTA is an immediate constant offset to be
36159 added to THIS. If VCALL_OFFSET is nonzero, the word at
36160 *(*this + vcall_offset) should be added to THIS. */
36163 x86_output_mi_thunk (FILE *file
,
36164 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
36165 HOST_WIDE_INT vcall_offset
, tree function
)
36167 rtx this_param
= x86_this_parameter (function
);
36168 rtx this_reg
, tmp
, fnaddr
;
36169 unsigned int tmp_regno
;
36172 tmp_regno
= R10_REG
;
36175 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
36176 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36177 tmp_regno
= AX_REG
;
36178 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36179 tmp_regno
= DX_REG
;
36181 tmp_regno
= CX_REG
;
36184 emit_note (NOTE_INSN_PROLOGUE_END
);
36186 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
36187 pull it in now and let DELTA benefit. */
36188 if (REG_P (this_param
))
36189 this_reg
= this_param
;
36190 else if (vcall_offset
)
36192 /* Put the this parameter into %eax. */
36193 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
36194 emit_move_insn (this_reg
, this_param
);
36197 this_reg
= NULL_RTX
;
36199 /* Adjust the this parameter by a fixed constant. */
36202 rtx delta_rtx
= GEN_INT (delta
);
36203 rtx delta_dst
= this_reg
? this_reg
: this_param
;
36207 if (!x86_64_general_operand (delta_rtx
, Pmode
))
36209 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36210 emit_move_insn (tmp
, delta_rtx
);
36215 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
36218 /* Adjust the this parameter by a value stored in the vtable. */
36221 rtx vcall_addr
, vcall_mem
, this_mem
;
36223 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36225 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
36226 if (Pmode
!= ptr_mode
)
36227 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
36228 emit_move_insn (tmp
, this_mem
);
36230 /* Adjust the this parameter. */
36231 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
36233 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
36235 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
36236 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
36237 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
36240 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
36241 if (Pmode
!= ptr_mode
)
36242 emit_insn (gen_addsi_1_zext (this_reg
,
36243 gen_rtx_REG (ptr_mode
,
36247 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
36250 /* If necessary, drop THIS back to its stack slot. */
36251 if (this_reg
&& this_reg
!= this_param
)
36252 emit_move_insn (this_param
, this_reg
);
36254 fnaddr
= XEXP (DECL_RTL (function
), 0);
36257 if (!flag_pic
|| targetm
.binds_local_p (function
)
36262 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
36263 tmp
= gen_rtx_CONST (Pmode
, tmp
);
36264 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
36269 if (!flag_pic
|| targetm
.binds_local_p (function
))
36272 else if (TARGET_MACHO
)
36274 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
36275 fnaddr
= XEXP (fnaddr
, 0);
36277 #endif /* TARGET_MACHO */
36280 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
36281 output_set_got (tmp
, NULL_RTX
);
36283 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
36284 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
36285 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
36289 /* Our sibling call patterns do not allow memories, because we have no
36290 predicate that can distinguish between frame and non-frame memory.
36291 For our purposes here, we can get away with (ab)using a jump pattern,
36292 because we're going to do no optimization. */
36293 if (MEM_P (fnaddr
))
36294 emit_jump_insn (gen_indirect_jump (fnaddr
));
36297 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
36298 fnaddr
= legitimize_pic_address (fnaddr
,
36299 gen_rtx_REG (Pmode
, tmp_regno
));
36301 if (!sibcall_insn_operand (fnaddr
, word_mode
))
36303 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
36304 if (GET_MODE (fnaddr
) != word_mode
)
36305 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
36306 emit_move_insn (tmp
, fnaddr
);
36310 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
36311 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
36312 tmp
= emit_call_insn (tmp
);
36313 SIBLING_CALL_P (tmp
) = 1;
36317 /* Emit just enough of rest_of_compilation to get the insns emitted.
36318 Note that use_thunk calls assemble_start_function et al. */
36319 tmp
= get_insns ();
36320 shorten_branches (tmp
);
36321 final_start_function (tmp
, file
, 1);
36322 final (tmp
, file
, 1);
36323 final_end_function ();
36327 x86_file_start (void)
36329 default_file_start ();
36331 darwin_file_start ();
36333 if (X86_FILE_START_VERSION_DIRECTIVE
)
36334 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
36335 if (X86_FILE_START_FLTUSED
)
36336 fputs ("\t.global\t__fltused\n", asm_out_file
);
36337 if (ix86_asm_dialect
== ASM_INTEL
)
36338 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
36342 x86_field_alignment (tree field
, int computed
)
36344 enum machine_mode mode
;
36345 tree type
= TREE_TYPE (field
);
36347 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
36349 mode
= TYPE_MODE (strip_array_types (type
));
36350 if (mode
== DFmode
|| mode
== DCmode
36351 || GET_MODE_CLASS (mode
) == MODE_INT
36352 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
36353 return MIN (32, computed
);
36357 /* Output assembler code to FILE to increment profiler label # LABELNO
36358 for profiling a function entry. */
36360 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
36362 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
36367 #ifndef NO_PROFILE_COUNTERS
36368 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
36371 if (!TARGET_PECOFF
&& flag_pic
)
36372 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
36374 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36378 #ifndef NO_PROFILE_COUNTERS
36379 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
36382 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
36386 #ifndef NO_PROFILE_COUNTERS
36387 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
36390 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36394 /* We don't have exact information about the insn sizes, but we may assume
36395 quite safely that we are informed about all 1 byte insns and memory
36396 address sizes. This is enough to eliminate unnecessary padding in
36400 min_insn_size (rtx insn
)
36404 if (!INSN_P (insn
) || !active_insn_p (insn
))
36407 /* Discard alignments we've emit and jump instructions. */
36408 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
36409 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
36412 /* Important case - calls are always 5 bytes.
36413 It is common to have many calls in the row. */
36415 && symbolic_reference_mentioned_p (PATTERN (insn
))
36416 && !SIBLING_CALL_P (insn
))
36418 len
= get_attr_length (insn
);
36422 /* For normal instructions we rely on get_attr_length being exact,
36423 with a few exceptions. */
36424 if (!JUMP_P (insn
))
36426 enum attr_type type
= get_attr_type (insn
);
36431 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
36432 || asm_noperands (PATTERN (insn
)) >= 0)
36439 /* Otherwise trust get_attr_length. */
36443 l
= get_attr_length_address (insn
);
36444 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
36453 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36455 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
36459 ix86_avoid_jump_mispredicts (void)
36461 rtx insn
, start
= get_insns ();
36462 int nbytes
= 0, njumps
= 0;
36465 /* Look for all minimal intervals of instructions containing 4 jumps.
36466 The intervals are bounded by START and INSN. NBYTES is the total
36467 size of instructions in the interval including INSN and not including
36468 START. When the NBYTES is smaller than 16 bytes, it is possible
36469 that the end of START and INSN ends up in the same 16byte page.
36471 The smallest offset in the page INSN can start is the case where START
36472 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
36473 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
36475 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
36479 if (LABEL_P (insn
))
36481 int align
= label_to_alignment (insn
);
36482 int max_skip
= label_to_max_skip (insn
);
36486 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
36487 already in the current 16 byte page, because otherwise
36488 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
36489 bytes to reach 16 byte boundary. */
36491 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
36494 fprintf (dump_file
, "Label %i with max_skip %i\n",
36495 INSN_UID (insn
), max_skip
);
36498 while (nbytes
+ max_skip
>= 16)
36500 start
= NEXT_INSN (start
);
36501 if (JUMP_P (start
) || CALL_P (start
))
36502 njumps
--, isjump
= 1;
36505 nbytes
-= min_insn_size (start
);
36511 min_size
= min_insn_size (insn
);
36512 nbytes
+= min_size
;
36514 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
36515 INSN_UID (insn
), min_size
);
36516 if (JUMP_P (insn
) || CALL_P (insn
))
36523 start
= NEXT_INSN (start
);
36524 if (JUMP_P (start
) || CALL_P (start
))
36525 njumps
--, isjump
= 1;
36528 nbytes
-= min_insn_size (start
);
36530 gcc_assert (njumps
>= 0);
36532 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
36533 INSN_UID (start
), INSN_UID (insn
), nbytes
);
36535 if (njumps
== 3 && isjump
&& nbytes
< 16)
36537 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
36540 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
36541 INSN_UID (insn
), padsize
);
36542 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
36548 /* AMD Athlon works faster
36549 when RET is not destination of conditional jump or directly preceded
36550 by other jump instruction. We avoid the penalty by inserting NOP just
36551 before the RET instructions in such cases. */
36553 ix86_pad_returns (void)
36558 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36560 basic_block bb
= e
->src
;
36561 rtx ret
= BB_END (bb
);
36563 bool replace
= false;
36565 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
36566 || optimize_bb_for_size_p (bb
))
36568 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
36569 if (active_insn_p (prev
) || LABEL_P (prev
))
36571 if (prev
&& LABEL_P (prev
))
36576 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36577 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
36578 && !(e
->flags
& EDGE_FALLTHRU
))
36586 prev
= prev_active_insn (ret
);
36588 && ((JUMP_P (prev
) && any_condjump_p (prev
))
36591 /* Empty functions get branch mispredict even when
36592 the jump destination is not visible to us. */
36593 if (!prev
&& !optimize_function_for_size_p (cfun
))
36598 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
36604 /* Count the minimum number of instructions in BB. Return 4 if the
36605 number of instructions >= 4. */
36608 ix86_count_insn_bb (basic_block bb
)
36611 int insn_count
= 0;
36613 /* Count number of instructions in this block. Return 4 if the number
36614 of instructions >= 4. */
36615 FOR_BB_INSNS (bb
, insn
)
36617 /* Only happen in exit blocks. */
36619 && ANY_RETURN_P (PATTERN (insn
)))
36622 if (NONDEBUG_INSN_P (insn
)
36623 && GET_CODE (PATTERN (insn
)) != USE
36624 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36627 if (insn_count
>= 4)
36636 /* Count the minimum number of instructions in code path in BB.
36637 Return 4 if the number of instructions >= 4. */
36640 ix86_count_insn (basic_block bb
)
36644 int min_prev_count
;
36646 /* Only bother counting instructions along paths with no
36647 more than 2 basic blocks between entry and exit. Given
36648 that BB has an edge to exit, determine if a predecessor
36649 of BB has an edge from entry. If so, compute the number
36650 of instructions in the predecessor block. If there
36651 happen to be multiple such blocks, compute the minimum. */
36652 min_prev_count
= 4;
36653 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36656 edge_iterator prev_ei
;
36658 if (e
->src
== ENTRY_BLOCK_PTR
)
36660 min_prev_count
= 0;
36663 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36665 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
36667 int count
= ix86_count_insn_bb (e
->src
);
36668 if (count
< min_prev_count
)
36669 min_prev_count
= count
;
36675 if (min_prev_count
< 4)
36676 min_prev_count
+= ix86_count_insn_bb (bb
);
36678 return min_prev_count
;
36681 /* Pad short function to 4 instructions. */
36684 ix86_pad_short_function (void)
36689 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36691 rtx ret
= BB_END (e
->src
);
36692 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36694 int insn_count
= ix86_count_insn (e
->src
);
36696 /* Pad short function. */
36697 if (insn_count
< 4)
36701 /* Find epilogue. */
36704 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36705 insn
= PREV_INSN (insn
);
36710 /* Two NOPs count as one instruction. */
36711 insn_count
= 2 * (4 - insn_count
);
36712 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36718 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36719 the epilogue, the Windows system unwinder will apply epilogue logic and
36720 produce incorrect offsets. This can be avoided by adding a nop between
36721 the last insn that can throw and the first insn of the epilogue. */
36724 ix86_seh_fixup_eh_fallthru (void)
36729 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36733 /* Find the beginning of the epilogue. */
36734 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36735 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36740 /* We only care about preceding insns that can throw. */
36741 insn
= prev_active_insn (insn
);
36742 if (insn
== NULL
|| !can_throw_internal (insn
))
36745 /* Do not separate calls from their debug information. */
36746 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36748 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36749 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36754 emit_insn_after (gen_nops (const1_rtx
), insn
);
36758 /* Implement machine specific optimizations. We implement padding of returns
36759 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36763 /* We are freeing block_for_insn in the toplev to keep compatibility
36764 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36765 compute_bb_for_insn ();
36767 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36768 ix86_seh_fixup_eh_fallthru ();
36770 if (optimize
&& optimize_function_for_speed_p (cfun
))
36772 if (TARGET_PAD_SHORT_FUNCTION
)
36773 ix86_pad_short_function ();
36774 else if (TARGET_PAD_RETURNS
)
36775 ix86_pad_returns ();
36776 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36777 if (TARGET_FOUR_JUMP_LIMIT
)
36778 ix86_avoid_jump_mispredicts ();
36783 /* Return nonzero when QImode register that must be represented via REX prefix
36786 x86_extended_QIreg_mentioned_p (rtx insn
)
36789 extract_insn_cached (insn
);
36790 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36791 if (GENERAL_REG_P (recog_data
.operand
[i
])
36792 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36797 /* Return nonzero when P points to register encoded via REX prefix.
36798 Called via for_each_rtx. */
36800 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36802 unsigned int regno
;
36805 regno
= REGNO (*p
);
36806 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36809 /* Return true when INSN mentions register that must be encoded using REX
36812 x86_extended_reg_mentioned_p (rtx insn
)
36814 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36815 extended_reg_mentioned_1
, NULL
);
36818 /* If profitable, negate (without causing overflow) integer constant
36819 of mode MODE at location LOC. Return true in this case. */
36821 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36825 if (!CONST_INT_P (*loc
))
36831 /* DImode x86_64 constants must fit in 32 bits. */
36832 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36843 gcc_unreachable ();
36846 /* Avoid overflows. */
36847 if (mode_signbit_p (mode
, *loc
))
36850 val
= INTVAL (*loc
);
36852 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36853 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36854 if ((val
< 0 && val
!= -128)
36857 *loc
= GEN_INT (-val
);
36864 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36865 optabs would emit if we didn't have TFmode patterns. */
36868 x86_emit_floatuns (rtx operands
[2])
36870 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36871 enum machine_mode mode
, inmode
;
36873 inmode
= GET_MODE (operands
[1]);
36874 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36877 in
= force_reg (inmode
, operands
[1]);
36878 mode
= GET_MODE (out
);
36879 neglab
= gen_label_rtx ();
36880 donelab
= gen_label_rtx ();
36881 f0
= gen_reg_rtx (mode
);
36883 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36885 expand_float (out
, in
, 0);
36887 emit_jump_insn (gen_jump (donelab
));
36890 emit_label (neglab
);
36892 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36894 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36896 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36898 expand_float (f0
, i0
, 0);
36900 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36902 emit_label (donelab
);
36905 /* AVX512F does support 64-byte integer vector operations,
36906 thus the longest vector we are faced with is V64QImode. */
36907 #define MAX_VECT_LEN 64
36909 struct expand_vec_perm_d
36911 rtx target
, op0
, op1
;
36912 unsigned char perm
[MAX_VECT_LEN
];
36913 enum machine_mode vmode
;
36914 unsigned char nelt
;
36915 bool one_operand_p
;
36919 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36920 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36921 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36923 /* Get a vector mode of the same size as the original but with elements
36924 twice as wide. This is only guaranteed to apply to integral vectors. */
36926 static inline enum machine_mode
36927 get_mode_wider_vector (enum machine_mode o
)
36929 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36930 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36931 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36932 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36936 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36937 with all elements equal to VAR. Return true if successful. */
36940 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36941 rtx target
, rtx val
)
36964 /* First attempt to recognize VAL as-is. */
36965 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36966 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36967 if (recog_memoized (insn
) < 0)
36970 /* If that fails, force VAL into a register. */
36973 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36974 seq
= get_insns ();
36977 emit_insn_before (seq
, insn
);
36979 ok
= recog_memoized (insn
) >= 0;
36988 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36992 val
= gen_lowpart (SImode
, val
);
36993 x
= gen_rtx_TRUNCATE (HImode
, val
);
36994 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36995 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37008 struct expand_vec_perm_d dperm
;
37012 memset (&dperm
, 0, sizeof (dperm
));
37013 dperm
.target
= target
;
37014 dperm
.vmode
= mode
;
37015 dperm
.nelt
= GET_MODE_NUNITS (mode
);
37016 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
37017 dperm
.one_operand_p
= true;
37019 /* Extend to SImode using a paradoxical SUBREG. */
37020 tmp1
= gen_reg_rtx (SImode
);
37021 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
37023 /* Insert the SImode value as low element of a V4SImode vector. */
37024 tmp2
= gen_reg_rtx (V4SImode
);
37025 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
37026 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
37028 ok
= (expand_vec_perm_1 (&dperm
)
37029 || expand_vec_perm_broadcast_1 (&dperm
));
37041 /* Replicate the value once into the next wider mode and recurse. */
37043 enum machine_mode smode
, wsmode
, wvmode
;
37046 smode
= GET_MODE_INNER (mode
);
37047 wvmode
= get_mode_wider_vector (mode
);
37048 wsmode
= GET_MODE_INNER (wvmode
);
37050 val
= convert_modes (wsmode
, smode
, val
, true);
37051 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
37052 GEN_INT (GET_MODE_BITSIZE (smode
)),
37053 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37054 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
37056 x
= gen_reg_rtx (wvmode
);
37057 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
37059 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
37066 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
37067 rtx x
= gen_reg_rtx (hvmode
);
37069 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
37072 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
37073 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37082 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37083 whose ONE_VAR element is VAR, and other elements are zero. Return true
37087 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
37088 rtx target
, rtx var
, int one_var
)
37090 enum machine_mode vsimode
;
37093 bool use_vector_set
= false;
37098 /* For SSE4.1, we normally use vector set. But if the second
37099 element is zero and inter-unit moves are OK, we use movq
37101 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
37102 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
37108 use_vector_set
= TARGET_SSE4_1
;
37111 use_vector_set
= TARGET_SSE2
;
37114 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
37121 use_vector_set
= TARGET_AVX
;
37124 /* Use ix86_expand_vector_set in 64bit mode only. */
37125 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
37131 if (use_vector_set
)
37133 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
37134 var
= force_reg (GET_MODE_INNER (mode
), var
);
37135 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37151 var
= force_reg (GET_MODE_INNER (mode
), var
);
37152 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
37153 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37158 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
37159 new_target
= gen_reg_rtx (mode
);
37161 new_target
= target
;
37162 var
= force_reg (GET_MODE_INNER (mode
), var
);
37163 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
37164 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
37165 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
37168 /* We need to shuffle the value to the correct position, so
37169 create a new pseudo to store the intermediate result. */
37171 /* With SSE2, we can use the integer shuffle insns. */
37172 if (mode
!= V4SFmode
&& TARGET_SSE2
)
37174 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
37176 GEN_INT (one_var
== 1 ? 0 : 1),
37177 GEN_INT (one_var
== 2 ? 0 : 1),
37178 GEN_INT (one_var
== 3 ? 0 : 1)));
37179 if (target
!= new_target
)
37180 emit_move_insn (target
, new_target
);
37184 /* Otherwise convert the intermediate result to V4SFmode and
37185 use the SSE1 shuffle instructions. */
37186 if (mode
!= V4SFmode
)
37188 tmp
= gen_reg_rtx (V4SFmode
);
37189 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
37194 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
37196 GEN_INT (one_var
== 1 ? 0 : 1),
37197 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
37198 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
37200 if (mode
!= V4SFmode
)
37201 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
37202 else if (tmp
!= target
)
37203 emit_move_insn (target
, tmp
);
37205 else if (target
!= new_target
)
37206 emit_move_insn (target
, new_target
);
37211 vsimode
= V4SImode
;
37217 vsimode
= V2SImode
;
37223 /* Zero extend the variable element to SImode and recurse. */
37224 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
37226 x
= gen_reg_rtx (vsimode
);
37227 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
37229 gcc_unreachable ();
37231 emit_move_insn (target
, gen_lowpart (mode
, x
));
37239 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37240 consisting of the values in VALS. It is known that all elements
37241 except ONE_VAR are constants. Return true if successful. */
37244 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
37245 rtx target
, rtx vals
, int one_var
)
37247 rtx var
= XVECEXP (vals
, 0, one_var
);
37248 enum machine_mode wmode
;
37251 const_vec
= copy_rtx (vals
);
37252 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
37253 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
37261 /* For the two element vectors, it's just as easy to use
37262 the general case. */
37266 /* Use ix86_expand_vector_set in 64bit mode only. */
37289 /* There's no way to set one QImode entry easily. Combine
37290 the variable value with its adjacent constant value, and
37291 promote to an HImode set. */
37292 x
= XVECEXP (vals
, 0, one_var
^ 1);
37295 var
= convert_modes (HImode
, QImode
, var
, true);
37296 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
37297 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37298 x
= GEN_INT (INTVAL (x
) & 0xff);
37302 var
= convert_modes (HImode
, QImode
, var
, true);
37303 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
37305 if (x
!= const0_rtx
)
37306 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
37307 1, OPTAB_LIB_WIDEN
);
37309 x
= gen_reg_rtx (wmode
);
37310 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
37311 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
37313 emit_move_insn (target
, gen_lowpart (mode
, x
));
37320 emit_move_insn (target
, const_vec
);
37321 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37325 /* A subroutine of ix86_expand_vector_init_general. Use vector
37326 concatenate to handle the most general case: all values variable,
37327 and none identical. */
37330 ix86_expand_vector_init_concat (enum machine_mode mode
,
37331 rtx target
, rtx
*ops
, int n
)
37333 enum machine_mode cmode
, hmode
= VOIDmode
;
37334 rtx first
[8], second
[4];
37374 gcc_unreachable ();
37377 if (!register_operand (ops
[1], cmode
))
37378 ops
[1] = force_reg (cmode
, ops
[1]);
37379 if (!register_operand (ops
[0], cmode
))
37380 ops
[0] = force_reg (cmode
, ops
[0]);
37381 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37382 gen_rtx_VEC_CONCAT (mode
, ops
[0],
37402 gcc_unreachable ();
37418 gcc_unreachable ();
37423 /* FIXME: We process inputs backward to help RA. PR 36222. */
37426 for (; i
> 0; i
-= 2, j
--)
37428 first
[j
] = gen_reg_rtx (cmode
);
37429 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
37430 ix86_expand_vector_init (false, first
[j
],
37431 gen_rtx_PARALLEL (cmode
, v
));
37437 gcc_assert (hmode
!= VOIDmode
);
37438 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37440 second
[j
] = gen_reg_rtx (hmode
);
37441 ix86_expand_vector_init_concat (hmode
, second
[j
],
37445 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
37448 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
37452 gcc_unreachable ();
37456 /* A subroutine of ix86_expand_vector_init_general. Use vector
37457 interleave to handle the most general case: all values variable,
37458 and none identical. */
37461 ix86_expand_vector_init_interleave (enum machine_mode mode
,
37462 rtx target
, rtx
*ops
, int n
)
37464 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
37467 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
37468 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
37469 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
37474 gen_load_even
= gen_vec_setv8hi
;
37475 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
37476 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37477 inner_mode
= HImode
;
37478 first_imode
= V4SImode
;
37479 second_imode
= V2DImode
;
37480 third_imode
= VOIDmode
;
37483 gen_load_even
= gen_vec_setv16qi
;
37484 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
37485 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
37486 inner_mode
= QImode
;
37487 first_imode
= V8HImode
;
37488 second_imode
= V4SImode
;
37489 third_imode
= V2DImode
;
37492 gcc_unreachable ();
37495 for (i
= 0; i
< n
; i
++)
37497 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
37498 op0
= gen_reg_rtx (SImode
);
37499 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
37501 /* Insert the SImode value as low element of V4SImode vector. */
37502 op1
= gen_reg_rtx (V4SImode
);
37503 op0
= gen_rtx_VEC_MERGE (V4SImode
,
37504 gen_rtx_VEC_DUPLICATE (V4SImode
,
37506 CONST0_RTX (V4SImode
),
37508 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
37510 /* Cast the V4SImode vector back to a vector in orignal mode. */
37511 op0
= gen_reg_rtx (mode
);
37512 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
37514 /* Load even elements into the second position. */
37515 emit_insn (gen_load_even (op0
,
37516 force_reg (inner_mode
,
37520 /* Cast vector to FIRST_IMODE vector. */
37521 ops
[i
] = gen_reg_rtx (first_imode
);
37522 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
37525 /* Interleave low FIRST_IMODE vectors. */
37526 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37528 op0
= gen_reg_rtx (first_imode
);
37529 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
37531 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
37532 ops
[j
] = gen_reg_rtx (second_imode
);
37533 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
37536 /* Interleave low SECOND_IMODE vectors. */
37537 switch (second_imode
)
37540 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
37542 op0
= gen_reg_rtx (second_imode
);
37543 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
37546 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
37548 ops
[j
] = gen_reg_rtx (third_imode
);
37549 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
37551 second_imode
= V2DImode
;
37552 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37556 op0
= gen_reg_rtx (second_imode
);
37557 emit_insn (gen_interleave_second_low (op0
, ops
[0],
37560 /* Cast the SECOND_IMODE vector back to a vector on original
37562 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37563 gen_lowpart (mode
, op0
)));
37567 gcc_unreachable ();
37571 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
37572 all values variable, and none identical. */
37575 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
37576 rtx target
, rtx vals
)
37578 rtx ops
[32], op0
, op1
;
37579 enum machine_mode half_mode
= VOIDmode
;
37586 if (!mmx_ok
&& !TARGET_SSE
)
37598 n
= GET_MODE_NUNITS (mode
);
37599 for (i
= 0; i
< n
; i
++)
37600 ops
[i
] = XVECEXP (vals
, 0, i
);
37601 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
37605 half_mode
= V16QImode
;
37609 half_mode
= V8HImode
;
37613 n
= GET_MODE_NUNITS (mode
);
37614 for (i
= 0; i
< n
; i
++)
37615 ops
[i
] = XVECEXP (vals
, 0, i
);
37616 op0
= gen_reg_rtx (half_mode
);
37617 op1
= gen_reg_rtx (half_mode
);
37618 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37620 ix86_expand_vector_init_interleave (half_mode
, op1
,
37621 &ops
[n
>> 1], n
>> 2);
37622 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37623 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37627 if (!TARGET_SSE4_1
)
37635 /* Don't use ix86_expand_vector_init_interleave if we can't
37636 move from GPR to SSE register directly. */
37637 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37640 n
= GET_MODE_NUNITS (mode
);
37641 for (i
= 0; i
< n
; i
++)
37642 ops
[i
] = XVECEXP (vals
, 0, i
);
37643 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37651 gcc_unreachable ();
37655 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37656 enum machine_mode inner_mode
;
37657 rtx words
[4], shift
;
37659 inner_mode
= GET_MODE_INNER (mode
);
37660 n_elts
= GET_MODE_NUNITS (mode
);
37661 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37662 n_elt_per_word
= n_elts
/ n_words
;
37663 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37665 for (i
= 0; i
< n_words
; ++i
)
37667 rtx word
= NULL_RTX
;
37669 for (j
= 0; j
< n_elt_per_word
; ++j
)
37671 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37672 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37678 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37679 word
, 1, OPTAB_LIB_WIDEN
);
37680 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37681 word
, 1, OPTAB_LIB_WIDEN
);
37689 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37690 else if (n_words
== 2)
37692 rtx tmp
= gen_reg_rtx (mode
);
37693 emit_clobber (tmp
);
37694 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37695 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37696 emit_move_insn (target
, tmp
);
37698 else if (n_words
== 4)
37700 rtx tmp
= gen_reg_rtx (V4SImode
);
37701 gcc_assert (word_mode
== SImode
);
37702 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37703 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37704 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37707 gcc_unreachable ();
37711 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37712 instructions unless MMX_OK is true. */
37715 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37717 enum machine_mode mode
= GET_MODE (target
);
37718 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37719 int n_elts
= GET_MODE_NUNITS (mode
);
37720 int n_var
= 0, one_var
= -1;
37721 bool all_same
= true, all_const_zero
= true;
37725 for (i
= 0; i
< n_elts
; ++i
)
37727 x
= XVECEXP (vals
, 0, i
);
37728 if (!(CONST_INT_P (x
)
37729 || GET_CODE (x
) == CONST_DOUBLE
37730 || GET_CODE (x
) == CONST_FIXED
))
37731 n_var
++, one_var
= i
;
37732 else if (x
!= CONST0_RTX (inner_mode
))
37733 all_const_zero
= false;
37734 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37738 /* Constants are best loaded from the constant pool. */
37741 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37745 /* If all values are identical, broadcast the value. */
37747 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37748 XVECEXP (vals
, 0, 0)))
37751 /* Values where only one field is non-constant are best loaded from
37752 the pool and overwritten via move later. */
37756 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37757 XVECEXP (vals
, 0, one_var
),
37761 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37765 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37769 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37771 enum machine_mode mode
= GET_MODE (target
);
37772 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37773 enum machine_mode half_mode
;
37774 bool use_vec_merge
= false;
37776 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37778 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37779 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37780 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37781 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37782 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37783 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37785 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37787 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37788 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37789 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37790 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37791 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37792 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37802 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37803 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37805 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37807 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37808 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37814 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37818 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37819 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37821 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37823 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37824 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37831 /* For the two element vectors, we implement a VEC_CONCAT with
37832 the extraction of the other element. */
37834 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37835 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37838 op0
= val
, op1
= tmp
;
37840 op0
= tmp
, op1
= val
;
37842 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37843 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37848 use_vec_merge
= TARGET_SSE4_1
;
37855 use_vec_merge
= true;
37859 /* tmp = target = A B C D */
37860 tmp
= copy_to_reg (target
);
37861 /* target = A A B B */
37862 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37863 /* target = X A B B */
37864 ix86_expand_vector_set (false, target
, val
, 0);
37865 /* target = A X C D */
37866 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37867 const1_rtx
, const0_rtx
,
37868 GEN_INT (2+4), GEN_INT (3+4)));
37872 /* tmp = target = A B C D */
37873 tmp
= copy_to_reg (target
);
37874 /* tmp = X B C D */
37875 ix86_expand_vector_set (false, tmp
, val
, 0);
37876 /* target = A B X D */
37877 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37878 const0_rtx
, const1_rtx
,
37879 GEN_INT (0+4), GEN_INT (3+4)));
37883 /* tmp = target = A B C D */
37884 tmp
= copy_to_reg (target
);
37885 /* tmp = X B C D */
37886 ix86_expand_vector_set (false, tmp
, val
, 0);
37887 /* target = A B X D */
37888 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37889 const0_rtx
, const1_rtx
,
37890 GEN_INT (2+4), GEN_INT (0+4)));
37894 gcc_unreachable ();
37899 use_vec_merge
= TARGET_SSE4_1
;
37903 /* Element 0 handled by vec_merge below. */
37906 use_vec_merge
= true;
37912 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37913 store into element 0, then shuffle them back. */
37917 order
[0] = GEN_INT (elt
);
37918 order
[1] = const1_rtx
;
37919 order
[2] = const2_rtx
;
37920 order
[3] = GEN_INT (3);
37921 order
[elt
] = const0_rtx
;
37923 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37924 order
[1], order
[2], order
[3]));
37926 ix86_expand_vector_set (false, target
, val
, 0);
37928 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37929 order
[1], order
[2], order
[3]));
37933 /* For SSE1, we have to reuse the V4SF code. */
37934 rtx t
= gen_reg_rtx (V4SFmode
);
37935 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
37936 emit_move_insn (target
, gen_lowpart (mode
, t
));
37941 use_vec_merge
= TARGET_SSE2
;
37944 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37948 use_vec_merge
= TARGET_SSE4_1
;
37955 half_mode
= V16QImode
;
37961 half_mode
= V8HImode
;
37967 half_mode
= V4SImode
;
37973 half_mode
= V2DImode
;
37979 half_mode
= V4SFmode
;
37985 half_mode
= V2DFmode
;
37991 /* Compute offset. */
37995 gcc_assert (i
<= 1);
37997 /* Extract the half. */
37998 tmp
= gen_reg_rtx (half_mode
);
37999 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
38001 /* Put val in tmp at elt. */
38002 ix86_expand_vector_set (false, tmp
, val
, elt
);
38005 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
38014 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
38015 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
38016 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38020 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38022 emit_move_insn (mem
, target
);
38024 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38025 emit_move_insn (tmp
, val
);
38027 emit_move_insn (target
, mem
);
38032 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
38034 enum machine_mode mode
= GET_MODE (vec
);
38035 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
38036 bool use_vec_extr
= false;
38049 use_vec_extr
= true;
38053 use_vec_extr
= TARGET_SSE4_1
;
38065 tmp
= gen_reg_rtx (mode
);
38066 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
38067 GEN_INT (elt
), GEN_INT (elt
),
38068 GEN_INT (elt
+4), GEN_INT (elt
+4)));
38072 tmp
= gen_reg_rtx (mode
);
38073 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
38077 gcc_unreachable ();
38080 use_vec_extr
= true;
38085 use_vec_extr
= TARGET_SSE4_1
;
38099 tmp
= gen_reg_rtx (mode
);
38100 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
38101 GEN_INT (elt
), GEN_INT (elt
),
38102 GEN_INT (elt
), GEN_INT (elt
)));
38106 tmp
= gen_reg_rtx (mode
);
38107 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
38111 gcc_unreachable ();
38114 use_vec_extr
= true;
38119 /* For SSE1, we have to reuse the V4SF code. */
38120 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
38121 gen_lowpart (V4SFmode
, vec
), elt
);
38127 use_vec_extr
= TARGET_SSE2
;
38130 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
38134 use_vec_extr
= TARGET_SSE4_1
;
38140 tmp
= gen_reg_rtx (V4SFmode
);
38142 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
38144 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
38145 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38153 tmp
= gen_reg_rtx (V2DFmode
);
38155 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
38157 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
38158 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38166 tmp
= gen_reg_rtx (V16QImode
);
38168 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
38170 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
38171 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
38179 tmp
= gen_reg_rtx (V8HImode
);
38181 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
38183 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
38184 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
38192 tmp
= gen_reg_rtx (V4SImode
);
38194 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
38196 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
38197 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38205 tmp
= gen_reg_rtx (V2DImode
);
38207 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
38209 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
38210 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38216 /* ??? Could extract the appropriate HImode element and shift. */
38223 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
38224 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
38226 /* Let the rtl optimizers know about the zero extension performed. */
38227 if (inner_mode
== QImode
|| inner_mode
== HImode
)
38229 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
38230 target
= gen_lowpart (SImode
, target
);
38233 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38237 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38239 emit_move_insn (mem
, vec
);
38241 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38242 emit_move_insn (target
, tmp
);
38246 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
38247 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
38248 The upper bits of DEST are undefined, though they shouldn't cause
38249 exceptions (some bits from src or all zeros are ok). */
38252 emit_reduc_half (rtx dest
, rtx src
, int i
)
38255 switch (GET_MODE (src
))
38259 tem
= gen_sse_movhlps (dest
, src
, src
);
38261 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
38262 GEN_INT (1 + 4), GEN_INT (1 + 4));
38265 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
38271 d
= gen_reg_rtx (V1TImode
);
38272 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
38277 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
38279 tem
= gen_avx_shufps256 (dest
, src
, src
,
38280 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
38284 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
38286 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
38294 if (GET_MODE (dest
) != V4DImode
)
38295 d
= gen_reg_rtx (V4DImode
);
38296 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
38297 gen_lowpart (V4DImode
, src
),
38302 d
= gen_reg_rtx (V2TImode
);
38303 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
38308 gcc_unreachable ();
38312 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
38315 /* Expand a vector reduction. FN is the binary pattern to reduce;
38316 DEST is the destination; IN is the input vector. */
38319 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
38321 rtx half
, dst
, vec
= in
;
38322 enum machine_mode mode
= GET_MODE (in
);
38325 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
38327 && mode
== V8HImode
38328 && fn
== gen_uminv8hi3
)
38330 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
38334 for (i
= GET_MODE_BITSIZE (mode
);
38335 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
38338 half
= gen_reg_rtx (mode
);
38339 emit_reduc_half (half
, vec
, i
);
38340 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
38343 dst
= gen_reg_rtx (mode
);
38344 emit_insn (fn (dst
, half
, vec
));
38349 /* Target hook for scalar_mode_supported_p. */
38351 ix86_scalar_mode_supported_p (enum machine_mode mode
)
38353 if (DECIMAL_FLOAT_MODE_P (mode
))
38354 return default_decimal_float_supported_p ();
38355 else if (mode
== TFmode
)
38358 return default_scalar_mode_supported_p (mode
);
38361 /* Implements target hook vector_mode_supported_p. */
38363 ix86_vector_mode_supported_p (enum machine_mode mode
)
38365 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
38367 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
38369 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
38371 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
38373 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
38378 /* Target hook for c_mode_for_suffix. */
38379 static enum machine_mode
38380 ix86_c_mode_for_suffix (char suffix
)
38390 /* Worker function for TARGET_MD_ASM_CLOBBERS.
38392 We do this in the new i386 backend to maintain source compatibility
38393 with the old cc0-based compiler. */
38396 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
38397 tree inputs ATTRIBUTE_UNUSED
,
38400 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
38402 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
38407 /* Implements target vector targetm.asm.encode_section_info. */
38409 static void ATTRIBUTE_UNUSED
38410 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
38412 default_encode_section_info (decl
, rtl
, first
);
38414 if (TREE_CODE (decl
) == VAR_DECL
38415 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
38416 && ix86_in_large_data_p (decl
))
38417 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
38420 /* Worker function for REVERSE_CONDITION. */
38423 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
38425 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
38426 ? reverse_condition (code
)
38427 : reverse_condition_maybe_unordered (code
));
38430 /* Output code to perform an x87 FP register move, from OPERANDS[1]
38434 output_387_reg_move (rtx insn
, rtx
*operands
)
38436 if (REG_P (operands
[0]))
38438 if (REG_P (operands
[1])
38439 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38441 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
38442 return output_387_ffreep (operands
, 0);
38443 return "fstp\t%y0";
38445 if (STACK_TOP_P (operands
[0]))
38446 return "fld%Z1\t%y1";
38449 else if (MEM_P (operands
[0]))
38451 gcc_assert (REG_P (operands
[1]));
38452 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38453 return "fstp%Z0\t%y0";
38456 /* There is no non-popping store to memory for XFmode.
38457 So if we need one, follow the store with a load. */
38458 if (GET_MODE (operands
[0]) == XFmode
)
38459 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
38461 return "fst%Z0\t%y0";
38468 /* Output code to perform a conditional jump to LABEL, if C2 flag in
38469 FP status register is set. */
38472 ix86_emit_fp_unordered_jump (rtx label
)
38474 rtx reg
= gen_reg_rtx (HImode
);
38477 emit_insn (gen_x86_fnstsw_1 (reg
));
38479 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
38481 emit_insn (gen_x86_sahf_1 (reg
));
38483 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
38484 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
38488 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
38490 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38491 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
38494 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
38495 gen_rtx_LABEL_REF (VOIDmode
, label
),
38497 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
38499 emit_jump_insn (temp
);
38500 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
38503 /* Output code to perform a log1p XFmode calculation. */
38505 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
38507 rtx label1
= gen_label_rtx ();
38508 rtx label2
= gen_label_rtx ();
38510 rtx tmp
= gen_reg_rtx (XFmode
);
38511 rtx tmp2
= gen_reg_rtx (XFmode
);
38514 emit_insn (gen_absxf2 (tmp
, op1
));
38515 test
= gen_rtx_GE (VOIDmode
, tmp
,
38516 CONST_DOUBLE_FROM_REAL_VALUE (
38517 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
38519 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
38521 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38522 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
38523 emit_jump (label2
);
38525 emit_label (label1
);
38526 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
38527 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
38528 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38529 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
38531 emit_label (label2
);
38534 /* Emit code for round calculation. */
38535 void ix86_emit_i387_round (rtx op0
, rtx op1
)
38537 enum machine_mode inmode
= GET_MODE (op1
);
38538 enum machine_mode outmode
= GET_MODE (op0
);
38539 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
38540 rtx scratch
= gen_reg_rtx (HImode
);
38541 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38542 rtx jump_label
= gen_label_rtx ();
38544 rtx (*gen_abs
) (rtx
, rtx
);
38545 rtx (*gen_neg
) (rtx
, rtx
);
38550 gen_abs
= gen_abssf2
;
38553 gen_abs
= gen_absdf2
;
38556 gen_abs
= gen_absxf2
;
38559 gcc_unreachable ();
38565 gen_neg
= gen_negsf2
;
38568 gen_neg
= gen_negdf2
;
38571 gen_neg
= gen_negxf2
;
38574 gen_neg
= gen_neghi2
;
38577 gen_neg
= gen_negsi2
;
38580 gen_neg
= gen_negdi2
;
38583 gcc_unreachable ();
38586 e1
= gen_reg_rtx (inmode
);
38587 e2
= gen_reg_rtx (inmode
);
38588 res
= gen_reg_rtx (outmode
);
38590 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
38592 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
38594 /* scratch = fxam(op1) */
38595 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
38596 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
38598 /* e1 = fabs(op1) */
38599 emit_insn (gen_abs (e1
, op1
));
38601 /* e2 = e1 + 0.5 */
38602 half
= force_reg (inmode
, half
);
38603 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38604 gen_rtx_PLUS (inmode
, e1
, half
)));
38606 /* res = floor(e2) */
38607 if (inmode
!= XFmode
)
38609 tmp1
= gen_reg_rtx (XFmode
);
38611 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
38612 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38622 rtx tmp0
= gen_reg_rtx (XFmode
);
38624 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38626 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38627 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38628 UNSPEC_TRUNC_NOOP
)));
38632 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38635 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38638 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38641 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38644 gcc_unreachable ();
38647 /* flags = signbit(a) */
38648 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38650 /* if (flags) then res = -res */
38651 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38652 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38653 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38655 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38656 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38657 JUMP_LABEL (insn
) = jump_label
;
38659 emit_insn (gen_neg (res
, res
));
38661 emit_label (jump_label
);
38662 LABEL_NUSES (jump_label
) = 1;
38664 emit_move_insn (op0
, res
);
38667 /* Output code to perform a Newton-Rhapson approximation of a single precision
38668 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38670 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38672 rtx x0
, x1
, e0
, e1
;
38674 x0
= gen_reg_rtx (mode
);
38675 e0
= gen_reg_rtx (mode
);
38676 e1
= gen_reg_rtx (mode
);
38677 x1
= gen_reg_rtx (mode
);
38679 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38681 b
= force_reg (mode
, b
);
38683 /* x0 = rcp(b) estimate */
38684 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38685 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38688 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38689 gen_rtx_MULT (mode
, x0
, b
)));
38692 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38693 gen_rtx_MULT (mode
, x0
, e0
)));
38696 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38697 gen_rtx_PLUS (mode
, x0
, x0
)));
38700 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38701 gen_rtx_MINUS (mode
, e1
, e0
)));
38704 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38705 gen_rtx_MULT (mode
, a
, x1
)));
38708 /* Output code to perform a Newton-Rhapson approximation of a
38709 single precision floating point [reciprocal] square root. */
38711 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38714 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38717 x0
= gen_reg_rtx (mode
);
38718 e0
= gen_reg_rtx (mode
);
38719 e1
= gen_reg_rtx (mode
);
38720 e2
= gen_reg_rtx (mode
);
38721 e3
= gen_reg_rtx (mode
);
38723 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
38724 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38726 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38727 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38729 if (VECTOR_MODE_P (mode
))
38731 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38732 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38735 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38736 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38738 a
= force_reg (mode
, a
);
38740 /* x0 = rsqrt(a) estimate */
38741 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38742 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38745 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38750 zero
= gen_reg_rtx (mode
);
38751 mask
= gen_reg_rtx (mode
);
38753 zero
= force_reg (mode
, CONST0_RTX(mode
));
38754 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38755 gen_rtx_NE (mode
, zero
, a
)));
38757 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38758 gen_rtx_AND (mode
, x0
, mask
)));
38762 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38763 gen_rtx_MULT (mode
, x0
, a
)));
38765 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38766 gen_rtx_MULT (mode
, e0
, x0
)));
38769 mthree
= force_reg (mode
, mthree
);
38770 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38771 gen_rtx_PLUS (mode
, e1
, mthree
)));
38773 mhalf
= force_reg (mode
, mhalf
);
38775 /* e3 = -.5 * x0 */
38776 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38777 gen_rtx_MULT (mode
, x0
, mhalf
)));
38779 /* e3 = -.5 * e0 */
38780 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38781 gen_rtx_MULT (mode
, e0
, mhalf
)));
38782 /* ret = e2 * e3 */
38783 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38784 gen_rtx_MULT (mode
, e2
, e3
)));
38787 #ifdef TARGET_SOLARIS
38788 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38791 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38794 /* With Binutils 2.15, the "@unwind" marker must be specified on
38795 every occurrence of the ".eh_frame" section, not just the first
38798 && strcmp (name
, ".eh_frame") == 0)
38800 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38801 flags
& SECTION_WRITE
? "aw" : "a");
38806 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38808 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38813 default_elf_asm_named_section (name
, flags
, decl
);
38815 #endif /* TARGET_SOLARIS */
38817 /* Return the mangling of TYPE if it is an extended fundamental type. */
38819 static const char *
38820 ix86_mangle_type (const_tree type
)
38822 type
= TYPE_MAIN_VARIANT (type
);
38824 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38825 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38828 switch (TYPE_MODE (type
))
38831 /* __float128 is "g". */
38834 /* "long double" or __float80 is "e". */
38841 /* For 32-bit code we can save PIC register setup by using
38842 __stack_chk_fail_local hidden function instead of calling
38843 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38844 register, so it is better to call __stack_chk_fail directly. */
38846 static tree ATTRIBUTE_UNUSED
38847 ix86_stack_protect_fail (void)
38849 return TARGET_64BIT
38850 ? default_external_stack_protect_fail ()
38851 : default_hidden_stack_protect_fail ();
38854 /* Select a format to encode pointers in exception handling data. CODE
38855 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38856 true if the symbol may be affected by dynamic relocations.
38858 ??? All x86 object file formats are capable of representing this.
38859 After all, the relocation needed is the same as for the call insn.
38860 Whether or not a particular assembler allows us to enter such, I
38861 guess we'll have to see. */
38863 asm_preferred_eh_data_format (int code
, int global
)
38867 int type
= DW_EH_PE_sdata8
;
38869 || ix86_cmodel
== CM_SMALL_PIC
38870 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38871 type
= DW_EH_PE_sdata4
;
38872 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38874 if (ix86_cmodel
== CM_SMALL
38875 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38876 return DW_EH_PE_udata4
;
38877 return DW_EH_PE_absptr
;
38880 /* Expand copysign from SIGN to the positive value ABS_VALUE
38881 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38884 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38886 enum machine_mode mode
= GET_MODE (sign
);
38887 rtx sgn
= gen_reg_rtx (mode
);
38888 if (mask
== NULL_RTX
)
38890 enum machine_mode vmode
;
38892 if (mode
== SFmode
)
38894 else if (mode
== DFmode
)
38899 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38900 if (!VECTOR_MODE_P (mode
))
38902 /* We need to generate a scalar mode mask in this case. */
38903 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38904 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38905 mask
= gen_reg_rtx (mode
);
38906 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38910 mask
= gen_rtx_NOT (mode
, mask
);
38911 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38912 gen_rtx_AND (mode
, mask
, sign
)));
38913 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38914 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38917 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38918 mask for masking out the sign-bit is stored in *SMASK, if that is
38921 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38923 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38926 xa
= gen_reg_rtx (mode
);
38927 if (mode
== SFmode
)
38929 else if (mode
== DFmode
)
38933 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38934 if (!VECTOR_MODE_P (mode
))
38936 /* We need to generate a scalar mode mask in this case. */
38937 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38938 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38939 mask
= gen_reg_rtx (mode
);
38940 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38942 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38943 gen_rtx_AND (mode
, op0
, mask
)));
38951 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38952 swapping the operands if SWAP_OPERANDS is true. The expanded
38953 code is a forward jump to a newly created label in case the
38954 comparison is true. The generated label rtx is returned. */
38956 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38957 bool swap_operands
)
38959 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
38969 label
= gen_label_rtx ();
38970 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
38971 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38972 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
38973 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38974 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38975 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38976 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38977 JUMP_LABEL (tmp
) = label
;
38982 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38983 using comparison code CODE. Operands are swapped for the comparison if
38984 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38986 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38987 bool swap_operands
)
38989 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38990 enum machine_mode mode
= GET_MODE (op0
);
38991 rtx mask
= gen_reg_rtx (mode
);
39000 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
39002 emit_insn (insn (mask
, op0
, op1
,
39003 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
39007 /* Generate and return a rtx of mode MODE for 2**n where n is the number
39008 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
39010 ix86_gen_TWO52 (enum machine_mode mode
)
39012 REAL_VALUE_TYPE TWO52r
;
39015 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
39016 TWO52
= const_double_from_real_value (TWO52r
, mode
);
39017 TWO52
= force_reg (mode
, TWO52
);
39022 /* Expand SSE sequence for computing lround from OP1 storing
39025 ix86_expand_lround (rtx op0
, rtx op1
)
39027 /* C code for the stuff we're doing below:
39028 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
39031 enum machine_mode mode
= GET_MODE (op1
);
39032 const struct real_format
*fmt
;
39033 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39036 /* load nextafter (0.5, 0.0) */
39037 fmt
= REAL_MODE_FORMAT (mode
);
39038 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39039 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39041 /* adj = copysign (0.5, op1) */
39042 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39043 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
39045 /* adj = op1 + adj */
39046 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39048 /* op0 = (imode)adj */
39049 expand_fix (op0
, adj
, 0);
39052 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
39055 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
39057 /* C code for the stuff we're doing below (for do_floor):
39059 xi -= (double)xi > op1 ? 1 : 0;
39062 enum machine_mode fmode
= GET_MODE (op1
);
39063 enum machine_mode imode
= GET_MODE (op0
);
39064 rtx ireg
, freg
, label
, tmp
;
39066 /* reg = (long)op1 */
39067 ireg
= gen_reg_rtx (imode
);
39068 expand_fix (ireg
, op1
, 0);
39070 /* freg = (double)reg */
39071 freg
= gen_reg_rtx (fmode
);
39072 expand_float (freg
, ireg
, 0);
39074 /* ireg = (freg > op1) ? ireg - 1 : ireg */
39075 label
= ix86_expand_sse_compare_and_jump (UNLE
,
39076 freg
, op1
, !do_floor
);
39077 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
39078 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
39079 emit_move_insn (ireg
, tmp
);
39081 emit_label (label
);
39082 LABEL_NUSES (label
) = 1;
39084 emit_move_insn (op0
, ireg
);
39087 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
39088 result in OPERAND0. */
39090 ix86_expand_rint (rtx operand0
, rtx operand1
)
39092 /* C code for the stuff we're doing below:
39093 xa = fabs (operand1);
39094 if (!isless (xa, 2**52))
39096 xa = xa + 2**52 - 2**52;
39097 return copysign (xa, operand1);
39099 enum machine_mode mode
= GET_MODE (operand0
);
39100 rtx res
, xa
, label
, TWO52
, mask
;
39102 res
= gen_reg_rtx (mode
);
39103 emit_move_insn (res
, operand1
);
39105 /* xa = abs (operand1) */
39106 xa
= ix86_expand_sse_fabs (res
, &mask
);
39108 /* if (!isless (xa, TWO52)) goto label; */
39109 TWO52
= ix86_gen_TWO52 (mode
);
39110 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39112 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39113 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39115 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
39117 emit_label (label
);
39118 LABEL_NUSES (label
) = 1;
39120 emit_move_insn (operand0
, res
);
39123 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39126 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
39128 /* C code for the stuff we expand below.
39129 double xa = fabs (x), x2;
39130 if (!isless (xa, TWO52))
39132 xa = xa + TWO52 - TWO52;
39133 x2 = copysign (xa, x);
39142 enum machine_mode mode
= GET_MODE (operand0
);
39143 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
39145 TWO52
= ix86_gen_TWO52 (mode
);
39147 /* Temporary for holding the result, initialized to the input
39148 operand to ease control flow. */
39149 res
= gen_reg_rtx (mode
);
39150 emit_move_insn (res
, operand1
);
39152 /* xa = abs (operand1) */
39153 xa
= ix86_expand_sse_fabs (res
, &mask
);
39155 /* if (!isless (xa, TWO52)) goto label; */
39156 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39158 /* xa = xa + TWO52 - TWO52; */
39159 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39160 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39162 /* xa = copysign (xa, operand1) */
39163 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
39165 /* generate 1.0 or -1.0 */
39166 one
= force_reg (mode
,
39167 const_double_from_real_value (do_floor
39168 ? dconst1
: dconstm1
, mode
));
39170 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39171 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39172 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39173 gen_rtx_AND (mode
, one
, tmp
)));
39174 /* We always need to subtract here to preserve signed zero. */
39175 tmp
= expand_simple_binop (mode
, MINUS
,
39176 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39177 emit_move_insn (res
, tmp
);
39179 emit_label (label
);
39180 LABEL_NUSES (label
) = 1;
39182 emit_move_insn (operand0
, res
);
39185 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39188 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
39190 /* C code for the stuff we expand below.
39191 double xa = fabs (x), x2;
39192 if (!isless (xa, TWO52))
39194 x2 = (double)(long)x;
39201 if (HONOR_SIGNED_ZEROS (mode))
39202 return copysign (x2, x);
39205 enum machine_mode mode
= GET_MODE (operand0
);
39206 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
39208 TWO52
= ix86_gen_TWO52 (mode
);
39210 /* Temporary for holding the result, initialized to the input
39211 operand to ease control flow. */
39212 res
= gen_reg_rtx (mode
);
39213 emit_move_insn (res
, operand1
);
39215 /* xa = abs (operand1) */
39216 xa
= ix86_expand_sse_fabs (res
, &mask
);
39218 /* if (!isless (xa, TWO52)) goto label; */
39219 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39221 /* xa = (double)(long)x */
39222 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39223 expand_fix (xi
, res
, 0);
39224 expand_float (xa
, xi
, 0);
39227 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39229 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39230 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39231 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39232 gen_rtx_AND (mode
, one
, tmp
)));
39233 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
39234 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39235 emit_move_insn (res
, tmp
);
39237 if (HONOR_SIGNED_ZEROS (mode
))
39238 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39240 emit_label (label
);
39241 LABEL_NUSES (label
) = 1;
39243 emit_move_insn (operand0
, res
);
39246 /* Expand SSE sequence for computing round from OPERAND1 storing
39247 into OPERAND0. Sequence that works without relying on DImode truncation
39248 via cvttsd2siq that is only available on 64bit targets. */
39250 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
39252 /* C code for the stuff we expand below.
39253 double xa = fabs (x), xa2, x2;
39254 if (!isless (xa, TWO52))
39256 Using the absolute value and copying back sign makes
39257 -0.0 -> -0.0 correct.
39258 xa2 = xa + TWO52 - TWO52;
39263 else if (dxa > 0.5)
39265 x2 = copysign (xa2, x);
39268 enum machine_mode mode
= GET_MODE (operand0
);
39269 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
39271 TWO52
= ix86_gen_TWO52 (mode
);
39273 /* Temporary for holding the result, initialized to the input
39274 operand to ease control flow. */
39275 res
= gen_reg_rtx (mode
);
39276 emit_move_insn (res
, operand1
);
39278 /* xa = abs (operand1) */
39279 xa
= ix86_expand_sse_fabs (res
, &mask
);
39281 /* if (!isless (xa, TWO52)) goto label; */
39282 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39284 /* xa2 = xa + TWO52 - TWO52; */
39285 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39286 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
39288 /* dxa = xa2 - xa; */
39289 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
39291 /* generate 0.5, 1.0 and -0.5 */
39292 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
39293 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39294 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
39298 tmp
= gen_reg_rtx (mode
);
39299 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
39300 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
39301 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39302 gen_rtx_AND (mode
, one
, tmp
)));
39303 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39304 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
39305 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
39306 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39307 gen_rtx_AND (mode
, one
, tmp
)));
39308 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39310 /* res = copysign (xa2, operand1) */
39311 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
39313 emit_label (label
);
39314 LABEL_NUSES (label
) = 1;
39316 emit_move_insn (operand0
, res
);
39319 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39322 ix86_expand_trunc (rtx operand0
, rtx operand1
)
39324 /* C code for SSE variant we expand below.
39325 double xa = fabs (x), x2;
39326 if (!isless (xa, TWO52))
39328 x2 = (double)(long)x;
39329 if (HONOR_SIGNED_ZEROS (mode))
39330 return copysign (x2, x);
39333 enum machine_mode mode
= GET_MODE (operand0
);
39334 rtx xa
, xi
, TWO52
, label
, res
, mask
;
39336 TWO52
= ix86_gen_TWO52 (mode
);
39338 /* Temporary for holding the result, initialized to the input
39339 operand to ease control flow. */
39340 res
= gen_reg_rtx (mode
);
39341 emit_move_insn (res
, operand1
);
39343 /* xa = abs (operand1) */
39344 xa
= ix86_expand_sse_fabs (res
, &mask
);
39346 /* if (!isless (xa, TWO52)) goto label; */
39347 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39349 /* x = (double)(long)x */
39350 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39351 expand_fix (xi
, res
, 0);
39352 expand_float (res
, xi
, 0);
39354 if (HONOR_SIGNED_ZEROS (mode
))
39355 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39357 emit_label (label
);
39358 LABEL_NUSES (label
) = 1;
39360 emit_move_insn (operand0
, res
);
39363 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39366 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
39368 enum machine_mode mode
= GET_MODE (operand0
);
39369 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
39371 /* C code for SSE variant we expand below.
39372 double xa = fabs (x), x2;
39373 if (!isless (xa, TWO52))
39375 xa2 = xa + TWO52 - TWO52;
39379 x2 = copysign (xa2, x);
39383 TWO52
= ix86_gen_TWO52 (mode
);
39385 /* Temporary for holding the result, initialized to the input
39386 operand to ease control flow. */
39387 res
= gen_reg_rtx (mode
);
39388 emit_move_insn (res
, operand1
);
39390 /* xa = abs (operand1) */
39391 xa
= ix86_expand_sse_fabs (res
, &smask
);
39393 /* if (!isless (xa, TWO52)) goto label; */
39394 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39396 /* res = xa + TWO52 - TWO52; */
39397 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39398 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
39399 emit_move_insn (res
, tmp
);
39402 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39404 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
39405 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
39406 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
39407 gen_rtx_AND (mode
, mask
, one
)));
39408 tmp
= expand_simple_binop (mode
, MINUS
,
39409 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
39410 emit_move_insn (res
, tmp
);
39412 /* res = copysign (res, operand1) */
39413 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
39415 emit_label (label
);
39416 LABEL_NUSES (label
) = 1;
39418 emit_move_insn (operand0
, res
);
39421 /* Expand SSE sequence for computing round from OPERAND1 storing
39424 ix86_expand_round (rtx operand0
, rtx operand1
)
39426 /* C code for the stuff we're doing below:
39427 double xa = fabs (x);
39428 if (!isless (xa, TWO52))
39430 xa = (double)(long)(xa + nextafter (0.5, 0.0));
39431 return copysign (xa, x);
39433 enum machine_mode mode
= GET_MODE (operand0
);
39434 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
39435 const struct real_format
*fmt
;
39436 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39438 /* Temporary for holding the result, initialized to the input
39439 operand to ease control flow. */
39440 res
= gen_reg_rtx (mode
);
39441 emit_move_insn (res
, operand1
);
39443 TWO52
= ix86_gen_TWO52 (mode
);
39444 xa
= ix86_expand_sse_fabs (res
, &mask
);
39445 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39447 /* load nextafter (0.5, 0.0) */
39448 fmt
= REAL_MODE_FORMAT (mode
);
39449 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39450 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39452 /* xa = xa + 0.5 */
39453 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39454 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39456 /* xa = (double)(int64_t)xa */
39457 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39458 expand_fix (xi
, xa
, 0);
39459 expand_float (xa
, xi
, 0);
39461 /* res = copysign (xa, operand1) */
39462 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
39464 emit_label (label
);
39465 LABEL_NUSES (label
) = 1;
39467 emit_move_insn (operand0
, res
);
39470 /* Expand SSE sequence for computing round
39471 from OP1 storing into OP0 using sse4 round insn. */
39473 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
39475 enum machine_mode mode
= GET_MODE (op0
);
39476 rtx e1
, e2
, res
, half
;
39477 const struct real_format
*fmt
;
39478 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39479 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
39480 rtx (*gen_round
) (rtx
, rtx
, rtx
);
39485 gen_copysign
= gen_copysignsf3
;
39486 gen_round
= gen_sse4_1_roundsf2
;
39489 gen_copysign
= gen_copysigndf3
;
39490 gen_round
= gen_sse4_1_rounddf2
;
39493 gcc_unreachable ();
39496 /* round (a) = trunc (a + copysign (0.5, a)) */
39498 /* load nextafter (0.5, 0.0) */
39499 fmt
= REAL_MODE_FORMAT (mode
);
39500 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39501 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39502 half
= const_double_from_real_value (pred_half
, mode
);
39504 /* e1 = copysign (0.5, op1) */
39505 e1
= gen_reg_rtx (mode
);
39506 emit_insn (gen_copysign (e1
, half
, op1
));
39508 /* e2 = op1 + e1 */
39509 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39511 /* res = trunc (e2) */
39512 res
= gen_reg_rtx (mode
);
39513 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
39515 emit_move_insn (op0
, res
);
39519 /* Table of valid machine attributes. */
39520 static const struct attribute_spec ix86_attribute_table
[] =
39522 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
39523 affects_type_identity } */
39524 /* Stdcall attribute says callee is responsible for popping arguments
39525 if they are not variable. */
39526 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39528 /* Fastcall attribute says callee is responsible for popping arguments
39529 if they are not variable. */
39530 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39532 /* Thiscall attribute says callee is responsible for popping arguments
39533 if they are not variable. */
39534 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39536 /* Cdecl attribute says the callee is a normal C declaration */
39537 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39539 /* Regparm attribute specifies how many integer arguments are to be
39540 passed in registers. */
39541 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
39543 /* Sseregparm attribute says we are using x86_64 calling conventions
39544 for FP arguments. */
39545 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39547 /* The transactional memory builtins are implicitly regparm or fastcall
39548 depending on the ABI. Override the generic do-nothing attribute that
39549 these builtins were declared with. */
39550 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
39552 /* force_align_arg_pointer says this function realigns the stack at entry. */
39553 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
39554 false, true, true, ix86_handle_cconv_attribute
, false },
39555 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39556 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
39557 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
39558 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
39561 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39563 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39565 #ifdef SUBTARGET_ATTRIBUTE_TABLE
39566 SUBTARGET_ATTRIBUTE_TABLE
,
39568 /* ms_abi and sysv_abi calling convention function attributes. */
39569 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39570 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39571 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
39573 { "callee_pop_aggregate_return", 1, 1, false, true, true,
39574 ix86_handle_callee_pop_aggregate_return
, true },
39576 { NULL
, 0, 0, false, false, false, NULL
, false }
39579 /* Implement targetm.vectorize.builtin_vectorization_cost. */
39581 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
39583 int misalign ATTRIBUTE_UNUSED
)
39587 switch (type_of_cost
)
39590 return ix86_cost
->scalar_stmt_cost
;
39593 return ix86_cost
->scalar_load_cost
;
39596 return ix86_cost
->scalar_store_cost
;
39599 return ix86_cost
->vec_stmt_cost
;
39602 return ix86_cost
->vec_align_load_cost
;
39605 return ix86_cost
->vec_store_cost
;
39607 case vec_to_scalar
:
39608 return ix86_cost
->vec_to_scalar_cost
;
39610 case scalar_to_vec
:
39611 return ix86_cost
->scalar_to_vec_cost
;
39613 case unaligned_load
:
39614 case unaligned_store
:
39615 return ix86_cost
->vec_unalign_load_cost
;
39617 case cond_branch_taken
:
39618 return ix86_cost
->cond_taken_branch_cost
;
39620 case cond_branch_not_taken
:
39621 return ix86_cost
->cond_not_taken_branch_cost
;
39624 case vec_promote_demote
:
39625 return ix86_cost
->vec_stmt_cost
;
39627 case vec_construct
:
39628 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39629 return elements
/ 2 + 1;
39632 gcc_unreachable ();
39636 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39637 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39638 insn every time. */
39640 static GTY(()) rtx vselect_insn
;
39642 /* Initialize vselect_insn. */
39645 init_vselect_insn (void)
39650 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39651 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39652 XVECEXP (x
, 0, i
) = const0_rtx
;
39653 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39655 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39657 vselect_insn
= emit_insn (x
);
39661 /* Construct (set target (vec_select op0 (parallel perm))) and
39662 return true if that's a valid instruction in the active ISA. */
39665 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39666 unsigned nelt
, bool testing_p
)
39669 rtx x
, save_vconcat
;
39672 if (vselect_insn
== NULL_RTX
)
39673 init_vselect_insn ();
39675 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39676 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39677 for (i
= 0; i
< nelt
; ++i
)
39678 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39679 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39680 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39681 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39682 SET_DEST (PATTERN (vselect_insn
)) = target
;
39683 icode
= recog_memoized (vselect_insn
);
39685 if (icode
>= 0 && !testing_p
)
39686 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39688 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39689 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39690 INSN_CODE (vselect_insn
) = -1;
39695 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39698 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39699 const unsigned char *perm
, unsigned nelt
,
39702 enum machine_mode v2mode
;
39706 if (vselect_insn
== NULL_RTX
)
39707 init_vselect_insn ();
39709 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39710 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39711 PUT_MODE (x
, v2mode
);
39714 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39715 XEXP (x
, 0) = const0_rtx
;
39716 XEXP (x
, 1) = const0_rtx
;
39720 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39721 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39724 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39726 enum machine_mode vmode
= d
->vmode
;
39727 unsigned i
, mask
, nelt
= d
->nelt
;
39728 rtx target
, op0
, op1
, x
;
39729 rtx rperm
[32], vperm
;
39731 if (d
->one_operand_p
)
39733 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39735 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39737 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39742 /* This is a blend, not a permute. Elements must stay in their
39743 respective lanes. */
39744 for (i
= 0; i
< nelt
; ++i
)
39746 unsigned e
= d
->perm
[i
];
39747 if (!(e
== i
|| e
== i
+ nelt
))
39754 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39755 decision should be extracted elsewhere, so that we only try that
39756 sequence once all budget==3 options have been tried. */
39757 target
= d
->target
;
39770 for (i
= 0; i
< nelt
; ++i
)
39771 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39775 for (i
= 0; i
< 2; ++i
)
39776 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39781 for (i
= 0; i
< 4; ++i
)
39782 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39787 /* See if bytes move in pairs so we can use pblendw with
39788 an immediate argument, rather than pblendvb with a vector
39790 for (i
= 0; i
< 16; i
+= 2)
39791 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39794 for (i
= 0; i
< nelt
; ++i
)
39795 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39798 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39799 vperm
= force_reg (vmode
, vperm
);
39801 if (GET_MODE_SIZE (vmode
) == 16)
39802 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39804 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39805 if (target
!= d
->target
)
39806 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39810 for (i
= 0; i
< 8; ++i
)
39811 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39816 target
= gen_reg_rtx (vmode
);
39817 op0
= gen_lowpart (vmode
, op0
);
39818 op1
= gen_lowpart (vmode
, op1
);
39822 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39823 for (i
= 0; i
< 32; i
+= 2)
39824 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39826 /* See if bytes move in quadruplets. If yes, vpblendd
39827 with immediate can be used. */
39828 for (i
= 0; i
< 32; i
+= 4)
39829 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39833 /* See if bytes move the same in both lanes. If yes,
39834 vpblendw with immediate can be used. */
39835 for (i
= 0; i
< 16; i
+= 2)
39836 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39839 /* Use vpblendw. */
39840 for (i
= 0; i
< 16; ++i
)
39841 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39846 /* Use vpblendd. */
39847 for (i
= 0; i
< 8; ++i
)
39848 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39853 /* See if words move in pairs. If yes, vpblendd can be used. */
39854 for (i
= 0; i
< 16; i
+= 2)
39855 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39859 /* See if words move the same in both lanes. If not,
39860 vpblendvb must be used. */
39861 for (i
= 0; i
< 8; i
++)
39862 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39864 /* Use vpblendvb. */
39865 for (i
= 0; i
< 32; ++i
)
39866 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39870 target
= gen_reg_rtx (vmode
);
39871 op0
= gen_lowpart (vmode
, op0
);
39872 op1
= gen_lowpart (vmode
, op1
);
39873 goto finish_pblendvb
;
39876 /* Use vpblendw. */
39877 for (i
= 0; i
< 16; ++i
)
39878 mask
|= (d
->perm
[i
] >= 16) << i
;
39882 /* Use vpblendd. */
39883 for (i
= 0; i
< 8; ++i
)
39884 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39889 /* Use vpblendd. */
39890 for (i
= 0; i
< 4; ++i
)
39891 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39896 gcc_unreachable ();
39899 /* This matches five different patterns with the different modes. */
39900 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39901 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39903 if (target
!= d
->target
)
39904 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39909 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39910 in terms of the variable form of vpermilps.
39912 Note that we will have already failed the immediate input vpermilps,
39913 which requires that the high and low part shuffle be identical; the
39914 variable form doesn't require that. */
39917 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39919 rtx rperm
[8], vperm
;
39922 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39925 /* We can only permute within the 128-bit lane. */
39926 for (i
= 0; i
< 8; ++i
)
39928 unsigned e
= d
->perm
[i
];
39929 if (i
< 4 ? e
>= 4 : e
< 4)
39936 for (i
= 0; i
< 8; ++i
)
39938 unsigned e
= d
->perm
[i
];
39940 /* Within each 128-bit lane, the elements of op0 are numbered
39941 from 0 and the elements of op1 are numbered from 4. */
39947 rperm
[i
] = GEN_INT (e
);
39950 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39951 vperm
= force_reg (V8SImode
, vperm
);
39952 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39957 /* Return true if permutation D can be performed as VMODE permutation
39961 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39963 unsigned int i
, j
, chunk
;
39965 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39966 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39967 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39970 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39973 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39974 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39975 if (d
->perm
[i
] & (chunk
- 1))
39978 for (j
= 1; j
< chunk
; ++j
)
39979 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39985 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39986 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39989 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39991 unsigned i
, nelt
, eltsz
, mask
;
39992 unsigned char perm
[32];
39993 enum machine_mode vmode
= V16QImode
;
39994 rtx rperm
[32], vperm
, target
, op0
, op1
;
39998 if (!d
->one_operand_p
)
40000 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
40003 && valid_perm_using_mode_p (V2TImode
, d
))
40008 /* Use vperm2i128 insn. The pattern uses
40009 V4DImode instead of V2TImode. */
40010 target
= d
->target
;
40011 if (d
->vmode
!= V4DImode
)
40012 target
= gen_reg_rtx (V4DImode
);
40013 op0
= gen_lowpart (V4DImode
, d
->op0
);
40014 op1
= gen_lowpart (V4DImode
, d
->op1
);
40016 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
40017 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
40018 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
40019 if (target
!= d
->target
)
40020 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40028 if (GET_MODE_SIZE (d
->vmode
) == 16)
40033 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40038 /* V4DImode should be already handled through
40039 expand_vselect by vpermq instruction. */
40040 gcc_assert (d
->vmode
!= V4DImode
);
40043 if (d
->vmode
== V8SImode
40044 || d
->vmode
== V16HImode
40045 || d
->vmode
== V32QImode
)
40047 /* First see if vpermq can be used for
40048 V8SImode/V16HImode/V32QImode. */
40049 if (valid_perm_using_mode_p (V4DImode
, d
))
40051 for (i
= 0; i
< 4; i
++)
40052 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
40055 target
= gen_reg_rtx (V4DImode
);
40056 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
40059 emit_move_insn (d
->target
,
40060 gen_lowpart (d
->vmode
, target
));
40066 /* Next see if vpermd can be used. */
40067 if (valid_perm_using_mode_p (V8SImode
, d
))
40070 /* Or if vpermps can be used. */
40071 else if (d
->vmode
== V8SFmode
)
40074 if (vmode
== V32QImode
)
40076 /* vpshufb only works intra lanes, it is not
40077 possible to shuffle bytes in between the lanes. */
40078 for (i
= 0; i
< nelt
; ++i
)
40079 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
40090 if (vmode
== V8SImode
)
40091 for (i
= 0; i
< 8; ++i
)
40092 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
40095 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40096 if (!d
->one_operand_p
)
40097 mask
= 2 * nelt
- 1;
40098 else if (vmode
== V16QImode
)
40101 mask
= nelt
/ 2 - 1;
40103 for (i
= 0; i
< nelt
; ++i
)
40105 unsigned j
, e
= d
->perm
[i
] & mask
;
40106 for (j
= 0; j
< eltsz
; ++j
)
40107 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
40111 vperm
= gen_rtx_CONST_VECTOR (vmode
,
40112 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
40113 vperm
= force_reg (vmode
, vperm
);
40115 target
= d
->target
;
40116 if (d
->vmode
!= vmode
)
40117 target
= gen_reg_rtx (vmode
);
40118 op0
= gen_lowpart (vmode
, d
->op0
);
40119 if (d
->one_operand_p
)
40121 if (vmode
== V16QImode
)
40122 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
40123 else if (vmode
== V32QImode
)
40124 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
40125 else if (vmode
== V8SFmode
)
40126 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
40128 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
40132 op1
= gen_lowpart (vmode
, d
->op1
);
40133 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
40135 if (target
!= d
->target
)
40136 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40141 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
40142 in a single instruction. */
40145 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
40147 unsigned i
, nelt
= d
->nelt
;
40148 unsigned char perm2
[MAX_VECT_LEN
];
40150 /* Check plain VEC_SELECT first, because AVX has instructions that could
40151 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
40152 input where SEL+CONCAT may not. */
40153 if (d
->one_operand_p
)
40155 int mask
= nelt
- 1;
40156 bool identity_perm
= true;
40157 bool broadcast_perm
= true;
40159 for (i
= 0; i
< nelt
; i
++)
40161 perm2
[i
] = d
->perm
[i
] & mask
;
40163 identity_perm
= false;
40165 broadcast_perm
= false;
40171 emit_move_insn (d
->target
, d
->op0
);
40174 else if (broadcast_perm
&& TARGET_AVX2
)
40176 /* Use vpbroadcast{b,w,d}. */
40177 rtx (*gen
) (rtx
, rtx
) = NULL
;
40181 gen
= gen_avx2_pbroadcastv32qi_1
;
40184 gen
= gen_avx2_pbroadcastv16hi_1
;
40187 gen
= gen_avx2_pbroadcastv8si_1
;
40190 gen
= gen_avx2_pbroadcastv16qi
;
40193 gen
= gen_avx2_pbroadcastv8hi
;
40196 gen
= gen_avx2_vec_dupv8sf_1
;
40198 /* For other modes prefer other shuffles this function creates. */
40204 emit_insn (gen (d
->target
, d
->op0
));
40209 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
40212 /* There are plenty of patterns in sse.md that are written for
40213 SEL+CONCAT and are not replicated for a single op. Perhaps
40214 that should be changed, to avoid the nastiness here. */
40216 /* Recognize interleave style patterns, which means incrementing
40217 every other permutation operand. */
40218 for (i
= 0; i
< nelt
; i
+= 2)
40220 perm2
[i
] = d
->perm
[i
] & mask
;
40221 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
40223 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40227 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
40230 for (i
= 0; i
< nelt
; i
+= 4)
40232 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
40233 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
40234 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
40235 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
40238 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40244 /* Finally, try the fully general two operand permute. */
40245 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
40249 /* Recognize interleave style patterns with reversed operands. */
40250 if (!d
->one_operand_p
)
40252 for (i
= 0; i
< nelt
; ++i
)
40254 unsigned e
= d
->perm
[i
];
40262 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
40267 /* Try the SSE4.1 blend variable merge instructions. */
40268 if (expand_vec_perm_blend (d
))
40271 /* Try one of the AVX vpermil variable permutations. */
40272 if (expand_vec_perm_vpermil (d
))
40275 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
40276 vpshufb, vpermd, vpermps or vpermq variable permutation. */
40277 if (expand_vec_perm_pshufb (d
))
40283 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40284 in terms of a pair of pshuflw + pshufhw instructions. */
40287 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
40289 unsigned char perm2
[MAX_VECT_LEN
];
40293 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
40296 /* The two permutations only operate in 64-bit lanes. */
40297 for (i
= 0; i
< 4; ++i
)
40298 if (d
->perm
[i
] >= 4)
40300 for (i
= 4; i
< 8; ++i
)
40301 if (d
->perm
[i
] < 4)
40307 /* Emit the pshuflw. */
40308 memcpy (perm2
, d
->perm
, 4);
40309 for (i
= 4; i
< 8; ++i
)
40311 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
40314 /* Emit the pshufhw. */
40315 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
40316 for (i
= 0; i
< 4; ++i
)
40318 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
40324 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40325 the permutation using the SSSE3 palignr instruction. This succeeds
40326 when all of the elements in PERM fit within one vector and we merely
40327 need to shift them down so that a single vector permutation has a
40328 chance to succeed. */
40331 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
40333 unsigned i
, nelt
= d
->nelt
;
40337 struct expand_vec_perm_d dcopy
;
40339 /* Even with AVX, palignr only operates on 128-bit vectors. */
40340 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40343 min
= nelt
, max
= 0;
40344 for (i
= 0; i
< nelt
; ++i
)
40346 unsigned e
= d
->perm
[i
];
40352 if (min
== 0 || max
- min
>= nelt
)
40355 /* Given that we have SSSE3, we know we'll be able to implement the
40356 single operand permutation after the palignr with pshufb. */
40361 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
40362 target
= gen_reg_rtx (TImode
);
40363 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, d
->op1
),
40364 gen_lowpart (TImode
, d
->op0
), shift
));
40366 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
40367 dcopy
.one_operand_p
= true;
40370 for (i
= 0; i
< nelt
; ++i
)
40372 unsigned e
= dcopy
.perm
[i
] - min
;
40378 /* Test for the degenerate case where the alignment by itself
40379 produces the desired permutation. */
40382 emit_move_insn (d
->target
, dcopy
.op0
);
40386 ok
= expand_vec_perm_1 (&dcopy
);
40392 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
40394 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40395 a two vector permutation into a single vector permutation by using
40396 an interleave operation to merge the vectors. */
40399 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
40401 struct expand_vec_perm_d dremap
, dfinal
;
40402 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40403 unsigned HOST_WIDE_INT contents
;
40404 unsigned char remap
[2 * MAX_VECT_LEN
];
40406 bool ok
, same_halves
= false;
40408 if (GET_MODE_SIZE (d
->vmode
) == 16)
40410 if (d
->one_operand_p
)
40413 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40417 /* For 32-byte modes allow even d->one_operand_p.
40418 The lack of cross-lane shuffling in some instructions
40419 might prevent a single insn shuffle. */
40421 dfinal
.testing_p
= true;
40422 /* If expand_vec_perm_interleave3 can expand this into
40423 a 3 insn sequence, give up and let it be expanded as
40424 3 insn sequence. While that is one insn longer,
40425 it doesn't need a memory operand and in the common
40426 case that both interleave low and high permutations
40427 with the same operands are adjacent needs 4 insns
40428 for both after CSE. */
40429 if (expand_vec_perm_interleave3 (&dfinal
))
40435 /* Examine from whence the elements come. */
40437 for (i
= 0; i
< nelt
; ++i
)
40438 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
40440 memset (remap
, 0xff, sizeof (remap
));
40443 if (GET_MODE_SIZE (d
->vmode
) == 16)
40445 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
40447 /* Split the two input vectors into 4 halves. */
40448 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
40453 /* If the elements from the low halves use interleave low, and similarly
40454 for interleave high. If the elements are from mis-matched halves, we
40455 can use shufps for V4SF/V4SI or do a DImode shuffle. */
40456 if ((contents
& (h1
| h3
)) == contents
)
40459 for (i
= 0; i
< nelt2
; ++i
)
40462 remap
[i
+ nelt
] = i
* 2 + 1;
40463 dremap
.perm
[i
* 2] = i
;
40464 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40466 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40467 dremap
.vmode
= V4SFmode
;
40469 else if ((contents
& (h2
| h4
)) == contents
)
40472 for (i
= 0; i
< nelt2
; ++i
)
40474 remap
[i
+ nelt2
] = i
* 2;
40475 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
40476 dremap
.perm
[i
* 2] = i
+ nelt2
;
40477 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
40479 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40480 dremap
.vmode
= V4SFmode
;
40482 else if ((contents
& (h1
| h4
)) == contents
)
40485 for (i
= 0; i
< nelt2
; ++i
)
40488 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
40489 dremap
.perm
[i
] = i
;
40490 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
40495 dremap
.vmode
= V2DImode
;
40497 dremap
.perm
[0] = 0;
40498 dremap
.perm
[1] = 3;
40501 else if ((contents
& (h2
| h3
)) == contents
)
40504 for (i
= 0; i
< nelt2
; ++i
)
40506 remap
[i
+ nelt2
] = i
;
40507 remap
[i
+ nelt
] = i
+ nelt2
;
40508 dremap
.perm
[i
] = i
+ nelt2
;
40509 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
40514 dremap
.vmode
= V2DImode
;
40516 dremap
.perm
[0] = 1;
40517 dremap
.perm
[1] = 2;
40525 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
40526 unsigned HOST_WIDE_INT q
[8];
40527 unsigned int nonzero_halves
[4];
40529 /* Split the two input vectors into 8 quarters. */
40530 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
40531 for (i
= 1; i
< 8; ++i
)
40532 q
[i
] = q
[0] << (nelt4
* i
);
40533 for (i
= 0; i
< 4; ++i
)
40534 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
40536 nonzero_halves
[nzcnt
] = i
;
40542 gcc_assert (d
->one_operand_p
);
40543 nonzero_halves
[1] = nonzero_halves
[0];
40544 same_halves
= true;
40546 else if (d
->one_operand_p
)
40548 gcc_assert (nonzero_halves
[0] == 0);
40549 gcc_assert (nonzero_halves
[1] == 1);
40554 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
40556 /* Attempt to increase the likelihood that dfinal
40557 shuffle will be intra-lane. */
40558 char tmph
= nonzero_halves
[0];
40559 nonzero_halves
[0] = nonzero_halves
[1];
40560 nonzero_halves
[1] = tmph
;
40563 /* vperm2f128 or vperm2i128. */
40564 for (i
= 0; i
< nelt2
; ++i
)
40566 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
40567 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
40568 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
40569 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
40572 if (d
->vmode
!= V8SFmode
40573 && d
->vmode
!= V4DFmode
40574 && d
->vmode
!= V8SImode
)
40576 dremap
.vmode
= V8SImode
;
40578 for (i
= 0; i
< 4; ++i
)
40580 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
40581 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
40585 else if (d
->one_operand_p
)
40587 else if (TARGET_AVX2
40588 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
40591 for (i
= 0; i
< nelt4
; ++i
)
40594 remap
[i
+ nelt
] = i
* 2 + 1;
40595 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
40596 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
40597 dremap
.perm
[i
* 2] = i
;
40598 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40599 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
40600 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
40603 else if (TARGET_AVX2
40604 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
40607 for (i
= 0; i
< nelt4
; ++i
)
40609 remap
[i
+ nelt4
] = i
* 2;
40610 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
40611 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
40612 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
40613 dremap
.perm
[i
* 2] = i
+ nelt4
;
40614 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
40615 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
40616 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
40623 /* Use the remapping array set up above to move the elements from their
40624 swizzled locations into their final destinations. */
40626 for (i
= 0; i
< nelt
; ++i
)
40628 unsigned e
= remap
[d
->perm
[i
]];
40629 gcc_assert (e
< nelt
);
40630 /* If same_halves is true, both halves of the remapped vector are the
40631 same. Avoid cross-lane accesses if possible. */
40632 if (same_halves
&& i
>= nelt2
)
40634 gcc_assert (e
< nelt2
);
40635 dfinal
.perm
[i
] = e
+ nelt2
;
40638 dfinal
.perm
[i
] = e
;
40640 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
40641 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40642 dfinal
.op1
= dfinal
.op0
;
40643 dfinal
.one_operand_p
= true;
40645 /* Test if the final remap can be done with a single insn. For V4SFmode or
40646 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40648 ok
= expand_vec_perm_1 (&dfinal
);
40649 seq
= get_insns ();
40658 if (dremap
.vmode
!= dfinal
.vmode
)
40660 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40661 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40664 ok
= expand_vec_perm_1 (&dremap
);
40671 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40672 a single vector cross-lane permutation into vpermq followed
40673 by any of the single insn permutations. */
40676 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40678 struct expand_vec_perm_d dremap
, dfinal
;
40679 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40680 unsigned contents
[2];
40684 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40685 && d
->one_operand_p
))
40690 for (i
= 0; i
< nelt2
; ++i
)
40692 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40693 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40696 for (i
= 0; i
< 2; ++i
)
40698 unsigned int cnt
= 0;
40699 for (j
= 0; j
< 4; ++j
)
40700 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40708 dremap
.vmode
= V4DImode
;
40710 dremap
.target
= gen_reg_rtx (V4DImode
);
40711 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40712 dremap
.op1
= dremap
.op0
;
40713 dremap
.one_operand_p
= true;
40714 for (i
= 0; i
< 2; ++i
)
40716 unsigned int cnt
= 0;
40717 for (j
= 0; j
< 4; ++j
)
40718 if ((contents
[i
] & (1u << j
)) != 0)
40719 dremap
.perm
[2 * i
+ cnt
++] = j
;
40720 for (; cnt
< 2; ++cnt
)
40721 dremap
.perm
[2 * i
+ cnt
] = 0;
40725 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40726 dfinal
.op1
= dfinal
.op0
;
40727 dfinal
.one_operand_p
= true;
40728 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40732 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40733 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40735 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40736 dfinal
.perm
[i
] |= nelt4
;
40738 gcc_unreachable ();
40741 ok
= expand_vec_perm_1 (&dremap
);
40744 ok
= expand_vec_perm_1 (&dfinal
);
40750 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40751 a vector permutation using two instructions, vperm2f128 resp.
40752 vperm2i128 followed by any single in-lane permutation. */
40755 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40757 struct expand_vec_perm_d dfirst
, dsecond
;
40758 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40762 || GET_MODE_SIZE (d
->vmode
) != 32
40763 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40767 dsecond
.one_operand_p
= false;
40768 dsecond
.testing_p
= true;
40770 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40771 immediate. For perm < 16 the second permutation uses
40772 d->op0 as first operand, for perm >= 16 it uses d->op1
40773 as first operand. The second operand is the result of
40775 for (perm
= 0; perm
< 32; perm
++)
40777 /* Ignore permutations which do not move anything cross-lane. */
40780 /* The second shuffle for e.g. V4DFmode has
40781 0123 and ABCD operands.
40782 Ignore AB23, as 23 is already in the second lane
40783 of the first operand. */
40784 if ((perm
& 0xc) == (1 << 2)) continue;
40785 /* And 01CD, as 01 is in the first lane of the first
40787 if ((perm
& 3) == 0) continue;
40788 /* And 4567, as then the vperm2[fi]128 doesn't change
40789 anything on the original 4567 second operand. */
40790 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40794 /* The second shuffle for e.g. V4DFmode has
40795 4567 and ABCD operands.
40796 Ignore AB67, as 67 is already in the second lane
40797 of the first operand. */
40798 if ((perm
& 0xc) == (3 << 2)) continue;
40799 /* And 45CD, as 45 is in the first lane of the first
40801 if ((perm
& 3) == 2) continue;
40802 /* And 0123, as then the vperm2[fi]128 doesn't change
40803 anything on the original 0123 first operand. */
40804 if ((perm
& 0xf) == (1 << 2)) continue;
40807 for (i
= 0; i
< nelt
; i
++)
40809 j
= d
->perm
[i
] / nelt2
;
40810 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40811 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40812 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40813 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40821 ok
= expand_vec_perm_1 (&dsecond
);
40832 /* Found a usable second shuffle. dfirst will be
40833 vperm2f128 on d->op0 and d->op1. */
40834 dsecond
.testing_p
= false;
40836 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40837 for (i
= 0; i
< nelt
; i
++)
40838 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40839 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40841 ok
= expand_vec_perm_1 (&dfirst
);
40844 /* And dsecond is some single insn shuffle, taking
40845 d->op0 and result of vperm2f128 (if perm < 16) or
40846 d->op1 and result of vperm2f128 (otherwise). */
40847 dsecond
.op1
= dfirst
.target
;
40849 dsecond
.op0
= dfirst
.op1
;
40851 ok
= expand_vec_perm_1 (&dsecond
);
40857 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40858 if (d
->one_operand_p
)
40865 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40866 a two vector permutation using 2 intra-lane interleave insns
40867 and cross-lane shuffle for 32-byte vectors. */
40870 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40873 rtx (*gen
) (rtx
, rtx
, rtx
);
40875 if (d
->one_operand_p
)
40877 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40879 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40885 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40887 for (i
= 0; i
< nelt
; i
+= 2)
40888 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40889 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40899 gen
= gen_vec_interleave_highv32qi
;
40901 gen
= gen_vec_interleave_lowv32qi
;
40905 gen
= gen_vec_interleave_highv16hi
;
40907 gen
= gen_vec_interleave_lowv16hi
;
40911 gen
= gen_vec_interleave_highv8si
;
40913 gen
= gen_vec_interleave_lowv8si
;
40917 gen
= gen_vec_interleave_highv4di
;
40919 gen
= gen_vec_interleave_lowv4di
;
40923 gen
= gen_vec_interleave_highv8sf
;
40925 gen
= gen_vec_interleave_lowv8sf
;
40929 gen
= gen_vec_interleave_highv4df
;
40931 gen
= gen_vec_interleave_lowv4df
;
40934 gcc_unreachable ();
40937 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40941 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40942 a single vector permutation using a single intra-lane vector
40943 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40944 the non-swapped and swapped vectors together. */
40947 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40949 struct expand_vec_perm_d dfirst
, dsecond
;
40950 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40953 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40957 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40958 || !d
->one_operand_p
)
40962 for (i
= 0; i
< nelt
; i
++)
40963 dfirst
.perm
[i
] = 0xff;
40964 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40966 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40967 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40969 dfirst
.perm
[j
] = d
->perm
[i
];
40973 for (i
= 0; i
< nelt
; i
++)
40974 if (dfirst
.perm
[i
] == 0xff)
40975 dfirst
.perm
[i
] = i
;
40978 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40981 ok
= expand_vec_perm_1 (&dfirst
);
40982 seq
= get_insns ();
40994 dsecond
.op0
= dfirst
.target
;
40995 dsecond
.op1
= dfirst
.target
;
40996 dsecond
.one_operand_p
= true;
40997 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40998 for (i
= 0; i
< nelt
; i
++)
40999 dsecond
.perm
[i
] = i
^ nelt2
;
41001 ok
= expand_vec_perm_1 (&dsecond
);
41004 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
41005 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
41009 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
41010 permutation using two vperm2f128, followed by a vshufpd insn blending
41011 the two vectors together. */
41014 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
41016 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
41019 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
41029 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
41030 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
41031 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
41032 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
41033 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
41034 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
41035 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
41036 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
41037 dthird
.perm
[0] = (d
->perm
[0] % 2);
41038 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
41039 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
41040 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
41042 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
41043 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
41044 dthird
.op0
= dfirst
.target
;
41045 dthird
.op1
= dsecond
.target
;
41046 dthird
.one_operand_p
= false;
41048 canonicalize_perm (&dfirst
);
41049 canonicalize_perm (&dsecond
);
41051 ok
= expand_vec_perm_1 (&dfirst
)
41052 && expand_vec_perm_1 (&dsecond
)
41053 && expand_vec_perm_1 (&dthird
);
41060 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
41061 permutation with two pshufb insns and an ior. We should have already
41062 failed all two instruction sequences. */
41065 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
41067 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
41068 unsigned int i
, nelt
, eltsz
;
41070 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
41072 gcc_assert (!d
->one_operand_p
);
41075 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41077 /* Generate two permutation masks. If the required element is within
41078 the given vector it is shuffled into the proper lane. If the required
41079 element is in the other vector, force a zero into the lane by setting
41080 bit 7 in the permutation mask. */
41081 m128
= GEN_INT (-128);
41082 for (i
= 0; i
< nelt
; ++i
)
41084 unsigned j
, e
= d
->perm
[i
];
41085 unsigned which
= (e
>= nelt
);
41089 for (j
= 0; j
< eltsz
; ++j
)
41091 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
41092 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
41096 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
41097 vperm
= force_reg (V16QImode
, vperm
);
41099 l
= gen_reg_rtx (V16QImode
);
41100 op
= gen_lowpart (V16QImode
, d
->op0
);
41101 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
41103 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
41104 vperm
= force_reg (V16QImode
, vperm
);
41106 h
= gen_reg_rtx (V16QImode
);
41107 op
= gen_lowpart (V16QImode
, d
->op1
);
41108 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
41111 if (d
->vmode
!= V16QImode
)
41112 op
= gen_reg_rtx (V16QImode
);
41113 emit_insn (gen_iorv16qi3 (op
, l
, h
));
41114 if (op
!= d
->target
)
41115 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41120 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
41121 with two vpshufb insns, vpermq and vpor. We should have already failed
41122 all two or three instruction sequences. */
41125 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
41127 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
41128 unsigned int i
, nelt
, eltsz
;
41131 || !d
->one_operand_p
41132 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41139 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41141 /* Generate two permutation masks. If the required element is within
41142 the same lane, it is shuffled in. If the required element from the
41143 other lane, force a zero by setting bit 7 in the permutation mask.
41144 In the other mask the mask has non-negative elements if element
41145 is requested from the other lane, but also moved to the other lane,
41146 so that the result of vpshufb can have the two V2TImode halves
41148 m128
= GEN_INT (-128);
41149 for (i
= 0; i
< nelt
; ++i
)
41151 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41152 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41154 for (j
= 0; j
< eltsz
; ++j
)
41156 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
41157 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
41161 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41162 vperm
= force_reg (V32QImode
, vperm
);
41164 h
= gen_reg_rtx (V32QImode
);
41165 op
= gen_lowpart (V32QImode
, d
->op0
);
41166 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41168 /* Swap the 128-byte lanes of h into hp. */
41169 hp
= gen_reg_rtx (V4DImode
);
41170 op
= gen_lowpart (V4DImode
, h
);
41171 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
41174 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41175 vperm
= force_reg (V32QImode
, vperm
);
41177 l
= gen_reg_rtx (V32QImode
);
41178 op
= gen_lowpart (V32QImode
, d
->op0
);
41179 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41182 if (d
->vmode
!= V32QImode
)
41183 op
= gen_reg_rtx (V32QImode
);
41184 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
41185 if (op
!= d
->target
)
41186 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41191 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
41192 and extract-odd permutations of two V32QImode and V16QImode operand
41193 with two vpshufb insns, vpor and vpermq. We should have already
41194 failed all two or three instruction sequences. */
41197 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
41199 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
41200 unsigned int i
, nelt
, eltsz
;
41203 || d
->one_operand_p
41204 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41207 for (i
= 0; i
< d
->nelt
; ++i
)
41208 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
41215 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41217 /* Generate two permutation masks. In the first permutation mask
41218 the first quarter will contain indexes for the first half
41219 of the op0, the second quarter will contain bit 7 set, third quarter
41220 will contain indexes for the second half of the op0 and the
41221 last quarter bit 7 set. In the second permutation mask
41222 the first quarter will contain bit 7 set, the second quarter
41223 indexes for the first half of the op1, the third quarter bit 7 set
41224 and last quarter indexes for the second half of the op1.
41225 I.e. the first mask e.g. for V32QImode extract even will be:
41226 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
41227 (all values masked with 0xf except for -128) and second mask
41228 for extract even will be
41229 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
41230 m128
= GEN_INT (-128);
41231 for (i
= 0; i
< nelt
; ++i
)
41233 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41234 unsigned which
= d
->perm
[i
] >= nelt
;
41235 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
41237 for (j
= 0; j
< eltsz
; ++j
)
41239 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
41240 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
41244 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41245 vperm
= force_reg (V32QImode
, vperm
);
41247 l
= gen_reg_rtx (V32QImode
);
41248 op
= gen_lowpart (V32QImode
, d
->op0
);
41249 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41251 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41252 vperm
= force_reg (V32QImode
, vperm
);
41254 h
= gen_reg_rtx (V32QImode
);
41255 op
= gen_lowpart (V32QImode
, d
->op1
);
41256 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41258 ior
= gen_reg_rtx (V32QImode
);
41259 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
41261 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
41262 op
= gen_reg_rtx (V4DImode
);
41263 ior
= gen_lowpart (V4DImode
, ior
);
41264 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
41265 const1_rtx
, GEN_INT (3)));
41266 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41271 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
41272 and extract-odd permutations. */
41275 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
41277 rtx t1
, t2
, t3
, t4
, t5
;
41282 t1
= gen_reg_rtx (V4DFmode
);
41283 t2
= gen_reg_rtx (V4DFmode
);
41285 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41286 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41287 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41289 /* Now an unpck[lh]pd will produce the result required. */
41291 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
41293 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
41299 int mask
= odd
? 0xdd : 0x88;
41301 t1
= gen_reg_rtx (V8SFmode
);
41302 t2
= gen_reg_rtx (V8SFmode
);
41303 t3
= gen_reg_rtx (V8SFmode
);
41305 /* Shuffle within the 128-bit lanes to produce:
41306 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
41307 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
41310 /* Shuffle the lanes around to produce:
41311 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
41312 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
41315 /* Shuffle within the 128-bit lanes to produce:
41316 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
41317 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
41319 /* Shuffle within the 128-bit lanes to produce:
41320 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
41321 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
41323 /* Shuffle the lanes around to produce:
41324 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
41325 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
41334 /* These are always directly implementable by expand_vec_perm_1. */
41335 gcc_unreachable ();
41339 return expand_vec_perm_pshufb2 (d
);
41342 /* We need 2*log2(N)-1 operations to achieve odd/even
41343 with interleave. */
41344 t1
= gen_reg_rtx (V8HImode
);
41345 t2
= gen_reg_rtx (V8HImode
);
41346 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
41347 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
41348 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
41349 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
41351 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
41353 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
41360 return expand_vec_perm_pshufb2 (d
);
41363 t1
= gen_reg_rtx (V16QImode
);
41364 t2
= gen_reg_rtx (V16QImode
);
41365 t3
= gen_reg_rtx (V16QImode
);
41366 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
41367 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
41368 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
41369 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
41370 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
41371 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
41373 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
41375 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
41382 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
41387 struct expand_vec_perm_d d_copy
= *d
;
41388 d_copy
.vmode
= V4DFmode
;
41389 d_copy
.target
= gen_reg_rtx (V4DFmode
);
41390 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
41391 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
41392 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41395 emit_move_insn (d
->target
,
41396 gen_lowpart (V4DImode
, d_copy
.target
));
41402 t1
= gen_reg_rtx (V4DImode
);
41403 t2
= gen_reg_rtx (V4DImode
);
41405 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41406 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41407 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41409 /* Now an vpunpck[lh]qdq will produce the result required. */
41411 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
41413 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
41420 struct expand_vec_perm_d d_copy
= *d
;
41421 d_copy
.vmode
= V8SFmode
;
41422 d_copy
.target
= gen_reg_rtx (V8SFmode
);
41423 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
41424 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
41425 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41428 emit_move_insn (d
->target
,
41429 gen_lowpart (V8SImode
, d_copy
.target
));
41435 t1
= gen_reg_rtx (V8SImode
);
41436 t2
= gen_reg_rtx (V8SImode
);
41437 t3
= gen_reg_rtx (V4DImode
);
41438 t4
= gen_reg_rtx (V4DImode
);
41439 t5
= gen_reg_rtx (V4DImode
);
41441 /* Shuffle the lanes around into
41442 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
41443 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
41444 gen_lowpart (V4DImode
, d
->op1
),
41446 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
41447 gen_lowpart (V4DImode
, d
->op1
),
41450 /* Swap the 2nd and 3rd position in each lane into
41451 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
41452 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
41453 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41454 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
41455 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41457 /* Now an vpunpck[lh]qdq will produce
41458 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
41460 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
41461 gen_lowpart (V4DImode
, t2
));
41463 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
41464 gen_lowpart (V4DImode
, t2
));
41466 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
41470 gcc_unreachable ();
41476 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41477 extract-even and extract-odd permutations. */
41480 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
41482 unsigned i
, odd
, nelt
= d
->nelt
;
41485 if (odd
!= 0 && odd
!= 1)
41488 for (i
= 1; i
< nelt
; ++i
)
41489 if (d
->perm
[i
] != 2 * i
+ odd
)
41492 return expand_vec_perm_even_odd_1 (d
, odd
);
41495 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
41496 permutations. We assume that expand_vec_perm_1 has already failed. */
41499 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
41501 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
41502 enum machine_mode vmode
= d
->vmode
;
41503 unsigned char perm2
[4];
41504 rtx op0
= d
->op0
, dest
;
41511 /* These are special-cased in sse.md so that we can optionally
41512 use the vbroadcast instruction. They expand to two insns
41513 if the input happens to be in a register. */
41514 gcc_unreachable ();
41520 /* These are always implementable using standard shuffle patterns. */
41521 gcc_unreachable ();
41525 /* These can be implemented via interleave. We save one insn by
41526 stopping once we have promoted to V4SImode and then use pshufd. */
41530 rtx (*gen
) (rtx
, rtx
, rtx
)
41531 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
41532 : gen_vec_interleave_lowv8hi
;
41536 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
41537 : gen_vec_interleave_highv8hi
;
41542 dest
= gen_reg_rtx (vmode
);
41543 emit_insn (gen (dest
, op0
, op0
));
41544 vmode
= get_mode_wider_vector (vmode
);
41545 op0
= gen_lowpart (vmode
, dest
);
41547 while (vmode
!= V4SImode
);
41549 memset (perm2
, elt
, 4);
41550 dest
= gen_reg_rtx (V4SImode
);
41551 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
41554 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
41561 /* For AVX2 broadcasts of the first element vpbroadcast* or
41562 vpermq should be used by expand_vec_perm_1. */
41563 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
41567 gcc_unreachable ();
41571 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41572 broadcast permutations. */
41575 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
41577 unsigned i
, elt
, nelt
= d
->nelt
;
41579 if (!d
->one_operand_p
)
41583 for (i
= 1; i
< nelt
; ++i
)
41584 if (d
->perm
[i
] != elt
)
41587 return expand_vec_perm_broadcast_1 (d
);
41590 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
41591 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
41592 all the shorter instruction sequences. */
41595 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
41597 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
41598 unsigned int i
, nelt
, eltsz
;
41602 || d
->one_operand_p
41603 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41610 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41612 /* Generate 4 permutation masks. If the required element is within
41613 the same lane, it is shuffled in. If the required element from the
41614 other lane, force a zero by setting bit 7 in the permutation mask.
41615 In the other mask the mask has non-negative elements if element
41616 is requested from the other lane, but also moved to the other lane,
41617 so that the result of vpshufb can have the two V2TImode halves
41619 m128
= GEN_INT (-128);
41620 for (i
= 0; i
< 32; ++i
)
41622 rperm
[0][i
] = m128
;
41623 rperm
[1][i
] = m128
;
41624 rperm
[2][i
] = m128
;
41625 rperm
[3][i
] = m128
;
41631 for (i
= 0; i
< nelt
; ++i
)
41633 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41634 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41635 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
41637 for (j
= 0; j
< eltsz
; ++j
)
41638 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
41639 used
[which
] = true;
41642 for (i
= 0; i
< 2; ++i
)
41644 if (!used
[2 * i
+ 1])
41649 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
41650 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
41651 vperm
= force_reg (V32QImode
, vperm
);
41652 h
[i
] = gen_reg_rtx (V32QImode
);
41653 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41654 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
41657 /* Swap the 128-byte lanes of h[X]. */
41658 for (i
= 0; i
< 2; ++i
)
41660 if (h
[i
] == NULL_RTX
)
41662 op
= gen_reg_rtx (V4DImode
);
41663 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
41664 const2_rtx
, GEN_INT (3), const0_rtx
,
41666 h
[i
] = gen_lowpart (V32QImode
, op
);
41669 for (i
= 0; i
< 2; ++i
)
41676 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41677 vperm
= force_reg (V32QImode
, vperm
);
41678 l
[i
] = gen_reg_rtx (V32QImode
);
41679 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41680 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41683 for (i
= 0; i
< 2; ++i
)
41687 op
= gen_reg_rtx (V32QImode
);
41688 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41695 gcc_assert (l
[0] && l
[1]);
41697 if (d
->vmode
!= V32QImode
)
41698 op
= gen_reg_rtx (V32QImode
);
41699 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41700 if (op
!= d
->target
)
41701 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41705 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41706 With all of the interface bits taken care of, perform the expansion
41707 in D and return true on success. */
41710 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41712 /* Try a single instruction expansion. */
41713 if (expand_vec_perm_1 (d
))
41716 /* Try sequences of two instructions. */
41718 if (expand_vec_perm_pshuflw_pshufhw (d
))
41721 if (expand_vec_perm_palignr (d
))
41724 if (expand_vec_perm_interleave2 (d
))
41727 if (expand_vec_perm_broadcast (d
))
41730 if (expand_vec_perm_vpermq_perm_1 (d
))
41733 if (expand_vec_perm_vperm2f128 (d
))
41736 /* Try sequences of three instructions. */
41738 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41741 if (expand_vec_perm_pshufb2 (d
))
41744 if (expand_vec_perm_interleave3 (d
))
41747 if (expand_vec_perm_vperm2f128_vblend (d
))
41750 /* Try sequences of four instructions. */
41752 if (expand_vec_perm_vpshufb2_vpermq (d
))
41755 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41758 /* ??? Look for narrow permutations whose element orderings would
41759 allow the promotion to a wider mode. */
41761 /* ??? Look for sequences of interleave or a wider permute that place
41762 the data into the correct lanes for a half-vector shuffle like
41763 pshuf[lh]w or vpermilps. */
41765 /* ??? Look for sequences of interleave that produce the desired results.
41766 The combinatorics of punpck[lh] get pretty ugly... */
41768 if (expand_vec_perm_even_odd (d
))
41771 /* Even longer sequences. */
41772 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41778 /* If a permutation only uses one operand, make it clear. Returns true
41779 if the permutation references both operands. */
41782 canonicalize_perm (struct expand_vec_perm_d
*d
)
41784 int i
, which
, nelt
= d
->nelt
;
41786 for (i
= which
= 0; i
< nelt
; ++i
)
41787 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41789 d
->one_operand_p
= true;
41796 if (!rtx_equal_p (d
->op0
, d
->op1
))
41798 d
->one_operand_p
= false;
41801 /* The elements of PERM do not suggest that only the first operand
41802 is used, but both operands are identical. Allow easier matching
41803 of the permutation by folding the permutation into the single
41808 for (i
= 0; i
< nelt
; ++i
)
41809 d
->perm
[i
] &= nelt
- 1;
41818 return (which
== 3);
41822 ix86_expand_vec_perm_const (rtx operands
[4])
41824 struct expand_vec_perm_d d
;
41825 unsigned char perm
[MAX_VECT_LEN
];
41830 d
.target
= operands
[0];
41831 d
.op0
= operands
[1];
41832 d
.op1
= operands
[2];
41835 d
.vmode
= GET_MODE (d
.target
);
41836 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41837 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41838 d
.testing_p
= false;
41840 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41841 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41842 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41844 for (i
= 0; i
< nelt
; ++i
)
41846 rtx e
= XVECEXP (sel
, 0, i
);
41847 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41852 two_args
= canonicalize_perm (&d
);
41854 if (ix86_expand_vec_perm_const_1 (&d
))
41857 /* If the selector says both arguments are needed, but the operands are the
41858 same, the above tried to expand with one_operand_p and flattened selector.
41859 If that didn't work, retry without one_operand_p; we succeeded with that
41861 if (two_args
&& d
.one_operand_p
)
41863 d
.one_operand_p
= false;
41864 memcpy (d
.perm
, perm
, sizeof (perm
));
41865 return ix86_expand_vec_perm_const_1 (&d
);
41871 /* Implement targetm.vectorize.vec_perm_const_ok. */
41874 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41875 const unsigned char *sel
)
41877 struct expand_vec_perm_d d
;
41878 unsigned int i
, nelt
, which
;
41882 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41883 d
.testing_p
= true;
41885 /* Given sufficient ISA support we can just return true here
41886 for selected vector modes. */
41887 if (GET_MODE_SIZE (d
.vmode
) == 16)
41889 /* All implementable with a single vpperm insn. */
41892 /* All implementable with 2 pshufb + 1 ior. */
41895 /* All implementable with shufpd or unpck[lh]pd. */
41900 /* Extract the values from the vector CST into the permutation
41902 memcpy (d
.perm
, sel
, nelt
);
41903 for (i
= which
= 0; i
< nelt
; ++i
)
41905 unsigned char e
= d
.perm
[i
];
41906 gcc_assert (e
< 2 * nelt
);
41907 which
|= (e
< nelt
? 1 : 2);
41910 /* For all elements from second vector, fold the elements to first. */
41912 for (i
= 0; i
< nelt
; ++i
)
41915 /* Check whether the mask can be applied to the vector type. */
41916 d
.one_operand_p
= (which
!= 3);
41918 /* Implementable with shufps or pshufd. */
41919 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41922 /* Otherwise we have to go through the motions and see if we can
41923 figure out how to generate the requested permutation. */
41924 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41925 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41926 if (!d
.one_operand_p
)
41927 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41930 ret
= ix86_expand_vec_perm_const_1 (&d
);
41937 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41939 struct expand_vec_perm_d d
;
41945 d
.vmode
= GET_MODE (targ
);
41946 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41947 d
.one_operand_p
= false;
41948 d
.testing_p
= false;
41950 for (i
= 0; i
< nelt
; ++i
)
41951 d
.perm
[i
] = i
* 2 + odd
;
41953 /* We'll either be able to implement the permutation directly... */
41954 if (expand_vec_perm_1 (&d
))
41957 /* ... or we use the special-case patterns. */
41958 expand_vec_perm_even_odd_1 (&d
, odd
);
41962 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41964 struct expand_vec_perm_d d
;
41965 unsigned i
, nelt
, base
;
41971 d
.vmode
= GET_MODE (targ
);
41972 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41973 d
.one_operand_p
= false;
41974 d
.testing_p
= false;
41976 base
= high_p
? nelt
/ 2 : 0;
41977 for (i
= 0; i
< nelt
/ 2; ++i
)
41979 d
.perm
[i
* 2] = i
+ base
;
41980 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41983 /* Note that for AVX this isn't one instruction. */
41984 ok
= ix86_expand_vec_perm_const_1 (&d
);
41989 /* Expand a vector operation CODE for a V*QImode in terms of the
41990 same operation on V*HImode. */
41993 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41995 enum machine_mode qimode
= GET_MODE (dest
);
41996 enum machine_mode himode
;
41997 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41998 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41999 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
42000 struct expand_vec_perm_d d
;
42001 bool ok
, full_interleave
;
42002 bool uns_p
= false;
42009 gen_il
= gen_vec_interleave_lowv16qi
;
42010 gen_ih
= gen_vec_interleave_highv16qi
;
42013 himode
= V16HImode
;
42014 gen_il
= gen_avx2_interleave_lowv32qi
;
42015 gen_ih
= gen_avx2_interleave_highv32qi
;
42018 gcc_unreachable ();
42021 op2_l
= op2_h
= op2
;
42025 /* Unpack data such that we've got a source byte in each low byte of
42026 each word. We don't care what goes into the high byte of each word.
42027 Rather than trying to get zero in there, most convenient is to let
42028 it be a copy of the low byte. */
42029 op2_l
= gen_reg_rtx (qimode
);
42030 op2_h
= gen_reg_rtx (qimode
);
42031 emit_insn (gen_il (op2_l
, op2
, op2
));
42032 emit_insn (gen_ih (op2_h
, op2
, op2
));
42035 op1_l
= gen_reg_rtx (qimode
);
42036 op1_h
= gen_reg_rtx (qimode
);
42037 emit_insn (gen_il (op1_l
, op1
, op1
));
42038 emit_insn (gen_ih (op1_h
, op1
, op1
));
42039 full_interleave
= qimode
== V16QImode
;
42047 op1_l
= gen_reg_rtx (himode
);
42048 op1_h
= gen_reg_rtx (himode
);
42049 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
42050 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
42051 full_interleave
= true;
42054 gcc_unreachable ();
42057 /* Perform the operation. */
42058 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
42060 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
42062 gcc_assert (res_l
&& res_h
);
42064 /* Merge the data back into the right place. */
42066 d
.op0
= gen_lowpart (qimode
, res_l
);
42067 d
.op1
= gen_lowpart (qimode
, res_h
);
42069 d
.nelt
= GET_MODE_NUNITS (qimode
);
42070 d
.one_operand_p
= false;
42071 d
.testing_p
= false;
42073 if (full_interleave
)
42075 /* For SSE2, we used an full interleave, so the desired
42076 results are in the even elements. */
42077 for (i
= 0; i
< 32; ++i
)
42082 /* For AVX, the interleave used above was not cross-lane. So the
42083 extraction is evens but with the second and third quarter swapped.
42084 Happily, that is even one insn shorter than even extraction. */
42085 for (i
= 0; i
< 32; ++i
)
42086 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
42089 ok
= ix86_expand_vec_perm_const_1 (&d
);
42092 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42093 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
42096 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
42097 if op is CONST_VECTOR with all odd elements equal to their
42098 preceding element. */
42101 const_vector_equal_evenodd_p (rtx op
)
42103 enum machine_mode mode
= GET_MODE (op
);
42104 int i
, nunits
= GET_MODE_NUNITS (mode
);
42105 if (GET_CODE (op
) != CONST_VECTOR
42106 || nunits
!= CONST_VECTOR_NUNITS (op
))
42108 for (i
= 0; i
< nunits
; i
+= 2)
42109 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
42115 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
42116 bool uns_p
, bool odd_p
)
42118 enum machine_mode mode
= GET_MODE (op1
);
42119 enum machine_mode wmode
= GET_MODE (dest
);
42121 rtx orig_op1
= op1
, orig_op2
= op2
;
42123 if (!nonimmediate_operand (op1
, mode
))
42124 op1
= force_reg (mode
, op1
);
42125 if (!nonimmediate_operand (op2
, mode
))
42126 op2
= force_reg (mode
, op2
);
42128 /* We only play even/odd games with vectors of SImode. */
42129 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
42131 /* If we're looking for the odd results, shift those members down to
42132 the even slots. For some cpus this is faster than a PSHUFD. */
42135 /* For XOP use vpmacsdqh, but only for smult, as it is only
42137 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
42139 x
= force_reg (wmode
, CONST0_RTX (wmode
));
42140 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
42144 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
42145 if (!const_vector_equal_evenodd_p (orig_op1
))
42146 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
42147 x
, NULL
, 1, OPTAB_DIRECT
);
42148 if (!const_vector_equal_evenodd_p (orig_op2
))
42149 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
42150 x
, NULL
, 1, OPTAB_DIRECT
);
42151 op1
= gen_lowpart (mode
, op1
);
42152 op2
= gen_lowpart (mode
, op2
);
42155 if (mode
== V8SImode
)
42158 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
42160 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
42163 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
42164 else if (TARGET_SSE4_1
)
42165 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
42168 rtx s1
, s2
, t0
, t1
, t2
;
42170 /* The easiest way to implement this without PMULDQ is to go through
42171 the motions as if we are performing a full 64-bit multiply. With
42172 the exception that we need to do less shuffling of the elements. */
42174 /* Compute the sign-extension, aka highparts, of the two operands. */
42175 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42176 op1
, pc_rtx
, pc_rtx
);
42177 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42178 op2
, pc_rtx
, pc_rtx
);
42180 /* Multiply LO(A) * HI(B), and vice-versa. */
42181 t1
= gen_reg_rtx (wmode
);
42182 t2
= gen_reg_rtx (wmode
);
42183 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
42184 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
42186 /* Multiply LO(A) * LO(B). */
42187 t0
= gen_reg_rtx (wmode
);
42188 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
42190 /* Combine and shift the highparts into place. */
42191 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
42192 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
42195 /* Combine high and low parts. */
42196 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
42203 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
42204 bool uns_p
, bool high_p
)
42206 enum machine_mode wmode
= GET_MODE (dest
);
42207 enum machine_mode mode
= GET_MODE (op1
);
42208 rtx t1
, t2
, t3
, t4
, mask
;
42213 t1
= gen_reg_rtx (mode
);
42214 t2
= gen_reg_rtx (mode
);
42215 if (TARGET_XOP
&& !uns_p
)
42217 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
42218 shuffle the elements once so that all elements are in the right
42219 place for immediate use: { A C B D }. */
42220 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
42221 const1_rtx
, GEN_INT (3)));
42222 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
42223 const1_rtx
, GEN_INT (3)));
42227 /* Put the elements into place for the multiply. */
42228 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
42229 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
42232 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
42236 /* Shuffle the elements between the lanes. After this we
42237 have { A B E F | C D G H } for each operand. */
42238 t1
= gen_reg_rtx (V4DImode
);
42239 t2
= gen_reg_rtx (V4DImode
);
42240 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
42241 const0_rtx
, const2_rtx
,
42242 const1_rtx
, GEN_INT (3)));
42243 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
42244 const0_rtx
, const2_rtx
,
42245 const1_rtx
, GEN_INT (3)));
42247 /* Shuffle the elements within the lanes. After this we
42248 have { A A B B | C C D D } or { E E F F | G G H H }. */
42249 t3
= gen_reg_rtx (V8SImode
);
42250 t4
= gen_reg_rtx (V8SImode
);
42251 mask
= GEN_INT (high_p
42252 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
42253 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
42254 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
42255 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
42257 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
42262 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
42263 uns_p
, OPTAB_DIRECT
);
42264 t2
= expand_binop (mode
,
42265 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
42266 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
42267 gcc_assert (t1
&& t2
);
42269 t3
= gen_reg_rtx (mode
);
42270 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
42271 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
42276 t1
= gen_reg_rtx (wmode
);
42277 t2
= gen_reg_rtx (wmode
);
42278 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
42279 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
42281 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
42285 gcc_unreachable ();
42290 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
42292 rtx res_1
, res_2
, res_3
, res_4
;
42294 res_1
= gen_reg_rtx (V4SImode
);
42295 res_2
= gen_reg_rtx (V4SImode
);
42296 res_3
= gen_reg_rtx (V2DImode
);
42297 res_4
= gen_reg_rtx (V2DImode
);
42298 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
42299 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
42301 /* Move the results in element 2 down to element 1; we don't care
42302 what goes in elements 2 and 3. Then we can merge the parts
42303 back together with an interleave.
42305 Note that two other sequences were tried:
42306 (1) Use interleaves at the start instead of psrldq, which allows
42307 us to use a single shufps to merge things back at the end.
42308 (2) Use shufps here to combine the two vectors, then pshufd to
42309 put the elements in the correct order.
42310 In both cases the cost of the reformatting stall was too high
42311 and the overall sequence slower. */
42313 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
42314 const0_rtx
, const2_rtx
,
42315 const0_rtx
, const0_rtx
));
42316 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
42317 const0_rtx
, const2_rtx
,
42318 const0_rtx
, const0_rtx
));
42319 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
42321 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
42325 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
42327 enum machine_mode mode
= GET_MODE (op0
);
42328 rtx t1
, t2
, t3
, t4
, t5
, t6
;
42330 if (TARGET_XOP
&& mode
== V2DImode
)
42332 /* op1: A,B,C,D, op2: E,F,G,H */
42333 op1
= gen_lowpart (V4SImode
, op1
);
42334 op2
= gen_lowpart (V4SImode
, op2
);
42336 t1
= gen_reg_rtx (V4SImode
);
42337 t2
= gen_reg_rtx (V4SImode
);
42338 t3
= gen_reg_rtx (V2DImode
);
42339 t4
= gen_reg_rtx (V2DImode
);
42342 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
42348 /* t2: (B*E),(A*F),(D*G),(C*H) */
42349 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
42351 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
42352 emit_insn (gen_xop_phadddq (t3
, t2
));
42354 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
42355 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
42357 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
42358 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
42362 enum machine_mode nmode
;
42363 rtx (*umul
) (rtx
, rtx
, rtx
);
42365 if (mode
== V2DImode
)
42367 umul
= gen_vec_widen_umult_even_v4si
;
42370 else if (mode
== V4DImode
)
42372 umul
= gen_vec_widen_umult_even_v8si
;
42376 gcc_unreachable ();
42379 /* Multiply low parts. */
42380 t1
= gen_reg_rtx (mode
);
42381 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
42383 /* Shift input vectors right 32 bits so we can multiply high parts. */
42385 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
42386 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
42388 /* Multiply high parts by low parts. */
42389 t4
= gen_reg_rtx (mode
);
42390 t5
= gen_reg_rtx (mode
);
42391 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
42392 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
42394 /* Combine and shift the highparts back. */
42395 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
42396 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
42398 /* Combine high and low parts. */
42399 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
42402 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42403 gen_rtx_MULT (mode
, op1
, op2
));
42406 /* Return 1 if control tansfer instruction INSN
42407 should be encoded with bnd prefix.
42408 If insn is NULL then return 1 when control
42409 transfer instructions should be prefixed with
42410 bnd by default for current function. */
42413 ix86_bnd_prefixed_insn_p (rtx insn ATTRIBUTE_UNUSED
)
42418 /* Calculate integer abs() using only SSE2 instructions. */
42421 ix86_expand_sse2_abs (rtx target
, rtx input
)
42423 enum machine_mode mode
= GET_MODE (target
);
42428 /* For 32-bit signed integer X, the best way to calculate the absolute
42429 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
42431 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
42432 GEN_INT (GET_MODE_BITSIZE
42433 (GET_MODE_INNER (mode
)) - 1),
42434 NULL
, 0, OPTAB_DIRECT
);
42435 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
42436 NULL
, 0, OPTAB_DIRECT
);
42437 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
42438 target
, 0, OPTAB_DIRECT
);
42441 /* For 16-bit signed integer X, the best way to calculate the absolute
42442 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
42444 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42446 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
42447 target
, 0, OPTAB_DIRECT
);
42450 /* For 8-bit signed integer X, the best way to calculate the absolute
42451 value of X is min ((unsigned char) X, (unsigned char) (-X)),
42452 as SSE2 provides the PMINUB insn. */
42454 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42456 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
42457 target
, 0, OPTAB_DIRECT
);
42461 gcc_unreachable ();
42465 emit_move_insn (target
, x
);
42468 /* Expand an insert into a vector register through pinsr insn.
42469 Return true if successful. */
42472 ix86_expand_pinsr (rtx
*operands
)
42474 rtx dst
= operands
[0];
42475 rtx src
= operands
[3];
42477 unsigned int size
= INTVAL (operands
[1]);
42478 unsigned int pos
= INTVAL (operands
[2]);
42480 if (GET_CODE (dst
) == SUBREG
)
42482 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
42483 dst
= SUBREG_REG (dst
);
42486 if (GET_CODE (src
) == SUBREG
)
42487 src
= SUBREG_REG (src
);
42489 switch (GET_MODE (dst
))
42496 enum machine_mode srcmode
, dstmode
;
42497 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
42499 srcmode
= mode_for_size (size
, MODE_INT
, 0);
42504 if (!TARGET_SSE4_1
)
42506 dstmode
= V16QImode
;
42507 pinsr
= gen_sse4_1_pinsrb
;
42513 dstmode
= V8HImode
;
42514 pinsr
= gen_sse2_pinsrw
;
42518 if (!TARGET_SSE4_1
)
42520 dstmode
= V4SImode
;
42521 pinsr
= gen_sse4_1_pinsrd
;
42525 gcc_assert (TARGET_64BIT
);
42526 if (!TARGET_SSE4_1
)
42528 dstmode
= V2DImode
;
42529 pinsr
= gen_sse4_1_pinsrq
;
42537 if (GET_MODE (dst
) != dstmode
)
42538 d
= gen_reg_rtx (dstmode
);
42539 src
= gen_lowpart (srcmode
, src
);
42543 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
), src
,
42544 GEN_INT (1 << pos
)));
42546 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
42555 /* This function returns the calling abi specific va_list type node.
42556 It returns the FNDECL specific va_list type. */
42559 ix86_fn_abi_va_list (tree fndecl
)
42562 return va_list_type_node
;
42563 gcc_assert (fndecl
!= NULL_TREE
);
42565 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
42566 return ms_va_list_type_node
;
42568 return sysv_va_list_type_node
;
42571 /* Returns the canonical va_list type specified by TYPE. If there
42572 is no valid TYPE provided, it return NULL_TREE. */
42575 ix86_canonical_va_list_type (tree type
)
42579 /* Resolve references and pointers to va_list type. */
42580 if (TREE_CODE (type
) == MEM_REF
)
42581 type
= TREE_TYPE (type
);
42582 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
42583 type
= TREE_TYPE (type
);
42584 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
42585 type
= TREE_TYPE (type
);
42587 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
42589 wtype
= va_list_type_node
;
42590 gcc_assert (wtype
!= NULL_TREE
);
42592 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42594 /* If va_list is an array type, the argument may have decayed
42595 to a pointer type, e.g. by being passed to another function.
42596 In that case, unwrap both types so that we can compare the
42597 underlying records. */
42598 if (TREE_CODE (htype
) == ARRAY_TYPE
42599 || POINTER_TYPE_P (htype
))
42601 wtype
= TREE_TYPE (wtype
);
42602 htype
= TREE_TYPE (htype
);
42605 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42606 return va_list_type_node
;
42607 wtype
= sysv_va_list_type_node
;
42608 gcc_assert (wtype
!= NULL_TREE
);
42610 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42612 /* If va_list is an array type, the argument may have decayed
42613 to a pointer type, e.g. by being passed to another function.
42614 In that case, unwrap both types so that we can compare the
42615 underlying records. */
42616 if (TREE_CODE (htype
) == ARRAY_TYPE
42617 || POINTER_TYPE_P (htype
))
42619 wtype
= TREE_TYPE (wtype
);
42620 htype
= TREE_TYPE (htype
);
42623 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42624 return sysv_va_list_type_node
;
42625 wtype
= ms_va_list_type_node
;
42626 gcc_assert (wtype
!= NULL_TREE
);
42628 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42630 /* If va_list is an array type, the argument may have decayed
42631 to a pointer type, e.g. by being passed to another function.
42632 In that case, unwrap both types so that we can compare the
42633 underlying records. */
42634 if (TREE_CODE (htype
) == ARRAY_TYPE
42635 || POINTER_TYPE_P (htype
))
42637 wtype
= TREE_TYPE (wtype
);
42638 htype
= TREE_TYPE (htype
);
42641 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42642 return ms_va_list_type_node
;
42645 return std_canonical_va_list_type (type
);
42648 /* Iterate through the target-specific builtin types for va_list.
42649 IDX denotes the iterator, *PTREE is set to the result type of
42650 the va_list builtin, and *PNAME to its internal type.
42651 Returns zero if there is no element for this index, otherwise
42652 IDX should be increased upon the next call.
42653 Note, do not iterate a base builtin's name like __builtin_va_list.
42654 Used from c_common_nodes_and_builtins. */
42657 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
42667 *ptree
= ms_va_list_type_node
;
42668 *pname
= "__builtin_ms_va_list";
42672 *ptree
= sysv_va_list_type_node
;
42673 *pname
= "__builtin_sysv_va_list";
42681 #undef TARGET_SCHED_DISPATCH
42682 #define TARGET_SCHED_DISPATCH has_dispatch
42683 #undef TARGET_SCHED_DISPATCH_DO
42684 #define TARGET_SCHED_DISPATCH_DO do_dispatch
42685 #undef TARGET_SCHED_REASSOCIATION_WIDTH
42686 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
42687 #undef TARGET_SCHED_REORDER
42688 #define TARGET_SCHED_REORDER ix86_sched_reorder
42689 #undef TARGET_SCHED_ADJUST_PRIORITY
42690 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
42691 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
42692 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
42693 ix86_dependencies_evaluation_hook
42695 /* The size of the dispatch window is the total number of bytes of
42696 object code allowed in a window. */
42697 #define DISPATCH_WINDOW_SIZE 16
42699 /* Number of dispatch windows considered for scheduling. */
42700 #define MAX_DISPATCH_WINDOWS 3
42702 /* Maximum number of instructions in a window. */
42705 /* Maximum number of immediate operands in a window. */
42708 /* Maximum number of immediate bits allowed in a window. */
42709 #define MAX_IMM_SIZE 128
42711 /* Maximum number of 32 bit immediates allowed in a window. */
42712 #define MAX_IMM_32 4
42714 /* Maximum number of 64 bit immediates allowed in a window. */
42715 #define MAX_IMM_64 2
42717 /* Maximum total of loads or prefetches allowed in a window. */
42720 /* Maximum total of stores allowed in a window. */
42721 #define MAX_STORE 1
42727 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
42728 enum dispatch_group
{
42743 /* Number of allowable groups in a dispatch window. It is an array
42744 indexed by dispatch_group enum. 100 is used as a big number,
42745 because the number of these kind of operations does not have any
42746 effect in dispatch window, but we need them for other reasons in
42748 static unsigned int num_allowable_groups
[disp_last
] = {
42749 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42752 char group_name
[disp_last
+ 1][16] = {
42753 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42754 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42755 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42758 /* Instruction path. */
42761 path_single
, /* Single micro op. */
42762 path_double
, /* Double micro op. */
42763 path_multi
, /* Instructions with more than 2 micro op.. */
42767 /* sched_insn_info defines a window to the instructions scheduled in
42768 the basic block. It contains a pointer to the insn_info table and
42769 the instruction scheduled.
42771 Windows are allocated for each basic block and are linked
42773 typedef struct sched_insn_info_s
{
42775 enum dispatch_group group
;
42776 enum insn_path path
;
42781 /* Linked list of dispatch windows. This is a two way list of
42782 dispatch windows of a basic block. It contains information about
42783 the number of uops in the window and the total number of
42784 instructions and of bytes in the object code for this dispatch
42786 typedef struct dispatch_windows_s
{
42787 int num_insn
; /* Number of insn in the window. */
42788 int num_uops
; /* Number of uops in the window. */
42789 int window_size
; /* Number of bytes in the window. */
42790 int window_num
; /* Window number between 0 or 1. */
42791 int num_imm
; /* Number of immediates in an insn. */
42792 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42793 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42794 int imm_size
; /* Total immediates in the window. */
42795 int num_loads
; /* Total memory loads in the window. */
42796 int num_stores
; /* Total memory stores in the window. */
42797 int violation
; /* Violation exists in window. */
42798 sched_insn_info
*window
; /* Pointer to the window. */
42799 struct dispatch_windows_s
*next
;
42800 struct dispatch_windows_s
*prev
;
42801 } dispatch_windows
;
42803 /* Immediate valuse used in an insn. */
42804 typedef struct imm_info_s
42811 static dispatch_windows
*dispatch_window_list
;
42812 static dispatch_windows
*dispatch_window_list1
;
42814 /* Get dispatch group of insn. */
42816 static enum dispatch_group
42817 get_mem_group (rtx insn
)
42819 enum attr_memory memory
;
42821 if (INSN_CODE (insn
) < 0)
42822 return disp_no_group
;
42823 memory
= get_attr_memory (insn
);
42824 if (memory
== MEMORY_STORE
)
42827 if (memory
== MEMORY_LOAD
)
42830 if (memory
== MEMORY_BOTH
)
42831 return disp_load_store
;
42833 return disp_no_group
;
42836 /* Return true if insn is a compare instruction. */
42841 enum attr_type type
;
42843 type
= get_attr_type (insn
);
42844 return (type
== TYPE_TEST
42845 || type
== TYPE_ICMP
42846 || type
== TYPE_FCMP
42847 || GET_CODE (PATTERN (insn
)) == COMPARE
);
42850 /* Return true if a dispatch violation encountered. */
42853 dispatch_violation (void)
42855 if (dispatch_window_list
->next
)
42856 return dispatch_window_list
->next
->violation
;
42857 return dispatch_window_list
->violation
;
42860 /* Return true if insn is a branch instruction. */
42863 is_branch (rtx insn
)
42865 return (CALL_P (insn
) || JUMP_P (insn
));
42868 /* Return true if insn is a prefetch instruction. */
42871 is_prefetch (rtx insn
)
42873 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42876 /* This function initializes a dispatch window and the list container holding a
42877 pointer to the window. */
42880 init_window (int window_num
)
42883 dispatch_windows
*new_list
;
42885 if (window_num
== 0)
42886 new_list
= dispatch_window_list
;
42888 new_list
= dispatch_window_list1
;
42890 new_list
->num_insn
= 0;
42891 new_list
->num_uops
= 0;
42892 new_list
->window_size
= 0;
42893 new_list
->next
= NULL
;
42894 new_list
->prev
= NULL
;
42895 new_list
->window_num
= window_num
;
42896 new_list
->num_imm
= 0;
42897 new_list
->num_imm_32
= 0;
42898 new_list
->num_imm_64
= 0;
42899 new_list
->imm_size
= 0;
42900 new_list
->num_loads
= 0;
42901 new_list
->num_stores
= 0;
42902 new_list
->violation
= false;
42904 for (i
= 0; i
< MAX_INSN
; i
++)
42906 new_list
->window
[i
].insn
= NULL
;
42907 new_list
->window
[i
].group
= disp_no_group
;
42908 new_list
->window
[i
].path
= no_path
;
42909 new_list
->window
[i
].byte_len
= 0;
42910 new_list
->window
[i
].imm_bytes
= 0;
42915 /* This function allocates and initializes a dispatch window and the
42916 list container holding a pointer to the window. */
42918 static dispatch_windows
*
42919 allocate_window (void)
42921 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42922 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42927 /* This routine initializes the dispatch scheduling information. It
42928 initiates building dispatch scheduler tables and constructs the
42929 first dispatch window. */
42932 init_dispatch_sched (void)
42934 /* Allocate a dispatch list and a window. */
42935 dispatch_window_list
= allocate_window ();
42936 dispatch_window_list1
= allocate_window ();
42941 /* This function returns true if a branch is detected. End of a basic block
42942 does not have to be a branch, but here we assume only branches end a
42946 is_end_basic_block (enum dispatch_group group
)
42948 return group
== disp_branch
;
42951 /* This function is called when the end of a window processing is reached. */
42954 process_end_window (void)
42956 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42957 if (dispatch_window_list
->next
)
42959 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42960 gcc_assert (dispatch_window_list
->window_size
42961 + dispatch_window_list1
->window_size
<= 48);
42967 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42968 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42969 for 48 bytes of instructions. Note that these windows are not dispatch
42970 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42972 static dispatch_windows
*
42973 allocate_next_window (int window_num
)
42975 if (window_num
== 0)
42977 if (dispatch_window_list
->next
)
42980 return dispatch_window_list
;
42983 dispatch_window_list
->next
= dispatch_window_list1
;
42984 dispatch_window_list1
->prev
= dispatch_window_list
;
42986 return dispatch_window_list1
;
42989 /* Increment the number of immediate operands of an instruction. */
42992 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42997 switch ( GET_CODE (*in_rtx
))
43002 (imm_values
->imm
)++;
43003 if (x86_64_immediate_operand (*in_rtx
, SImode
))
43004 (imm_values
->imm32
)++;
43006 (imm_values
->imm64
)++;
43010 (imm_values
->imm
)++;
43011 (imm_values
->imm64
)++;
43015 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
43017 (imm_values
->imm
)++;
43018 (imm_values
->imm32
)++;
43029 /* Compute number of immediate operands of an instruction. */
43032 find_constant (rtx in_rtx
, imm_info
*imm_values
)
43034 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
43035 (rtx_function
) find_constant_1
, (void *) imm_values
);
43038 /* Return total size of immediate operands of an instruction along with number
43039 of corresponding immediate-operands. It initializes its parameters to zero
43040 befor calling FIND_CONSTANT.
43041 INSN is the input instruction. IMM is the total of immediates.
43042 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
43046 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
43048 imm_info imm_values
= {0, 0, 0};
43050 find_constant (insn
, &imm_values
);
43051 *imm
= imm_values
.imm
;
43052 *imm32
= imm_values
.imm32
;
43053 *imm64
= imm_values
.imm64
;
43054 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
43057 /* This function indicates if an operand of an instruction is an
43061 has_immediate (rtx insn
)
43063 int num_imm_operand
;
43064 int num_imm32_operand
;
43065 int num_imm64_operand
;
43068 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43069 &num_imm64_operand
);
43073 /* Return single or double path for instructions. */
43075 static enum insn_path
43076 get_insn_path (rtx insn
)
43078 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
43080 if ((int)path
== 0)
43081 return path_single
;
43083 if ((int)path
== 1)
43084 return path_double
;
43089 /* Return insn dispatch group. */
43091 static enum dispatch_group
43092 get_insn_group (rtx insn
)
43094 enum dispatch_group group
= get_mem_group (insn
);
43098 if (is_branch (insn
))
43099 return disp_branch
;
43104 if (has_immediate (insn
))
43107 if (is_prefetch (insn
))
43108 return disp_prefetch
;
43110 return disp_no_group
;
43113 /* Count number of GROUP restricted instructions in a dispatch
43114 window WINDOW_LIST. */
43117 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
43119 enum dispatch_group group
= get_insn_group (insn
);
43121 int num_imm_operand
;
43122 int num_imm32_operand
;
43123 int num_imm64_operand
;
43125 if (group
== disp_no_group
)
43128 if (group
== disp_imm
)
43130 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43131 &num_imm64_operand
);
43132 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
43133 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
43134 || (num_imm32_operand
> 0
43135 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
43136 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
43137 || (num_imm64_operand
> 0
43138 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
43139 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
43140 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
43141 && num_imm64_operand
> 0
43142 && ((window_list
->num_imm_64
> 0
43143 && window_list
->num_insn
>= 2)
43144 || window_list
->num_insn
>= 3)))
43150 if ((group
== disp_load_store
43151 && (window_list
->num_loads
>= MAX_LOAD
43152 || window_list
->num_stores
>= MAX_STORE
))
43153 || ((group
== disp_load
43154 || group
== disp_prefetch
)
43155 && window_list
->num_loads
>= MAX_LOAD
)
43156 || (group
== disp_store
43157 && window_list
->num_stores
>= MAX_STORE
))
43163 /* This function returns true if insn satisfies dispatch rules on the
43164 last window scheduled. */
43167 fits_dispatch_window (rtx insn
)
43169 dispatch_windows
*window_list
= dispatch_window_list
;
43170 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
43171 unsigned int num_restrict
;
43172 enum dispatch_group group
= get_insn_group (insn
);
43173 enum insn_path path
= get_insn_path (insn
);
43176 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
43177 instructions should be given the lowest priority in the
43178 scheduling process in Haifa scheduler to make sure they will be
43179 scheduled in the same dispatch window as the reference to them. */
43180 if (group
== disp_jcc
|| group
== disp_cmp
)
43183 /* Check nonrestricted. */
43184 if (group
== disp_no_group
|| group
== disp_branch
)
43187 /* Get last dispatch window. */
43188 if (window_list_next
)
43189 window_list
= window_list_next
;
43191 if (window_list
->window_num
== 1)
43193 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
43196 || (min_insn_size (insn
) + sum
) >= 48)
43197 /* Window 1 is full. Go for next window. */
43201 num_restrict
= count_num_restricted (insn
, window_list
);
43203 if (num_restrict
> num_allowable_groups
[group
])
43206 /* See if it fits in the first window. */
43207 if (window_list
->window_num
== 0)
43209 /* The first widow should have only single and double path
43211 if (path
== path_double
43212 && (window_list
->num_uops
+ 2) > MAX_INSN
)
43214 else if (path
!= path_single
)
43220 /* Add an instruction INSN with NUM_UOPS micro-operations to the
43221 dispatch window WINDOW_LIST. */
43224 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
43226 int byte_len
= min_insn_size (insn
);
43227 int num_insn
= window_list
->num_insn
;
43229 sched_insn_info
*window
= window_list
->window
;
43230 enum dispatch_group group
= get_insn_group (insn
);
43231 enum insn_path path
= get_insn_path (insn
);
43232 int num_imm_operand
;
43233 int num_imm32_operand
;
43234 int num_imm64_operand
;
43236 if (!window_list
->violation
&& group
!= disp_cmp
43237 && !fits_dispatch_window (insn
))
43238 window_list
->violation
= true;
43240 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43241 &num_imm64_operand
);
43243 /* Initialize window with new instruction. */
43244 window
[num_insn
].insn
= insn
;
43245 window
[num_insn
].byte_len
= byte_len
;
43246 window
[num_insn
].group
= group
;
43247 window
[num_insn
].path
= path
;
43248 window
[num_insn
].imm_bytes
= imm_size
;
43250 window_list
->window_size
+= byte_len
;
43251 window_list
->num_insn
= num_insn
+ 1;
43252 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
43253 window_list
->imm_size
+= imm_size
;
43254 window_list
->num_imm
+= num_imm_operand
;
43255 window_list
->num_imm_32
+= num_imm32_operand
;
43256 window_list
->num_imm_64
+= num_imm64_operand
;
43258 if (group
== disp_store
)
43259 window_list
->num_stores
+= 1;
43260 else if (group
== disp_load
43261 || group
== disp_prefetch
)
43262 window_list
->num_loads
+= 1;
43263 else if (group
== disp_load_store
)
43265 window_list
->num_stores
+= 1;
43266 window_list
->num_loads
+= 1;
43270 /* Adds a scheduled instruction, INSN, to the current dispatch window.
43271 If the total bytes of instructions or the number of instructions in
43272 the window exceed allowable, it allocates a new window. */
43275 add_to_dispatch_window (rtx insn
)
43278 dispatch_windows
*window_list
;
43279 dispatch_windows
*next_list
;
43280 dispatch_windows
*window0_list
;
43281 enum insn_path path
;
43282 enum dispatch_group insn_group
;
43290 if (INSN_CODE (insn
) < 0)
43293 byte_len
= min_insn_size (insn
);
43294 window_list
= dispatch_window_list
;
43295 next_list
= window_list
->next
;
43296 path
= get_insn_path (insn
);
43297 insn_group
= get_insn_group (insn
);
43299 /* Get the last dispatch window. */
43301 window_list
= dispatch_window_list
->next
;
43303 if (path
== path_single
)
43305 else if (path
== path_double
)
43308 insn_num_uops
= (int) path
;
43310 /* If current window is full, get a new window.
43311 Window number zero is full, if MAX_INSN uops are scheduled in it.
43312 Window number one is full, if window zero's bytes plus window
43313 one's bytes is 32, or if the bytes of the new instruction added
43314 to the total makes it greater than 48, or it has already MAX_INSN
43315 instructions in it. */
43316 num_insn
= window_list
->num_insn
;
43317 num_uops
= window_list
->num_uops
;
43318 window_num
= window_list
->window_num
;
43319 insn_fits
= fits_dispatch_window (insn
);
43321 if (num_insn
>= MAX_INSN
43322 || num_uops
+ insn_num_uops
> MAX_INSN
43325 window_num
= ~window_num
& 1;
43326 window_list
= allocate_next_window (window_num
);
43329 if (window_num
== 0)
43331 add_insn_window (insn
, window_list
, insn_num_uops
);
43332 if (window_list
->num_insn
>= MAX_INSN
43333 && insn_group
== disp_branch
)
43335 process_end_window ();
43339 else if (window_num
== 1)
43341 window0_list
= window_list
->prev
;
43342 sum
= window0_list
->window_size
+ window_list
->window_size
;
43344 || (byte_len
+ sum
) >= 48)
43346 process_end_window ();
43347 window_list
= dispatch_window_list
;
43350 add_insn_window (insn
, window_list
, insn_num_uops
);
43353 gcc_unreachable ();
43355 if (is_end_basic_block (insn_group
))
43357 /* End of basic block is reached do end-basic-block process. */
43358 process_end_window ();
43363 /* Print the dispatch window, WINDOW_NUM, to FILE. */
43365 DEBUG_FUNCTION
static void
43366 debug_dispatch_window_file (FILE *file
, int window_num
)
43368 dispatch_windows
*list
;
43371 if (window_num
== 0)
43372 list
= dispatch_window_list
;
43374 list
= dispatch_window_list1
;
43376 fprintf (file
, "Window #%d:\n", list
->window_num
);
43377 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
43378 list
->num_insn
, list
->num_uops
, list
->window_size
);
43379 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43380 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
43382 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
43384 fprintf (file
, " insn info:\n");
43386 for (i
= 0; i
< MAX_INSN
; i
++)
43388 if (!list
->window
[i
].insn
)
43390 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
43391 i
, group_name
[list
->window
[i
].group
],
43392 i
, (void *)list
->window
[i
].insn
,
43393 i
, list
->window
[i
].path
,
43394 i
, list
->window
[i
].byte_len
,
43395 i
, list
->window
[i
].imm_bytes
);
43399 /* Print to stdout a dispatch window. */
43401 DEBUG_FUNCTION
void
43402 debug_dispatch_window (int window_num
)
43404 debug_dispatch_window_file (stdout
, window_num
);
43407 /* Print INSN dispatch information to FILE. */
43409 DEBUG_FUNCTION
static void
43410 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
43413 enum insn_path path
;
43414 enum dispatch_group group
;
43416 int num_imm_operand
;
43417 int num_imm32_operand
;
43418 int num_imm64_operand
;
43420 if (INSN_CODE (insn
) < 0)
43423 byte_len
= min_insn_size (insn
);
43424 path
= get_insn_path (insn
);
43425 group
= get_insn_group (insn
);
43426 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43427 &num_imm64_operand
);
43429 fprintf (file
, " insn info:\n");
43430 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
43431 group_name
[group
], path
, byte_len
);
43432 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43433 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
43436 /* Print to STDERR the status of the ready list with respect to
43437 dispatch windows. */
43439 DEBUG_FUNCTION
void
43440 debug_ready_dispatch (void)
43443 int no_ready
= number_in_ready ();
43445 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
43447 for (i
= 0; i
< no_ready
; i
++)
43448 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
43451 /* This routine is the driver of the dispatch scheduler. */
43454 do_dispatch (rtx insn
, int mode
)
43456 if (mode
== DISPATCH_INIT
)
43457 init_dispatch_sched ();
43458 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
43459 add_to_dispatch_window (insn
);
43462 /* Return TRUE if Dispatch Scheduling is supported. */
43465 has_dispatch (rtx insn
, int action
)
43467 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
|| TARGET_BDVER4
)
43468 && flag_dispatch_scheduler
)
43474 case IS_DISPATCH_ON
:
43479 return is_cmp (insn
);
43481 case DISPATCH_VIOLATION
:
43482 return dispatch_violation ();
43484 case FITS_DISPATCH_WINDOW
:
43485 return fits_dispatch_window (insn
);
43491 /* Implementation of reassociation_width target hook used by
43492 reassoc phase to identify parallelism level in reassociated
43493 tree. Statements tree_code is passed in OPC. Arguments type
43496 Currently parallel reassociation is enabled for Atom
43497 processors only and we set reassociation width to be 2
43498 because Atom may issue up to 2 instructions per cycle.
43500 Return value should be fixed if parallel reassociation is
43501 enabled for other processors. */
43504 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
43505 enum machine_mode mode
)
43509 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
43511 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
43517 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
43518 place emms and femms instructions. */
43520 static enum machine_mode
43521 ix86_preferred_simd_mode (enum machine_mode mode
)
43529 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
43531 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
43533 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
43535 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
43538 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43544 if (!TARGET_VECTORIZE_DOUBLE
)
43546 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43548 else if (TARGET_SSE2
)
43557 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
43560 static unsigned int
43561 ix86_autovectorize_vector_sizes (void)
43563 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
43568 /* Return class of registers which could be used for pseudo of MODE
43569 and of class RCLASS for spilling instead of memory. Return NO_REGS
43570 if it is not possible or non-profitable. */
43572 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
43574 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
43575 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
43576 && INTEGER_CLASS_P (rclass
))
43577 return ALL_SSE_REGS
;
43581 /* Implement targetm.vectorize.init_cost. */
43584 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
43586 unsigned *cost
= XNEWVEC (unsigned, 3);
43587 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
43591 /* Implement targetm.vectorize.add_stmt_cost. */
43594 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
43595 struct _stmt_vec_info
*stmt_info
, int misalign
,
43596 enum vect_cost_model_location where
)
43598 unsigned *cost
= (unsigned *) data
;
43599 unsigned retval
= 0;
43601 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
43602 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
43604 /* Statements in an inner loop relative to the loop being
43605 vectorized are weighted more heavily. The value here is
43606 arbitrary and could potentially be improved with analysis. */
43607 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
43608 count
*= 50; /* FIXME. */
43610 retval
= (unsigned) (count
* stmt_cost
);
43611 cost
[where
] += retval
;
43616 /* Implement targetm.vectorize.finish_cost. */
43619 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
43620 unsigned *body_cost
, unsigned *epilogue_cost
)
43622 unsigned *cost
= (unsigned *) data
;
43623 *prologue_cost
= cost
[vect_prologue
];
43624 *body_cost
= cost
[vect_body
];
43625 *epilogue_cost
= cost
[vect_epilogue
];
43628 /* Implement targetm.vectorize.destroy_cost_data. */
43631 ix86_destroy_cost_data (void *data
)
43636 /* Validate target specific memory model bits in VAL. */
43638 static unsigned HOST_WIDE_INT
43639 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
43641 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
43644 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
43646 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
43648 warning (OPT_Winvalid_memory_model
,
43649 "Unknown architecture specific memory model");
43650 return MEMMODEL_SEQ_CST
;
43652 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
43653 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
43655 warning (OPT_Winvalid_memory_model
,
43656 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
43657 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
43659 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
43661 warning (OPT_Winvalid_memory_model
,
43662 "HLE_RELEASE not used with RELEASE or stronger memory model");
43663 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
43668 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
43671 ix86_float_exceptions_rounding_supported_p (void)
43673 /* For x87 floating point with standard excess precision handling,
43674 there is no adddf3 pattern (since x87 floating point only has
43675 XFmode operations) so the default hook implementation gets this
43677 return TARGET_80387
|| TARGET_SSE_MATH
;
43680 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
43683 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
43685 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
43687 tree exceptions_var
= create_tmp_var (integer_type_node
, NULL
);
43690 tree fenv_index_type
= build_index_type (size_int (6));
43691 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
43692 tree fenv_var
= create_tmp_var (fenv_type
, NULL
);
43693 mark_addressable (fenv_var
);
43694 tree fenv_ptr
= build_pointer_type (fenv_type
);
43695 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
43696 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
43697 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
43698 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
43699 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
43700 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
43701 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
43702 tree hold_fnclex
= build_call_expr (fnclex
, 0);
43703 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_fnstenv
,
43705 *clear
= build_call_expr (fnclex
, 0);
43706 tree sw_var
= create_tmp_var (short_unsigned_type_node
, NULL
);
43707 mark_addressable (sw_var
);
43708 tree su_ptr
= build_pointer_type (short_unsigned_type_node
);
43709 tree sw_addr
= build1 (ADDR_EXPR
, su_ptr
, sw_var
);
43710 tree fnstsw_call
= build_call_expr (fnstsw
, 1, sw_addr
);
43711 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
43712 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
43713 exceptions_var
, exceptions_x87
);
43714 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
43715 fnstsw_call
, update_mod
);
43716 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
43717 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
43719 if (TARGET_SSE_MATH
)
43721 tree mxcsr_orig_var
= create_tmp_var (unsigned_type_node
, NULL
);
43722 tree mxcsr_mod_var
= create_tmp_var (unsigned_type_node
, NULL
);
43723 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
43724 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
43725 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
43726 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
43727 mxcsr_orig_var
, stmxcsr_hold_call
);
43728 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
43730 build_int_cst (unsigned_type_node
, 0x1f80));
43731 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
43732 build_int_cst (unsigned_type_node
, 0xffffffc0));
43733 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
43734 mxcsr_mod_var
, hold_mod_val
);
43735 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
43736 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
43737 hold_assign_orig
, hold_assign_mod
);
43738 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
43739 ldmxcsr_hold_call
);
43741 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
43744 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
43746 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
43747 ldmxcsr_clear_call
);
43749 *clear
= ldmxcsr_clear_call
;
43750 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
43751 tree exceptions_sse
= fold_convert (integer_type_node
,
43752 stxmcsr_update_call
);
43755 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
43756 exceptions_var
, exceptions_sse
);
43757 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
43758 exceptions_var
, exceptions_mod
);
43759 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
43760 exceptions_assign
);
43763 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
43764 exceptions_var
, exceptions_sse
);
43765 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
43766 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
43767 ldmxcsr_update_call
);
43769 tree atomic_feraiseexcept
43770 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
43771 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
43772 1, exceptions_var
);
43773 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
43774 atomic_feraiseexcept_call
);
43777 /* Initialize the GCC target structure. */
43778 #undef TARGET_RETURN_IN_MEMORY
43779 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
43781 #undef TARGET_LEGITIMIZE_ADDRESS
43782 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
43784 #undef TARGET_ATTRIBUTE_TABLE
43785 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
43786 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
43787 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
43788 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43789 # undef TARGET_MERGE_DECL_ATTRIBUTES
43790 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
43793 #undef TARGET_COMP_TYPE_ATTRIBUTES
43794 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
43796 #undef TARGET_INIT_BUILTINS
43797 #define TARGET_INIT_BUILTINS ix86_init_builtins
43798 #undef TARGET_BUILTIN_DECL
43799 #define TARGET_BUILTIN_DECL ix86_builtin_decl
43800 #undef TARGET_EXPAND_BUILTIN
43801 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
43803 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
43804 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
43805 ix86_builtin_vectorized_function
43807 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
43808 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
43810 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
43811 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
43813 #undef TARGET_VECTORIZE_BUILTIN_GATHER
43814 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
43816 #undef TARGET_BUILTIN_RECIPROCAL
43817 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
43819 #undef TARGET_ASM_FUNCTION_EPILOGUE
43820 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
43822 #undef TARGET_ENCODE_SECTION_INFO
43823 #ifndef SUBTARGET_ENCODE_SECTION_INFO
43824 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
43826 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
43829 #undef TARGET_ASM_OPEN_PAREN
43830 #define TARGET_ASM_OPEN_PAREN ""
43831 #undef TARGET_ASM_CLOSE_PAREN
43832 #define TARGET_ASM_CLOSE_PAREN ""
43834 #undef TARGET_ASM_BYTE_OP
43835 #define TARGET_ASM_BYTE_OP ASM_BYTE
43837 #undef TARGET_ASM_ALIGNED_HI_OP
43838 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
43839 #undef TARGET_ASM_ALIGNED_SI_OP
43840 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
43842 #undef TARGET_ASM_ALIGNED_DI_OP
43843 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
43846 #undef TARGET_PROFILE_BEFORE_PROLOGUE
43847 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
43849 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
43850 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
43852 #undef TARGET_ASM_UNALIGNED_HI_OP
43853 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
43854 #undef TARGET_ASM_UNALIGNED_SI_OP
43855 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
43856 #undef TARGET_ASM_UNALIGNED_DI_OP
43857 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
43859 #undef TARGET_PRINT_OPERAND
43860 #define TARGET_PRINT_OPERAND ix86_print_operand
43861 #undef TARGET_PRINT_OPERAND_ADDRESS
43862 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
43863 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
43864 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
43865 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
43866 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
43868 #undef TARGET_SCHED_INIT_GLOBAL
43869 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
43870 #undef TARGET_SCHED_ADJUST_COST
43871 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
43872 #undef TARGET_SCHED_ISSUE_RATE
43873 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
43874 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
43875 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
43876 ia32_multipass_dfa_lookahead
43877 #undef TARGET_SCHED_MACRO_FUSION_P
43878 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
43879 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
43880 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
43882 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
43883 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
43885 #undef TARGET_MEMMODEL_CHECK
43886 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
43888 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
43889 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
43892 #undef TARGET_HAVE_TLS
43893 #define TARGET_HAVE_TLS true
43895 #undef TARGET_CANNOT_FORCE_CONST_MEM
43896 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
43897 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
43898 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
43900 #undef TARGET_DELEGITIMIZE_ADDRESS
43901 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
43903 #undef TARGET_MS_BITFIELD_LAYOUT_P
43904 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
43907 #undef TARGET_BINDS_LOCAL_P
43908 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
43910 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43911 #undef TARGET_BINDS_LOCAL_P
43912 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
43915 #undef TARGET_ASM_OUTPUT_MI_THUNK
43916 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
43917 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
43918 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
43920 #undef TARGET_ASM_FILE_START
43921 #define TARGET_ASM_FILE_START x86_file_start
43923 #undef TARGET_OPTION_OVERRIDE
43924 #define TARGET_OPTION_OVERRIDE ix86_option_override
43926 #undef TARGET_REGISTER_MOVE_COST
43927 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
43928 #undef TARGET_MEMORY_MOVE_COST
43929 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
43930 #undef TARGET_RTX_COSTS
43931 #define TARGET_RTX_COSTS ix86_rtx_costs
43932 #undef TARGET_ADDRESS_COST
43933 #define TARGET_ADDRESS_COST ix86_address_cost
43935 #undef TARGET_FIXED_CONDITION_CODE_REGS
43936 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
43937 #undef TARGET_CC_MODES_COMPATIBLE
43938 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
43940 #undef TARGET_MACHINE_DEPENDENT_REORG
43941 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
43943 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
43944 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
43946 #undef TARGET_BUILD_BUILTIN_VA_LIST
43947 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
43949 #undef TARGET_FOLD_BUILTIN
43950 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
43952 #undef TARGET_COMPARE_VERSION_PRIORITY
43953 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
43955 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
43956 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
43957 ix86_generate_version_dispatcher_body
43959 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
43960 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
43961 ix86_get_function_versions_dispatcher
43963 #undef TARGET_ENUM_VA_LIST_P
43964 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
43966 #undef TARGET_FN_ABI_VA_LIST
43967 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
43969 #undef TARGET_CANONICAL_VA_LIST_TYPE
43970 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
43972 #undef TARGET_EXPAND_BUILTIN_VA_START
43973 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
43975 #undef TARGET_MD_ASM_CLOBBERS
43976 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43978 #undef TARGET_PROMOTE_PROTOTYPES
43979 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43980 #undef TARGET_STRUCT_VALUE_RTX
43981 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
43982 #undef TARGET_SETUP_INCOMING_VARARGS
43983 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
43984 #undef TARGET_MUST_PASS_IN_STACK
43985 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
43986 #undef TARGET_FUNCTION_ARG_ADVANCE
43987 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
43988 #undef TARGET_FUNCTION_ARG
43989 #define TARGET_FUNCTION_ARG ix86_function_arg
43990 #undef TARGET_FUNCTION_ARG_BOUNDARY
43991 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
43992 #undef TARGET_PASS_BY_REFERENCE
43993 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
43994 #undef TARGET_INTERNAL_ARG_POINTER
43995 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
43996 #undef TARGET_UPDATE_STACK_BOUNDARY
43997 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
43998 #undef TARGET_GET_DRAP_RTX
43999 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
44000 #undef TARGET_STRICT_ARGUMENT_NAMING
44001 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
44002 #undef TARGET_STATIC_CHAIN
44003 #define TARGET_STATIC_CHAIN ix86_static_chain
44004 #undef TARGET_TRAMPOLINE_INIT
44005 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
44006 #undef TARGET_RETURN_POPS_ARGS
44007 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
44009 #undef TARGET_LEGITIMATE_COMBINED_INSN
44010 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
44012 #undef TARGET_ASAN_SHADOW_OFFSET
44013 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
44015 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
44016 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
44018 #undef TARGET_SCALAR_MODE_SUPPORTED_P
44019 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
44021 #undef TARGET_VECTOR_MODE_SUPPORTED_P
44022 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
44024 #undef TARGET_C_MODE_FOR_SUFFIX
44025 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
44028 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
44029 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
44032 #ifdef SUBTARGET_INSERT_ATTRIBUTES
44033 #undef TARGET_INSERT_ATTRIBUTES
44034 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
44037 #undef TARGET_MANGLE_TYPE
44038 #define TARGET_MANGLE_TYPE ix86_mangle_type
44041 #undef TARGET_STACK_PROTECT_FAIL
44042 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
44045 #undef TARGET_FUNCTION_VALUE
44046 #define TARGET_FUNCTION_VALUE ix86_function_value
44048 #undef TARGET_FUNCTION_VALUE_REGNO_P
44049 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
44051 #undef TARGET_PROMOTE_FUNCTION_MODE
44052 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
44054 #undef TARGET_MEMBER_TYPE_FORCES_BLK
44055 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
44057 #undef TARGET_INSTANTIATE_DECLS
44058 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
44060 #undef TARGET_SECONDARY_RELOAD
44061 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
44063 #undef TARGET_CLASS_MAX_NREGS
44064 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
44066 #undef TARGET_PREFERRED_RELOAD_CLASS
44067 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
44068 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
44069 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
44070 #undef TARGET_CLASS_LIKELY_SPILLED_P
44071 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
44073 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
44074 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
44075 ix86_builtin_vectorization_cost
44076 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
44077 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
44078 ix86_vectorize_vec_perm_const_ok
44079 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
44080 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
44081 ix86_preferred_simd_mode
44082 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
44083 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
44084 ix86_autovectorize_vector_sizes
44085 #undef TARGET_VECTORIZE_INIT_COST
44086 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
44087 #undef TARGET_VECTORIZE_ADD_STMT_COST
44088 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
44089 #undef TARGET_VECTORIZE_FINISH_COST
44090 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
44091 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
44092 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
44094 #undef TARGET_SET_CURRENT_FUNCTION
44095 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
44097 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
44098 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
44100 #undef TARGET_OPTION_SAVE
44101 #define TARGET_OPTION_SAVE ix86_function_specific_save
44103 #undef TARGET_OPTION_RESTORE
44104 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
44106 #undef TARGET_OPTION_PRINT
44107 #define TARGET_OPTION_PRINT ix86_function_specific_print
44109 #undef TARGET_OPTION_FUNCTION_VERSIONS
44110 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
44112 #undef TARGET_CAN_INLINE_P
44113 #define TARGET_CAN_INLINE_P ix86_can_inline_p
44115 #undef TARGET_EXPAND_TO_RTL_HOOK
44116 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
44118 #undef TARGET_LEGITIMATE_ADDRESS_P
44119 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
44121 #undef TARGET_LRA_P
44122 #define TARGET_LRA_P hook_bool_void_true
44124 #undef TARGET_REGISTER_PRIORITY
44125 #define TARGET_REGISTER_PRIORITY ix86_register_priority
44127 #undef TARGET_REGISTER_USAGE_LEVELING_P
44128 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
44130 #undef TARGET_LEGITIMATE_CONSTANT_P
44131 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
44133 #undef TARGET_FRAME_POINTER_REQUIRED
44134 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
44136 #undef TARGET_CAN_ELIMINATE
44137 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
44139 #undef TARGET_EXTRA_LIVE_ON_ENTRY
44140 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
44142 #undef TARGET_ASM_CODE_END
44143 #define TARGET_ASM_CODE_END ix86_code_end
44145 #undef TARGET_CONDITIONAL_REGISTER_USAGE
44146 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
44149 #undef TARGET_INIT_LIBFUNCS
44150 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
44153 #undef TARGET_SPILL_CLASS
44154 #define TARGET_SPILL_CLASS ix86_spill_class
44156 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
44157 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
44158 ix86_float_exceptions_rounding_supported_p
44160 struct gcc_target targetm
= TARGET_INITIALIZER
;
44162 #include "gt-i386.h"