1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 #include "pass_manager.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
69 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
70 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
72 #ifndef CHECK_STACK_LIMIT
73 #define CHECK_STACK_LIMIT (-1)
76 /* Return index of given mode in mult and division cost tables. */
77 #define MODE_INDEX(mode) \
78 ((mode) == QImode ? 0 \
79 : (mode) == HImode ? 1 \
80 : (mode) == SImode ? 2 \
81 : (mode) == DImode ? 3 \
84 /* Processor costs (relative to an add) */
85 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
86 #define COSTS_N_BYTES(N) ((N) * 2)
88 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
90 static stringop_algs ix86_size_memcpy
[2] = {
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
92 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
93 static stringop_algs ix86_size_memset
[2] = {
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
95 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
98 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
99 COSTS_N_BYTES (2), /* cost of an add instruction */
100 COSTS_N_BYTES (3), /* cost of a lea instruction */
101 COSTS_N_BYTES (2), /* variable shift costs */
102 COSTS_N_BYTES (3), /* constant shift costs */
103 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
104 COSTS_N_BYTES (3), /* HI */
105 COSTS_N_BYTES (3), /* SI */
106 COSTS_N_BYTES (3), /* DI */
107 COSTS_N_BYTES (5)}, /* other */
108 0, /* cost of multiply per each bit set */
109 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
110 COSTS_N_BYTES (3), /* HI */
111 COSTS_N_BYTES (3), /* SI */
112 COSTS_N_BYTES (3), /* DI */
113 COSTS_N_BYTES (5)}, /* other */
114 COSTS_N_BYTES (3), /* cost of movsx */
115 COSTS_N_BYTES (3), /* cost of movzx */
116 0, /* "large" insn */
118 2, /* cost for loading QImode using movzbl */
119 {2, 2, 2}, /* cost of loading integer registers
120 in QImode, HImode and SImode.
121 Relative to reg-reg move (2). */
122 {2, 2, 2}, /* cost of storing integer registers */
123 2, /* cost of reg,reg fld/fst */
124 {2, 2, 2}, /* cost of loading fp registers
125 in SFmode, DFmode and XFmode */
126 {2, 2, 2}, /* cost of storing fp registers
127 in SFmode, DFmode and XFmode */
128 3, /* cost of moving MMX register */
129 {3, 3}, /* cost of loading MMX registers
130 in SImode and DImode */
131 {3, 3}, /* cost of storing MMX registers
132 in SImode and DImode */
133 3, /* cost of moving SSE register */
134 {3, 3, 3}, /* cost of loading SSE registers
135 in SImode, DImode and TImode */
136 {3, 3, 3}, /* cost of storing SSE registers
137 in SImode, DImode and TImode */
138 3, /* MMX or SSE register to integer */
139 0, /* size of l1 cache */
140 0, /* size of l2 cache */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
145 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
146 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
147 COSTS_N_BYTES (2), /* cost of FABS instruction. */
148 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
149 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
152 1, /* scalar_stmt_cost. */
153 1, /* scalar load_cost. */
154 1, /* scalar_store_cost. */
155 1, /* vec_stmt_cost. */
156 1, /* vec_to_scalar_cost. */
157 1, /* scalar_to_vec_cost. */
158 1, /* vec_align_load_cost. */
159 1, /* vec_unalign_load_cost. */
160 1, /* vec_store_cost. */
161 1, /* cond_taken_branch_cost. */
162 1, /* cond_not_taken_branch_cost. */
165 /* Processor costs (relative to an add) */
166 static stringop_algs i386_memcpy
[2] = {
167 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
168 DUMMY_STRINGOP_ALGS
};
169 static stringop_algs i386_memset
[2] = {
170 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
171 DUMMY_STRINGOP_ALGS
};
174 struct processor_costs i386_cost
= { /* 386 specific costs */
175 COSTS_N_INSNS (1), /* cost of an add instruction */
176 COSTS_N_INSNS (1), /* cost of a lea instruction */
177 COSTS_N_INSNS (3), /* variable shift costs */
178 COSTS_N_INSNS (2), /* constant shift costs */
179 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
180 COSTS_N_INSNS (6), /* HI */
181 COSTS_N_INSNS (6), /* SI */
182 COSTS_N_INSNS (6), /* DI */
183 COSTS_N_INSNS (6)}, /* other */
184 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
185 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
186 COSTS_N_INSNS (23), /* HI */
187 COSTS_N_INSNS (23), /* SI */
188 COSTS_N_INSNS (23), /* DI */
189 COSTS_N_INSNS (23)}, /* other */
190 COSTS_N_INSNS (3), /* cost of movsx */
191 COSTS_N_INSNS (2), /* cost of movzx */
192 15, /* "large" insn */
194 4, /* cost for loading QImode using movzbl */
195 {2, 4, 2}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 4, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {8, 8, 8}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {8, 8, 8}, /* cost of storing fp registers
203 in SFmode, DFmode and XFmode */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of l1 cache */
216 0, /* size of l2 cache */
217 0, /* size of prefetch block */
218 0, /* number of parallel prefetches */
220 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
221 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
222 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
223 COSTS_N_INSNS (22), /* cost of FABS instruction. */
224 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
225 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
228 1, /* scalar_stmt_cost. */
229 1, /* scalar load_cost. */
230 1, /* scalar_store_cost. */
231 1, /* vec_stmt_cost. */
232 1, /* vec_to_scalar_cost. */
233 1, /* scalar_to_vec_cost. */
234 1, /* vec_align_load_cost. */
235 2, /* vec_unalign_load_cost. */
236 1, /* vec_store_cost. */
237 3, /* cond_taken_branch_cost. */
238 1, /* cond_not_taken_branch_cost. */
241 static stringop_algs i486_memcpy
[2] = {
242 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
243 DUMMY_STRINGOP_ALGS
};
244 static stringop_algs i486_memset
[2] = {
245 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
246 DUMMY_STRINGOP_ALGS
};
249 struct processor_costs i486_cost
= { /* 486 specific costs */
250 COSTS_N_INSNS (1), /* cost of an add instruction */
251 COSTS_N_INSNS (1), /* cost of a lea instruction */
252 COSTS_N_INSNS (3), /* variable shift costs */
253 COSTS_N_INSNS (2), /* constant shift costs */
254 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
255 COSTS_N_INSNS (12), /* HI */
256 COSTS_N_INSNS (12), /* SI */
257 COSTS_N_INSNS (12), /* DI */
258 COSTS_N_INSNS (12)}, /* other */
259 1, /* cost of multiply per each bit set */
260 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
261 COSTS_N_INSNS (40), /* HI */
262 COSTS_N_INSNS (40), /* SI */
263 COSTS_N_INSNS (40), /* DI */
264 COSTS_N_INSNS (40)}, /* other */
265 COSTS_N_INSNS (3), /* cost of movsx */
266 COSTS_N_INSNS (2), /* cost of movzx */
267 15, /* "large" insn */
269 4, /* cost for loading QImode using movzbl */
270 {2, 4, 2}, /* cost of loading integer registers
271 in QImode, HImode and SImode.
272 Relative to reg-reg move (2). */
273 {2, 4, 2}, /* cost of storing integer registers */
274 2, /* cost of reg,reg fld/fst */
275 {8, 8, 8}, /* cost of loading fp registers
276 in SFmode, DFmode and XFmode */
277 {8, 8, 8}, /* cost of storing fp registers
278 in SFmode, DFmode and XFmode */
279 2, /* cost of moving MMX register */
280 {4, 8}, /* cost of loading MMX registers
281 in SImode and DImode */
282 {4, 8}, /* cost of storing MMX registers
283 in SImode and DImode */
284 2, /* cost of moving SSE register */
285 {4, 8, 16}, /* cost of loading SSE registers
286 in SImode, DImode and TImode */
287 {4, 8, 16}, /* cost of storing SSE registers
288 in SImode, DImode and TImode */
289 3, /* MMX or SSE register to integer */
290 4, /* size of l1 cache. 486 has 8kB cache
291 shared for code and data, so 4kB is
292 not really precise. */
293 4, /* size of l2 cache */
294 0, /* size of prefetch block */
295 0, /* number of parallel prefetches */
297 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
298 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
299 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
300 COSTS_N_INSNS (3), /* cost of FABS instruction. */
301 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
302 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
305 1, /* scalar_stmt_cost. */
306 1, /* scalar load_cost. */
307 1, /* scalar_store_cost. */
308 1, /* vec_stmt_cost. */
309 1, /* vec_to_scalar_cost. */
310 1, /* scalar_to_vec_cost. */
311 1, /* vec_align_load_cost. */
312 2, /* vec_unalign_load_cost. */
313 1, /* vec_store_cost. */
314 3, /* cond_taken_branch_cost. */
315 1, /* cond_not_taken_branch_cost. */
318 static stringop_algs pentium_memcpy
[2] = {
319 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
320 DUMMY_STRINGOP_ALGS
};
321 static stringop_algs pentium_memset
[2] = {
322 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
323 DUMMY_STRINGOP_ALGS
};
326 struct processor_costs pentium_cost
= {
327 COSTS_N_INSNS (1), /* cost of an add instruction */
328 COSTS_N_INSNS (1), /* cost of a lea instruction */
329 COSTS_N_INSNS (4), /* variable shift costs */
330 COSTS_N_INSNS (1), /* constant shift costs */
331 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
332 COSTS_N_INSNS (11), /* HI */
333 COSTS_N_INSNS (11), /* SI */
334 COSTS_N_INSNS (11), /* DI */
335 COSTS_N_INSNS (11)}, /* other */
336 0, /* cost of multiply per each bit set */
337 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
338 COSTS_N_INSNS (25), /* HI */
339 COSTS_N_INSNS (25), /* SI */
340 COSTS_N_INSNS (25), /* DI */
341 COSTS_N_INSNS (25)}, /* other */
342 COSTS_N_INSNS (3), /* cost of movsx */
343 COSTS_N_INSNS (2), /* cost of movzx */
344 8, /* "large" insn */
346 6, /* cost for loading QImode using movzbl */
347 {2, 4, 2}, /* cost of loading integer registers
348 in QImode, HImode and SImode.
349 Relative to reg-reg move (2). */
350 {2, 4, 2}, /* cost of storing integer registers */
351 2, /* cost of reg,reg fld/fst */
352 {2, 2, 6}, /* cost of loading fp registers
353 in SFmode, DFmode and XFmode */
354 {4, 4, 6}, /* cost of storing fp registers
355 in SFmode, DFmode and XFmode */
356 8, /* cost of moving MMX register */
357 {8, 8}, /* cost of loading MMX registers
358 in SImode and DImode */
359 {8, 8}, /* cost of storing MMX registers
360 in SImode and DImode */
361 2, /* cost of moving SSE register */
362 {4, 8, 16}, /* cost of loading SSE registers
363 in SImode, DImode and TImode */
364 {4, 8, 16}, /* cost of storing SSE registers
365 in SImode, DImode and TImode */
366 3, /* MMX or SSE register to integer */
367 8, /* size of l1 cache. */
368 8, /* size of l2 cache */
369 0, /* size of prefetch block */
370 0, /* number of parallel prefetches */
372 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
373 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
374 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
375 COSTS_N_INSNS (1), /* cost of FABS instruction. */
376 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
377 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
380 1, /* scalar_stmt_cost. */
381 1, /* scalar load_cost. */
382 1, /* scalar_store_cost. */
383 1, /* vec_stmt_cost. */
384 1, /* vec_to_scalar_cost. */
385 1, /* scalar_to_vec_cost. */
386 1, /* vec_align_load_cost. */
387 2, /* vec_unalign_load_cost. */
388 1, /* vec_store_cost. */
389 3, /* cond_taken_branch_cost. */
390 1, /* cond_not_taken_branch_cost. */
393 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
394 (we ensure the alignment). For small blocks inline loop is still a
395 noticeable win, for bigger blocks either rep movsl or rep movsb is
396 way to go. Rep movsb has apparently more expensive startup time in CPU,
397 but after 4K the difference is down in the noise. */
398 static stringop_algs pentiumpro_memcpy
[2] = {
399 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
400 {8192, rep_prefix_4_byte
, false},
401 {-1, rep_prefix_1_byte
, false}}},
402 DUMMY_STRINGOP_ALGS
};
403 static stringop_algs pentiumpro_memset
[2] = {
404 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
405 {8192, rep_prefix_4_byte
, false},
406 {-1, libcall
, false}}},
407 DUMMY_STRINGOP_ALGS
};
409 struct processor_costs pentiumpro_cost
= {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1), /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (4), /* HI */
416 COSTS_N_INSNS (4), /* SI */
417 COSTS_N_INSNS (4), /* DI */
418 COSTS_N_INSNS (4)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (17), /* HI */
422 COSTS_N_INSNS (17), /* SI */
423 COSTS_N_INSNS (17), /* DI */
424 COSTS_N_INSNS (17)}, /* other */
425 COSTS_N_INSNS (1), /* cost of movsx */
426 COSTS_N_INSNS (1), /* cost of movzx */
427 8, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 4, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 2, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 2, /* cost of moving MMX register */
440 {2, 2}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {2, 2}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {2, 2, 8}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {2, 2, 8}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 256, /* size of l2 cache */
452 32, /* size of prefetch block */
453 6, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (2), /* cost of FABS instruction. */
459 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 static stringop_algs geode_memcpy
[2] = {
477 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
478 DUMMY_STRINGOP_ALGS
};
479 static stringop_algs geode_memset
[2] = {
480 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
481 DUMMY_STRINGOP_ALGS
};
483 struct processor_costs geode_cost
= {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (1), /* cost of a lea instruction */
486 COSTS_N_INSNS (2), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (4), /* HI */
490 COSTS_N_INSNS (7), /* SI */
491 COSTS_N_INSNS (7), /* DI */
492 COSTS_N_INSNS (7)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (23), /* HI */
496 COSTS_N_INSNS (39), /* SI */
497 COSTS_N_INSNS (39), /* DI */
498 COSTS_N_INSNS (39)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 1, /* cost for loading QImode using movzbl */
504 {1, 1, 1}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {1, 1, 1}, /* cost of storing integer registers */
508 1, /* cost of reg,reg fld/fst */
509 {1, 1, 1}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {4, 6, 6}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
514 1, /* cost of moving MMX register */
515 {1, 1}, /* cost of loading MMX registers
516 in SImode and DImode */
517 {1, 1}, /* cost of storing MMX registers
518 in SImode and DImode */
519 1, /* cost of moving SSE register */
520 {1, 1, 1}, /* cost of loading SSE registers
521 in SImode, DImode and TImode */
522 {1, 1, 1}, /* cost of storing SSE registers
523 in SImode, DImode and TImode */
524 1, /* MMX or SSE register to integer */
525 64, /* size of l1 cache. */
526 128, /* size of l2 cache. */
527 32, /* size of prefetch block */
528 1, /* number of parallel prefetches */
530 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (1), /* cost of FABS instruction. */
534 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
538 1, /* scalar_stmt_cost. */
539 1, /* scalar load_cost. */
540 1, /* scalar_store_cost. */
541 1, /* vec_stmt_cost. */
542 1, /* vec_to_scalar_cost. */
543 1, /* scalar_to_vec_cost. */
544 1, /* vec_align_load_cost. */
545 2, /* vec_unalign_load_cost. */
546 1, /* vec_store_cost. */
547 3, /* cond_taken_branch_cost. */
548 1, /* cond_not_taken_branch_cost. */
551 static stringop_algs k6_memcpy
[2] = {
552 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
553 DUMMY_STRINGOP_ALGS
};
554 static stringop_algs k6_memset
[2] = {
555 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
556 DUMMY_STRINGOP_ALGS
};
558 struct processor_costs k6_cost
= {
559 COSTS_N_INSNS (1), /* cost of an add instruction */
560 COSTS_N_INSNS (2), /* cost of a lea instruction */
561 COSTS_N_INSNS (1), /* variable shift costs */
562 COSTS_N_INSNS (1), /* constant shift costs */
563 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
564 COSTS_N_INSNS (3), /* HI */
565 COSTS_N_INSNS (3), /* SI */
566 COSTS_N_INSNS (3), /* DI */
567 COSTS_N_INSNS (3)}, /* other */
568 0, /* cost of multiply per each bit set */
569 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
570 COSTS_N_INSNS (18), /* HI */
571 COSTS_N_INSNS (18), /* SI */
572 COSTS_N_INSNS (18), /* DI */
573 COSTS_N_INSNS (18)}, /* other */
574 COSTS_N_INSNS (2), /* cost of movsx */
575 COSTS_N_INSNS (2), /* cost of movzx */
576 8, /* "large" insn */
578 3, /* cost for loading QImode using movzbl */
579 {4, 5, 4}, /* cost of loading integer registers
580 in QImode, HImode and SImode.
581 Relative to reg-reg move (2). */
582 {2, 3, 2}, /* cost of storing integer registers */
583 4, /* cost of reg,reg fld/fst */
584 {6, 6, 6}, /* cost of loading fp registers
585 in SFmode, DFmode and XFmode */
586 {4, 4, 4}, /* cost of storing fp registers
587 in SFmode, DFmode and XFmode */
588 2, /* cost of moving MMX register */
589 {2, 2}, /* cost of loading MMX registers
590 in SImode and DImode */
591 {2, 2}, /* cost of storing MMX registers
592 in SImode and DImode */
593 2, /* cost of moving SSE register */
594 {2, 2, 8}, /* cost of loading SSE registers
595 in SImode, DImode and TImode */
596 {2, 2, 8}, /* cost of storing SSE registers
597 in SImode, DImode and TImode */
598 6, /* MMX or SSE register to integer */
599 32, /* size of l1 cache. */
600 32, /* size of l2 cache. Some models
601 have integrated l2 cache, but
602 optimizing for k6 is not important
603 enough to worry about that. */
604 32, /* size of prefetch block */
605 1, /* number of parallel prefetches */
607 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
608 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
609 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
610 COSTS_N_INSNS (2), /* cost of FABS instruction. */
611 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
612 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
615 1, /* scalar_stmt_cost. */
616 1, /* scalar load_cost. */
617 1, /* scalar_store_cost. */
618 1, /* vec_stmt_cost. */
619 1, /* vec_to_scalar_cost. */
620 1, /* scalar_to_vec_cost. */
621 1, /* vec_align_load_cost. */
622 2, /* vec_unalign_load_cost. */
623 1, /* vec_store_cost. */
624 3, /* cond_taken_branch_cost. */
625 1, /* cond_not_taken_branch_cost. */
628 /* For some reason, Athlon deals better with REP prefix (relative to loops)
629 compared to K8. Alignment becomes important after 8 bytes for memcpy and
630 128 bytes for memset. */
631 static stringop_algs athlon_memcpy
[2] = {
632 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
633 DUMMY_STRINGOP_ALGS
};
634 static stringop_algs athlon_memset
[2] = {
635 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
636 DUMMY_STRINGOP_ALGS
};
638 struct processor_costs athlon_cost
= {
639 COSTS_N_INSNS (1), /* cost of an add instruction */
640 COSTS_N_INSNS (2), /* cost of a lea instruction */
641 COSTS_N_INSNS (1), /* variable shift costs */
642 COSTS_N_INSNS (1), /* constant shift costs */
643 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
644 COSTS_N_INSNS (5), /* HI */
645 COSTS_N_INSNS (5), /* SI */
646 COSTS_N_INSNS (5), /* DI */
647 COSTS_N_INSNS (5)}, /* other */
648 0, /* cost of multiply per each bit set */
649 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
650 COSTS_N_INSNS (26), /* HI */
651 COSTS_N_INSNS (42), /* SI */
652 COSTS_N_INSNS (74), /* DI */
653 COSTS_N_INSNS (74)}, /* other */
654 COSTS_N_INSNS (1), /* cost of movsx */
655 COSTS_N_INSNS (1), /* cost of movzx */
656 8, /* "large" insn */
658 4, /* cost for loading QImode using movzbl */
659 {3, 4, 3}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {3, 4, 3}, /* cost of storing integer registers */
663 4, /* cost of reg,reg fld/fst */
664 {4, 4, 12}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {6, 6, 8}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {4, 4}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {4, 4}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, /* cost of moving SSE register */
674 {4, 4, 6}, /* cost of loading SSE registers
675 in SImode, DImode and TImode */
676 {4, 4, 5}, /* cost of storing SSE registers
677 in SImode, DImode and TImode */
678 5, /* MMX or SSE register to integer */
679 64, /* size of l1 cache. */
680 256, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 6, /* number of parallel prefetches */
684 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
685 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
686 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
687 COSTS_N_INSNS (2), /* cost of FABS instruction. */
688 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
689 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
692 1, /* scalar_stmt_cost. */
693 1, /* scalar load_cost. */
694 1, /* scalar_store_cost. */
695 1, /* vec_stmt_cost. */
696 1, /* vec_to_scalar_cost. */
697 1, /* scalar_to_vec_cost. */
698 1, /* vec_align_load_cost. */
699 2, /* vec_unalign_load_cost. */
700 1, /* vec_store_cost. */
701 3, /* cond_taken_branch_cost. */
702 1, /* cond_not_taken_branch_cost. */
705 /* K8 has optimized REP instruction for medium sized blocks, but for very
706 small blocks it is better to use loop. For large blocks, libcall can
707 do nontemporary accesses and beat inline considerably. */
708 static stringop_algs k8_memcpy
[2] = {
709 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
710 {-1, rep_prefix_4_byte
, false}}},
711 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
712 {-1, libcall
, false}}}};
713 static stringop_algs k8_memset
[2] = {
714 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
715 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
716 {libcall
, {{48, unrolled_loop
, false},
717 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
719 struct processor_costs k8_cost
= {
720 COSTS_N_INSNS (1), /* cost of an add instruction */
721 COSTS_N_INSNS (2), /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (5)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
739 4, /* cost for loading QImode using movzbl */
740 {3, 4, 3}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {3, 4, 3}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {4, 4, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {3, 3}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {4, 4}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {4, 3, 6}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {4, 4, 5}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of l1 cache. */
761 512, /* size of l2 cache. */
762 64, /* size of prefetch block */
763 /* New AMD processors never drop prefetches; if they cannot be performed
764 immediately, they are queued. We set number of simultaneous prefetches
765 to a large constant to reflect this (it probably is not a good idea not
766 to limit number of prefetches at all, as their execution also takes some
768 100, /* number of parallel prefetches */
770 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (2), /* cost of FABS instruction. */
774 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
779 4, /* scalar_stmt_cost. */
780 2, /* scalar load_cost. */
781 2, /* scalar_store_cost. */
782 5, /* vec_stmt_cost. */
783 0, /* vec_to_scalar_cost. */
784 2, /* scalar_to_vec_cost. */
785 2, /* vec_align_load_cost. */
786 3, /* vec_unalign_load_cost. */
787 3, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 2, /* cond_not_taken_branch_cost. */
792 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
793 very small blocks it is better to use loop. For large blocks, libcall can
794 do nontemporary accesses and beat inline considerably. */
795 static stringop_algs amdfam10_memcpy
[2] = {
796 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
797 {-1, rep_prefix_4_byte
, false}}},
798 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
799 {-1, libcall
, false}}}};
800 static stringop_algs amdfam10_memset
[2] = {
801 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
802 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
803 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
804 {-1, libcall
, false}}}};
805 struct processor_costs amdfam10_cost
= {
806 COSTS_N_INSNS (1), /* cost of an add instruction */
807 COSTS_N_INSNS (2), /* cost of a lea instruction */
808 COSTS_N_INSNS (1), /* variable shift costs */
809 COSTS_N_INSNS (1), /* constant shift costs */
810 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
811 COSTS_N_INSNS (4), /* HI */
812 COSTS_N_INSNS (3), /* SI */
813 COSTS_N_INSNS (4), /* DI */
814 COSTS_N_INSNS (5)}, /* other */
815 0, /* cost of multiply per each bit set */
816 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
817 COSTS_N_INSNS (35), /* HI */
818 COSTS_N_INSNS (51), /* SI */
819 COSTS_N_INSNS (83), /* DI */
820 COSTS_N_INSNS (83)}, /* other */
821 COSTS_N_INSNS (1), /* cost of movsx */
822 COSTS_N_INSNS (1), /* cost of movzx */
823 8, /* "large" insn */
825 4, /* cost for loading QImode using movzbl */
826 {3, 4, 3}, /* cost of loading integer registers
827 in QImode, HImode and SImode.
828 Relative to reg-reg move (2). */
829 {3, 4, 3}, /* cost of storing integer registers */
830 4, /* cost of reg,reg fld/fst */
831 {4, 4, 12}, /* cost of loading fp registers
832 in SFmode, DFmode and XFmode */
833 {6, 6, 8}, /* cost of storing fp registers
834 in SFmode, DFmode and XFmode */
835 2, /* cost of moving MMX register */
836 {3, 3}, /* cost of loading MMX registers
837 in SImode and DImode */
838 {4, 4}, /* cost of storing MMX registers
839 in SImode and DImode */
840 2, /* cost of moving SSE register */
841 {4, 4, 3}, /* cost of loading SSE registers
842 in SImode, DImode and TImode */
843 {4, 4, 5}, /* cost of storing SSE registers
844 in SImode, DImode and TImode */
845 3, /* MMX or SSE register to integer */
847 MOVD reg64, xmmreg Double FSTORE 4
848 MOVD reg32, xmmreg Double FSTORE 4
850 MOVD reg64, xmmreg Double FADD 3
852 MOVD reg32, xmmreg Double FADD 3
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
862 100, /* number of parallel prefetches */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 6, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 2, /* vec_unalign_load_cost. */
881 2, /* vec_store_cost. */
882 2, /* cond_taken_branch_cost. */
883 1, /* cond_not_taken_branch_cost. */
886 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall
888 can do nontemporary accesses and beat inline considerably. */
889 static stringop_algs bdver1_memcpy
[2] = {
890 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
891 {-1, rep_prefix_4_byte
, false}}},
892 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
893 {-1, libcall
, false}}}};
894 static stringop_algs bdver1_memset
[2] = {
895 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
896 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
897 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
898 {-1, libcall
, false}}}};
900 const struct processor_costs bdver1_cost
= {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (1), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (4), /* SI */
908 COSTS_N_INSNS (6), /* DI */
909 COSTS_N_INSNS (6)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (35), /* HI */
913 COSTS_N_INSNS (51), /* SI */
914 COSTS_N_INSNS (83), /* DI */
915 COSTS_N_INSNS (83)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
920 4, /* cost for loading QImode using movzbl */
921 {5, 5, 4}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {4, 4, 4}, /* cost of storing integer registers */
925 2, /* cost of reg,reg fld/fst */
926 {5, 5, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {4, 4, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {4, 4}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 4, 4}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 4}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 2, /* MMX or SSE register to integer */
942 MOVD reg64, xmmreg Double FSTORE 4
943 MOVD reg32, xmmreg Double FSTORE 4
945 MOVD reg64, xmmreg Double FADD 3
947 MOVD reg32, xmmreg Double FADD 3
949 16, /* size of l1 cache. */
950 2048, /* size of l2 cache. */
951 64, /* size of prefetch block */
952 /* New AMD processors never drop prefetches; if they cannot be performed
953 immediately, they are queued. We set number of simultaneous prefetches
954 to a large constant to reflect this (it probably is not a good idea not
955 to limit number of prefetches at all, as their execution also takes some
957 100, /* number of parallel prefetches */
959 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
960 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
961 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
962 COSTS_N_INSNS (2), /* cost of FABS instruction. */
963 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
964 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
968 6, /* scalar_stmt_cost. */
969 4, /* scalar load_cost. */
970 4, /* scalar_store_cost. */
971 6, /* vec_stmt_cost. */
972 0, /* vec_to_scalar_cost. */
973 2, /* scalar_to_vec_cost. */
974 4, /* vec_align_load_cost. */
975 4, /* vec_unalign_load_cost. */
976 4, /* vec_store_cost. */
977 2, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
981 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
982 very small blocks it is better to use loop. For large blocks, libcall
983 can do nontemporary accesses and beat inline considerably. */
985 static stringop_algs bdver2_memcpy
[2] = {
986 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
987 {-1, rep_prefix_4_byte
, false}}},
988 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
989 {-1, libcall
, false}}}};
990 static stringop_algs bdver2_memset
[2] = {
991 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
992 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
993 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
994 {-1, libcall
, false}}}};
996 const struct processor_costs bdver2_cost
= {
997 COSTS_N_INSNS (1), /* cost of an add instruction */
998 COSTS_N_INSNS (1), /* cost of a lea instruction */
999 COSTS_N_INSNS (1), /* variable shift costs */
1000 COSTS_N_INSNS (1), /* constant shift costs */
1001 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1002 COSTS_N_INSNS (4), /* HI */
1003 COSTS_N_INSNS (4), /* SI */
1004 COSTS_N_INSNS (6), /* DI */
1005 COSTS_N_INSNS (6)}, /* other */
1006 0, /* cost of multiply per each bit set */
1007 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1008 COSTS_N_INSNS (35), /* HI */
1009 COSTS_N_INSNS (51), /* SI */
1010 COSTS_N_INSNS (83), /* DI */
1011 COSTS_N_INSNS (83)}, /* other */
1012 COSTS_N_INSNS (1), /* cost of movsx */
1013 COSTS_N_INSNS (1), /* cost of movzx */
1014 8, /* "large" insn */
1016 4, /* cost for loading QImode using movzbl */
1017 {5, 5, 4}, /* cost of loading integer registers
1018 in QImode, HImode and SImode.
1019 Relative to reg-reg move (2). */
1020 {4, 4, 4}, /* cost of storing integer registers */
1021 2, /* cost of reg,reg fld/fst */
1022 {5, 5, 12}, /* cost of loading fp registers
1023 in SFmode, DFmode and XFmode */
1024 {4, 4, 8}, /* cost of storing fp registers
1025 in SFmode, DFmode and XFmode */
1026 2, /* cost of moving MMX register */
1027 {4, 4}, /* cost of loading MMX registers
1028 in SImode and DImode */
1029 {4, 4}, /* cost of storing MMX registers
1030 in SImode and DImode */
1031 2, /* cost of moving SSE register */
1032 {4, 4, 4}, /* cost of loading SSE registers
1033 in SImode, DImode and TImode */
1034 {4, 4, 4}, /* cost of storing SSE registers
1035 in SImode, DImode and TImode */
1036 2, /* MMX or SSE register to integer */
1038 MOVD reg64, xmmreg Double FSTORE 4
1039 MOVD reg32, xmmreg Double FSTORE 4
1041 MOVD reg64, xmmreg Double FADD 3
1043 MOVD reg32, xmmreg Double FADD 3
1045 16, /* size of l1 cache. */
1046 2048, /* size of l2 cache. */
1047 64, /* size of prefetch block */
1048 /* New AMD processors never drop prefetches; if they cannot be performed
1049 immediately, they are queued. We set number of simultaneous prefetches
1050 to a large constant to reflect this (it probably is not a good idea not
1051 to limit number of prefetches at all, as their execution also takes some
1053 100, /* number of parallel prefetches */
1054 2, /* Branch cost */
1055 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1056 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1057 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1058 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1059 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1060 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1064 6, /* scalar_stmt_cost. */
1065 4, /* scalar load_cost. */
1066 4, /* scalar_store_cost. */
1067 6, /* vec_stmt_cost. */
1068 0, /* vec_to_scalar_cost. */
1069 2, /* scalar_to_vec_cost. */
1070 4, /* vec_align_load_cost. */
1071 4, /* vec_unalign_load_cost. */
1072 4, /* vec_store_cost. */
1073 2, /* cond_taken_branch_cost. */
1074 1, /* cond_not_taken_branch_cost. */
1078 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1079 very small blocks it is better to use loop. For large blocks, libcall
1080 can do nontemporary accesses and beat inline considerably. */
1081 static stringop_algs bdver3_memcpy
[2] = {
1082 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1083 {-1, rep_prefix_4_byte
, false}}},
1084 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}};
1086 static stringop_algs bdver3_memset
[2] = {
1087 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1088 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1089 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1090 {-1, libcall
, false}}}};
1091 struct processor_costs bdver3_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (1), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (4), /* SI */
1099 COSTS_N_INSNS (6), /* DI */
1100 COSTS_N_INSNS (6)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (35), /* HI */
1104 COSTS_N_INSNS (51), /* SI */
1105 COSTS_N_INSNS (83), /* DI */
1106 COSTS_N_INSNS (83)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {5, 5, 4}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {4, 4, 4}, /* cost of storing integer registers */
1116 2, /* cost of reg,reg fld/fst */
1117 {5, 5, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {4, 4, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {4, 4}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 4, 4}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 4}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 2, /* MMX or SSE register to integer */
1132 16, /* size of l1 cache. */
1133 2048, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 2, /* Branch cost */
1142 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1151 6, /* scalar_stmt_cost. */
1152 4, /* scalar load_cost. */
1153 4, /* scalar_store_cost. */
1154 6, /* vec_stmt_cost. */
1155 0, /* vec_to_scalar_cost. */
1156 2, /* scalar_to_vec_cost. */
1157 4, /* vec_align_load_cost. */
1158 4, /* vec_unalign_load_cost. */
1159 4, /* vec_store_cost. */
1160 2, /* cond_taken_branch_cost. */
1161 1, /* cond_not_taken_branch_cost. */
1164 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1165 very small blocks it is better to use loop. For large blocks, libcall can
1166 do nontemporary accesses and beat inline considerably. */
1167 static stringop_algs btver1_memcpy
[2] = {
1168 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1169 {-1, rep_prefix_4_byte
, false}}},
1170 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1171 {-1, libcall
, false}}}};
1172 static stringop_algs btver1_memset
[2] = {
1173 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1174 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1175 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1176 {-1, libcall
, false}}}};
1177 const struct processor_costs btver1_cost
= {
1178 COSTS_N_INSNS (1), /* cost of an add instruction */
1179 COSTS_N_INSNS (2), /* cost of a lea instruction */
1180 COSTS_N_INSNS (1), /* variable shift costs */
1181 COSTS_N_INSNS (1), /* constant shift costs */
1182 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1183 COSTS_N_INSNS (4), /* HI */
1184 COSTS_N_INSNS (3), /* SI */
1185 COSTS_N_INSNS (4), /* DI */
1186 COSTS_N_INSNS (5)}, /* other */
1187 0, /* cost of multiply per each bit set */
1188 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1189 COSTS_N_INSNS (35), /* HI */
1190 COSTS_N_INSNS (51), /* SI */
1191 COSTS_N_INSNS (83), /* DI */
1192 COSTS_N_INSNS (83)}, /* other */
1193 COSTS_N_INSNS (1), /* cost of movsx */
1194 COSTS_N_INSNS (1), /* cost of movzx */
1195 8, /* "large" insn */
1197 4, /* cost for loading QImode using movzbl */
1198 {3, 4, 3}, /* cost of loading integer registers
1199 in QImode, HImode and SImode.
1200 Relative to reg-reg move (2). */
1201 {3, 4, 3}, /* cost of storing integer registers */
1202 4, /* cost of reg,reg fld/fst */
1203 {4, 4, 12}, /* cost of loading fp registers
1204 in SFmode, DFmode and XFmode */
1205 {6, 6, 8}, /* cost of storing fp registers
1206 in SFmode, DFmode and XFmode */
1207 2, /* cost of moving MMX register */
1208 {3, 3}, /* cost of loading MMX registers
1209 in SImode and DImode */
1210 {4, 4}, /* cost of storing MMX registers
1211 in SImode and DImode */
1212 2, /* cost of moving SSE register */
1213 {4, 4, 3}, /* cost of loading SSE registers
1214 in SImode, DImode and TImode */
1215 {4, 4, 5}, /* cost of storing SSE registers
1216 in SImode, DImode and TImode */
1217 3, /* MMX or SSE register to integer */
1219 MOVD reg64, xmmreg Double FSTORE 4
1220 MOVD reg32, xmmreg Double FSTORE 4
1222 MOVD reg64, xmmreg Double FADD 3
1224 MOVD reg32, xmmreg Double FADD 3
1226 32, /* size of l1 cache. */
1227 512, /* size of l2 cache. */
1228 64, /* size of prefetch block */
1229 100, /* number of parallel prefetches */
1230 2, /* Branch cost */
1231 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1232 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1233 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1234 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1235 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1236 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 static stringop_algs btver2_memcpy
[2] = {
1254 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1255 {-1, rep_prefix_4_byte
, false}}},
1256 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1257 {-1, libcall
, false}}}};
1258 static stringop_algs btver2_memset
[2] = {
1259 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1260 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1261 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1262 {-1, libcall
, false}}}};
1263 const struct processor_costs btver2_cost
= {
1264 COSTS_N_INSNS (1), /* cost of an add instruction */
1265 COSTS_N_INSNS (2), /* cost of a lea instruction */
1266 COSTS_N_INSNS (1), /* variable shift costs */
1267 COSTS_N_INSNS (1), /* constant shift costs */
1268 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1269 COSTS_N_INSNS (4), /* HI */
1270 COSTS_N_INSNS (3), /* SI */
1271 COSTS_N_INSNS (4), /* DI */
1272 COSTS_N_INSNS (5)}, /* other */
1273 0, /* cost of multiply per each bit set */
1274 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1275 COSTS_N_INSNS (35), /* HI */
1276 COSTS_N_INSNS (51), /* SI */
1277 COSTS_N_INSNS (83), /* DI */
1278 COSTS_N_INSNS (83)}, /* other */
1279 COSTS_N_INSNS (1), /* cost of movsx */
1280 COSTS_N_INSNS (1), /* cost of movzx */
1281 8, /* "large" insn */
1283 4, /* cost for loading QImode using movzbl */
1284 {3, 4, 3}, /* cost of loading integer registers
1285 in QImode, HImode and SImode.
1286 Relative to reg-reg move (2). */
1287 {3, 4, 3}, /* cost of storing integer registers */
1288 4, /* cost of reg,reg fld/fst */
1289 {4, 4, 12}, /* cost of loading fp registers
1290 in SFmode, DFmode and XFmode */
1291 {6, 6, 8}, /* cost of storing fp registers
1292 in SFmode, DFmode and XFmode */
1293 2, /* cost of moving MMX register */
1294 {3, 3}, /* cost of loading MMX registers
1295 in SImode and DImode */
1296 {4, 4}, /* cost of storing MMX registers
1297 in SImode and DImode */
1298 2, /* cost of moving SSE register */
1299 {4, 4, 3}, /* cost of loading SSE registers
1300 in SImode, DImode and TImode */
1301 {4, 4, 5}, /* cost of storing SSE registers
1302 in SImode, DImode and TImode */
1303 3, /* MMX or SSE register to integer */
1305 MOVD reg64, xmmreg Double FSTORE 4
1306 MOVD reg32, xmmreg Double FSTORE 4
1308 MOVD reg64, xmmreg Double FADD 3
1310 MOVD reg32, xmmreg Double FADD 3
1312 32, /* size of l1 cache. */
1313 2048, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 100, /* number of parallel prefetches */
1316 2, /* Branch cost */
1317 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1318 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1319 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1320 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1321 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1322 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1325 4, /* scalar_stmt_cost. */
1326 2, /* scalar load_cost. */
1327 2, /* scalar_store_cost. */
1328 6, /* vec_stmt_cost. */
1329 0, /* vec_to_scalar_cost. */
1330 2, /* scalar_to_vec_cost. */
1331 2, /* vec_align_load_cost. */
1332 2, /* vec_unalign_load_cost. */
1333 2, /* vec_store_cost. */
1334 2, /* cond_taken_branch_cost. */
1335 1, /* cond_not_taken_branch_cost. */
1338 static stringop_algs pentium4_memcpy
[2] = {
1339 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1340 DUMMY_STRINGOP_ALGS
};
1341 static stringop_algs pentium4_memset
[2] = {
1342 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1343 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1344 DUMMY_STRINGOP_ALGS
};
1347 struct processor_costs pentium4_cost
= {
1348 COSTS_N_INSNS (1), /* cost of an add instruction */
1349 COSTS_N_INSNS (3), /* cost of a lea instruction */
1350 COSTS_N_INSNS (4), /* variable shift costs */
1351 COSTS_N_INSNS (4), /* constant shift costs */
1352 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1353 COSTS_N_INSNS (15), /* HI */
1354 COSTS_N_INSNS (15), /* SI */
1355 COSTS_N_INSNS (15), /* DI */
1356 COSTS_N_INSNS (15)}, /* other */
1357 0, /* cost of multiply per each bit set */
1358 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1359 COSTS_N_INSNS (56), /* HI */
1360 COSTS_N_INSNS (56), /* SI */
1361 COSTS_N_INSNS (56), /* DI */
1362 COSTS_N_INSNS (56)}, /* other */
1363 COSTS_N_INSNS (1), /* cost of movsx */
1364 COSTS_N_INSNS (1), /* cost of movzx */
1365 16, /* "large" insn */
1367 2, /* cost for loading QImode using movzbl */
1368 {4, 5, 4}, /* cost of loading integer registers
1369 in QImode, HImode and SImode.
1370 Relative to reg-reg move (2). */
1371 {2, 3, 2}, /* cost of storing integer registers */
1372 2, /* cost of reg,reg fld/fst */
1373 {2, 2, 6}, /* cost of loading fp registers
1374 in SFmode, DFmode and XFmode */
1375 {4, 4, 6}, /* cost of storing fp registers
1376 in SFmode, DFmode and XFmode */
1377 2, /* cost of moving MMX register */
1378 {2, 2}, /* cost of loading MMX registers
1379 in SImode and DImode */
1380 {2, 2}, /* cost of storing MMX registers
1381 in SImode and DImode */
1382 12, /* cost of moving SSE register */
1383 {12, 12, 12}, /* cost of loading SSE registers
1384 in SImode, DImode and TImode */
1385 {2, 2, 8}, /* cost of storing SSE registers
1386 in SImode, DImode and TImode */
1387 10, /* MMX or SSE register to integer */
1388 8, /* size of l1 cache. */
1389 256, /* size of l2 cache. */
1390 64, /* size of prefetch block */
1391 6, /* number of parallel prefetches */
1392 2, /* Branch cost */
1393 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1394 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1395 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1396 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1397 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1398 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1401 1, /* scalar_stmt_cost. */
1402 1, /* scalar load_cost. */
1403 1, /* scalar_store_cost. */
1404 1, /* vec_stmt_cost. */
1405 1, /* vec_to_scalar_cost. */
1406 1, /* scalar_to_vec_cost. */
1407 1, /* vec_align_load_cost. */
1408 2, /* vec_unalign_load_cost. */
1409 1, /* vec_store_cost. */
1410 3, /* cond_taken_branch_cost. */
1411 1, /* cond_not_taken_branch_cost. */
1414 static stringop_algs nocona_memcpy
[2] = {
1415 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1416 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1417 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1419 static stringop_algs nocona_memset
[2] = {
1420 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1421 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1422 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1423 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1426 struct processor_costs nocona_cost
= {
1427 COSTS_N_INSNS (1), /* cost of an add instruction */
1428 COSTS_N_INSNS (1), /* cost of a lea instruction */
1429 COSTS_N_INSNS (1), /* variable shift costs */
1430 COSTS_N_INSNS (1), /* constant shift costs */
1431 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1432 COSTS_N_INSNS (10), /* HI */
1433 COSTS_N_INSNS (10), /* SI */
1434 COSTS_N_INSNS (10), /* DI */
1435 COSTS_N_INSNS (10)}, /* other */
1436 0, /* cost of multiply per each bit set */
1437 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1438 COSTS_N_INSNS (66), /* HI */
1439 COSTS_N_INSNS (66), /* SI */
1440 COSTS_N_INSNS (66), /* DI */
1441 COSTS_N_INSNS (66)}, /* other */
1442 COSTS_N_INSNS (1), /* cost of movsx */
1443 COSTS_N_INSNS (1), /* cost of movzx */
1444 16, /* "large" insn */
1445 17, /* MOVE_RATIO */
1446 4, /* cost for loading QImode using movzbl */
1447 {4, 4, 4}, /* cost of loading integer registers
1448 in QImode, HImode and SImode.
1449 Relative to reg-reg move (2). */
1450 {4, 4, 4}, /* cost of storing integer registers */
1451 3, /* cost of reg,reg fld/fst */
1452 {12, 12, 12}, /* cost of loading fp registers
1453 in SFmode, DFmode and XFmode */
1454 {4, 4, 4}, /* cost of storing fp registers
1455 in SFmode, DFmode and XFmode */
1456 6, /* cost of moving MMX register */
1457 {12, 12}, /* cost of loading MMX registers
1458 in SImode and DImode */
1459 {12, 12}, /* cost of storing MMX registers
1460 in SImode and DImode */
1461 6, /* cost of moving SSE register */
1462 {12, 12, 12}, /* cost of loading SSE registers
1463 in SImode, DImode and TImode */
1464 {12, 12, 12}, /* cost of storing SSE registers
1465 in SImode, DImode and TImode */
1466 8, /* MMX or SSE register to integer */
1467 8, /* size of l1 cache. */
1468 1024, /* size of l2 cache. */
1469 128, /* size of prefetch block */
1470 8, /* number of parallel prefetches */
1471 1, /* Branch cost */
1472 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1473 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1474 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1475 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1476 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1477 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1493 static stringop_algs atom_memcpy
[2] = {
1494 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1495 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1496 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1497 static stringop_algs atom_memset
[2] = {
1498 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1499 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1500 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1501 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1503 struct processor_costs atom_cost
= {
1504 COSTS_N_INSNS (1), /* cost of an add instruction */
1505 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1506 COSTS_N_INSNS (1), /* variable shift costs */
1507 COSTS_N_INSNS (1), /* constant shift costs */
1508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1509 COSTS_N_INSNS (4), /* HI */
1510 COSTS_N_INSNS (3), /* SI */
1511 COSTS_N_INSNS (4), /* DI */
1512 COSTS_N_INSNS (2)}, /* other */
1513 0, /* cost of multiply per each bit set */
1514 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1515 COSTS_N_INSNS (26), /* HI */
1516 COSTS_N_INSNS (42), /* SI */
1517 COSTS_N_INSNS (74), /* DI */
1518 COSTS_N_INSNS (74)}, /* other */
1519 COSTS_N_INSNS (1), /* cost of movsx */
1520 COSTS_N_INSNS (1), /* cost of movzx */
1521 8, /* "large" insn */
1522 17, /* MOVE_RATIO */
1523 4, /* cost for loading QImode using movzbl */
1524 {4, 4, 4}, /* cost of loading integer registers
1525 in QImode, HImode and SImode.
1526 Relative to reg-reg move (2). */
1527 {4, 4, 4}, /* cost of storing integer registers */
1528 4, /* cost of reg,reg fld/fst */
1529 {12, 12, 12}, /* cost of loading fp registers
1530 in SFmode, DFmode and XFmode */
1531 {6, 6, 8}, /* cost of storing fp registers
1532 in SFmode, DFmode and XFmode */
1533 2, /* cost of moving MMX register */
1534 {8, 8}, /* cost of loading MMX registers
1535 in SImode and DImode */
1536 {8, 8}, /* cost of storing MMX registers
1537 in SImode and DImode */
1538 2, /* cost of moving SSE register */
1539 {8, 8, 8}, /* cost of loading SSE registers
1540 in SImode, DImode and TImode */
1541 {8, 8, 8}, /* cost of storing SSE registers
1542 in SImode, DImode and TImode */
1543 5, /* MMX or SSE register to integer */
1544 32, /* size of l1 cache. */
1545 256, /* size of l2 cache. */
1546 64, /* size of prefetch block */
1547 6, /* number of parallel prefetches */
1548 3, /* Branch cost */
1549 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1550 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1551 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1552 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1553 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1554 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1557 1, /* scalar_stmt_cost. */
1558 1, /* scalar load_cost. */
1559 1, /* scalar_store_cost. */
1560 1, /* vec_stmt_cost. */
1561 1, /* vec_to_scalar_cost. */
1562 1, /* scalar_to_vec_cost. */
1563 1, /* vec_align_load_cost. */
1564 2, /* vec_unalign_load_cost. */
1565 1, /* vec_store_cost. */
1566 3, /* cond_taken_branch_cost. */
1567 1, /* cond_not_taken_branch_cost. */
1570 static stringop_algs slm_memcpy
[2] = {
1571 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1572 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1573 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1574 static stringop_algs slm_memset
[2] = {
1575 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1576 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1577 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1578 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1580 struct processor_costs slm_cost
= {
1581 COSTS_N_INSNS (1), /* cost of an add instruction */
1582 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1583 COSTS_N_INSNS (1), /* variable shift costs */
1584 COSTS_N_INSNS (1), /* constant shift costs */
1585 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1586 COSTS_N_INSNS (4), /* HI */
1587 COSTS_N_INSNS (3), /* SI */
1588 COSTS_N_INSNS (4), /* DI */
1589 COSTS_N_INSNS (2)}, /* other */
1590 0, /* cost of multiply per each bit set */
1591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1592 COSTS_N_INSNS (26), /* HI */
1593 COSTS_N_INSNS (42), /* SI */
1594 COSTS_N_INSNS (74), /* DI */
1595 COSTS_N_INSNS (74)}, /* other */
1596 COSTS_N_INSNS (1), /* cost of movsx */
1597 COSTS_N_INSNS (1), /* cost of movzx */
1598 8, /* "large" insn */
1599 17, /* MOVE_RATIO */
1600 4, /* cost for loading QImode using movzbl */
1601 {4, 4, 4}, /* cost of loading integer registers
1602 in QImode, HImode and SImode.
1603 Relative to reg-reg move (2). */
1604 {4, 4, 4}, /* cost of storing integer registers */
1605 4, /* cost of reg,reg fld/fst */
1606 {12, 12, 12}, /* cost of loading fp registers
1607 in SFmode, DFmode and XFmode */
1608 {6, 6, 8}, /* cost of storing fp registers
1609 in SFmode, DFmode and XFmode */
1610 2, /* cost of moving MMX register */
1611 {8, 8}, /* cost of loading MMX registers
1612 in SImode and DImode */
1613 {8, 8}, /* cost of storing MMX registers
1614 in SImode and DImode */
1615 2, /* cost of moving SSE register */
1616 {8, 8, 8}, /* cost of loading SSE registers
1617 in SImode, DImode and TImode */
1618 {8, 8, 8}, /* cost of storing SSE registers
1619 in SImode, DImode and TImode */
1620 5, /* MMX or SSE register to integer */
1621 32, /* size of l1 cache. */
1622 256, /* size of l2 cache. */
1623 64, /* size of prefetch block */
1624 6, /* number of parallel prefetches */
1625 3, /* Branch cost */
1626 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1627 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1628 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1629 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1630 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1631 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1634 1, /* scalar_stmt_cost. */
1635 1, /* scalar load_cost. */
1636 1, /* scalar_store_cost. */
1637 1, /* vec_stmt_cost. */
1638 1, /* vec_to_scalar_cost. */
1639 1, /* scalar_to_vec_cost. */
1640 1, /* vec_align_load_cost. */
1641 2, /* vec_unalign_load_cost. */
1642 1, /* vec_store_cost. */
1643 3, /* cond_taken_branch_cost. */
1644 1, /* cond_not_taken_branch_cost. */
1647 /* Generic should produce code tuned for Core-i7 (and newer chips)
1648 and btver1 (and newer chips). */
1650 static stringop_algs generic_memcpy
[2] = {
1651 DUMMY_STRINGOP_ALGS
,
1652 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1653 {-1, libcall
, false}}}};
1654 static stringop_algs generic_memset
[2] = {
1655 DUMMY_STRINGOP_ALGS
,
1656 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1657 {-1, libcall
, false}}}};
1659 struct processor_costs generic_cost
= {
1660 COSTS_N_INSNS (1), /* cost of an add instruction */
1661 /* On all chips taken into consideration lea is 2 cycles and more. With
1662 this cost however our current implementation of synth_mult results in
1663 use of unnecessary temporary registers causing regression on several
1664 SPECfp benchmarks. */
1665 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (4), /* HI */
1670 COSTS_N_INSNS (3), /* SI */
1671 COSTS_N_INSNS (4), /* DI */
1672 COSTS_N_INSNS (2)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (26), /* HI */
1676 COSTS_N_INSNS (42), /* SI */
1677 COSTS_N_INSNS (74), /* DI */
1678 COSTS_N_INSNS (74)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 8, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 4, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {6, 6, 8}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 2, /* cost of moving MMX register */
1694 {8, 8}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {8, 8}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 2, /* cost of moving SSE register */
1699 {8, 8, 8}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {8, 8, 8}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 5, /* MMX or SSE register to integer */
1704 32, /* size of l1 cache. */
1705 512, /* size of l2 cache. */
1706 64, /* size of prefetch block */
1707 6, /* number of parallel prefetches */
1708 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1709 value is increased to perhaps more appropriate value of 5. */
1710 3, /* Branch cost */
1711 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1712 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1713 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1714 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1715 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1716 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1719 1, /* scalar_stmt_cost. */
1720 1, /* scalar load_cost. */
1721 1, /* scalar_store_cost. */
1722 1, /* vec_stmt_cost. */
1723 1, /* vec_to_scalar_cost. */
1724 1, /* scalar_to_vec_cost. */
1725 1, /* vec_align_load_cost. */
1726 2, /* vec_unalign_load_cost. */
1727 1, /* vec_store_cost. */
1728 3, /* cond_taken_branch_cost. */
1729 1, /* cond_not_taken_branch_cost. */
1732 /* core_cost should produce code tuned for Core familly of CPUs. */
1733 static stringop_algs core_memcpy
[2] = {
1734 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1735 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1736 {-1, libcall
, false}}}};
1737 static stringop_algs core_memset
[2] = {
1738 {libcall
, {{6, loop_1_byte
, true},
1740 {8192, rep_prefix_4_byte
, true},
1741 {-1, libcall
, false}}},
1742 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1743 {-1, libcall
, false}}}};
1746 struct processor_costs core_cost
= {
1747 COSTS_N_INSNS (1), /* cost of an add instruction */
1748 /* On all chips taken into consideration lea is 2 cycles and more. With
1749 this cost however our current implementation of synth_mult results in
1750 use of unnecessary temporary registers causing regression on several
1751 SPECfp benchmarks. */
1752 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1753 COSTS_N_INSNS (1), /* variable shift costs */
1754 COSTS_N_INSNS (1), /* constant shift costs */
1755 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1756 COSTS_N_INSNS (4), /* HI */
1757 COSTS_N_INSNS (3), /* SI */
1758 COSTS_N_INSNS (4), /* DI */
1759 COSTS_N_INSNS (2)}, /* other */
1760 0, /* cost of multiply per each bit set */
1761 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1762 COSTS_N_INSNS (26), /* HI */
1763 COSTS_N_INSNS (42), /* SI */
1764 COSTS_N_INSNS (74), /* DI */
1765 COSTS_N_INSNS (74)}, /* other */
1766 COSTS_N_INSNS (1), /* cost of movsx */
1767 COSTS_N_INSNS (1), /* cost of movzx */
1768 8, /* "large" insn */
1769 17, /* MOVE_RATIO */
1770 4, /* cost for loading QImode using movzbl */
1771 {4, 4, 4}, /* cost of loading integer registers
1772 in QImode, HImode and SImode.
1773 Relative to reg-reg move (2). */
1774 {4, 4, 4}, /* cost of storing integer registers */
1775 4, /* cost of reg,reg fld/fst */
1776 {12, 12, 12}, /* cost of loading fp registers
1777 in SFmode, DFmode and XFmode */
1778 {6, 6, 8}, /* cost of storing fp registers
1779 in SFmode, DFmode and XFmode */
1780 2, /* cost of moving MMX register */
1781 {8, 8}, /* cost of loading MMX registers
1782 in SImode and DImode */
1783 {8, 8}, /* cost of storing MMX registers
1784 in SImode and DImode */
1785 2, /* cost of moving SSE register */
1786 {8, 8, 8}, /* cost of loading SSE registers
1787 in SImode, DImode and TImode */
1788 {8, 8, 8}, /* cost of storing SSE registers
1789 in SImode, DImode and TImode */
1790 5, /* MMX or SSE register to integer */
1791 64, /* size of l1 cache. */
1792 512, /* size of l2 cache. */
1793 64, /* size of prefetch block */
1794 6, /* number of parallel prefetches */
1795 /* FIXME perhaps more appropriate value is 5. */
1796 3, /* Branch cost */
1797 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1798 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1799 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1800 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1801 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1802 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1805 1, /* scalar_stmt_cost. */
1806 1, /* scalar load_cost. */
1807 1, /* scalar_store_cost. */
1808 1, /* vec_stmt_cost. */
1809 1, /* vec_to_scalar_cost. */
1810 1, /* scalar_to_vec_cost. */
1811 1, /* vec_align_load_cost. */
1812 2, /* vec_unalign_load_cost. */
1813 1, /* vec_store_cost. */
1814 3, /* cond_taken_branch_cost. */
1815 1, /* cond_not_taken_branch_cost. */
1819 /* Set by -mtune. */
1820 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1822 /* Set by -mtune or -Os. */
1823 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1825 /* Processor feature/optimization bitmasks. */
1826 #define m_386 (1<<PROCESSOR_I386)
1827 #define m_486 (1<<PROCESSOR_I486)
1828 #define m_PENT (1<<PROCESSOR_PENTIUM)
1829 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1830 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1831 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1832 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1833 #define m_CORE2 (1<<PROCESSOR_CORE2)
1834 #define m_COREI7 (1<<PROCESSOR_COREI7)
1835 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1836 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1837 #define m_ATOM (1<<PROCESSOR_ATOM)
1838 #define m_SLM (1<<PROCESSOR_SLM)
1840 #define m_GEODE (1<<PROCESSOR_GEODE)
1841 #define m_K6 (1<<PROCESSOR_K6)
1842 #define m_K6_GEODE (m_K6 | m_GEODE)
1843 #define m_K8 (1<<PROCESSOR_K8)
1844 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1845 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1846 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1847 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1848 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1849 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1850 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1851 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1852 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1853 #define m_BTVER (m_BTVER1 | m_BTVER2)
1854 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1856 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1858 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1860 #define DEF_TUNE(tune, name, selector) name,
1861 #include "x86-tune.def"
1865 /* Feature tests against the various tunings. */
1866 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1868 /* Feature tests against the various tunings used to create ix86_tune_features
1869 based on the processor mask. */
1870 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1872 #define DEF_TUNE(tune, name, selector) selector,
1873 #include "x86-tune.def"
1877 /* Feature tests against the various architecture variations. */
1878 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1880 /* Feature tests against the various architecture variations, used to create
1881 ix86_arch_features based on the processor mask. */
1882 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1883 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1884 ~(m_386
| m_486
| m_PENT
| m_K6
),
1886 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1889 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1892 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1895 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1899 static const unsigned int x86_accumulate_outgoing_args
1900 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
1902 static const unsigned int x86_arch_always_fancy_math_387
1903 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
;
1905 static const unsigned int x86_avx256_split_unaligned_load
1906 = m_COREI7
| m_GENERIC
;
1908 static const unsigned int x86_avx256_split_unaligned_store
1909 = m_COREI7
| m_BDVER
| m_GENERIC
;
1911 /* In case the average insn count for single function invocation is
1912 lower than this constant, emit fast (but longer) prologue and
1914 #define FAST_PROLOGUE_INSN_COUNT 20
1916 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1917 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
1918 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
1919 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
1921 /* Array of the smallest class containing reg number REGNO, indexed by
1922 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1924 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
1926 /* ax, dx, cx, bx */
1927 AREG
, DREG
, CREG
, BREG
,
1928 /* si, di, bp, sp */
1929 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
1931 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
1932 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
1935 /* flags, fpsr, fpcr, frame */
1936 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
1938 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1941 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
1944 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1945 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
1946 /* SSE REX registers */
1947 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
1949 /* AVX-512 SSE registers */
1950 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1951 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1952 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1953 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
1954 /* Mask registers. */
1955 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1956 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
1959 /* The "default" register map used in 32bit mode. */
1961 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
1963 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1964 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1965 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1966 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1967 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1968 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1969 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1970 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
1971 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
1972 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
1975 /* The "default" register map used in 64bit mode. */
1977 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
1979 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1980 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1981 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1982 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1983 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1984 8,9,10,11,12,13,14,15, /* extended integer registers */
1985 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1986 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
1987 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
1988 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
1991 /* Define the register numbers to be used in Dwarf debugging information.
1992 The SVR4 reference port C compiler uses the following register numbers
1993 in its Dwarf output code:
1994 0 for %eax (gcc regno = 0)
1995 1 for %ecx (gcc regno = 2)
1996 2 for %edx (gcc regno = 1)
1997 3 for %ebx (gcc regno = 3)
1998 4 for %esp (gcc regno = 7)
1999 5 for %ebp (gcc regno = 6)
2000 6 for %esi (gcc regno = 4)
2001 7 for %edi (gcc regno = 5)
2002 The following three DWARF register numbers are never generated by
2003 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2004 believes these numbers have these meanings.
2005 8 for %eip (no gcc equivalent)
2006 9 for %eflags (gcc regno = 17)
2007 10 for %trapno (no gcc equivalent)
2008 It is not at all clear how we should number the FP stack registers
2009 for the x86 architecture. If the version of SDB on x86/svr4 were
2010 a bit less brain dead with respect to floating-point then we would
2011 have a precedent to follow with respect to DWARF register numbers
2012 for x86 FP registers, but the SDB on x86/svr4 is so completely
2013 broken with respect to FP registers that it is hardly worth thinking
2014 of it as something to strive for compatibility with.
2015 The version of x86/svr4 SDB I have at the moment does (partially)
2016 seem to believe that DWARF register number 11 is associated with
2017 the x86 register %st(0), but that's about all. Higher DWARF
2018 register numbers don't seem to be associated with anything in
2019 particular, and even for DWARF regno 11, SDB only seems to under-
2020 stand that it should say that a variable lives in %st(0) (when
2021 asked via an `=' command) if we said it was in DWARF regno 11,
2022 but SDB still prints garbage when asked for the value of the
2023 variable in question (via a `/' command).
2024 (Also note that the labels SDB prints for various FP stack regs
2025 when doing an `x' command are all wrong.)
2026 Note that these problems generally don't affect the native SVR4
2027 C compiler because it doesn't allow the use of -O with -g and
2028 because when it is *not* optimizing, it allocates a memory
2029 location for each floating-point variable, and the memory
2030 location is what gets described in the DWARF AT_location
2031 attribute for the variable in question.
2032 Regardless of the severe mental illness of the x86/svr4 SDB, we
2033 do something sensible here and we use the following DWARF
2034 register numbers. Note that these are all stack-top-relative
2036 11 for %st(0) (gcc regno = 8)
2037 12 for %st(1) (gcc regno = 9)
2038 13 for %st(2) (gcc regno = 10)
2039 14 for %st(3) (gcc regno = 11)
2040 15 for %st(4) (gcc regno = 12)
2041 16 for %st(5) (gcc regno = 13)
2042 17 for %st(6) (gcc regno = 14)
2043 18 for %st(7) (gcc regno = 15)
2045 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2047 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2048 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2049 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2050 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2051 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2052 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2053 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2054 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2055 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2056 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2059 /* Define parameter passing and return registers. */
2061 static int const x86_64_int_parameter_registers
[6] =
2063 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2066 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2068 CX_REG
, DX_REG
, R8_REG
, R9_REG
2071 static int const x86_64_int_return_registers
[4] =
2073 AX_REG
, DX_REG
, DI_REG
, SI_REG
2076 /* Additional registers that are clobbered by SYSV calls. */
2078 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2082 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2083 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2086 /* Define the structure for the machine field in struct function. */
2088 struct GTY(()) stack_local_entry
{
2089 unsigned short mode
;
2092 struct stack_local_entry
*next
;
2095 /* Structure describing stack frame layout.
2096 Stack grows downward:
2102 saved static chain if ix86_static_chain_on_stack
2104 saved frame pointer if frame_pointer_needed
2105 <- HARD_FRAME_POINTER
2111 <- sse_regs_save_offset
2114 [va_arg registers] |
2118 [padding2] | = to_allocate
2127 int outgoing_arguments_size
;
2129 /* The offsets relative to ARG_POINTER. */
2130 HOST_WIDE_INT frame_pointer_offset
;
2131 HOST_WIDE_INT hard_frame_pointer_offset
;
2132 HOST_WIDE_INT stack_pointer_offset
;
2133 HOST_WIDE_INT hfp_save_offset
;
2134 HOST_WIDE_INT reg_save_offset
;
2135 HOST_WIDE_INT sse_reg_save_offset
;
2137 /* When save_regs_using_mov is set, emit prologue using
2138 move instead of push instructions. */
2139 bool save_regs_using_mov
;
2142 /* Which cpu are we scheduling for. */
2143 enum attr_cpu ix86_schedule
;
2145 /* Which cpu are we optimizing for. */
2146 enum processor_type ix86_tune
;
2148 /* Which instruction set architecture to use. */
2149 enum processor_type ix86_arch
;
2151 /* True if processor has SSE prefetch instruction. */
2152 unsigned char x86_prefetch_sse
;
2154 /* -mstackrealign option */
2155 static const char ix86_force_align_arg_pointer_string
[]
2156 = "force_align_arg_pointer";
2158 static rtx (*ix86_gen_leave
) (void);
2159 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2160 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2161 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2162 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2163 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2164 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2165 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2166 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2167 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2168 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2169 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2171 /* Preferred alignment for stack boundary in bits. */
2172 unsigned int ix86_preferred_stack_boundary
;
2174 /* Alignment for incoming stack boundary in bits specified at
2176 static unsigned int ix86_user_incoming_stack_boundary
;
2178 /* Default alignment for incoming stack boundary in bits. */
2179 static unsigned int ix86_default_incoming_stack_boundary
;
2181 /* Alignment for incoming stack boundary in bits. */
2182 unsigned int ix86_incoming_stack_boundary
;
2184 /* Calling abi specific va_list type nodes. */
2185 static GTY(()) tree sysv_va_list_type_node
;
2186 static GTY(()) tree ms_va_list_type_node
;
2188 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2189 char internal_label_prefix
[16];
2190 int internal_label_prefix_len
;
2192 /* Fence to use after loop using movnt. */
2195 /* Register class used for passing given 64bit part of the argument.
2196 These represent classes as documented by the PS ABI, with the exception
2197 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2198 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2200 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2201 whenever possible (upper half does contain padding). */
2202 enum x86_64_reg_class
2205 X86_64_INTEGER_CLASS
,
2206 X86_64_INTEGERSI_CLASS
,
2213 X86_64_COMPLEX_X87_CLASS
,
2217 #define MAX_CLASSES 4
2219 /* Table of constants used by fldpi, fldln2, etc.... */
2220 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2221 static bool ext_80387_constants_init
= 0;
2224 static struct machine_function
* ix86_init_machine_status (void);
2225 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2226 static bool ix86_function_value_regno_p (const unsigned int);
2227 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2229 static rtx
ix86_static_chain (const_tree
, bool);
2230 static int ix86_function_regparm (const_tree
, const_tree
);
2231 static void ix86_compute_frame_layout (struct ix86_frame
*);
2232 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2234 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2235 static tree
ix86_canonical_va_list_type (tree
);
2236 static void predict_jump (int);
2237 static unsigned int split_stack_prologue_scratch_regno (void);
2238 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2240 enum ix86_function_specific_strings
2242 IX86_FUNCTION_SPECIFIC_ARCH
,
2243 IX86_FUNCTION_SPECIFIC_TUNE
,
2244 IX86_FUNCTION_SPECIFIC_MAX
2247 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2248 const char *, enum fpmath_unit
, bool);
2249 static void ix86_function_specific_save (struct cl_target_option
*);
2250 static void ix86_function_specific_restore (struct cl_target_option
*);
2251 static void ix86_function_specific_print (FILE *, int,
2252 struct cl_target_option
*);
2253 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2254 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2255 struct gcc_options
*);
2256 static bool ix86_can_inline_p (tree
, tree
);
2257 static void ix86_set_current_function (tree
);
2258 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2260 static enum calling_abi
ix86_function_abi (const_tree
);
2263 #ifndef SUBTARGET32_DEFAULT_CPU
2264 #define SUBTARGET32_DEFAULT_CPU "i386"
2267 /* Whether -mtune= or -march= were specified */
2268 static int ix86_tune_defaulted
;
2269 static int ix86_arch_specified
;
2271 /* Vectorization library interface and handlers. */
2272 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2274 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2275 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2277 /* Processor target table, indexed by processor number */
2280 const struct processor_costs
*cost
; /* Processor costs */
2281 const int align_loop
; /* Default alignments. */
2282 const int align_loop_max_skip
;
2283 const int align_jump
;
2284 const int align_jump_max_skip
;
2285 const int align_func
;
2288 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2290 {&i386_cost
, 4, 3, 4, 3, 4},
2291 {&i486_cost
, 16, 15, 16, 15, 16},
2292 {&pentium_cost
, 16, 7, 16, 7, 16},
2293 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2294 {&geode_cost
, 0, 0, 0, 0, 0},
2295 {&k6_cost
, 32, 7, 32, 7, 32},
2296 {&athlon_cost
, 16, 7, 16, 7, 16},
2297 {&pentium4_cost
, 0, 0, 0, 0, 0},
2298 {&k8_cost
, 16, 7, 16, 7, 16},
2299 {&nocona_cost
, 0, 0, 0, 0, 0},
2301 {&core_cost
, 16, 10, 16, 10, 16},
2303 {&core_cost
, 16, 10, 16, 10, 16},
2305 {&core_cost
, 16, 10, 16, 10, 16},
2306 {&generic_cost
, 16, 10, 16, 10, 16},
2307 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2308 {&bdver1_cost
, 16, 10, 16, 7, 11},
2309 {&bdver2_cost
, 16, 10, 16, 7, 11},
2310 {&bdver3_cost
, 16, 10, 16, 7, 11},
2311 {&btver1_cost
, 16, 10, 16, 7, 11},
2312 {&btver2_cost
, 16, 10, 16, 7, 11},
2313 {&atom_cost
, 16, 15, 16, 7, 16},
2314 {&slm_cost
, 16, 15, 16, 7, 16}
2317 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2352 gate_insert_vzeroupper (void)
2354 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2358 rest_of_handle_insert_vzeroupper (void)
2362 /* vzeroupper instructions are inserted immediately after reload to
2363 account for possible spills from 256bit registers. The pass
2364 reuses mode switching infrastructure by re-running mode insertion
2365 pass, so disable entities that have already been processed. */
2366 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2367 ix86_optimize_mode_switching
[i
] = 0;
2369 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2371 /* Call optimize_mode_switching. */
2372 g
->get_passes ()->execute_pass_mode_switching ();
2378 const pass_data pass_data_insert_vzeroupper
=
2380 RTL_PASS
, /* type */
2381 "vzeroupper", /* name */
2382 OPTGROUP_NONE
, /* optinfo_flags */
2383 true, /* has_gate */
2384 true, /* has_execute */
2385 TV_NONE
, /* tv_id */
2386 0, /* properties_required */
2387 0, /* properties_provided */
2388 0, /* properties_destroyed */
2389 0, /* todo_flags_start */
2390 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2393 class pass_insert_vzeroupper
: public rtl_opt_pass
2396 pass_insert_vzeroupper(gcc::context
*ctxt
)
2397 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2400 /* opt_pass methods: */
2401 bool gate () { return gate_insert_vzeroupper (); }
2402 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2404 }; // class pass_insert_vzeroupper
2409 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2411 return new pass_insert_vzeroupper (ctxt
);
2414 /* Return true if a red-zone is in use. */
2417 ix86_using_red_zone (void)
2419 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2422 /* Return a string that documents the current -m options. The caller is
2423 responsible for freeing the string. */
2426 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2427 const char *tune
, enum fpmath_unit fpmath
,
2430 struct ix86_target_opts
2432 const char *option
; /* option string */
2433 HOST_WIDE_INT mask
; /* isa mask options */
2436 /* This table is ordered so that options like -msse4.2 that imply
2437 preceding options while match those first. */
2438 static struct ix86_target_opts isa_opts
[] =
2440 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2441 { "-mfma", OPTION_MASK_ISA_FMA
},
2442 { "-mxop", OPTION_MASK_ISA_XOP
},
2443 { "-mlwp", OPTION_MASK_ISA_LWP
},
2444 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2445 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2446 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2447 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2448 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2449 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2450 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2451 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2452 { "-msse3", OPTION_MASK_ISA_SSE3
},
2453 { "-msse2", OPTION_MASK_ISA_SSE2
},
2454 { "-msse", OPTION_MASK_ISA_SSE
},
2455 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2456 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2457 { "-mmmx", OPTION_MASK_ISA_MMX
},
2458 { "-mabm", OPTION_MASK_ISA_ABM
},
2459 { "-mbmi", OPTION_MASK_ISA_BMI
},
2460 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2461 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2462 { "-mhle", OPTION_MASK_ISA_HLE
},
2463 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2464 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2465 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2466 { "-madx", OPTION_MASK_ISA_ADX
},
2467 { "-mtbm", OPTION_MASK_ISA_TBM
},
2468 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2469 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2470 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2471 { "-maes", OPTION_MASK_ISA_AES
},
2472 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2473 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2474 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2475 { "-mf16c", OPTION_MASK_ISA_F16C
},
2476 { "-mrtm", OPTION_MASK_ISA_RTM
},
2477 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2478 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2482 static struct ix86_target_opts flag_opts
[] =
2484 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2485 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2486 { "-m80387", MASK_80387
},
2487 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2488 { "-malign-double", MASK_ALIGN_DOUBLE
},
2489 { "-mcld", MASK_CLD
},
2490 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2491 { "-mieee-fp", MASK_IEEE_FP
},
2492 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2493 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2494 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2495 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2496 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2497 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2498 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2499 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2500 { "-mrecip", MASK_RECIP
},
2501 { "-mrtd", MASK_RTD
},
2502 { "-msseregparm", MASK_SSEREGPARM
},
2503 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2504 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2505 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2506 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2507 { "-mvzeroupper", MASK_VZEROUPPER
},
2508 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2509 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2510 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2513 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2516 char target_other
[40];
2526 memset (opts
, '\0', sizeof (opts
));
2528 /* Add -march= option. */
2531 opts
[num
][0] = "-march=";
2532 opts
[num
++][1] = arch
;
2535 /* Add -mtune= option. */
2538 opts
[num
][0] = "-mtune=";
2539 opts
[num
++][1] = tune
;
2542 /* Add -m32/-m64/-mx32. */
2543 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2545 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2549 isa
&= ~ (OPTION_MASK_ISA_64BIT
2550 | OPTION_MASK_ABI_64
2551 | OPTION_MASK_ABI_X32
);
2555 opts
[num
++][0] = abi
;
2557 /* Pick out the options in isa options. */
2558 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2560 if ((isa
& isa_opts
[i
].mask
) != 0)
2562 opts
[num
++][0] = isa_opts
[i
].option
;
2563 isa
&= ~ isa_opts
[i
].mask
;
2567 if (isa
&& add_nl_p
)
2569 opts
[num
++][0] = isa_other
;
2570 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2574 /* Add flag options. */
2575 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2577 if ((flags
& flag_opts
[i
].mask
) != 0)
2579 opts
[num
++][0] = flag_opts
[i
].option
;
2580 flags
&= ~ flag_opts
[i
].mask
;
2584 if (flags
&& add_nl_p
)
2586 opts
[num
++][0] = target_other
;
2587 sprintf (target_other
, "(other flags: %#x)", flags
);
2590 /* Add -fpmath= option. */
2593 opts
[num
][0] = "-mfpmath=";
2594 switch ((int) fpmath
)
2597 opts
[num
++][1] = "387";
2601 opts
[num
++][1] = "sse";
2604 case FPMATH_387
| FPMATH_SSE
:
2605 opts
[num
++][1] = "sse+387";
2617 gcc_assert (num
< ARRAY_SIZE (opts
));
2619 /* Size the string. */
2621 sep_len
= (add_nl_p
) ? 3 : 1;
2622 for (i
= 0; i
< num
; i
++)
2625 for (j
= 0; j
< 2; j
++)
2627 len
+= strlen (opts
[i
][j
]);
2630 /* Build the string. */
2631 ret
= ptr
= (char *) xmalloc (len
);
2634 for (i
= 0; i
< num
; i
++)
2638 for (j
= 0; j
< 2; j
++)
2639 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2646 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2654 for (j
= 0; j
< 2; j
++)
2657 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2659 line_len
+= len2
[j
];
2664 gcc_assert (ret
+ len
>= ptr
);
2669 /* Return true, if profiling code should be emitted before
2670 prologue. Otherwise it returns false.
2671 Note: For x86 with "hotfix" it is sorried. */
2673 ix86_profile_before_prologue (void)
2675 return flag_fentry
!= 0;
2678 /* Function that is callable from the debugger to print the current
2680 void ATTRIBUTE_UNUSED
2681 ix86_debug_options (void)
2683 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2684 ix86_arch_string
, ix86_tune_string
,
2689 fprintf (stderr
, "%s\n\n", opts
);
2693 fputs ("<no options>\n\n", stderr
);
2698 static const char *stringop_alg_names
[] = {
2700 #define DEF_ALG(alg, name) #name,
2701 #include "stringop.def"
2706 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2707 The string is of the following form (or comma separated list of it):
2709 strategy_alg:max_size:[align|noalign]
2711 where the full size range for the strategy is either [0, max_size] or
2712 [min_size, max_size], in which min_size is the max_size + 1 of the
2713 preceding range. The last size range must have max_size == -1.
2718 -mmemcpy-strategy=libcall:-1:noalign
2720 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2724 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2726 This is to tell the compiler to use the following strategy for memset
2727 1) when the expected size is between [1, 16], use rep_8byte strategy;
2728 2) when the size is between [17, 2048], use vector_loop;
2729 3) when the size is > 2048, use libcall. */
2731 struct stringop_size_range
2739 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2741 const struct stringop_algs
*default_algs
;
2742 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2743 char *curr_range_str
, *next_range_str
;
2747 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2749 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2751 curr_range_str
= strategy_str
;
2759 next_range_str
= strchr (curr_range_str
, ',');
2761 *next_range_str
++ = '\0';
2763 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2764 alg_name
, &maxs
, align
))
2766 error ("wrong arg %s to option %s", curr_range_str
,
2767 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2771 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2773 error ("size ranges of option %s should be increasing",
2774 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2778 for (i
= 0; i
< last_alg
; i
++)
2780 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2782 alg
= (stringop_alg
) i
;
2789 error ("wrong stringop strategy name %s specified for option %s",
2791 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2795 input_ranges
[n
].max
= maxs
;
2796 input_ranges
[n
].alg
= alg
;
2797 if (!strcmp (align
, "align"))
2798 input_ranges
[n
].noalign
= false;
2799 else if (!strcmp (align
, "noalign"))
2800 input_ranges
[n
].noalign
= true;
2803 error ("unknown alignment %s specified for option %s",
2804 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2808 curr_range_str
= next_range_str
;
2810 while (curr_range_str
);
2812 if (input_ranges
[n
- 1].max
!= -1)
2814 error ("the max value for the last size range should be -1"
2816 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2820 if (n
> MAX_STRINGOP_ALGS
)
2822 error ("too many size ranges specified in option %s",
2823 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2827 /* Now override the default algs array. */
2828 for (i
= 0; i
< n
; i
++)
2830 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2831 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2832 = input_ranges
[i
].alg
;
2833 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2834 = input_ranges
[i
].noalign
;
2839 /* parse -mtune-ctrl= option. When DUMP is true,
2840 print the features that are explicitly set. */
2843 parse_mtune_ctrl_str (bool dump
)
2845 if (!ix86_tune_ctrl_string
)
2848 char *next_feature_string
= NULL
;
2849 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2850 char *orig
= curr_feature_string
;
2856 next_feature_string
= strchr (curr_feature_string
, ',');
2857 if (next_feature_string
)
2858 *next_feature_string
++ = '\0';
2859 if (*curr_feature_string
== '^')
2861 curr_feature_string
++;
2864 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2866 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2868 ix86_tune_features
[i
] = !clear
;
2870 fprintf (stderr
, "Explicitly %s feature %s\n",
2871 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2875 if (i
== X86_TUNE_LAST
)
2876 error ("Unknown parameter to option -mtune-ctrl: %s",
2877 clear
? curr_feature_string
- 1 : curr_feature_string
);
2878 curr_feature_string
= next_feature_string
;
2880 while (curr_feature_string
);
2884 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2888 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2890 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2893 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2895 if (ix86_tune_no_default
)
2896 ix86_tune_features
[i
] = 0;
2898 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
2903 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
2904 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2905 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
2906 ix86_tune_features
[i
] ? "on" : "off");
2909 parse_mtune_ctrl_str (dump
);
2913 /* Override various settings based on options. If MAIN_ARGS_P, the
2914 options are from the command line, otherwise they are from
2918 ix86_option_override_internal (bool main_args_p
)
2921 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2922 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2927 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2928 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2929 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2930 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2931 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2932 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2933 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2934 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2935 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2936 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2937 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2938 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2939 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2940 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2941 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2942 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2943 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2944 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2945 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2946 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2947 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2948 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2949 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2950 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2951 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2952 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2953 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2954 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2955 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2956 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2957 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2958 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2959 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2960 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2961 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2962 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2963 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2964 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2965 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2966 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2967 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
2968 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
2969 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
2970 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
2972 /* if this reaches 64, need to widen struct pta flags below */
2976 const char *const name
; /* processor name or nickname. */
2977 const enum processor_type processor
;
2978 const enum attr_cpu schedule
;
2979 const unsigned HOST_WIDE_INT flags
;
2981 const processor_alias_table
[] =
2983 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2984 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2985 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2986 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2987 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2988 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2989 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2990 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
2991 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2992 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2993 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2994 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2995 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2996 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2997 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2998 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2999 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3000 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3001 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3002 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3003 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3004 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3005 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3006 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3007 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3008 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3009 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3010 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3011 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3012 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3013 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3014 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3015 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3016 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3017 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
3018 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3019 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3020 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3021 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3022 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
3023 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3024 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3025 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3026 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3027 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3028 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3029 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3030 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3031 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3032 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3034 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3035 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3036 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3037 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3038 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3039 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3041 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3042 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3043 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3044 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3045 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3046 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3047 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3048 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3049 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3050 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3051 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3052 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3053 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3054 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3055 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3056 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3057 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3058 {"k8", PROCESSOR_K8
, CPU_K8
,
3059 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3060 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3061 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3062 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3063 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3064 {"opteron", PROCESSOR_K8
, CPU_K8
,
3065 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3066 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3067 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3068 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3069 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3070 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3071 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3072 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3073 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3074 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3075 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3076 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3077 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3078 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3079 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3080 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3081 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3082 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3083 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3084 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3085 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3086 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3087 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3088 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3089 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3090 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3091 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3092 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3093 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3094 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3095 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3096 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3097 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3098 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3099 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3100 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3101 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3102 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3103 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3104 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3105 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3106 | PTA_FXSR
| PTA_XSAVE
},
3107 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3108 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3109 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3110 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3111 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3112 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3114 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3116 | PTA_HLE
/* flags are only used for -march switch. */ },
3119 /* -mrecip options. */
3122 const char *string
; /* option name */
3123 unsigned int mask
; /* mask bits to set */
3125 const recip_options
[] =
3127 { "all", RECIP_MASK_ALL
},
3128 { "none", RECIP_MASK_NONE
},
3129 { "div", RECIP_MASK_DIV
},
3130 { "sqrt", RECIP_MASK_SQRT
},
3131 { "vec-div", RECIP_MASK_VEC_DIV
},
3132 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3135 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3137 /* Set up prefix/suffix so the error messages refer to either the command
3138 line argument, or the attribute(target). */
3147 prefix
= "option(\"";
3152 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3153 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3154 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3155 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3156 #ifdef TARGET_BI_ARCH
3159 #if TARGET_BI_ARCH == 1
3160 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3161 is on and OPTION_MASK_ABI_X32 is off. We turn off
3162 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3165 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3167 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3168 on and OPTION_MASK_ABI_64 is off. We turn off
3169 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3172 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3179 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3180 OPTION_MASK_ABI_64 for TARGET_X32. */
3181 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3182 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3184 else if (TARGET_LP64
)
3186 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3187 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3188 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3189 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3192 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3193 SUBTARGET_OVERRIDE_OPTIONS
;
3196 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3197 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3200 /* -fPIC is the default for x86_64. */
3201 if (TARGET_MACHO
&& TARGET_64BIT
)
3204 /* Need to check -mtune=generic first. */
3205 if (ix86_tune_string
)
3207 if (!strcmp (ix86_tune_string
, "generic")
3208 || !strcmp (ix86_tune_string
, "i686")
3209 /* As special support for cross compilers we read -mtune=native
3210 as -mtune=generic. With native compilers we won't see the
3211 -mtune=native, as it was changed by the driver. */
3212 || !strcmp (ix86_tune_string
, "native"))
3214 ix86_tune_string
= "generic";
3216 /* If this call is for setting the option attribute, allow the
3217 generic that was previously set. */
3218 else if (!main_args_p
3219 && !strcmp (ix86_tune_string
, "generic"))
3221 else if (!strncmp (ix86_tune_string
, "generic", 7))
3222 error ("bad value (%s) for %stune=%s %s",
3223 ix86_tune_string
, prefix
, suffix
, sw
);
3224 else if (!strcmp (ix86_tune_string
, "x86-64"))
3225 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3226 "%stune=k8%s or %stune=generic%s instead as appropriate",
3227 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3231 if (ix86_arch_string
)
3232 ix86_tune_string
= ix86_arch_string
;
3233 if (!ix86_tune_string
)
3235 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3236 ix86_tune_defaulted
= 1;
3239 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3240 need to use a sensible tune option. */
3241 if (!strcmp (ix86_tune_string
, "generic")
3242 || !strcmp (ix86_tune_string
, "x86-64")
3243 || !strcmp (ix86_tune_string
, "i686"))
3245 ix86_tune_string
= "generic";
3249 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3251 /* rep; movq isn't available in 32-bit code. */
3252 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3253 ix86_stringop_alg
= no_stringop
;
3256 if (!ix86_arch_string
)
3257 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3259 ix86_arch_specified
= 1;
3261 if (global_options_set
.x_ix86_pmode
)
3263 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3264 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3265 error ("address mode %qs not supported in the %s bit mode",
3266 TARGET_64BIT
? "short" : "long",
3267 TARGET_64BIT
? "64" : "32");
3270 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3272 if (!global_options_set
.x_ix86_abi
)
3273 ix86_abi
= DEFAULT_ABI
;
3275 /* For targets using ms ABI enable ms-extensions, if not
3276 explicit turned off. For non-ms ABI we turn off this
3278 if (!global_options_set
.x_flag_ms_extensions
)
3279 flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3281 if (global_options_set
.x_ix86_cmodel
)
3283 switch (ix86_cmodel
)
3288 ix86_cmodel
= CM_SMALL_PIC
;
3290 error ("code model %qs not supported in the %s bit mode",
3297 ix86_cmodel
= CM_MEDIUM_PIC
;
3299 error ("code model %qs not supported in the %s bit mode",
3301 else if (TARGET_X32
)
3302 error ("code model %qs not supported in x32 mode",
3309 ix86_cmodel
= CM_LARGE_PIC
;
3311 error ("code model %qs not supported in the %s bit mode",
3313 else if (TARGET_X32
)
3314 error ("code model %qs not supported in x32 mode",
3320 error ("code model %s does not support PIC mode", "32");
3322 error ("code model %qs not supported in the %s bit mode",
3329 error ("code model %s does not support PIC mode", "kernel");
3330 ix86_cmodel
= CM_32
;
3333 error ("code model %qs not supported in the %s bit mode",
3343 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3344 use of rip-relative addressing. This eliminates fixups that
3345 would otherwise be needed if this object is to be placed in a
3346 DLL, and is essentially just as efficient as direct addressing. */
3347 if (TARGET_64BIT
&& (TARGET_RDOS
|| TARGET_PECOFF
))
3348 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3349 else if (TARGET_64BIT
)
3350 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3352 ix86_cmodel
= CM_32
;
3354 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3356 error ("-masm=intel not supported in this configuration");
3357 ix86_asm_dialect
= ASM_ATT
;
3359 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3360 sorry ("%i-bit mode not compiled in",
3361 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3363 for (i
= 0; i
< pta_size
; i
++)
3364 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3366 ix86_schedule
= processor_alias_table
[i
].schedule
;
3367 ix86_arch
= processor_alias_table
[i
].processor
;
3368 /* Default cpu tuning to the architecture. */
3369 ix86_tune
= ix86_arch
;
3371 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3372 error ("CPU you selected does not support x86-64 "
3375 if (processor_alias_table
[i
].flags
& PTA_MMX
3376 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3377 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3378 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3379 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3380 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3381 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3382 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3383 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3384 if (processor_alias_table
[i
].flags
& PTA_SSE
3385 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3386 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3387 if (processor_alias_table
[i
].flags
& PTA_SSE2
3388 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3389 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3390 if (processor_alias_table
[i
].flags
& PTA_SSE3
3391 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3392 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3393 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3394 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3395 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3396 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3397 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3398 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3399 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3400 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3401 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3402 if (processor_alias_table
[i
].flags
& PTA_AVX
3403 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3404 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3405 if (processor_alias_table
[i
].flags
& PTA_AVX2
3406 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3407 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3408 if (processor_alias_table
[i
].flags
& PTA_FMA
3409 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3410 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3411 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3412 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3413 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3414 if (processor_alias_table
[i
].flags
& PTA_FMA4
3415 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3416 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3417 if (processor_alias_table
[i
].flags
& PTA_XOP
3418 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3419 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3420 if (processor_alias_table
[i
].flags
& PTA_LWP
3421 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3422 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3423 if (processor_alias_table
[i
].flags
& PTA_ABM
3424 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3425 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3426 if (processor_alias_table
[i
].flags
& PTA_BMI
3427 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3428 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3429 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3430 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3431 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3432 if (processor_alias_table
[i
].flags
& PTA_TBM
3433 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3434 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3435 if (processor_alias_table
[i
].flags
& PTA_BMI2
3436 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3437 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3438 if (processor_alias_table
[i
].flags
& PTA_CX16
3439 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3440 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3441 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3442 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3443 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3444 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3445 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3446 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3447 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3448 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3449 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3450 if (processor_alias_table
[i
].flags
& PTA_AES
3451 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3452 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3453 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3454 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3455 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3456 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3457 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3458 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3459 if (processor_alias_table
[i
].flags
& PTA_RDRND
3460 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3461 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3462 if (processor_alias_table
[i
].flags
& PTA_F16C
3463 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3464 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3465 if (processor_alias_table
[i
].flags
& PTA_RTM
3466 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3467 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3468 if (processor_alias_table
[i
].flags
& PTA_HLE
3469 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3470 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3471 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3472 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3473 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3474 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3475 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3476 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3477 if (processor_alias_table
[i
].flags
& PTA_ADX
3478 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3479 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3480 if (processor_alias_table
[i
].flags
& PTA_FXSR
3481 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3482 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3483 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3484 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3485 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3486 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3487 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3488 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3489 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3490 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3491 ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3492 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3493 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3494 ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3495 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3496 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3497 ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3498 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3499 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3500 ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3501 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3502 x86_prefetch_sse
= true;
3507 if (!strcmp (ix86_arch_string
, "generic"))
3508 error ("generic CPU can be used only for %stune=%s %s",
3509 prefix
, suffix
, sw
);
3510 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3511 error ("bad value (%s) for %sarch=%s %s",
3512 ix86_arch_string
, prefix
, suffix
, sw
);
3514 ix86_arch_mask
= 1u << ix86_arch
;
3515 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3516 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3518 for (i
= 0; i
< pta_size
; i
++)
3519 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3521 ix86_schedule
= processor_alias_table
[i
].schedule
;
3522 ix86_tune
= processor_alias_table
[i
].processor
;
3525 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3527 if (ix86_tune_defaulted
)
3529 ix86_tune_string
= "x86-64";
3530 for (i
= 0; i
< pta_size
; i
++)
3531 if (! strcmp (ix86_tune_string
,
3532 processor_alias_table
[i
].name
))
3534 ix86_schedule
= processor_alias_table
[i
].schedule
;
3535 ix86_tune
= processor_alias_table
[i
].processor
;
3538 error ("CPU you selected does not support x86-64 "
3542 /* Intel CPUs have always interpreted SSE prefetch instructions as
3543 NOPs; so, we can enable SSE prefetch instructions even when
3544 -mtune (rather than -march) points us to a processor that has them.
3545 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3546 higher processors. */
3548 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3549 x86_prefetch_sse
= true;
3553 if (ix86_tune_specified
&& i
== pta_size
)
3554 error ("bad value (%s) for %stune=%s %s",
3555 ix86_tune_string
, prefix
, suffix
, sw
);
3557 set_ix86_tune_features (ix86_tune
, ix86_dump_tunes
);
3559 #ifndef USE_IX86_FRAME_POINTER
3560 #define USE_IX86_FRAME_POINTER 0
3563 #ifndef USE_X86_64_FRAME_POINTER
3564 #define USE_X86_64_FRAME_POINTER 0
3567 /* Set the default values for switches whose default depends on TARGET_64BIT
3568 in case they weren't overwritten by command line options. */
3571 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3572 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3573 if (flag_asynchronous_unwind_tables
== 2)
3574 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3575 if (flag_pcc_struct_return
== 2)
3576 flag_pcc_struct_return
= 0;
3580 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3581 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3582 if (flag_asynchronous_unwind_tables
== 2)
3583 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3584 if (flag_pcc_struct_return
== 2)
3585 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3588 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3590 ix86_cost
= &ix86_size_cost
;
3592 ix86_cost
= ix86_tune_cost
;
3594 /* Arrange to set up i386_stack_locals for all functions. */
3595 init_machine_status
= ix86_init_machine_status
;
3597 /* Validate -mregparm= value. */
3598 if (global_options_set
.x_ix86_regparm
)
3601 warning (0, "-mregparm is ignored in 64-bit mode");
3602 if (ix86_regparm
> REGPARM_MAX
)
3604 error ("-mregparm=%d is not between 0 and %d",
3605 ix86_regparm
, REGPARM_MAX
);
3610 ix86_regparm
= REGPARM_MAX
;
3612 /* Default align_* from the processor table. */
3613 if (align_loops
== 0)
3615 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3616 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3618 if (align_jumps
== 0)
3620 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3621 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3623 if (align_functions
== 0)
3625 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3628 /* Provide default for -mbranch-cost= value. */
3629 if (!global_options_set
.x_ix86_branch_cost
)
3630 ix86_branch_cost
= ix86_cost
->branch_cost
;
3634 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3636 /* Enable by default the SSE and MMX builtins. Do allow the user to
3637 explicitly disable any of these. In particular, disabling SSE and
3638 MMX for kernel code is extremely useful. */
3639 if (!ix86_arch_specified
)
3641 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3642 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3645 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3649 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3651 if (!ix86_arch_specified
)
3653 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3655 /* i386 ABI does not specify red zone. It still makes sense to use it
3656 when programmer takes care to stack from being destroyed. */
3657 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3658 target_flags
|= MASK_NO_RED_ZONE
;
3661 /* Keep nonleaf frame pointers. */
3662 if (flag_omit_frame_pointer
)
3663 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3664 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3665 flag_omit_frame_pointer
= 1;
3667 /* If we're doing fast math, we don't care about comparison order
3668 wrt NaNs. This lets us use a shorter comparison sequence. */
3669 if (flag_finite_math_only
)
3670 target_flags
&= ~MASK_IEEE_FP
;
3672 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3673 since the insns won't need emulation. */
3674 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3675 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3677 /* Likewise, if the target doesn't have a 387, or we've specified
3678 software floating point, don't use 387 inline intrinsics. */
3680 target_flags
|= MASK_NO_FANCY_MATH_387
;
3682 /* Turn on MMX builtins for -msse. */
3684 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3686 /* Enable SSE prefetch. */
3687 if (TARGET_SSE
|| (TARGET_PRFCHW
&& !TARGET_3DNOW
))
3688 x86_prefetch_sse
= true;
3690 /* Enable prefetch{,w} instructions for -m3dnow. */
3692 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
& ~ix86_isa_flags_explicit
;
3694 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3695 if (TARGET_SSE4_2
|| TARGET_ABM
)
3696 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3698 /* Enable lzcnt instruction for -mabm. */
3700 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3702 /* Validate -mpreferred-stack-boundary= value or default it to
3703 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3704 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3705 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3707 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3708 int max
= (TARGET_SEH
? 4 : 12);
3710 if (ix86_preferred_stack_boundary_arg
< min
3711 || ix86_preferred_stack_boundary_arg
> max
)
3714 error ("-mpreferred-stack-boundary is not supported "
3717 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3718 ix86_preferred_stack_boundary_arg
, min
, max
);
3721 ix86_preferred_stack_boundary
3722 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3725 /* Set the default value for -mstackrealign. */
3726 if (ix86_force_align_arg_pointer
== -1)
3727 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3729 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3731 /* Validate -mincoming-stack-boundary= value or default it to
3732 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3733 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3734 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3736 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3737 || ix86_incoming_stack_boundary_arg
> 12)
3738 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3739 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3742 ix86_user_incoming_stack_boundary
3743 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3744 ix86_incoming_stack_boundary
3745 = ix86_user_incoming_stack_boundary
;
3749 /* Accept -msseregparm only if at least SSE support is enabled. */
3750 if (TARGET_SSEREGPARM
3752 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3754 if (global_options_set
.x_ix86_fpmath
)
3756 if (ix86_fpmath
& FPMATH_SSE
)
3760 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3761 ix86_fpmath
= FPMATH_387
;
3763 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3765 warning (0, "387 instruction set disabled, using SSE arithmetics");
3766 ix86_fpmath
= FPMATH_SSE
;
3771 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3773 /* If the i387 is disabled, then do not return values in it. */
3775 target_flags
&= ~MASK_FLOAT_RETURNS
;
3777 /* Use external vectorized library in vectorizing intrinsics. */
3778 if (global_options_set
.x_ix86_veclibabi_type
)
3779 switch (ix86_veclibabi_type
)
3781 case ix86_veclibabi_type_svml
:
3782 ix86_veclib_handler
= ix86_veclibabi_svml
;
3785 case ix86_veclibabi_type_acml
:
3786 ix86_veclib_handler
= ix86_veclibabi_acml
;
3793 ix86_tune_mask
= 1u << ix86_tune
;
3794 if ((!USE_IX86_FRAME_POINTER
3795 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3796 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3798 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3800 /* ??? Unwind info is not correct around the CFG unless either a frame
3801 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3802 unwind info generation to be aware of the CFG and propagating states
3804 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3805 || flag_exceptions
|| flag_non_call_exceptions
)
3806 && flag_omit_frame_pointer
3807 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3809 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3810 warning (0, "unwind tables currently require either a frame pointer "
3811 "or %saccumulate-outgoing-args%s for correctness",
3813 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3816 /* If stack probes are required, the space used for large function
3817 arguments on the stack must also be probed, so enable
3818 -maccumulate-outgoing-args so this happens in the prologue. */
3819 if (TARGET_STACK_PROBE
3820 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3822 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3823 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3824 "for correctness", prefix
, suffix
);
3825 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3828 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3831 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3832 p
= strchr (internal_label_prefix
, 'X');
3833 internal_label_prefix_len
= p
- internal_label_prefix
;
3837 /* When scheduling description is not available, disable scheduler pass
3838 so it won't slow down the compilation and make x87 code slower. */
3839 if (!TARGET_SCHEDULE
)
3840 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3842 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3843 ix86_tune_cost
->simultaneous_prefetches
,
3844 global_options
.x_param_values
,
3845 global_options_set
.x_param_values
);
3846 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3847 ix86_tune_cost
->prefetch_block
,
3848 global_options
.x_param_values
,
3849 global_options_set
.x_param_values
);
3850 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3851 ix86_tune_cost
->l1_cache_size
,
3852 global_options
.x_param_values
,
3853 global_options_set
.x_param_values
);
3854 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3855 ix86_tune_cost
->l2_cache_size
,
3856 global_options
.x_param_values
,
3857 global_options_set
.x_param_values
);
3859 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3860 if (flag_prefetch_loop_arrays
< 0
3862 && (optimize
>= 3 || flag_profile_use
)
3863 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3864 flag_prefetch_loop_arrays
= 1;
3866 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3867 can be optimized to ap = __builtin_next_arg (0). */
3868 if (!TARGET_64BIT
&& !flag_split_stack
)
3869 targetm
.expand_builtin_va_start
= NULL
;
3873 ix86_gen_leave
= gen_leave_rex64
;
3874 if (Pmode
== DImode
)
3876 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3877 ix86_gen_tls_local_dynamic_base_64
3878 = gen_tls_local_dynamic_base_64_di
;
3882 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3883 ix86_gen_tls_local_dynamic_base_64
3884 = gen_tls_local_dynamic_base_64_si
;
3888 ix86_gen_leave
= gen_leave
;
3890 if (Pmode
== DImode
)
3892 ix86_gen_add3
= gen_adddi3
;
3893 ix86_gen_sub3
= gen_subdi3
;
3894 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3895 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3896 ix86_gen_andsp
= gen_anddi3
;
3897 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3898 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3899 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3900 ix86_gen_monitor
= gen_sse3_monitor_di
;
3904 ix86_gen_add3
= gen_addsi3
;
3905 ix86_gen_sub3
= gen_subsi3
;
3906 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3907 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3908 ix86_gen_andsp
= gen_andsi3
;
3909 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3910 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3911 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3912 ix86_gen_monitor
= gen_sse3_monitor_si
;
3916 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3918 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3921 if (!TARGET_64BIT
&& flag_pic
)
3923 if (flag_fentry
> 0)
3924 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3928 else if (TARGET_SEH
)
3930 if (flag_fentry
== 0)
3931 sorry ("-mno-fentry isn%'t compatible with SEH");
3934 else if (flag_fentry
< 0)
3936 #if defined(PROFILE_BEFORE_PROLOGUE)
3943 /* When not optimize for size, enable vzeroupper optimization for
3944 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3945 AVX unaligned load/store. */
3948 if (flag_expensive_optimizations
3949 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3950 target_flags
|= MASK_VZEROUPPER
;
3951 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3952 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3953 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3954 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3955 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3956 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3957 /* Enable 128-bit AVX instruction generation
3958 for the auto-vectorizer. */
3959 if (TARGET_AVX128_OPTIMAL
3960 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3961 target_flags
|= MASK_PREFER_AVX128
;
3964 if (ix86_recip_name
)
3966 char *p
= ASTRDUP (ix86_recip_name
);
3968 unsigned int mask
, i
;
3971 while ((q
= strtok (p
, ",")) != NULL
)
3982 if (!strcmp (q
, "default"))
3983 mask
= RECIP_MASK_ALL
;
3986 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3987 if (!strcmp (q
, recip_options
[i
].string
))
3989 mask
= recip_options
[i
].mask
;
3993 if (i
== ARRAY_SIZE (recip_options
))
3995 error ("unknown option for -mrecip=%s", q
);
3997 mask
= RECIP_MASK_NONE
;
4001 recip_mask_explicit
|= mask
;
4003 recip_mask
&= ~mask
;
4010 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4011 else if (target_flags_explicit
& MASK_RECIP
)
4012 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4014 /* Default long double to 64-bit for Bionic. */
4015 if (TARGET_HAS_BIONIC
4016 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
4017 target_flags
|= MASK_LONG_DOUBLE_64
;
4019 /* Save the initial options in case the user does function specific
4022 target_option_default_node
= target_option_current_node
4023 = build_target_option_node ();
4025 /* Handle stack protector */
4026 if (!global_options_set
.x_ix86_stack_protector_guard
)
4027 ix86_stack_protector_guard
= TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4029 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4030 if (ix86_tune_memcpy_strategy
)
4032 char *str
= xstrdup (ix86_tune_memcpy_strategy
);
4033 ix86_parse_stringop_strategy_string (str
, false);
4037 if (ix86_tune_memset_strategy
)
4039 char *str
= xstrdup (ix86_tune_memset_strategy
);
4040 ix86_parse_stringop_strategy_string (str
, true);
4045 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4048 ix86_option_override (void)
4050 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4051 static struct register_pass_info insert_vzeroupper_info
4052 = { pass_insert_vzeroupper
, "reload",
4053 1, PASS_POS_INSERT_AFTER
4056 ix86_option_override_internal (true);
4059 /* This needs to be done at start up. It's convenient to do it here. */
4060 register_pass (&insert_vzeroupper_info
);
4063 /* Update register usage after having seen the compiler flags. */
4066 ix86_conditional_register_usage (void)
4071 /* The PIC register, if it exists, is fixed. */
4072 j
= PIC_OFFSET_TABLE_REGNUM
;
4073 if (j
!= INVALID_REGNUM
)
4074 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4076 /* For 32-bit targets, squash the REX registers. */
4079 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4080 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4081 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4082 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4083 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4084 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4087 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4088 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4089 : TARGET_64BIT
? (1 << 2)
4092 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4094 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4096 /* Set/reset conditionally defined registers from
4097 CALL_USED_REGISTERS initializer. */
4098 if (call_used_regs
[i
] > 1)
4099 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4101 /* Calculate registers of CLOBBERED_REGS register set
4102 as call used registers from GENERAL_REGS register set. */
4103 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4104 && call_used_regs
[i
])
4105 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4108 /* If MMX is disabled, squash the registers. */
4110 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4111 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4112 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4114 /* If SSE is disabled, squash the registers. */
4116 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4117 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4118 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4120 /* If the FPU is disabled, squash the registers. */
4121 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4122 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4123 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4124 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4126 /* If AVX512F is disabled, squash the registers. */
4127 if (! TARGET_AVX512F
)
4129 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4130 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4132 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4133 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4138 /* Save the current options */
4141 ix86_function_specific_save (struct cl_target_option
*ptr
)
4143 ptr
->arch
= ix86_arch
;
4144 ptr
->schedule
= ix86_schedule
;
4145 ptr
->tune
= ix86_tune
;
4146 ptr
->branch_cost
= ix86_branch_cost
;
4147 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4148 ptr
->arch_specified
= ix86_arch_specified
;
4149 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4150 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4151 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4153 /* The fields are char but the variables are not; make sure the
4154 values fit in the fields. */
4155 gcc_assert (ptr
->arch
== ix86_arch
);
4156 gcc_assert (ptr
->schedule
== ix86_schedule
);
4157 gcc_assert (ptr
->tune
== ix86_tune
);
4158 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4161 /* Restore the current options */
4164 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4166 enum processor_type old_tune
= ix86_tune
;
4167 enum processor_type old_arch
= ix86_arch
;
4168 unsigned int ix86_arch_mask
;
4171 ix86_arch
= (enum processor_type
) ptr
->arch
;
4172 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4173 ix86_tune
= (enum processor_type
) ptr
->tune
;
4174 ix86_branch_cost
= ptr
->branch_cost
;
4175 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4176 ix86_arch_specified
= ptr
->arch_specified
;
4177 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4178 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4179 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4181 /* Recreate the arch feature tests if the arch changed */
4182 if (old_arch
!= ix86_arch
)
4184 ix86_arch_mask
= 1u << ix86_arch
;
4185 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4186 ix86_arch_features
[i
]
4187 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4190 /* Recreate the tune optimization tests */
4191 if (old_tune
!= ix86_tune
)
4192 set_ix86_tune_features (ix86_tune
, false);
4195 /* Print the current options */
4198 ix86_function_specific_print (FILE *file
, int indent
,
4199 struct cl_target_option
*ptr
)
4202 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4203 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4205 fprintf (file
, "%*sarch = %d (%s)\n",
4208 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4209 ? cpu_names
[ptr
->arch
]
4212 fprintf (file
, "%*stune = %d (%s)\n",
4215 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4216 ? cpu_names
[ptr
->tune
]
4219 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4223 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4224 free (target_string
);
4229 /* Inner function to process the attribute((target(...))), take an argument and
4230 set the current options from the argument. If we have a list, recursively go
4234 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4235 struct gcc_options
*enum_opts_set
)
4240 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4241 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4242 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4243 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4244 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4260 enum ix86_opt_type type
;
4265 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4266 IX86_ATTR_ISA ("abm", OPT_mabm
),
4267 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4268 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4269 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4270 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4271 IX86_ATTR_ISA ("aes", OPT_maes
),
4272 IX86_ATTR_ISA ("avx", OPT_mavx
),
4273 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4274 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4275 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4276 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4277 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4278 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4279 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4280 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4281 IX86_ATTR_ISA ("sse", OPT_msse
),
4282 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4283 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4284 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4285 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4286 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4287 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4288 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4289 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4290 IX86_ATTR_ISA ("fma", OPT_mfma
),
4291 IX86_ATTR_ISA ("xop", OPT_mxop
),
4292 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4293 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4294 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4295 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4296 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4297 IX86_ATTR_ISA ("hle", OPT_mhle
),
4298 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4299 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4300 IX86_ATTR_ISA ("adx", OPT_madx
),
4301 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4302 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4303 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4306 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4308 /* string options */
4309 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4310 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4313 IX86_ATTR_YES ("cld",
4317 IX86_ATTR_NO ("fancy-math-387",
4318 OPT_mfancy_math_387
,
4319 MASK_NO_FANCY_MATH_387
),
4321 IX86_ATTR_YES ("ieee-fp",
4325 IX86_ATTR_YES ("inline-all-stringops",
4326 OPT_minline_all_stringops
,
4327 MASK_INLINE_ALL_STRINGOPS
),
4329 IX86_ATTR_YES ("inline-stringops-dynamically",
4330 OPT_minline_stringops_dynamically
,
4331 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4333 IX86_ATTR_NO ("align-stringops",
4334 OPT_mno_align_stringops
,
4335 MASK_NO_ALIGN_STRINGOPS
),
4337 IX86_ATTR_YES ("recip",
4343 /* If this is a list, recurse to get the options. */
4344 if (TREE_CODE (args
) == TREE_LIST
)
4348 for (; args
; args
= TREE_CHAIN (args
))
4349 if (TREE_VALUE (args
)
4350 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4351 p_strings
, enum_opts_set
))
4357 else if (TREE_CODE (args
) != STRING_CST
)
4359 error ("attribute %<target%> argument not a string");
4363 /* Handle multiple arguments separated by commas. */
4364 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4366 while (next_optstr
&& *next_optstr
!= '\0')
4368 char *p
= next_optstr
;
4370 char *comma
= strchr (next_optstr
, ',');
4371 const char *opt_string
;
4372 size_t len
, opt_len
;
4377 enum ix86_opt_type type
= ix86_opt_unknown
;
4383 len
= comma
- next_optstr
;
4384 next_optstr
= comma
+ 1;
4392 /* Recognize no-xxx. */
4393 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4402 /* Find the option. */
4405 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4407 type
= attrs
[i
].type
;
4408 opt_len
= attrs
[i
].len
;
4409 if (ch
== attrs
[i
].string
[0]
4410 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4413 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4416 mask
= attrs
[i
].mask
;
4417 opt_string
= attrs
[i
].string
;
4422 /* Process the option. */
4425 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4429 else if (type
== ix86_opt_isa
)
4431 struct cl_decoded_option decoded
;
4433 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4434 ix86_handle_option (&global_options
, &global_options_set
,
4435 &decoded
, input_location
);
4438 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4440 if (type
== ix86_opt_no
)
4441 opt_set_p
= !opt_set_p
;
4444 target_flags
|= mask
;
4446 target_flags
&= ~mask
;
4449 else if (type
== ix86_opt_str
)
4453 error ("option(\"%s\") was already specified", opt_string
);
4457 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4460 else if (type
== ix86_opt_enum
)
4465 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4467 set_option (&global_options
, enum_opts_set
, opt
, value
,
4468 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4472 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4484 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4487 ix86_valid_target_attribute_tree (tree args
)
4489 const char *orig_arch_string
= ix86_arch_string
;
4490 const char *orig_tune_string
= ix86_tune_string
;
4491 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4492 int orig_tune_defaulted
= ix86_tune_defaulted
;
4493 int orig_arch_specified
= ix86_arch_specified
;
4494 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4497 struct cl_target_option
*def
4498 = TREE_TARGET_OPTION (target_option_default_node
);
4499 struct gcc_options enum_opts_set
;
4501 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4503 /* Process each of the options on the chain. */
4504 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4506 return error_mark_node
;
4508 /* If the changed options are different from the default, rerun
4509 ix86_option_override_internal, and then save the options away.
4510 The string options are are attribute options, and will be undone
4511 when we copy the save structure. */
4512 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4513 || target_flags
!= def
->x_target_flags
4514 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4515 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4516 || enum_opts_set
.x_ix86_fpmath
)
4518 /* If we are using the default tune= or arch=, undo the string assigned,
4519 and use the default. */
4520 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4521 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4522 else if (!orig_arch_specified
)
4523 ix86_arch_string
= NULL
;
4525 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4526 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4527 else if (orig_tune_defaulted
)
4528 ix86_tune_string
= NULL
;
4530 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4531 if (enum_opts_set
.x_ix86_fpmath
)
4532 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4533 else if (!TARGET_64BIT
&& TARGET_SSE
)
4535 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4536 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4539 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4540 ix86_option_override_internal (false);
4542 /* Add any builtin functions with the new isa if any. */
4543 ix86_add_new_builtins (ix86_isa_flags
);
4545 /* Save the current options unless we are validating options for
4547 t
= build_target_option_node ();
4549 ix86_arch_string
= orig_arch_string
;
4550 ix86_tune_string
= orig_tune_string
;
4551 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4553 /* Free up memory allocated to hold the strings */
4554 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4555 free (option_strings
[i
]);
4561 /* Hook to validate attribute((target("string"))). */
4564 ix86_valid_target_attribute_p (tree fndecl
,
4565 tree
ARG_UNUSED (name
),
4567 int ARG_UNUSED (flags
))
4569 struct cl_target_option cur_target
;
4572 /* attribute((target("default"))) does nothing, beyond
4573 affecting multi-versioning. */
4574 if (TREE_VALUE (args
)
4575 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4576 && TREE_CHAIN (args
) == NULL_TREE
4577 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4580 tree old_optimize
= build_optimization_node ();
4581 tree new_target
, new_optimize
;
4582 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4584 /* If the function changed the optimization levels as well as setting target
4585 options, start with the optimizations specified. */
4586 if (func_optimize
&& func_optimize
!= old_optimize
)
4587 cl_optimization_restore (&global_options
,
4588 TREE_OPTIMIZATION (func_optimize
));
4590 /* The target attributes may also change some optimization flags, so update
4591 the optimization options if necessary. */
4592 cl_target_option_save (&cur_target
, &global_options
);
4593 new_target
= ix86_valid_target_attribute_tree (args
);
4594 new_optimize
= build_optimization_node ();
4596 if (new_target
== error_mark_node
)
4599 else if (fndecl
&& new_target
)
4601 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4603 if (old_optimize
!= new_optimize
)
4604 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4607 cl_target_option_restore (&global_options
, &cur_target
);
4609 if (old_optimize
!= new_optimize
)
4610 cl_optimization_restore (&global_options
,
4611 TREE_OPTIMIZATION (old_optimize
));
4617 /* Hook to determine if one function can safely inline another. */
4620 ix86_can_inline_p (tree caller
, tree callee
)
4623 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4624 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4626 /* If callee has no option attributes, then it is ok to inline. */
4630 /* If caller has no option attributes, but callee does then it is not ok to
4632 else if (!caller_tree
)
4637 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4638 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4640 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4641 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4643 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4644 != callee_opts
->x_ix86_isa_flags
)
4647 /* See if we have the same non-isa options. */
4648 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4651 /* See if arch, tune, etc. are the same. */
4652 else if (caller_opts
->arch
!= callee_opts
->arch
)
4655 else if (caller_opts
->tune
!= callee_opts
->tune
)
4658 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4661 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4672 /* Remember the last target of ix86_set_current_function. */
4673 static GTY(()) tree ix86_previous_fndecl
;
4675 /* Invalidate ix86_previous_fndecl cache. */
4677 ix86_reset_previous_fndecl (void)
4679 ix86_previous_fndecl
= NULL_TREE
;
4682 /* Establish appropriate back-end context for processing the function
4683 FNDECL. The argument might be NULL to indicate processing at top
4684 level, outside of any function scope. */
4686 ix86_set_current_function (tree fndecl
)
4688 /* Only change the context if the function changes. This hook is called
4689 several times in the course of compiling a function, and we don't want to
4690 slow things down too much or call target_reinit when it isn't safe. */
4691 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4693 tree old_tree
= (ix86_previous_fndecl
4694 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4697 tree new_tree
= (fndecl
4698 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4701 ix86_previous_fndecl
= fndecl
;
4702 if (old_tree
== new_tree
)
4707 cl_target_option_restore (&global_options
,
4708 TREE_TARGET_OPTION (new_tree
));
4714 struct cl_target_option
*def
4715 = TREE_TARGET_OPTION (target_option_current_node
);
4717 cl_target_option_restore (&global_options
, def
);
4724 /* Return true if this goes in large data/bss. */
4727 ix86_in_large_data_p (tree exp
)
4729 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4732 /* Functions are never large data. */
4733 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4736 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4738 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4739 if (strcmp (section
, ".ldata") == 0
4740 || strcmp (section
, ".lbss") == 0)
4746 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4748 /* If this is an incomplete type with size 0, then we can't put it
4749 in data because it might be too big when completed. */
4750 if (!size
|| size
> ix86_section_threshold
)
4757 /* Switch to the appropriate section for output of DECL.
4758 DECL is either a `VAR_DECL' node or a constant of some sort.
4759 RELOC indicates whether forming the initial value of DECL requires
4760 link-time relocations. */
4762 ATTRIBUTE_UNUSED
static section
*
4763 x86_64_elf_select_section (tree decl
, int reloc
,
4764 unsigned HOST_WIDE_INT align
)
4766 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4767 && ix86_in_large_data_p (decl
))
4769 const char *sname
= NULL
;
4770 unsigned int flags
= SECTION_WRITE
;
4771 switch (categorize_decl_for_section (decl
, reloc
))
4776 case SECCAT_DATA_REL
:
4777 sname
= ".ldata.rel";
4779 case SECCAT_DATA_REL_LOCAL
:
4780 sname
= ".ldata.rel.local";
4782 case SECCAT_DATA_REL_RO
:
4783 sname
= ".ldata.rel.ro";
4785 case SECCAT_DATA_REL_RO_LOCAL
:
4786 sname
= ".ldata.rel.ro.local";
4790 flags
|= SECTION_BSS
;
4793 case SECCAT_RODATA_MERGE_STR
:
4794 case SECCAT_RODATA_MERGE_STR_INIT
:
4795 case SECCAT_RODATA_MERGE_CONST
:
4799 case SECCAT_SRODATA
:
4806 /* We don't split these for medium model. Place them into
4807 default sections and hope for best. */
4812 /* We might get called with string constants, but get_named_section
4813 doesn't like them as they are not DECLs. Also, we need to set
4814 flags in that case. */
4816 return get_section (sname
, flags
, NULL
);
4817 return get_named_section (decl
, sname
, reloc
);
4820 return default_elf_select_section (decl
, reloc
, align
);
4823 /* Select a set of attributes for section NAME based on the properties
4824 of DECL and whether or not RELOC indicates that DECL's initializer
4825 might contain runtime relocations. */
4827 static unsigned int ATTRIBUTE_UNUSED
4828 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
4830 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
4832 if (decl
== NULL_TREE
4833 && (strcmp (name
, ".ldata.rel.ro") == 0
4834 || strcmp (name
, ".ldata.rel.ro.local") == 0))
4835 flags
|= SECTION_RELRO
;
4837 if (strcmp (name
, ".lbss") == 0
4838 || strncmp (name
, ".lbss.", 5) == 0
4839 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
4840 flags
|= SECTION_BSS
;
4845 /* Build up a unique section name, expressed as a
4846 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4847 RELOC indicates whether the initial value of EXP requires
4848 link-time relocations. */
4850 static void ATTRIBUTE_UNUSED
4851 x86_64_elf_unique_section (tree decl
, int reloc
)
4853 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4854 && ix86_in_large_data_p (decl
))
4856 const char *prefix
= NULL
;
4857 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4858 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4860 switch (categorize_decl_for_section (decl
, reloc
))
4863 case SECCAT_DATA_REL
:
4864 case SECCAT_DATA_REL_LOCAL
:
4865 case SECCAT_DATA_REL_RO
:
4866 case SECCAT_DATA_REL_RO_LOCAL
:
4867 prefix
= one_only
? ".ld" : ".ldata";
4870 prefix
= one_only
? ".lb" : ".lbss";
4873 case SECCAT_RODATA_MERGE_STR
:
4874 case SECCAT_RODATA_MERGE_STR_INIT
:
4875 case SECCAT_RODATA_MERGE_CONST
:
4876 prefix
= one_only
? ".lr" : ".lrodata";
4878 case SECCAT_SRODATA
:
4885 /* We don't split these for medium model. Place them into
4886 default sections and hope for best. */
4891 const char *name
, *linkonce
;
4894 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4895 name
= targetm
.strip_name_encoding (name
);
4897 /* If we're using one_only, then there needs to be a .gnu.linkonce
4898 prefix to the section name. */
4899 linkonce
= one_only
? ".gnu.linkonce" : "";
4901 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4903 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4907 default_unique_section (decl
, reloc
);
4910 #ifdef COMMON_ASM_OP
4911 /* This says how to output assembler code to declare an
4912 uninitialized external linkage data object.
4914 For medium model x86-64 we need to use .largecomm opcode for
4917 x86_elf_aligned_common (FILE *file
,
4918 const char *name
, unsigned HOST_WIDE_INT size
,
4921 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4922 && size
> (unsigned int)ix86_section_threshold
)
4923 fputs (".largecomm\t", file
);
4925 fputs (COMMON_ASM_OP
, file
);
4926 assemble_name (file
, name
);
4927 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4928 size
, align
/ BITS_PER_UNIT
);
4932 /* Utility function for targets to use in implementing
4933 ASM_OUTPUT_ALIGNED_BSS. */
4936 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4937 const char *name
, unsigned HOST_WIDE_INT size
,
4940 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4941 && size
> (unsigned int)ix86_section_threshold
)
4942 switch_to_section (get_named_section (decl
, ".lbss", 0));
4944 switch_to_section (bss_section
);
4945 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4946 #ifdef ASM_DECLARE_OBJECT_NAME
4947 last_assemble_variable_decl
= decl
;
4948 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4950 /* Standard thing is just output label for the object. */
4951 ASM_OUTPUT_LABEL (file
, name
);
4952 #endif /* ASM_DECLARE_OBJECT_NAME */
4953 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4956 /* Decide whether we must probe the stack before any space allocation
4957 on this target. It's essentially TARGET_STACK_PROBE except when
4958 -fstack-check causes the stack to be already probed differently. */
4961 ix86_target_stack_probe (void)
4963 /* Do not probe the stack twice if static stack checking is enabled. */
4964 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4967 return TARGET_STACK_PROBE
;
4970 /* Decide whether we can make a sibling call to a function. DECL is the
4971 declaration of the function being targeted by the call and EXP is the
4972 CALL_EXPR representing the call. */
4975 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4977 tree type
, decl_or_type
;
4980 /* If we are generating position-independent code, we cannot sibcall
4981 optimize any indirect call, or a direct call to a global function,
4982 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4986 && (!decl
|| !targetm
.binds_local_p (decl
)))
4989 /* If we need to align the outgoing stack, then sibcalling would
4990 unalign the stack, which may break the called function. */
4991 if (ix86_minimum_incoming_stack_boundary (true)
4992 < PREFERRED_STACK_BOUNDARY
)
4997 decl_or_type
= decl
;
4998 type
= TREE_TYPE (decl
);
5002 /* We're looking at the CALL_EXPR, we need the type of the function. */
5003 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5004 type
= TREE_TYPE (type
); /* pointer type */
5005 type
= TREE_TYPE (type
); /* function type */
5006 decl_or_type
= type
;
5009 /* Check that the return value locations are the same. Like
5010 if we are returning floats on the 80387 register stack, we cannot
5011 make a sibcall from a function that doesn't return a float to a
5012 function that does or, conversely, from a function that does return
5013 a float to a function that doesn't; the necessary stack adjustment
5014 would not be executed. This is also the place we notice
5015 differences in the return value ABI. Note that it is ok for one
5016 of the functions to have void return type as long as the return
5017 value of the other is passed in a register. */
5018 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5019 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5021 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5023 if (!rtx_equal_p (a
, b
))
5026 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5028 else if (!rtx_equal_p (a
, b
))
5033 /* The SYSV ABI has more call-clobbered registers;
5034 disallow sibcalls from MS to SYSV. */
5035 if (cfun
->machine
->call_abi
== MS_ABI
5036 && ix86_function_type_abi (type
) == SYSV_ABI
)
5041 /* If this call is indirect, we'll need to be able to use a
5042 call-clobbered register for the address of the target function.
5043 Make sure that all such registers are not used for passing
5044 parameters. Note that DLLIMPORT functions are indirect. */
5046 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5048 if (ix86_function_regparm (type
, NULL
) >= 3)
5050 /* ??? Need to count the actual number of registers to be used,
5051 not the possible number of registers. Fix later. */
5057 /* Otherwise okay. That also includes certain types of indirect calls. */
5061 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5062 and "sseregparm" calling convention attributes;
5063 arguments as in struct attribute_spec.handler. */
5066 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5068 int flags ATTRIBUTE_UNUSED
,
5071 if (TREE_CODE (*node
) != FUNCTION_TYPE
5072 && TREE_CODE (*node
) != METHOD_TYPE
5073 && TREE_CODE (*node
) != FIELD_DECL
5074 && TREE_CODE (*node
) != TYPE_DECL
)
5076 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5078 *no_add_attrs
= true;
5082 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5083 if (is_attribute_p ("regparm", name
))
5087 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5089 error ("fastcall and regparm attributes are not compatible");
5092 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5094 error ("regparam and thiscall attributes are not compatible");
5097 cst
= TREE_VALUE (args
);
5098 if (TREE_CODE (cst
) != INTEGER_CST
)
5100 warning (OPT_Wattributes
,
5101 "%qE attribute requires an integer constant argument",
5103 *no_add_attrs
= true;
5105 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5107 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5109 *no_add_attrs
= true;
5117 /* Do not warn when emulating the MS ABI. */
5118 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5119 && TREE_CODE (*node
) != METHOD_TYPE
)
5120 || ix86_function_type_abi (*node
) != MS_ABI
)
5121 warning (OPT_Wattributes
, "%qE attribute ignored",
5123 *no_add_attrs
= true;
5127 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5128 if (is_attribute_p ("fastcall", name
))
5130 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5132 error ("fastcall and cdecl attributes are not compatible");
5134 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5136 error ("fastcall and stdcall attributes are not compatible");
5138 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5140 error ("fastcall and regparm attributes are not compatible");
5142 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5144 error ("fastcall and thiscall attributes are not compatible");
5148 /* Can combine stdcall with fastcall (redundant), regparm and
5150 else if (is_attribute_p ("stdcall", name
))
5152 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5154 error ("stdcall and cdecl attributes are not compatible");
5156 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5158 error ("stdcall and fastcall attributes are not compatible");
5160 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5162 error ("stdcall and thiscall attributes are not compatible");
5166 /* Can combine cdecl with regparm and sseregparm. */
5167 else if (is_attribute_p ("cdecl", name
))
5169 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5171 error ("stdcall and cdecl attributes are not compatible");
5173 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5175 error ("fastcall and cdecl attributes are not compatible");
5177 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5179 error ("cdecl and thiscall attributes are not compatible");
5182 else if (is_attribute_p ("thiscall", name
))
5184 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5185 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5187 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5189 error ("stdcall and thiscall attributes are not compatible");
5191 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5193 error ("fastcall and thiscall attributes are not compatible");
5195 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5197 error ("cdecl and thiscall attributes are not compatible");
5201 /* Can combine sseregparm with all attributes. */
5206 /* The transactional memory builtins are implicitly regparm or fastcall
5207 depending on the ABI. Override the generic do-nothing attribute that
5208 these builtins were declared with, and replace it with one of the two
5209 attributes that we expect elsewhere. */
5212 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5213 tree args ATTRIBUTE_UNUSED
,
5214 int flags
, bool *no_add_attrs
)
5218 /* In no case do we want to add the placeholder attribute. */
5219 *no_add_attrs
= true;
5221 /* The 64-bit ABI is unchanged for transactional memory. */
5225 /* ??? Is there a better way to validate 32-bit windows? We have
5226 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5227 if (CHECK_STACK_LIMIT
> 0)
5228 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5231 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5232 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5234 decl_attributes (node
, alt
, flags
);
5239 /* This function determines from TYPE the calling-convention. */
5242 ix86_get_callcvt (const_tree type
)
5244 unsigned int ret
= 0;
5249 return IX86_CALLCVT_CDECL
;
5251 attrs
= TYPE_ATTRIBUTES (type
);
5252 if (attrs
!= NULL_TREE
)
5254 if (lookup_attribute ("cdecl", attrs
))
5255 ret
|= IX86_CALLCVT_CDECL
;
5256 else if (lookup_attribute ("stdcall", attrs
))
5257 ret
|= IX86_CALLCVT_STDCALL
;
5258 else if (lookup_attribute ("fastcall", attrs
))
5259 ret
|= IX86_CALLCVT_FASTCALL
;
5260 else if (lookup_attribute ("thiscall", attrs
))
5261 ret
|= IX86_CALLCVT_THISCALL
;
5263 /* Regparam isn't allowed for thiscall and fastcall. */
5264 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5266 if (lookup_attribute ("regparm", attrs
))
5267 ret
|= IX86_CALLCVT_REGPARM
;
5268 if (lookup_attribute ("sseregparm", attrs
))
5269 ret
|= IX86_CALLCVT_SSEREGPARM
;
5272 if (IX86_BASE_CALLCVT(ret
) != 0)
5276 is_stdarg
= stdarg_p (type
);
5277 if (TARGET_RTD
&& !is_stdarg
)
5278 return IX86_CALLCVT_STDCALL
| ret
;
5282 || TREE_CODE (type
) != METHOD_TYPE
5283 || ix86_function_type_abi (type
) != MS_ABI
)
5284 return IX86_CALLCVT_CDECL
| ret
;
5286 return IX86_CALLCVT_THISCALL
;
5289 /* Return 0 if the attributes for two types are incompatible, 1 if they
5290 are compatible, and 2 if they are nearly compatible (which causes a
5291 warning to be generated). */
5294 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5296 unsigned int ccvt1
, ccvt2
;
5298 if (TREE_CODE (type1
) != FUNCTION_TYPE
5299 && TREE_CODE (type1
) != METHOD_TYPE
)
5302 ccvt1
= ix86_get_callcvt (type1
);
5303 ccvt2
= ix86_get_callcvt (type2
);
5306 if (ix86_function_regparm (type1
, NULL
)
5307 != ix86_function_regparm (type2
, NULL
))
5313 /* Return the regparm value for a function with the indicated TYPE and DECL.
5314 DECL may be NULL when calling function indirectly
5315 or considering a libcall. */
5318 ix86_function_regparm (const_tree type
, const_tree decl
)
5325 return (ix86_function_type_abi (type
) == SYSV_ABI
5326 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5327 ccvt
= ix86_get_callcvt (type
);
5328 regparm
= ix86_regparm
;
5330 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5332 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5335 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5339 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5341 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5344 /* Use register calling convention for local functions when possible. */
5346 && TREE_CODE (decl
) == FUNCTION_DECL
5348 && !(profile_flag
&& !flag_fentry
))
5350 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5351 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5352 if (i
&& i
->local
&& i
->can_change_signature
)
5354 int local_regparm
, globals
= 0, regno
;
5356 /* Make sure no regparm register is taken by a
5357 fixed register variable. */
5358 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5359 if (fixed_regs
[local_regparm
])
5362 /* We don't want to use regparm(3) for nested functions as
5363 these use a static chain pointer in the third argument. */
5364 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5367 /* In 32-bit mode save a register for the split stack. */
5368 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5371 /* Each fixed register usage increases register pressure,
5372 so less registers should be used for argument passing.
5373 This functionality can be overriden by an explicit
5375 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5376 if (fixed_regs
[regno
])
5380 = globals
< local_regparm
? local_regparm
- globals
: 0;
5382 if (local_regparm
> regparm
)
5383 regparm
= local_regparm
;
5390 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5391 DFmode (2) arguments in SSE registers for a function with the
5392 indicated TYPE and DECL. DECL may be NULL when calling function
5393 indirectly or considering a libcall. Otherwise return 0. */
5396 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5398 gcc_assert (!TARGET_64BIT
);
5400 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5401 by the sseregparm attribute. */
5402 if (TARGET_SSEREGPARM
5403 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5410 error ("calling %qD with attribute sseregparm without "
5411 "SSE/SSE2 enabled", decl
);
5413 error ("calling %qT with attribute sseregparm without "
5414 "SSE/SSE2 enabled", type
);
5422 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5423 (and DFmode for SSE2) arguments in SSE registers. */
5424 if (decl
&& TARGET_SSE_MATH
&& optimize
5425 && !(profile_flag
&& !flag_fentry
))
5427 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5428 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5429 if (i
&& i
->local
&& i
->can_change_signature
)
5430 return TARGET_SSE2
? 2 : 1;
5436 /* Return true if EAX is live at the start of the function. Used by
5437 ix86_expand_prologue to determine if we need special help before
5438 calling allocate_stack_worker. */
5441 ix86_eax_live_at_start_p (void)
5443 /* Cheat. Don't bother working forward from ix86_function_regparm
5444 to the function type to whether an actual argument is located in
5445 eax. Instead just look at cfg info, which is still close enough
5446 to correct at this point. This gives false positives for broken
5447 functions that might use uninitialized data that happens to be
5448 allocated in eax, but who cares? */
5449 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5453 ix86_keep_aggregate_return_pointer (tree fntype
)
5459 attr
= lookup_attribute ("callee_pop_aggregate_return",
5460 TYPE_ATTRIBUTES (fntype
));
5462 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5464 /* For 32-bit MS-ABI the default is to keep aggregate
5466 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5469 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5472 /* Value is the number of bytes of arguments automatically
5473 popped when returning from a subroutine call.
5474 FUNDECL is the declaration node of the function (as a tree),
5475 FUNTYPE is the data type of the function (as a tree),
5476 or for a library call it is an identifier node for the subroutine name.
5477 SIZE is the number of bytes of arguments passed on the stack.
5479 On the 80386, the RTD insn may be used to pop them if the number
5480 of args is fixed, but if the number is variable then the caller
5481 must pop them all. RTD can't be used for library calls now
5482 because the library is compiled with the Unix compiler.
5483 Use of RTD is a selectable option, since it is incompatible with
5484 standard Unix calling sequences. If the option is not selected,
5485 the caller must always pop the args.
5487 The attribute stdcall is equivalent to RTD on a per module basis. */
5490 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5494 /* None of the 64-bit ABIs pop arguments. */
5498 ccvt
= ix86_get_callcvt (funtype
);
5500 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5501 | IX86_CALLCVT_THISCALL
)) != 0
5502 && ! stdarg_p (funtype
))
5505 /* Lose any fake structure return argument if it is passed on the stack. */
5506 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5507 && !ix86_keep_aggregate_return_pointer (funtype
))
5509 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5511 return GET_MODE_SIZE (Pmode
);
5517 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5520 ix86_legitimate_combined_insn (rtx insn
)
5522 /* Check operand constraints in case hard registers were propagated
5523 into insn pattern. This check prevents combine pass from
5524 generating insn patterns with invalid hard register operands.
5525 These invalid insns can eventually confuse reload to error out
5526 with a spill failure. See also PRs 46829 and 46843. */
5527 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5531 extract_insn (insn
);
5532 preprocess_constraints ();
5534 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5536 rtx op
= recog_data
.operand
[i
];
5537 enum machine_mode mode
= GET_MODE (op
);
5538 struct operand_alternative
*op_alt
;
5543 /* A unary operator may be accepted by the predicate, but it
5544 is irrelevant for matching constraints. */
5548 if (GET_CODE (op
) == SUBREG
)
5550 if (REG_P (SUBREG_REG (op
))
5551 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5552 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5553 GET_MODE (SUBREG_REG (op
)),
5556 op
= SUBREG_REG (op
);
5559 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5562 op_alt
= recog_op_alt
[i
];
5564 /* Operand has no constraints, anything is OK. */
5565 win
= !recog_data
.n_alternatives
;
5567 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5569 if (op_alt
[j
].anything_ok
5570 || (op_alt
[j
].matches
!= -1
5572 (recog_data
.operand
[i
],
5573 recog_data
.operand
[op_alt
[j
].matches
]))
5574 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5589 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5591 static unsigned HOST_WIDE_INT
5592 ix86_asan_shadow_offset (void)
5594 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5595 : HOST_WIDE_INT_C (0x7fff8000))
5596 : (HOST_WIDE_INT_1
<< 29);
5599 /* Argument support functions. */
5601 /* Return true when register may be used to pass function parameters. */
5603 ix86_function_arg_regno_p (int regno
)
5606 const int *parm_regs
;
5611 return (regno
< REGPARM_MAX
5612 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5614 return (regno
< REGPARM_MAX
5615 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5616 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5617 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5618 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5621 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5622 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5625 /* TODO: The function should depend on current function ABI but
5626 builtins.c would need updating then. Therefore we use the
5629 /* RAX is used as hidden argument to va_arg functions. */
5630 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5633 if (ix86_abi
== MS_ABI
)
5634 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5636 parm_regs
= x86_64_int_parameter_registers
;
5637 for (i
= 0; i
< (ix86_abi
== MS_ABI
5638 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5639 if (regno
== parm_regs
[i
])
5644 /* Return if we do not know how to pass TYPE solely in registers. */
5647 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5649 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5652 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5653 The layout_type routine is crafty and tries to trick us into passing
5654 currently unsupported vector types on the stack by using TImode. */
5655 return (!TARGET_64BIT
&& mode
== TImode
5656 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5659 /* It returns the size, in bytes, of the area reserved for arguments passed
5660 in registers for the function represented by fndecl dependent to the used
5663 ix86_reg_parm_stack_space (const_tree fndecl
)
5665 enum calling_abi call_abi
= SYSV_ABI
;
5666 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5667 call_abi
= ix86_function_abi (fndecl
);
5669 call_abi
= ix86_function_type_abi (fndecl
);
5670 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5675 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5678 ix86_function_type_abi (const_tree fntype
)
5680 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5682 enum calling_abi abi
= ix86_abi
;
5683 if (abi
== SYSV_ABI
)
5685 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5688 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5695 /* We add this as a workaround in order to use libc_has_function
5698 ix86_libc_has_function (enum function_class fn_class
)
5700 return targetm
.libc_has_function (fn_class
);
5704 ix86_function_ms_hook_prologue (const_tree fn
)
5706 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5708 if (decl_function_context (fn
) != NULL_TREE
)
5709 error_at (DECL_SOURCE_LOCATION (fn
),
5710 "ms_hook_prologue is not compatible with nested function");
5717 static enum calling_abi
5718 ix86_function_abi (const_tree fndecl
)
5722 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5725 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5728 ix86_cfun_abi (void)
5732 return cfun
->machine
->call_abi
;
5735 /* Write the extra assembler code needed to declare a function properly. */
5738 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5741 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5745 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5746 unsigned int filler_cc
= 0xcccccccc;
5748 for (i
= 0; i
< filler_count
; i
+= 4)
5749 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5752 #ifdef SUBTARGET_ASM_UNWIND_INIT
5753 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5756 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5758 /* Output magic byte marker, if hot-patch attribute is set. */
5763 /* leaq [%rsp + 0], %rsp */
5764 asm_fprintf (asm_out_file
, ASM_BYTE
5765 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5769 /* movl.s %edi, %edi
5771 movl.s %esp, %ebp */
5772 asm_fprintf (asm_out_file
, ASM_BYTE
5773 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5779 extern void init_regs (void);
5781 /* Implementation of call abi switching target hook. Specific to FNDECL
5782 the specific call register sets are set. See also
5783 ix86_conditional_register_usage for more details. */
5785 ix86_call_abi_override (const_tree fndecl
)
5787 if (fndecl
== NULL_TREE
)
5788 cfun
->machine
->call_abi
= ix86_abi
;
5790 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5793 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5794 expensive re-initialization of init_regs each time we switch function context
5795 since this is needed only during RTL expansion. */
5797 ix86_maybe_switch_abi (void)
5800 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5804 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5805 for a call to a function whose data type is FNTYPE.
5806 For a library call, FNTYPE is 0. */
5809 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5810 tree fntype
, /* tree ptr for function decl */
5811 rtx libname
, /* SYMBOL_REF of library name or 0 */
5815 struct cgraph_local_info
*i
;
5817 memset (cum
, 0, sizeof (*cum
));
5821 i
= cgraph_local_info (fndecl
);
5822 cum
->call_abi
= ix86_function_abi (fndecl
);
5827 cum
->call_abi
= ix86_function_type_abi (fntype
);
5830 cum
->caller
= caller
;
5832 /* Set up the number of registers to use for passing arguments. */
5834 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5835 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5836 "or subtarget optimization implying it");
5837 cum
->nregs
= ix86_regparm
;
5840 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5841 ? X86_64_REGPARM_MAX
5842 : X86_64_MS_REGPARM_MAX
);
5846 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5849 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5850 ? X86_64_SSE_REGPARM_MAX
5851 : X86_64_MS_SSE_REGPARM_MAX
);
5855 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5856 cum
->warn_avx
= true;
5857 cum
->warn_sse
= true;
5858 cum
->warn_mmx
= true;
5860 /* Because type might mismatch in between caller and callee, we need to
5861 use actual type of function for local calls.
5862 FIXME: cgraph_analyze can be told to actually record if function uses
5863 va_start so for local functions maybe_vaarg can be made aggressive
5865 FIXME: once typesytem is fixed, we won't need this code anymore. */
5866 if (i
&& i
->local
&& i
->can_change_signature
)
5867 fntype
= TREE_TYPE (fndecl
);
5868 cum
->maybe_vaarg
= (fntype
5869 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5874 /* If there are variable arguments, then we won't pass anything
5875 in registers in 32-bit mode. */
5876 if (stdarg_p (fntype
))
5887 /* Use ecx and edx registers if function has fastcall attribute,
5888 else look for regparm information. */
5891 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5892 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5895 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5897 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5903 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5906 /* Set up the number of SSE registers used for passing SFmode
5907 and DFmode arguments. Warn for mismatching ABI. */
5908 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5912 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5913 But in the case of vector types, it is some vector mode.
5915 When we have only some of our vector isa extensions enabled, then there
5916 are some modes for which vector_mode_supported_p is false. For these
5917 modes, the generic vector support in gcc will choose some non-vector mode
5918 in order to implement the type. By computing the natural mode, we'll
5919 select the proper ABI location for the operand and not depend on whatever
5920 the middle-end decides to do with these vector types.
5922 The midde-end can't deal with the vector types > 16 bytes. In this
5923 case, we return the original mode and warn ABI change if CUM isn't
5926 static enum machine_mode
5927 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5929 enum machine_mode mode
= TYPE_MODE (type
);
5931 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5933 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5934 if ((size
== 8 || size
== 16 || size
== 32)
5935 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5936 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5938 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5940 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5941 mode
= MIN_MODE_VECTOR_FLOAT
;
5943 mode
= MIN_MODE_VECTOR_INT
;
5945 /* Get the mode which has this inner mode and number of units. */
5946 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5947 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5948 && GET_MODE_INNER (mode
) == innermode
)
5950 if (size
== 32 && !TARGET_AVX
)
5952 static bool warnedavx
;
5959 warning (0, "AVX vector argument without AVX "
5960 "enabled changes the ABI");
5962 return TYPE_MODE (type
);
5964 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5966 static bool warnedsse
;
5973 warning (0, "SSE vector argument without SSE "
5974 "enabled changes the ABI");
5989 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5990 this may not agree with the mode that the type system has chosen for the
5991 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5992 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5995 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6000 if (orig_mode
!= BLKmode
)
6001 tmp
= gen_rtx_REG (orig_mode
, regno
);
6004 tmp
= gen_rtx_REG (mode
, regno
);
6005 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6006 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6012 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6013 of this code is to classify each 8bytes of incoming argument by the register
6014 class and assign registers accordingly. */
6016 /* Return the union class of CLASS1 and CLASS2.
6017 See the x86-64 PS ABI for details. */
6019 static enum x86_64_reg_class
6020 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6022 /* Rule #1: If both classes are equal, this is the resulting class. */
6023 if (class1
== class2
)
6026 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6028 if (class1
== X86_64_NO_CLASS
)
6030 if (class2
== X86_64_NO_CLASS
)
6033 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6034 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6035 return X86_64_MEMORY_CLASS
;
6037 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6038 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6039 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6040 return X86_64_INTEGERSI_CLASS
;
6041 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6042 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6043 return X86_64_INTEGER_CLASS
;
6045 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6047 if (class1
== X86_64_X87_CLASS
6048 || class1
== X86_64_X87UP_CLASS
6049 || class1
== X86_64_COMPLEX_X87_CLASS
6050 || class2
== X86_64_X87_CLASS
6051 || class2
== X86_64_X87UP_CLASS
6052 || class2
== X86_64_COMPLEX_X87_CLASS
)
6053 return X86_64_MEMORY_CLASS
;
6055 /* Rule #6: Otherwise class SSE is used. */
6056 return X86_64_SSE_CLASS
;
6059 /* Classify the argument of type TYPE and mode MODE.
6060 CLASSES will be filled by the register class used to pass each word
6061 of the operand. The number of words is returned. In case the parameter
6062 should be passed in memory, 0 is returned. As a special case for zero
6063 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6065 BIT_OFFSET is used internally for handling records and specifies offset
6066 of the offset in bits modulo 256 to avoid overflow cases.
6068 See the x86-64 PS ABI for details.
6072 classify_argument (enum machine_mode mode
, const_tree type
,
6073 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6075 HOST_WIDE_INT bytes
=
6076 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6078 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6080 /* Variable sized entities are always passed/returned in memory. */
6084 if (mode
!= VOIDmode
6085 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6088 if (type
&& AGGREGATE_TYPE_P (type
))
6092 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6094 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6098 for (i
= 0; i
< words
; i
++)
6099 classes
[i
] = X86_64_NO_CLASS
;
6101 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6102 signalize memory class, so handle it as special case. */
6105 classes
[0] = X86_64_NO_CLASS
;
6109 /* Classify each field of record and merge classes. */
6110 switch (TREE_CODE (type
))
6113 /* And now merge the fields of structure. */
6114 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6116 if (TREE_CODE (field
) == FIELD_DECL
)
6120 if (TREE_TYPE (field
) == error_mark_node
)
6123 /* Bitfields are always classified as integer. Handle them
6124 early, since later code would consider them to be
6125 misaligned integers. */
6126 if (DECL_BIT_FIELD (field
))
6128 for (i
= (int_bit_position (field
)
6129 + (bit_offset
% 64)) / 8 / 8;
6130 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6131 + tree_low_cst (DECL_SIZE (field
), 0)
6134 merge_classes (X86_64_INTEGER_CLASS
,
6141 type
= TREE_TYPE (field
);
6143 /* Flexible array member is ignored. */
6144 if (TYPE_MODE (type
) == BLKmode
6145 && TREE_CODE (type
) == ARRAY_TYPE
6146 && TYPE_SIZE (type
) == NULL_TREE
6147 && TYPE_DOMAIN (type
) != NULL_TREE
6148 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6153 if (!warned
&& warn_psabi
)
6156 inform (input_location
,
6157 "the ABI of passing struct with"
6158 " a flexible array member has"
6159 " changed in GCC 4.4");
6163 num
= classify_argument (TYPE_MODE (type
), type
,
6165 (int_bit_position (field
)
6166 + bit_offset
) % 256);
6169 pos
= (int_bit_position (field
)
6170 + (bit_offset
% 64)) / 8 / 8;
6171 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6173 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6180 /* Arrays are handled as small records. */
6183 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6184 TREE_TYPE (type
), subclasses
, bit_offset
);
6188 /* The partial classes are now full classes. */
6189 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6190 subclasses
[0] = X86_64_SSE_CLASS
;
6191 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6192 && !((bit_offset
% 64) == 0 && bytes
== 4))
6193 subclasses
[0] = X86_64_INTEGER_CLASS
;
6195 for (i
= 0; i
< words
; i
++)
6196 classes
[i
] = subclasses
[i
% num
];
6201 case QUAL_UNION_TYPE
:
6202 /* Unions are similar to RECORD_TYPE but offset is always 0.
6204 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6206 if (TREE_CODE (field
) == FIELD_DECL
)
6210 if (TREE_TYPE (field
) == error_mark_node
)
6213 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6214 TREE_TYPE (field
), subclasses
,
6218 for (i
= 0; i
< num
; i
++)
6219 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6230 /* When size > 16 bytes, if the first one isn't
6231 X86_64_SSE_CLASS or any other ones aren't
6232 X86_64_SSEUP_CLASS, everything should be passed in
6234 if (classes
[0] != X86_64_SSE_CLASS
)
6237 for (i
= 1; i
< words
; i
++)
6238 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6242 /* Final merger cleanup. */
6243 for (i
= 0; i
< words
; i
++)
6245 /* If one class is MEMORY, everything should be passed in
6247 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6250 /* The X86_64_SSEUP_CLASS should be always preceded by
6251 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6252 if (classes
[i
] == X86_64_SSEUP_CLASS
6253 && classes
[i
- 1] != X86_64_SSE_CLASS
6254 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6256 /* The first one should never be X86_64_SSEUP_CLASS. */
6257 gcc_assert (i
!= 0);
6258 classes
[i
] = X86_64_SSE_CLASS
;
6261 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6262 everything should be passed in memory. */
6263 if (classes
[i
] == X86_64_X87UP_CLASS
6264 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6268 /* The first one should never be X86_64_X87UP_CLASS. */
6269 gcc_assert (i
!= 0);
6270 if (!warned
&& warn_psabi
)
6273 inform (input_location
,
6274 "the ABI of passing union with long double"
6275 " has changed in GCC 4.4");
6283 /* Compute alignment needed. We align all types to natural boundaries with
6284 exception of XFmode that is aligned to 64bits. */
6285 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6287 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6290 mode_alignment
= 128;
6291 else if (mode
== XCmode
)
6292 mode_alignment
= 256;
6293 if (COMPLEX_MODE_P (mode
))
6294 mode_alignment
/= 2;
6295 /* Misaligned fields are always returned in memory. */
6296 if (bit_offset
% mode_alignment
)
6300 /* for V1xx modes, just use the base mode */
6301 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6302 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6303 mode
= GET_MODE_INNER (mode
);
6305 /* Classification of atomic types. */
6310 classes
[0] = X86_64_SSE_CLASS
;
6313 classes
[0] = X86_64_SSE_CLASS
;
6314 classes
[1] = X86_64_SSEUP_CLASS
;
6324 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6328 classes
[0] = X86_64_INTEGERSI_CLASS
;
6331 else if (size
<= 64)
6333 classes
[0] = X86_64_INTEGER_CLASS
;
6336 else if (size
<= 64+32)
6338 classes
[0] = X86_64_INTEGER_CLASS
;
6339 classes
[1] = X86_64_INTEGERSI_CLASS
;
6342 else if (size
<= 64+64)
6344 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6352 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6356 /* OImode shouldn't be used directly. */
6361 if (!(bit_offset
% 64))
6362 classes
[0] = X86_64_SSESF_CLASS
;
6364 classes
[0] = X86_64_SSE_CLASS
;
6367 classes
[0] = X86_64_SSEDF_CLASS
;
6370 classes
[0] = X86_64_X87_CLASS
;
6371 classes
[1] = X86_64_X87UP_CLASS
;
6374 classes
[0] = X86_64_SSE_CLASS
;
6375 classes
[1] = X86_64_SSEUP_CLASS
;
6378 classes
[0] = X86_64_SSE_CLASS
;
6379 if (!(bit_offset
% 64))
6385 if (!warned
&& warn_psabi
)
6388 inform (input_location
,
6389 "the ABI of passing structure with complex float"
6390 " member has changed in GCC 4.4");
6392 classes
[1] = X86_64_SSESF_CLASS
;
6396 classes
[0] = X86_64_SSEDF_CLASS
;
6397 classes
[1] = X86_64_SSEDF_CLASS
;
6400 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6403 /* This modes is larger than 16 bytes. */
6411 classes
[0] = X86_64_SSE_CLASS
;
6412 classes
[1] = X86_64_SSEUP_CLASS
;
6413 classes
[2] = X86_64_SSEUP_CLASS
;
6414 classes
[3] = X86_64_SSEUP_CLASS
;
6422 classes
[0] = X86_64_SSE_CLASS
;
6423 classes
[1] = X86_64_SSEUP_CLASS
;
6431 classes
[0] = X86_64_SSE_CLASS
;
6437 gcc_assert (VECTOR_MODE_P (mode
));
6442 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6444 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6445 classes
[0] = X86_64_INTEGERSI_CLASS
;
6447 classes
[0] = X86_64_INTEGER_CLASS
;
6448 classes
[1] = X86_64_INTEGER_CLASS
;
6449 return 1 + (bytes
> 8);
6453 /* Examine the argument and return set number of register required in each
6454 class. Return 0 iff parameter should be passed in memory. */
6456 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6457 int *int_nregs
, int *sse_nregs
)
6459 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6460 int n
= classify_argument (mode
, type
, regclass
, 0);
6466 for (n
--; n
>= 0; n
--)
6467 switch (regclass
[n
])
6469 case X86_64_INTEGER_CLASS
:
6470 case X86_64_INTEGERSI_CLASS
:
6473 case X86_64_SSE_CLASS
:
6474 case X86_64_SSESF_CLASS
:
6475 case X86_64_SSEDF_CLASS
:
6478 case X86_64_NO_CLASS
:
6479 case X86_64_SSEUP_CLASS
:
6481 case X86_64_X87_CLASS
:
6482 case X86_64_X87UP_CLASS
:
6486 case X86_64_COMPLEX_X87_CLASS
:
6487 return in_return
? 2 : 0;
6488 case X86_64_MEMORY_CLASS
:
6494 /* Construct container for the argument used by GCC interface. See
6495 FUNCTION_ARG for the detailed description. */
6498 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6499 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6500 const int *intreg
, int sse_regno
)
6502 /* The following variables hold the static issued_error state. */
6503 static bool issued_sse_arg_error
;
6504 static bool issued_sse_ret_error
;
6505 static bool issued_x87_ret_error
;
6507 enum machine_mode tmpmode
;
6509 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6510 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6514 int needed_sseregs
, needed_intregs
;
6515 rtx exp
[MAX_CLASSES
];
6518 n
= classify_argument (mode
, type
, regclass
, 0);
6521 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6524 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6527 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6528 some less clueful developer tries to use floating-point anyway. */
6529 if (needed_sseregs
&& !TARGET_SSE
)
6533 if (!issued_sse_ret_error
)
6535 error ("SSE register return with SSE disabled");
6536 issued_sse_ret_error
= true;
6539 else if (!issued_sse_arg_error
)
6541 error ("SSE register argument with SSE disabled");
6542 issued_sse_arg_error
= true;
6547 /* Likewise, error if the ABI requires us to return values in the
6548 x87 registers and the user specified -mno-80387. */
6549 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6550 for (i
= 0; i
< n
; i
++)
6551 if (regclass
[i
] == X86_64_X87_CLASS
6552 || regclass
[i
] == X86_64_X87UP_CLASS
6553 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6555 if (!issued_x87_ret_error
)
6557 error ("x87 register return with x87 disabled");
6558 issued_x87_ret_error
= true;
6563 /* First construct simple cases. Avoid SCmode, since we want to use
6564 single register to pass this type. */
6565 if (n
== 1 && mode
!= SCmode
)
6566 switch (regclass
[0])
6568 case X86_64_INTEGER_CLASS
:
6569 case X86_64_INTEGERSI_CLASS
:
6570 return gen_rtx_REG (mode
, intreg
[0]);
6571 case X86_64_SSE_CLASS
:
6572 case X86_64_SSESF_CLASS
:
6573 case X86_64_SSEDF_CLASS
:
6574 if (mode
!= BLKmode
)
6575 return gen_reg_or_parallel (mode
, orig_mode
,
6576 SSE_REGNO (sse_regno
));
6578 case X86_64_X87_CLASS
:
6579 case X86_64_COMPLEX_X87_CLASS
:
6580 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6581 case X86_64_NO_CLASS
:
6582 /* Zero sized array, struct or class. */
6588 && regclass
[0] == X86_64_SSE_CLASS
6589 && regclass
[1] == X86_64_SSEUP_CLASS
6591 return gen_reg_or_parallel (mode
, orig_mode
,
6592 SSE_REGNO (sse_regno
));
6594 && regclass
[0] == X86_64_SSE_CLASS
6595 && regclass
[1] == X86_64_SSEUP_CLASS
6596 && regclass
[2] == X86_64_SSEUP_CLASS
6597 && regclass
[3] == X86_64_SSEUP_CLASS
6599 return gen_reg_or_parallel (mode
, orig_mode
,
6600 SSE_REGNO (sse_regno
));
6602 && regclass
[0] == X86_64_X87_CLASS
6603 && regclass
[1] == X86_64_X87UP_CLASS
)
6604 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6607 && regclass
[0] == X86_64_INTEGER_CLASS
6608 && regclass
[1] == X86_64_INTEGER_CLASS
6609 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6610 && intreg
[0] + 1 == intreg
[1])
6611 return gen_rtx_REG (mode
, intreg
[0]);
6613 /* Otherwise figure out the entries of the PARALLEL. */
6614 for (i
= 0; i
< n
; i
++)
6618 switch (regclass
[i
])
6620 case X86_64_NO_CLASS
:
6622 case X86_64_INTEGER_CLASS
:
6623 case X86_64_INTEGERSI_CLASS
:
6624 /* Merge TImodes on aligned occasions here too. */
6625 if (i
* 8 + 8 > bytes
)
6627 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6628 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6632 /* We've requested 24 bytes we
6633 don't have mode for. Use DImode. */
6634 if (tmpmode
== BLKmode
)
6637 = gen_rtx_EXPR_LIST (VOIDmode
,
6638 gen_rtx_REG (tmpmode
, *intreg
),
6642 case X86_64_SSESF_CLASS
:
6644 = gen_rtx_EXPR_LIST (VOIDmode
,
6645 gen_rtx_REG (SFmode
,
6646 SSE_REGNO (sse_regno
)),
6650 case X86_64_SSEDF_CLASS
:
6652 = gen_rtx_EXPR_LIST (VOIDmode
,
6653 gen_rtx_REG (DFmode
,
6654 SSE_REGNO (sse_regno
)),
6658 case X86_64_SSE_CLASS
:
6666 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6676 && regclass
[1] == X86_64_SSEUP_CLASS
6677 && regclass
[2] == X86_64_SSEUP_CLASS
6678 && regclass
[3] == X86_64_SSEUP_CLASS
);
6686 = gen_rtx_EXPR_LIST (VOIDmode
,
6687 gen_rtx_REG (tmpmode
,
6688 SSE_REGNO (sse_regno
)),
6697 /* Empty aligned struct, union or class. */
6701 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6702 for (i
= 0; i
< nexps
; i
++)
6703 XVECEXP (ret
, 0, i
) = exp
[i
];
6707 /* Update the data in CUM to advance over an argument of mode MODE
6708 and data type TYPE. (TYPE is null for libcalls where that information
6709 may not be available.) */
6712 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6713 const_tree type
, HOST_WIDE_INT bytes
,
6714 HOST_WIDE_INT words
)
6730 cum
->words
+= words
;
6731 cum
->nregs
-= words
;
6732 cum
->regno
+= words
;
6734 if (cum
->nregs
<= 0)
6742 /* OImode shouldn't be used directly. */
6746 if (cum
->float_in_sse
< 2)
6749 if (cum
->float_in_sse
< 1)
6766 if (!type
|| !AGGREGATE_TYPE_P (type
))
6768 cum
->sse_words
+= words
;
6769 cum
->sse_nregs
-= 1;
6770 cum
->sse_regno
+= 1;
6771 if (cum
->sse_nregs
<= 0)
6785 if (!type
|| !AGGREGATE_TYPE_P (type
))
6787 cum
->mmx_words
+= words
;
6788 cum
->mmx_nregs
-= 1;
6789 cum
->mmx_regno
+= 1;
6790 if (cum
->mmx_nregs
<= 0)
6801 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6802 const_tree type
, HOST_WIDE_INT words
, bool named
)
6804 int int_nregs
, sse_nregs
;
6806 /* Unnamed 256bit vector mode parameters are passed on stack. */
6807 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6810 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6811 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6813 cum
->nregs
-= int_nregs
;
6814 cum
->sse_nregs
-= sse_nregs
;
6815 cum
->regno
+= int_nregs
;
6816 cum
->sse_regno
+= sse_nregs
;
6820 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6821 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6822 cum
->words
+= words
;
6827 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6828 HOST_WIDE_INT words
)
6830 /* Otherwise, this should be passed indirect. */
6831 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6833 cum
->words
+= words
;
6841 /* Update the data in CUM to advance over an argument of mode MODE and
6842 data type TYPE. (TYPE is null for libcalls where that information
6843 may not be available.) */
6846 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6847 const_tree type
, bool named
)
6849 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6850 HOST_WIDE_INT bytes
, words
;
6852 if (mode
== BLKmode
)
6853 bytes
= int_size_in_bytes (type
);
6855 bytes
= GET_MODE_SIZE (mode
);
6856 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6859 mode
= type_natural_mode (type
, NULL
);
6861 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6862 function_arg_advance_ms_64 (cum
, bytes
, words
);
6863 else if (TARGET_64BIT
)
6864 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6866 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6869 /* Define where to put the arguments to a function.
6870 Value is zero to push the argument on the stack,
6871 or a hard register in which to store the argument.
6873 MODE is the argument's machine mode.
6874 TYPE is the data type of the argument (as a tree).
6875 This is null for libcalls where that information may
6877 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6878 the preceding args and about the function being called.
6879 NAMED is nonzero if this argument is a named parameter
6880 (otherwise it is an extra parameter matching an ellipsis). */
6883 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6884 enum machine_mode orig_mode
, const_tree type
,
6885 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6887 static bool warnedsse
, warnedmmx
;
6889 /* Avoid the AL settings for the Unix64 ABI. */
6890 if (mode
== VOIDmode
)
6906 if (words
<= cum
->nregs
)
6908 int regno
= cum
->regno
;
6910 /* Fastcall allocates the first two DWORD (SImode) or
6911 smaller arguments to ECX and EDX if it isn't an
6917 || (type
&& AGGREGATE_TYPE_P (type
)))
6920 /* ECX not EAX is the first allocated register. */
6921 if (regno
== AX_REG
)
6924 return gen_rtx_REG (mode
, regno
);
6929 if (cum
->float_in_sse
< 2)
6932 if (cum
->float_in_sse
< 1)
6936 /* In 32bit, we pass TImode in xmm registers. */
6943 if (!type
|| !AGGREGATE_TYPE_P (type
))
6945 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6948 warning (0, "SSE vector argument without SSE enabled "
6952 return gen_reg_or_parallel (mode
, orig_mode
,
6953 cum
->sse_regno
+ FIRST_SSE_REG
);
6958 /* OImode shouldn't be used directly. */
6967 if (!type
|| !AGGREGATE_TYPE_P (type
))
6970 return gen_reg_or_parallel (mode
, orig_mode
,
6971 cum
->sse_regno
+ FIRST_SSE_REG
);
6981 if (!type
|| !AGGREGATE_TYPE_P (type
))
6983 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6986 warning (0, "MMX vector argument without MMX enabled "
6990 return gen_reg_or_parallel (mode
, orig_mode
,
6991 cum
->mmx_regno
+ FIRST_MMX_REG
);
7000 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7001 enum machine_mode orig_mode
, const_tree type
, bool named
)
7003 /* Handle a hidden AL argument containing number of registers
7004 for varargs x86-64 functions. */
7005 if (mode
== VOIDmode
)
7006 return GEN_INT (cum
->maybe_vaarg
7007 ? (cum
->sse_nregs
< 0
7008 ? X86_64_SSE_REGPARM_MAX
7023 /* Unnamed 256bit vector mode parameters are passed on stack. */
7029 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7031 &x86_64_int_parameter_registers
[cum
->regno
],
7036 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7037 enum machine_mode orig_mode
, bool named
,
7038 HOST_WIDE_INT bytes
)
7042 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7043 We use value of -2 to specify that current function call is MSABI. */
7044 if (mode
== VOIDmode
)
7045 return GEN_INT (-2);
7047 /* If we've run out of registers, it goes on the stack. */
7048 if (cum
->nregs
== 0)
7051 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7053 /* Only floating point modes are passed in anything but integer regs. */
7054 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7057 regno
= cum
->regno
+ FIRST_SSE_REG
;
7062 /* Unnamed floating parameters are passed in both the
7063 SSE and integer registers. */
7064 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7065 t2
= gen_rtx_REG (mode
, regno
);
7066 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7067 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7068 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7071 /* Handle aggregated types passed in register. */
7072 if (orig_mode
== BLKmode
)
7074 if (bytes
> 0 && bytes
<= 8)
7075 mode
= (bytes
> 4 ? DImode
: SImode
);
7076 if (mode
== BLKmode
)
7080 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7083 /* Return where to put the arguments to a function.
7084 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7086 MODE is the argument's machine mode. TYPE is the data type of the
7087 argument. It is null for libcalls where that information may not be
7088 available. CUM gives information about the preceding args and about
7089 the function being called. NAMED is nonzero if this argument is a
7090 named parameter (otherwise it is an extra parameter matching an
7094 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7095 const_tree type
, bool named
)
7097 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7098 enum machine_mode mode
= omode
;
7099 HOST_WIDE_INT bytes
, words
;
7102 if (mode
== BLKmode
)
7103 bytes
= int_size_in_bytes (type
);
7105 bytes
= GET_MODE_SIZE (mode
);
7106 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7108 /* To simplify the code below, represent vector types with a vector mode
7109 even if MMX/SSE are not active. */
7110 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7111 mode
= type_natural_mode (type
, cum
);
7113 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7114 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7115 else if (TARGET_64BIT
)
7116 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7118 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7123 /* A C expression that indicates when an argument must be passed by
7124 reference. If nonzero for an argument, a copy of that argument is
7125 made in memory and a pointer to the argument is passed instead of
7126 the argument itself. The pointer is passed in whatever way is
7127 appropriate for passing a pointer to that type. */
7130 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7131 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7133 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7135 /* See Windows x64 Software Convention. */
7136 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7138 int msize
= (int) GET_MODE_SIZE (mode
);
7141 /* Arrays are passed by reference. */
7142 if (TREE_CODE (type
) == ARRAY_TYPE
)
7145 if (AGGREGATE_TYPE_P (type
))
7147 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7148 are passed by reference. */
7149 msize
= int_size_in_bytes (type
);
7153 /* __m128 is passed by reference. */
7155 case 1: case 2: case 4: case 8:
7161 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7167 /* Return true when TYPE should be 128bit aligned for 32bit argument
7168 passing ABI. XXX: This function is obsolete and is only used for
7169 checking psABI compatibility with previous versions of GCC. */
7172 ix86_compat_aligned_value_p (const_tree type
)
7174 enum machine_mode mode
= TYPE_MODE (type
);
7175 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7179 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7181 if (TYPE_ALIGN (type
) < 128)
7184 if (AGGREGATE_TYPE_P (type
))
7186 /* Walk the aggregates recursively. */
7187 switch (TREE_CODE (type
))
7191 case QUAL_UNION_TYPE
:
7195 /* Walk all the structure fields. */
7196 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7198 if (TREE_CODE (field
) == FIELD_DECL
7199 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7206 /* Just for use if some languages passes arrays by value. */
7207 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7218 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7219 XXX: This function is obsolete and is only used for checking psABI
7220 compatibility with previous versions of GCC. */
7223 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7224 const_tree type
, unsigned int align
)
7226 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7227 natural boundaries. */
7228 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7230 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7231 make an exception for SSE modes since these require 128bit
7234 The handling here differs from field_alignment. ICC aligns MMX
7235 arguments to 4 byte boundaries, while structure fields are aligned
7236 to 8 byte boundaries. */
7239 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7240 align
= PARM_BOUNDARY
;
7244 if (!ix86_compat_aligned_value_p (type
))
7245 align
= PARM_BOUNDARY
;
7248 if (align
> BIGGEST_ALIGNMENT
)
7249 align
= BIGGEST_ALIGNMENT
;
7253 /* Return true when TYPE should be 128bit aligned for 32bit argument
7257 ix86_contains_aligned_value_p (const_tree type
)
7259 enum machine_mode mode
= TYPE_MODE (type
);
7261 if (mode
== XFmode
|| mode
== XCmode
)
7264 if (TYPE_ALIGN (type
) < 128)
7267 if (AGGREGATE_TYPE_P (type
))
7269 /* Walk the aggregates recursively. */
7270 switch (TREE_CODE (type
))
7274 case QUAL_UNION_TYPE
:
7278 /* Walk all the structure fields. */
7279 for (field
= TYPE_FIELDS (type
);
7281 field
= DECL_CHAIN (field
))
7283 if (TREE_CODE (field
) == FIELD_DECL
7284 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7291 /* Just for use if some languages passes arrays by value. */
7292 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7301 return TYPE_ALIGN (type
) >= 128;
7306 /* Gives the alignment boundary, in bits, of an argument with the
7307 specified mode and type. */
7310 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7315 /* Since the main variant type is used for call, we convert it to
7316 the main variant type. */
7317 type
= TYPE_MAIN_VARIANT (type
);
7318 align
= TYPE_ALIGN (type
);
7321 align
= GET_MODE_ALIGNMENT (mode
);
7322 if (align
< PARM_BOUNDARY
)
7323 align
= PARM_BOUNDARY
;
7327 unsigned int saved_align
= align
;
7331 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7334 if (mode
== XFmode
|| mode
== XCmode
)
7335 align
= PARM_BOUNDARY
;
7337 else if (!ix86_contains_aligned_value_p (type
))
7338 align
= PARM_BOUNDARY
;
7341 align
= PARM_BOUNDARY
;
7346 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7350 inform (input_location
,
7351 "The ABI for passing parameters with %d-byte"
7352 " alignment has changed in GCC 4.6",
7353 align
/ BITS_PER_UNIT
);
7360 /* Return true if N is a possible register number of function value. */
7363 ix86_function_value_regno_p (const unsigned int regno
)
7370 case FIRST_FLOAT_REG
:
7371 /* TODO: The function should depend on current function ABI but
7372 builtins.c would need updating then. Therefore we use the
7374 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7376 return TARGET_FLOAT_RETURNS_IN_80387
;
7382 if (TARGET_MACHO
|| TARGET_64BIT
)
7390 /* Define how to find the value returned by a function.
7391 VALTYPE is the data type of the value (as a tree).
7392 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7393 otherwise, FUNC is 0. */
7396 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7397 const_tree fntype
, const_tree fn
)
7401 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7402 we normally prevent this case when mmx is not available. However
7403 some ABIs may require the result to be returned like DImode. */
7404 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7405 regno
= FIRST_MMX_REG
;
7407 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7408 we prevent this case when sse is not available. However some ABIs
7409 may require the result to be returned like integer TImode. */
7410 else if (mode
== TImode
7411 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7412 regno
= FIRST_SSE_REG
;
7414 /* 32-byte vector modes in %ymm0. */
7415 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7416 regno
= FIRST_SSE_REG
;
7418 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7419 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7420 regno
= FIRST_FLOAT_REG
;
7422 /* Most things go in %eax. */
7425 /* Override FP return register with %xmm0 for local functions when
7426 SSE math is enabled or for functions with sseregparm attribute. */
7427 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7429 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7430 if ((sse_level
>= 1 && mode
== SFmode
)
7431 || (sse_level
== 2 && mode
== DFmode
))
7432 regno
= FIRST_SSE_REG
;
7435 /* OImode shouldn't be used directly. */
7436 gcc_assert (mode
!= OImode
);
7438 return gen_rtx_REG (orig_mode
, regno
);
7442 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7447 /* Handle libcalls, which don't provide a type node. */
7448 if (valtype
== NULL
)
7462 regno
= FIRST_SSE_REG
;
7466 regno
= FIRST_FLOAT_REG
;
7474 return gen_rtx_REG (mode
, regno
);
7476 else if (POINTER_TYPE_P (valtype
))
7478 /* Pointers are always returned in word_mode. */
7482 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7483 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7484 x86_64_int_return_registers
, 0);
7486 /* For zero sized structures, construct_container returns NULL, but we
7487 need to keep rest of compiler happy by returning meaningful value. */
7489 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7495 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7498 unsigned int regno
= AX_REG
;
7502 switch (GET_MODE_SIZE (mode
))
7505 if (valtype
!= NULL_TREE
7506 && !VECTOR_INTEGER_TYPE_P (valtype
)
7507 && !VECTOR_INTEGER_TYPE_P (valtype
)
7508 && !INTEGRAL_TYPE_P (valtype
)
7509 && !VECTOR_FLOAT_TYPE_P (valtype
))
7511 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7512 && !COMPLEX_MODE_P (mode
))
7513 regno
= FIRST_SSE_REG
;
7517 if (mode
== SFmode
|| mode
== DFmode
)
7518 regno
= FIRST_SSE_REG
;
7524 return gen_rtx_REG (orig_mode
, regno
);
7528 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7529 enum machine_mode orig_mode
, enum machine_mode mode
)
7531 const_tree fn
, fntype
;
7534 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7535 fn
= fntype_or_decl
;
7536 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7538 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7539 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7540 else if (TARGET_64BIT
)
7541 return function_value_64 (orig_mode
, mode
, valtype
);
7543 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7547 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7548 bool outgoing ATTRIBUTE_UNUSED
)
7550 enum machine_mode mode
, orig_mode
;
7552 orig_mode
= TYPE_MODE (valtype
);
7553 mode
= type_natural_mode (valtype
, NULL
);
7554 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7557 /* Pointer function arguments and return values are promoted to
7560 static enum machine_mode
7561 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7562 int *punsignedp
, const_tree fntype
,
7565 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7567 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7570 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7574 /* Return true if a structure, union or array with MODE containing FIELD
7575 should be accessed using BLKmode. */
7578 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7580 /* Union with XFmode must be in BLKmode. */
7581 return (mode
== XFmode
7582 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7583 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7587 ix86_libcall_value (enum machine_mode mode
)
7589 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7592 /* Return true iff type is returned in memory. */
7594 static bool ATTRIBUTE_UNUSED
7595 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7599 if (mode
== BLKmode
)
7602 size
= int_size_in_bytes (type
);
7604 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7607 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7609 /* User-created vectors small enough to fit in EAX. */
7613 /* MMX/3dNow values are returned in MM0,
7614 except when it doesn't exits or the ABI prescribes otherwise. */
7616 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7618 /* SSE values are returned in XMM0, except when it doesn't exist. */
7622 /* AVX values are returned in YMM0, except when it doesn't exist. */
7633 /* OImode shouldn't be used directly. */
7634 gcc_assert (mode
!= OImode
);
7639 static bool ATTRIBUTE_UNUSED
7640 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7642 int needed_intregs
, needed_sseregs
;
7643 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7646 static bool ATTRIBUTE_UNUSED
7647 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7649 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7651 /* __m128 is returned in xmm0. */
7652 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7653 || VECTOR_FLOAT_TYPE_P (type
))
7654 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7655 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7658 /* Otherwise, the size must be exactly in [1248]. */
7659 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7663 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7665 #ifdef SUBTARGET_RETURN_IN_MEMORY
7666 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7668 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7672 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7673 return return_in_memory_ms_64 (type
, mode
);
7675 return return_in_memory_64 (type
, mode
);
7678 return return_in_memory_32 (type
, mode
);
7682 /* When returning SSE vector types, we have a choice of either
7683 (1) being abi incompatible with a -march switch, or
7684 (2) generating an error.
7685 Given no good solution, I think the safest thing is one warning.
7686 The user won't be able to use -Werror, but....
7688 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7689 called in response to actually generating a caller or callee that
7690 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7691 via aggregate_value_p for general type probing from tree-ssa. */
7694 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7696 static bool warnedsse
, warnedmmx
;
7698 if (!TARGET_64BIT
&& type
)
7700 /* Look at the return type of the function, not the function type. */
7701 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7703 if (!TARGET_SSE
&& !warnedsse
)
7706 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7709 warning (0, "SSE vector return without SSE enabled "
7714 if (!TARGET_MMX
&& !warnedmmx
)
7716 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7719 warning (0, "MMX vector return without MMX enabled "
7729 /* Create the va_list data type. */
7731 /* Returns the calling convention specific va_list date type.
7732 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7735 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7737 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7739 /* For i386 we use plain pointer to argument area. */
7740 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7741 return build_pointer_type (char_type_node
);
7743 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7744 type_decl
= build_decl (BUILTINS_LOCATION
,
7745 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7747 f_gpr
= build_decl (BUILTINS_LOCATION
,
7748 FIELD_DECL
, get_identifier ("gp_offset"),
7749 unsigned_type_node
);
7750 f_fpr
= build_decl (BUILTINS_LOCATION
,
7751 FIELD_DECL
, get_identifier ("fp_offset"),
7752 unsigned_type_node
);
7753 f_ovf
= build_decl (BUILTINS_LOCATION
,
7754 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7756 f_sav
= build_decl (BUILTINS_LOCATION
,
7757 FIELD_DECL
, get_identifier ("reg_save_area"),
7760 va_list_gpr_counter_field
= f_gpr
;
7761 va_list_fpr_counter_field
= f_fpr
;
7763 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7764 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7765 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7766 DECL_FIELD_CONTEXT (f_sav
) = record
;
7768 TYPE_STUB_DECL (record
) = type_decl
;
7769 TYPE_NAME (record
) = type_decl
;
7770 TYPE_FIELDS (record
) = f_gpr
;
7771 DECL_CHAIN (f_gpr
) = f_fpr
;
7772 DECL_CHAIN (f_fpr
) = f_ovf
;
7773 DECL_CHAIN (f_ovf
) = f_sav
;
7775 layout_type (record
);
7777 /* The correct type is an array type of one element. */
7778 return build_array_type (record
, build_index_type (size_zero_node
));
7781 /* Setup the builtin va_list data type and for 64-bit the additional
7782 calling convention specific va_list data types. */
7785 ix86_build_builtin_va_list (void)
7787 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7789 /* Initialize abi specific va_list builtin types. */
7793 if (ix86_abi
== MS_ABI
)
7795 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7796 if (TREE_CODE (t
) != RECORD_TYPE
)
7797 t
= build_variant_type_copy (t
);
7798 sysv_va_list_type_node
= t
;
7803 if (TREE_CODE (t
) != RECORD_TYPE
)
7804 t
= build_variant_type_copy (t
);
7805 sysv_va_list_type_node
= t
;
7807 if (ix86_abi
!= MS_ABI
)
7809 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7810 if (TREE_CODE (t
) != RECORD_TYPE
)
7811 t
= build_variant_type_copy (t
);
7812 ms_va_list_type_node
= t
;
7817 if (TREE_CODE (t
) != RECORD_TYPE
)
7818 t
= build_variant_type_copy (t
);
7819 ms_va_list_type_node
= t
;
7826 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7829 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7835 /* GPR size of varargs save area. */
7836 if (cfun
->va_list_gpr_size
)
7837 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7839 ix86_varargs_gpr_size
= 0;
7841 /* FPR size of varargs save area. We don't need it if we don't pass
7842 anything in SSE registers. */
7843 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7844 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7846 ix86_varargs_fpr_size
= 0;
7848 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7851 save_area
= frame_pointer_rtx
;
7852 set
= get_varargs_alias_set ();
7854 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7855 if (max
> X86_64_REGPARM_MAX
)
7856 max
= X86_64_REGPARM_MAX
;
7858 for (i
= cum
->regno
; i
< max
; i
++)
7860 mem
= gen_rtx_MEM (word_mode
,
7861 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7862 MEM_NOTRAP_P (mem
) = 1;
7863 set_mem_alias_set (mem
, set
);
7864 emit_move_insn (mem
,
7865 gen_rtx_REG (word_mode
,
7866 x86_64_int_parameter_registers
[i
]));
7869 if (ix86_varargs_fpr_size
)
7871 enum machine_mode smode
;
7874 /* Now emit code to save SSE registers. The AX parameter contains number
7875 of SSE parameter registers used to call this function, though all we
7876 actually check here is the zero/non-zero status. */
7878 label
= gen_label_rtx ();
7879 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7880 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7883 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7884 we used movdqa (i.e. TImode) instead? Perhaps even better would
7885 be if we could determine the real mode of the data, via a hook
7886 into pass_stdarg. Ignore all that for now. */
7888 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7889 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7891 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7892 if (max
> X86_64_SSE_REGPARM_MAX
)
7893 max
= X86_64_SSE_REGPARM_MAX
;
7895 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7897 mem
= plus_constant (Pmode
, save_area
,
7898 i
* 16 + ix86_varargs_gpr_size
);
7899 mem
= gen_rtx_MEM (smode
, mem
);
7900 MEM_NOTRAP_P (mem
) = 1;
7901 set_mem_alias_set (mem
, set
);
7902 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7904 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7912 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7914 alias_set_type set
= get_varargs_alias_set ();
7917 /* Reset to zero, as there might be a sysv vaarg used
7919 ix86_varargs_gpr_size
= 0;
7920 ix86_varargs_fpr_size
= 0;
7922 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7926 mem
= gen_rtx_MEM (Pmode
,
7927 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7928 i
* UNITS_PER_WORD
));
7929 MEM_NOTRAP_P (mem
) = 1;
7930 set_mem_alias_set (mem
, set
);
7932 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7933 emit_move_insn (mem
, reg
);
7938 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7939 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7942 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7943 CUMULATIVE_ARGS next_cum
;
7946 /* This argument doesn't appear to be used anymore. Which is good,
7947 because the old code here didn't suppress rtl generation. */
7948 gcc_assert (!no_rtl
);
7953 fntype
= TREE_TYPE (current_function_decl
);
7955 /* For varargs, we do not want to skip the dummy va_dcl argument.
7956 For stdargs, we do want to skip the last named argument. */
7958 if (stdarg_p (fntype
))
7959 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7962 if (cum
->call_abi
== MS_ABI
)
7963 setup_incoming_varargs_ms_64 (&next_cum
);
7965 setup_incoming_varargs_64 (&next_cum
);
7968 /* Checks if TYPE is of kind va_list char *. */
7971 is_va_list_char_pointer (tree type
)
7975 /* For 32-bit it is always true. */
7978 canonic
= ix86_canonical_va_list_type (type
);
7979 return (canonic
== ms_va_list_type_node
7980 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7983 /* Implement va_start. */
7986 ix86_va_start (tree valist
, rtx nextarg
)
7988 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7989 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7990 tree gpr
, fpr
, ovf
, sav
, t
;
7994 if (flag_split_stack
7995 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7997 unsigned int scratch_regno
;
7999 /* When we are splitting the stack, we can't refer to the stack
8000 arguments using internal_arg_pointer, because they may be on
8001 the old stack. The split stack prologue will arrange to
8002 leave a pointer to the old stack arguments in a scratch
8003 register, which we here copy to a pseudo-register. The split
8004 stack prologue can't set the pseudo-register directly because
8005 it (the prologue) runs before any registers have been saved. */
8007 scratch_regno
= split_stack_prologue_scratch_regno ();
8008 if (scratch_regno
!= INVALID_REGNUM
)
8012 reg
= gen_reg_rtx (Pmode
);
8013 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8016 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8020 push_topmost_sequence ();
8021 emit_insn_after (seq
, entry_of_function ());
8022 pop_topmost_sequence ();
8026 /* Only 64bit target needs something special. */
8027 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8029 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8030 std_expand_builtin_va_start (valist
, nextarg
);
8035 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8036 next
= expand_binop (ptr_mode
, add_optab
,
8037 cfun
->machine
->split_stack_varargs_pointer
,
8038 crtl
->args
.arg_offset_rtx
,
8039 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8040 convert_move (va_r
, next
, 0);
8045 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8046 f_fpr
= DECL_CHAIN (f_gpr
);
8047 f_ovf
= DECL_CHAIN (f_fpr
);
8048 f_sav
= DECL_CHAIN (f_ovf
);
8050 valist
= build_simple_mem_ref (valist
);
8051 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8052 /* The following should be folded into the MEM_REF offset. */
8053 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8055 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8057 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8059 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8062 /* Count number of gp and fp argument registers used. */
8063 words
= crtl
->args
.info
.words
;
8064 n_gpr
= crtl
->args
.info
.regno
;
8065 n_fpr
= crtl
->args
.info
.sse_regno
;
8067 if (cfun
->va_list_gpr_size
)
8069 type
= TREE_TYPE (gpr
);
8070 t
= build2 (MODIFY_EXPR
, type
,
8071 gpr
, build_int_cst (type
, n_gpr
* 8));
8072 TREE_SIDE_EFFECTS (t
) = 1;
8073 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8076 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8078 type
= TREE_TYPE (fpr
);
8079 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8080 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8081 TREE_SIDE_EFFECTS (t
) = 1;
8082 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8085 /* Find the overflow area. */
8086 type
= TREE_TYPE (ovf
);
8087 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8088 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8090 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8091 t
= make_tree (type
, ovf_rtx
);
8093 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8094 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8095 TREE_SIDE_EFFECTS (t
) = 1;
8096 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8098 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8100 /* Find the register save area.
8101 Prologue of the function save it right above stack frame. */
8102 type
= TREE_TYPE (sav
);
8103 t
= make_tree (type
, frame_pointer_rtx
);
8104 if (!ix86_varargs_gpr_size
)
8105 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8106 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8107 TREE_SIDE_EFFECTS (t
) = 1;
8108 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8112 /* Implement va_arg. */
8115 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8118 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8119 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8120 tree gpr
, fpr
, ovf
, sav
, t
;
8122 tree lab_false
, lab_over
= NULL_TREE
;
8127 enum machine_mode nat_mode
;
8128 unsigned int arg_boundary
;
8130 /* Only 64bit target needs something special. */
8131 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8132 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8134 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8135 f_fpr
= DECL_CHAIN (f_gpr
);
8136 f_ovf
= DECL_CHAIN (f_fpr
);
8137 f_sav
= DECL_CHAIN (f_ovf
);
8139 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8140 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8141 valist
= build_va_arg_indirect_ref (valist
);
8142 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8143 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8144 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8146 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8148 type
= build_pointer_type (type
);
8149 size
= int_size_in_bytes (type
);
8150 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8152 nat_mode
= type_natural_mode (type
, NULL
);
8161 /* Unnamed 256bit vector mode parameters are passed on stack. */
8162 if (!TARGET_64BIT_MS_ABI
)
8169 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8170 type
, 0, X86_64_REGPARM_MAX
,
8171 X86_64_SSE_REGPARM_MAX
, intreg
,
8176 /* Pull the value out of the saved registers. */
8178 addr
= create_tmp_var (ptr_type_node
, "addr");
8182 int needed_intregs
, needed_sseregs
;
8184 tree int_addr
, sse_addr
;
8186 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8187 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8189 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8191 need_temp
= (!REG_P (container
)
8192 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8193 || TYPE_ALIGN (type
) > 128));
8195 /* In case we are passing structure, verify that it is consecutive block
8196 on the register save area. If not we need to do moves. */
8197 if (!need_temp
&& !REG_P (container
))
8199 /* Verify that all registers are strictly consecutive */
8200 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8204 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8206 rtx slot
= XVECEXP (container
, 0, i
);
8207 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8208 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8216 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8218 rtx slot
= XVECEXP (container
, 0, i
);
8219 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8220 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8232 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8233 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8236 /* First ensure that we fit completely in registers. */
8239 t
= build_int_cst (TREE_TYPE (gpr
),
8240 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8241 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8242 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8243 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8244 gimplify_and_add (t
, pre_p
);
8248 t
= build_int_cst (TREE_TYPE (fpr
),
8249 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8250 + X86_64_REGPARM_MAX
* 8);
8251 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8252 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8253 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8254 gimplify_and_add (t
, pre_p
);
8257 /* Compute index to start of area used for integer regs. */
8260 /* int_addr = gpr + sav; */
8261 t
= fold_build_pointer_plus (sav
, gpr
);
8262 gimplify_assign (int_addr
, t
, pre_p
);
8266 /* sse_addr = fpr + sav; */
8267 t
= fold_build_pointer_plus (sav
, fpr
);
8268 gimplify_assign (sse_addr
, t
, pre_p
);
8272 int i
, prev_size
= 0;
8273 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8276 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8277 gimplify_assign (addr
, t
, pre_p
);
8279 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8281 rtx slot
= XVECEXP (container
, 0, i
);
8282 rtx reg
= XEXP (slot
, 0);
8283 enum machine_mode mode
= GET_MODE (reg
);
8289 tree dest_addr
, dest
;
8290 int cur_size
= GET_MODE_SIZE (mode
);
8292 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8293 prev_size
= INTVAL (XEXP (slot
, 1));
8294 if (prev_size
+ cur_size
> size
)
8296 cur_size
= size
- prev_size
;
8297 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8298 if (mode
== BLKmode
)
8301 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8302 if (mode
== GET_MODE (reg
))
8303 addr_type
= build_pointer_type (piece_type
);
8305 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8307 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8310 if (SSE_REGNO_P (REGNO (reg
)))
8312 src_addr
= sse_addr
;
8313 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8317 src_addr
= int_addr
;
8318 src_offset
= REGNO (reg
) * 8;
8320 src_addr
= fold_convert (addr_type
, src_addr
);
8321 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8323 dest_addr
= fold_convert (daddr_type
, addr
);
8324 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8325 if (cur_size
== GET_MODE_SIZE (mode
))
8327 src
= build_va_arg_indirect_ref (src_addr
);
8328 dest
= build_va_arg_indirect_ref (dest_addr
);
8330 gimplify_assign (dest
, src
, pre_p
);
8335 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8336 3, dest_addr
, src_addr
,
8337 size_int (cur_size
));
8338 gimplify_and_add (copy
, pre_p
);
8340 prev_size
+= cur_size
;
8346 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8347 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8348 gimplify_assign (gpr
, t
, pre_p
);
8353 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8354 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8355 gimplify_assign (fpr
, t
, pre_p
);
8358 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8360 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8363 /* ... otherwise out of the overflow area. */
8365 /* When we align parameter on stack for caller, if the parameter
8366 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8367 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8368 here with caller. */
8369 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8370 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8371 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8373 /* Care for on-stack alignment if needed. */
8374 if (arg_boundary
<= 64 || size
== 0)
8378 HOST_WIDE_INT align
= arg_boundary
/ 8;
8379 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8380 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8381 build_int_cst (TREE_TYPE (t
), -align
));
8384 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8385 gimplify_assign (addr
, t
, pre_p
);
8387 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8388 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8391 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8393 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8394 addr
= fold_convert (ptrtype
, addr
);
8397 addr
= build_va_arg_indirect_ref (addr
);
8398 return build_va_arg_indirect_ref (addr
);
8401 /* Return true if OPNUM's MEM should be matched
8402 in movabs* patterns. */
8405 ix86_check_movabs (rtx insn
, int opnum
)
8409 set
= PATTERN (insn
);
8410 if (GET_CODE (set
) == PARALLEL
)
8411 set
= XVECEXP (set
, 0, 0);
8412 gcc_assert (GET_CODE (set
) == SET
);
8413 mem
= XEXP (set
, opnum
);
8414 while (GET_CODE (mem
) == SUBREG
)
8415 mem
= SUBREG_REG (mem
);
8416 gcc_assert (MEM_P (mem
));
8417 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8420 /* Initialize the table of extra 80387 mathematical constants. */
8423 init_ext_80387_constants (void)
8425 static const char * cst
[5] =
8427 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8428 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8429 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8430 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8431 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8435 for (i
= 0; i
< 5; i
++)
8437 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8438 /* Ensure each constant is rounded to XFmode precision. */
8439 real_convert (&ext_80387_constants_table
[i
],
8440 XFmode
, &ext_80387_constants_table
[i
]);
8443 ext_80387_constants_init
= 1;
8446 /* Return non-zero if the constant is something that
8447 can be loaded with a special instruction. */
8450 standard_80387_constant_p (rtx x
)
8452 enum machine_mode mode
= GET_MODE (x
);
8456 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8459 if (x
== CONST0_RTX (mode
))
8461 if (x
== CONST1_RTX (mode
))
8464 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8466 /* For XFmode constants, try to find a special 80387 instruction when
8467 optimizing for size or on those CPUs that benefit from them. */
8469 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8473 if (! ext_80387_constants_init
)
8474 init_ext_80387_constants ();
8476 for (i
= 0; i
< 5; i
++)
8477 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8481 /* Load of the constant -0.0 or -1.0 will be split as
8482 fldz;fchs or fld1;fchs sequence. */
8483 if (real_isnegzero (&r
))
8485 if (real_identical (&r
, &dconstm1
))
8491 /* Return the opcode of the special instruction to be used to load
8495 standard_80387_constant_opcode (rtx x
)
8497 switch (standard_80387_constant_p (x
))
8521 /* Return the CONST_DOUBLE representing the 80387 constant that is
8522 loaded by the specified special instruction. The argument IDX
8523 matches the return value from standard_80387_constant_p. */
8526 standard_80387_constant_rtx (int idx
)
8530 if (! ext_80387_constants_init
)
8531 init_ext_80387_constants ();
8547 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8551 /* Return 1 if X is all 0s and 2 if x is all 1s
8552 in supported SSE/AVX vector mode. */
8555 standard_sse_constant_p (rtx x
)
8557 enum machine_mode mode
= GET_MODE (x
);
8559 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8561 if (vector_all_ones_operand (x
, mode
))
8583 /* Return the opcode of the special instruction to be used to load
8587 standard_sse_constant_opcode (rtx insn
, rtx x
)
8589 switch (standard_sse_constant_p (x
))
8592 switch (get_attr_mode (insn
))
8595 return "%vpxor\t%0, %d0";
8597 return "%vxorpd\t%0, %d0";
8599 return "%vxorps\t%0, %d0";
8602 return "vpxor\t%x0, %x0, %x0";
8604 return "vxorpd\t%x0, %x0, %x0";
8606 return "vxorps\t%x0, %x0, %x0";
8613 if (get_attr_mode (insn
) == MODE_XI
8614 || get_attr_mode (insn
) == MODE_V8DF
8615 || get_attr_mode (insn
) == MODE_V16SF
)
8616 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8618 return "vpcmpeqd\t%0, %0, %0";
8620 return "pcmpeqd\t%0, %0";
8628 /* Returns true if OP contains a symbol reference */
8631 symbolic_reference_mentioned_p (rtx op
)
8636 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8639 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8640 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8646 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8647 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8651 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8658 /* Return true if it is appropriate to emit `ret' instructions in the
8659 body of a function. Do this only if the epilogue is simple, needing a
8660 couple of insns. Prior to reloading, we can't tell how many registers
8661 must be saved, so return false then. Return false if there is no frame
8662 marker to de-allocate. */
8665 ix86_can_use_return_insn_p (void)
8667 struct ix86_frame frame
;
8669 if (! reload_completed
|| frame_pointer_needed
)
8672 /* Don't allow more than 32k pop, since that's all we can do
8673 with one instruction. */
8674 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8677 ix86_compute_frame_layout (&frame
);
8678 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8679 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8682 /* Value should be nonzero if functions must have frame pointers.
8683 Zero means the frame pointer need not be set up (and parms may
8684 be accessed via the stack pointer) in functions that seem suitable. */
8687 ix86_frame_pointer_required (void)
8689 /* If we accessed previous frames, then the generated code expects
8690 to be able to access the saved ebp value in our frame. */
8691 if (cfun
->machine
->accesses_prev_frame
)
8694 /* Several x86 os'es need a frame pointer for other reasons,
8695 usually pertaining to setjmp. */
8696 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8699 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8700 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8703 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8704 allocation is 4GB. */
8705 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8708 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8709 turns off the frame pointer by default. Turn it back on now if
8710 we've not got a leaf function. */
8711 if (TARGET_OMIT_LEAF_FRAME_POINTER
8713 || ix86_current_function_calls_tls_descriptor
))
8716 if (crtl
->profile
&& !flag_fentry
)
8722 /* Record that the current function accesses previous call frames. */
8725 ix86_setup_frame_addresses (void)
8727 cfun
->machine
->accesses_prev_frame
= 1;
8730 #ifndef USE_HIDDEN_LINKONCE
8731 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8732 # define USE_HIDDEN_LINKONCE 1
8734 # define USE_HIDDEN_LINKONCE 0
8738 static int pic_labels_used
;
8740 /* Fills in the label name that should be used for a pc thunk for
8741 the given register. */
8744 get_pc_thunk_name (char name
[32], unsigned int regno
)
8746 gcc_assert (!TARGET_64BIT
);
8748 if (USE_HIDDEN_LINKONCE
)
8749 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8751 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8755 /* This function generates code for -fpic that loads %ebx with
8756 the return address of the caller and then returns. */
8759 ix86_code_end (void)
8764 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8769 if (!(pic_labels_used
& (1 << regno
)))
8772 get_pc_thunk_name (name
, regno
);
8774 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8775 get_identifier (name
),
8776 build_function_type_list (void_type_node
, NULL_TREE
));
8777 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8778 NULL_TREE
, void_type_node
);
8779 TREE_PUBLIC (decl
) = 1;
8780 TREE_STATIC (decl
) = 1;
8781 DECL_IGNORED_P (decl
) = 1;
8786 switch_to_section (darwin_sections
[text_coal_section
]);
8787 fputs ("\t.weak_definition\t", asm_out_file
);
8788 assemble_name (asm_out_file
, name
);
8789 fputs ("\n\t.private_extern\t", asm_out_file
);
8790 assemble_name (asm_out_file
, name
);
8791 putc ('\n', asm_out_file
);
8792 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8793 DECL_WEAK (decl
) = 1;
8797 if (USE_HIDDEN_LINKONCE
)
8799 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8801 targetm
.asm_out
.unique_section (decl
, 0);
8802 switch_to_section (get_named_section (decl
, NULL
, 0));
8804 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8805 fputs ("\t.hidden\t", asm_out_file
);
8806 assemble_name (asm_out_file
, name
);
8807 putc ('\n', asm_out_file
);
8808 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8812 switch_to_section (text_section
);
8813 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8816 DECL_INITIAL (decl
) = make_node (BLOCK
);
8817 current_function_decl
= decl
;
8818 init_function_start (decl
);
8819 first_function_block_is_cold
= false;
8820 /* Make sure unwind info is emitted for the thunk if needed. */
8821 final_start_function (emit_barrier (), asm_out_file
, 1);
8823 /* Pad stack IP move with 4 instructions (two NOPs count
8824 as one instruction). */
8825 if (TARGET_PAD_SHORT_FUNCTION
)
8830 fputs ("\tnop\n", asm_out_file
);
8833 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8834 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8835 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8836 fputs ("\tret\n", asm_out_file
);
8837 final_end_function ();
8838 init_insn_lengths ();
8839 free_after_compilation (cfun
);
8841 current_function_decl
= NULL
;
8844 if (flag_split_stack
)
8845 file_end_indicate_split_stack ();
8848 /* Emit code for the SET_GOT patterns. */
8851 output_set_got (rtx dest
, rtx label
)
8857 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8859 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8860 xops
[2] = gen_rtx_MEM (Pmode
,
8861 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8862 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8864 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8865 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8866 an unadorned address. */
8867 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8868 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8869 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8873 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8878 /* We don't need a pic base, we're not producing pic. */
8881 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8882 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8883 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8884 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8889 get_pc_thunk_name (name
, REGNO (dest
));
8890 pic_labels_used
|= 1 << REGNO (dest
);
8892 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8893 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8894 output_asm_insn ("call\t%X2", xops
);
8897 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
8898 This is what will be referenced by the Mach-O PIC subsystem. */
8899 if (machopic_should_output_picbase_label () || !label
)
8900 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8902 /* When we are restoring the pic base at the site of a nonlocal label,
8903 and we decided to emit the pic base above, we will still output a
8904 local label used for calculating the correction offset (even though
8905 the offset will be 0 in that case). */
8907 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8908 CODE_LABEL_NUMBER (label
));
8913 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8918 /* Generate an "push" pattern for input ARG. */
8923 struct machine_function
*m
= cfun
->machine
;
8925 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8926 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8927 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8929 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8930 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8932 return gen_rtx_SET (VOIDmode
,
8933 gen_rtx_MEM (word_mode
,
8934 gen_rtx_PRE_DEC (Pmode
,
8935 stack_pointer_rtx
)),
8939 /* Generate an "pop" pattern for input ARG. */
8944 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8945 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8947 return gen_rtx_SET (VOIDmode
,
8949 gen_rtx_MEM (word_mode
,
8950 gen_rtx_POST_INC (Pmode
,
8951 stack_pointer_rtx
)));
8954 /* Return >= 0 if there is an unused call-clobbered register available
8955 for the entire function. */
8958 ix86_select_alt_pic_regnum (void)
8962 && !ix86_current_function_calls_tls_descriptor
)
8965 /* Can't use the same register for both PIC and DRAP. */
8967 drap
= REGNO (crtl
->drap_reg
);
8970 for (i
= 2; i
>= 0; --i
)
8971 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8975 return INVALID_REGNUM
;
8978 /* Return TRUE if we need to save REGNO. */
8981 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8983 if (pic_offset_table_rtx
8984 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8985 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8987 || crtl
->calls_eh_return
8988 || crtl
->uses_const_pool
8989 || cfun
->has_nonlocal_label
))
8990 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8992 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8997 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8998 if (test
== INVALID_REGNUM
)
9005 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9008 return (df_regs_ever_live_p (regno
)
9009 && !call_used_regs
[regno
]
9010 && !fixed_regs
[regno
]
9011 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9014 /* Return number of saved general prupose registers. */
9017 ix86_nsaved_regs (void)
9022 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9023 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9028 /* Return number of saved SSE registrers. */
9031 ix86_nsaved_sseregs (void)
9036 if (!TARGET_64BIT_MS_ABI
)
9038 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9039 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9044 /* Given FROM and TO register numbers, say whether this elimination is
9045 allowed. If stack alignment is needed, we can only replace argument
9046 pointer with hard frame pointer, or replace frame pointer with stack
9047 pointer. Otherwise, frame pointer elimination is automatically
9048 handled and all other eliminations are valid. */
9051 ix86_can_eliminate (const int from
, const int to
)
9053 if (stack_realign_fp
)
9054 return ((from
== ARG_POINTER_REGNUM
9055 && to
== HARD_FRAME_POINTER_REGNUM
)
9056 || (from
== FRAME_POINTER_REGNUM
9057 && to
== STACK_POINTER_REGNUM
));
9059 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9062 /* Return the offset between two registers, one to be eliminated, and the other
9063 its replacement, at the start of a routine. */
9066 ix86_initial_elimination_offset (int from
, int to
)
9068 struct ix86_frame frame
;
9069 ix86_compute_frame_layout (&frame
);
9071 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9072 return frame
.hard_frame_pointer_offset
;
9073 else if (from
== FRAME_POINTER_REGNUM
9074 && to
== HARD_FRAME_POINTER_REGNUM
)
9075 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9078 gcc_assert (to
== STACK_POINTER_REGNUM
);
9080 if (from
== ARG_POINTER_REGNUM
)
9081 return frame
.stack_pointer_offset
;
9083 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9084 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9088 /* In a dynamically-aligned function, we can't know the offset from
9089 stack pointer to frame pointer, so we must ensure that setjmp
9090 eliminates fp against the hard fp (%ebp) rather than trying to
9091 index from %esp up to the top of the frame across a gap that is
9092 of unknown (at compile-time) size. */
9094 ix86_builtin_setjmp_frame_value (void)
9096 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9099 /* When using -fsplit-stack, the allocation routines set a field in
9100 the TCB to the bottom of the stack plus this much space, measured
9103 #define SPLIT_STACK_AVAILABLE 256
9105 /* Fill structure ix86_frame about frame of currently computed function. */
9108 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9110 unsigned HOST_WIDE_INT stack_alignment_needed
;
9111 HOST_WIDE_INT offset
;
9112 unsigned HOST_WIDE_INT preferred_alignment
;
9113 HOST_WIDE_INT size
= get_frame_size ();
9114 HOST_WIDE_INT to_allocate
;
9116 frame
->nregs
= ix86_nsaved_regs ();
9117 frame
->nsseregs
= ix86_nsaved_sseregs ();
9119 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9120 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9122 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9123 function prologues and leaf. */
9124 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9125 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9126 || ix86_current_function_calls_tls_descriptor
))
9128 preferred_alignment
= 16;
9129 stack_alignment_needed
= 16;
9130 crtl
->preferred_stack_boundary
= 128;
9131 crtl
->stack_alignment_needed
= 128;
9134 gcc_assert (!size
|| stack_alignment_needed
);
9135 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9136 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9138 /* For SEH we have to limit the amount of code movement into the prologue.
9139 At present we do this via a BLOCKAGE, at which point there's very little
9140 scheduling that can be done, which means that there's very little point
9141 in doing anything except PUSHs. */
9143 cfun
->machine
->use_fast_prologue_epilogue
= false;
9145 /* During reload iteration the amount of registers saved can change.
9146 Recompute the value as needed. Do not recompute when amount of registers
9147 didn't change as reload does multiple calls to the function and does not
9148 expect the decision to change within single iteration. */
9149 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9150 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9152 int count
= frame
->nregs
;
9153 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9155 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9157 /* The fast prologue uses move instead of push to save registers. This
9158 is significantly longer, but also executes faster as modern hardware
9159 can execute the moves in parallel, but can't do that for push/pop.
9161 Be careful about choosing what prologue to emit: When function takes
9162 many instructions to execute we may use slow version as well as in
9163 case function is known to be outside hot spot (this is known with
9164 feedback only). Weight the size of function by number of registers
9165 to save as it is cheap to use one or two push instructions but very
9166 slow to use many of them. */
9168 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9169 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9170 || (flag_branch_probabilities
9171 && node
->frequency
< NODE_FREQUENCY_HOT
))
9172 cfun
->machine
->use_fast_prologue_epilogue
= false;
9174 cfun
->machine
->use_fast_prologue_epilogue
9175 = !expensive_function_p (count
);
9178 frame
->save_regs_using_mov
9179 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9180 /* If static stack checking is enabled and done with probes,
9181 the registers need to be saved before allocating the frame. */
9182 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9184 /* Skip return address. */
9185 offset
= UNITS_PER_WORD
;
9187 /* Skip pushed static chain. */
9188 if (ix86_static_chain_on_stack
)
9189 offset
+= UNITS_PER_WORD
;
9191 /* Skip saved base pointer. */
9192 if (frame_pointer_needed
)
9193 offset
+= UNITS_PER_WORD
;
9194 frame
->hfp_save_offset
= offset
;
9196 /* The traditional frame pointer location is at the top of the frame. */
9197 frame
->hard_frame_pointer_offset
= offset
;
9199 /* Register save area */
9200 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9201 frame
->reg_save_offset
= offset
;
9203 /* On SEH target, registers are pushed just before the frame pointer
9206 frame
->hard_frame_pointer_offset
= offset
;
9208 /* Align and set SSE register save area. */
9209 if (frame
->nsseregs
)
9211 /* The only ABI that has saved SSE registers (Win64) also has a
9212 16-byte aligned default stack, and thus we don't need to be
9213 within the re-aligned local stack frame to save them. */
9214 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9215 offset
= (offset
+ 16 - 1) & -16;
9216 offset
+= frame
->nsseregs
* 16;
9218 frame
->sse_reg_save_offset
= offset
;
9220 /* The re-aligned stack starts here. Values before this point are not
9221 directly comparable with values below this point. In order to make
9222 sure that no value happens to be the same before and after, force
9223 the alignment computation below to add a non-zero value. */
9224 if (stack_realign_fp
)
9225 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9228 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9229 offset
+= frame
->va_arg_size
;
9231 /* Align start of frame for local function. */
9232 if (stack_realign_fp
9233 || offset
!= frame
->sse_reg_save_offset
9236 || cfun
->calls_alloca
9237 || ix86_current_function_calls_tls_descriptor
)
9238 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9240 /* Frame pointer points here. */
9241 frame
->frame_pointer_offset
= offset
;
9245 /* Add outgoing arguments area. Can be skipped if we eliminated
9246 all the function calls as dead code.
9247 Skipping is however impossible when function calls alloca. Alloca
9248 expander assumes that last crtl->outgoing_args_size
9249 of stack frame are unused. */
9250 if (ACCUMULATE_OUTGOING_ARGS
9251 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9252 || ix86_current_function_calls_tls_descriptor
))
9254 offset
+= crtl
->outgoing_args_size
;
9255 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9258 frame
->outgoing_arguments_size
= 0;
9260 /* Align stack boundary. Only needed if we're calling another function
9262 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9263 || ix86_current_function_calls_tls_descriptor
)
9264 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9266 /* We've reached end of stack frame. */
9267 frame
->stack_pointer_offset
= offset
;
9269 /* Size prologue needs to allocate. */
9270 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9272 if ((!to_allocate
&& frame
->nregs
<= 1)
9273 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9274 frame
->save_regs_using_mov
= false;
9276 if (ix86_using_red_zone ()
9277 && crtl
->sp_is_unchanging
9279 && !ix86_current_function_calls_tls_descriptor
)
9281 frame
->red_zone_size
= to_allocate
;
9282 if (frame
->save_regs_using_mov
)
9283 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9284 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9285 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9288 frame
->red_zone_size
= 0;
9289 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9291 /* The SEH frame pointer location is near the bottom of the frame.
9292 This is enforced by the fact that the difference between the
9293 stack pointer and the frame pointer is limited to 240 bytes in
9294 the unwind data structure. */
9299 /* If we can leave the frame pointer where it is, do so. Also, returns
9300 the establisher frame for __builtin_frame_address (0). */
9301 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9302 if (diff
<= SEH_MAX_FRAME_SIZE
9303 && (diff
> 240 || (diff
& 15) != 0)
9304 && !crtl
->accesses_prior_frames
)
9306 /* Ideally we'd determine what portion of the local stack frame
9307 (within the constraint of the lowest 240) is most heavily used.
9308 But without that complication, simply bias the frame pointer
9309 by 128 bytes so as to maximize the amount of the local stack
9310 frame that is addressable with 8-bit offsets. */
9311 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9316 /* This is semi-inlined memory_address_length, but simplified
9317 since we know that we're always dealing with reg+offset, and
9318 to avoid having to create and discard all that rtl. */
9321 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9327 /* EBP and R13 cannot be encoded without an offset. */
9328 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9330 else if (IN_RANGE (offset
, -128, 127))
9333 /* ESP and R12 must be encoded with a SIB byte. */
9334 if (regno
== SP_REG
|| regno
== R12_REG
)
9340 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9341 The valid base registers are taken from CFUN->MACHINE->FS. */
9344 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9346 const struct machine_function
*m
= cfun
->machine
;
9347 rtx base_reg
= NULL
;
9348 HOST_WIDE_INT base_offset
= 0;
9350 if (m
->use_fast_prologue_epilogue
)
9352 /* Choose the base register most likely to allow the most scheduling
9353 opportunities. Generally FP is valid throughout the function,
9354 while DRAP must be reloaded within the epilogue. But choose either
9355 over the SP due to increased encoding size. */
9359 base_reg
= hard_frame_pointer_rtx
;
9360 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9362 else if (m
->fs
.drap_valid
)
9364 base_reg
= crtl
->drap_reg
;
9365 base_offset
= 0 - cfa_offset
;
9367 else if (m
->fs
.sp_valid
)
9369 base_reg
= stack_pointer_rtx
;
9370 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9375 HOST_WIDE_INT toffset
;
9378 /* Choose the base register with the smallest address encoding.
9379 With a tie, choose FP > DRAP > SP. */
9382 base_reg
= stack_pointer_rtx
;
9383 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9384 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9386 if (m
->fs
.drap_valid
)
9388 toffset
= 0 - cfa_offset
;
9389 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9392 base_reg
= crtl
->drap_reg
;
9393 base_offset
= toffset
;
9399 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9400 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9403 base_reg
= hard_frame_pointer_rtx
;
9404 base_offset
= toffset
;
9409 gcc_assert (base_reg
!= NULL
);
9411 return plus_constant (Pmode
, base_reg
, base_offset
);
9414 /* Emit code to save registers in the prologue. */
9417 ix86_emit_save_regs (void)
9422 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9423 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9425 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9426 RTX_FRAME_RELATED_P (insn
) = 1;
9430 /* Emit a single register save at CFA - CFA_OFFSET. */
9433 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9434 HOST_WIDE_INT cfa_offset
)
9436 struct machine_function
*m
= cfun
->machine
;
9437 rtx reg
= gen_rtx_REG (mode
, regno
);
9438 rtx mem
, addr
, base
, insn
;
9440 addr
= choose_baseaddr (cfa_offset
);
9441 mem
= gen_frame_mem (mode
, addr
);
9443 /* For SSE saves, we need to indicate the 128-bit alignment. */
9444 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9446 insn
= emit_move_insn (mem
, reg
);
9447 RTX_FRAME_RELATED_P (insn
) = 1;
9450 if (GET_CODE (base
) == PLUS
)
9451 base
= XEXP (base
, 0);
9452 gcc_checking_assert (REG_P (base
));
9454 /* When saving registers into a re-aligned local stack frame, avoid
9455 any tricky guessing by dwarf2out. */
9456 if (m
->fs
.realigned
)
9458 gcc_checking_assert (stack_realign_drap
);
9460 if (regno
== REGNO (crtl
->drap_reg
))
9462 /* A bit of a hack. We force the DRAP register to be saved in
9463 the re-aligned stack frame, which provides us with a copy
9464 of the CFA that will last past the prologue. Install it. */
9465 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9466 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9467 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9468 mem
= gen_rtx_MEM (mode
, addr
);
9469 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9473 /* The frame pointer is a stable reference within the
9474 aligned frame. Use it. */
9475 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9476 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9477 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9478 mem
= gen_rtx_MEM (mode
, addr
);
9479 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9480 gen_rtx_SET (VOIDmode
, mem
, reg
));
9484 /* The memory may not be relative to the current CFA register,
9485 which means that we may need to generate a new pattern for
9486 use by the unwind info. */
9487 else if (base
!= m
->fs
.cfa_reg
)
9489 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9490 m
->fs
.cfa_offset
- cfa_offset
);
9491 mem
= gen_rtx_MEM (mode
, addr
);
9492 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9496 /* Emit code to save registers using MOV insns.
9497 First register is stored at CFA - CFA_OFFSET. */
9499 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9503 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9504 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9506 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9507 cfa_offset
-= UNITS_PER_WORD
;
9511 /* Emit code to save SSE registers using MOV insns.
9512 First register is stored at CFA - CFA_OFFSET. */
9514 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9518 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9519 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9521 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9526 static GTY(()) rtx queued_cfa_restores
;
9528 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9529 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9530 Don't add the note if the previously saved value will be left untouched
9531 within stack red-zone till return, as unwinders can find the same value
9532 in the register and on the stack. */
9535 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9537 if (!crtl
->shrink_wrapped
9538 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9543 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9544 RTX_FRAME_RELATED_P (insn
) = 1;
9548 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9551 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9554 ix86_add_queued_cfa_restore_notes (rtx insn
)
9557 if (!queued_cfa_restores
)
9559 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9561 XEXP (last
, 1) = REG_NOTES (insn
);
9562 REG_NOTES (insn
) = queued_cfa_restores
;
9563 queued_cfa_restores
= NULL_RTX
;
9564 RTX_FRAME_RELATED_P (insn
) = 1;
9567 /* Expand prologue or epilogue stack adjustment.
9568 The pattern exist to put a dependency on all ebp-based memory accesses.
9569 STYLE should be negative if instructions should be marked as frame related,
9570 zero if %r11 register is live and cannot be freely used and positive
9574 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9575 int style
, bool set_cfa
)
9577 struct machine_function
*m
= cfun
->machine
;
9579 bool add_frame_related_expr
= false;
9581 if (Pmode
== SImode
)
9582 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9583 else if (x86_64_immediate_operand (offset
, DImode
))
9584 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9588 /* r11 is used by indirect sibcall return as well, set before the
9589 epilogue and used after the epilogue. */
9591 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9594 gcc_assert (src
!= hard_frame_pointer_rtx
9595 && dest
!= hard_frame_pointer_rtx
);
9596 tmp
= hard_frame_pointer_rtx
;
9598 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9600 add_frame_related_expr
= true;
9602 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9605 insn
= emit_insn (insn
);
9607 ix86_add_queued_cfa_restore_notes (insn
);
9613 gcc_assert (m
->fs
.cfa_reg
== src
);
9614 m
->fs
.cfa_offset
+= INTVAL (offset
);
9615 m
->fs
.cfa_reg
= dest
;
9617 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9618 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9619 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9620 RTX_FRAME_RELATED_P (insn
) = 1;
9624 RTX_FRAME_RELATED_P (insn
) = 1;
9625 if (add_frame_related_expr
)
9627 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9628 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9629 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9633 if (dest
== stack_pointer_rtx
)
9635 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9636 bool valid
= m
->fs
.sp_valid
;
9638 if (src
== hard_frame_pointer_rtx
)
9640 valid
= m
->fs
.fp_valid
;
9641 ooffset
= m
->fs
.fp_offset
;
9643 else if (src
== crtl
->drap_reg
)
9645 valid
= m
->fs
.drap_valid
;
9650 /* Else there are two possibilities: SP itself, which we set
9651 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9652 taken care of this by hand along the eh_return path. */
9653 gcc_checking_assert (src
== stack_pointer_rtx
9654 || offset
== const0_rtx
);
9657 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9658 m
->fs
.sp_valid
= valid
;
9662 /* Find an available register to be used as dynamic realign argument
9663 pointer regsiter. Such a register will be written in prologue and
9664 used in begin of body, so it must not be
9665 1. parameter passing register.
9667 We reuse static-chain register if it is available. Otherwise, we
9668 use DI for i386 and R13 for x86-64. We chose R13 since it has
9671 Return: the regno of chosen register. */
9674 find_drap_reg (void)
9676 tree decl
= cfun
->decl
;
9680 /* Use R13 for nested function or function need static chain.
9681 Since function with tail call may use any caller-saved
9682 registers in epilogue, DRAP must not use caller-saved
9683 register in such case. */
9684 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9691 /* Use DI for nested function or function need static chain.
9692 Since function with tail call may use any caller-saved
9693 registers in epilogue, DRAP must not use caller-saved
9694 register in such case. */
9695 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9698 /* Reuse static chain register if it isn't used for parameter
9700 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9702 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9703 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9710 /* Return minimum incoming stack alignment. */
9713 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9715 unsigned int incoming_stack_boundary
;
9717 /* Prefer the one specified at command line. */
9718 if (ix86_user_incoming_stack_boundary
)
9719 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9720 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9721 if -mstackrealign is used, it isn't used for sibcall check and
9722 estimated stack alignment is 128bit. */
9725 && ix86_force_align_arg_pointer
9726 && crtl
->stack_alignment_estimated
== 128)
9727 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9729 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9731 /* Incoming stack alignment can be changed on individual functions
9732 via force_align_arg_pointer attribute. We use the smallest
9733 incoming stack boundary. */
9734 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9735 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9736 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9737 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9739 /* The incoming stack frame has to be aligned at least at
9740 parm_stack_boundary. */
9741 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9742 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9744 /* Stack at entrance of main is aligned by runtime. We use the
9745 smallest incoming stack boundary. */
9746 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9747 && DECL_NAME (current_function_decl
)
9748 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9749 && DECL_FILE_SCOPE_P (current_function_decl
))
9750 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9752 return incoming_stack_boundary
;
9755 /* Update incoming stack boundary and estimated stack alignment. */
9758 ix86_update_stack_boundary (void)
9760 ix86_incoming_stack_boundary
9761 = ix86_minimum_incoming_stack_boundary (false);
9763 /* x86_64 vararg needs 16byte stack alignment for register save
9767 && crtl
->stack_alignment_estimated
< 128)
9768 crtl
->stack_alignment_estimated
= 128;
9771 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9772 needed or an rtx for DRAP otherwise. */
9775 ix86_get_drap_rtx (void)
9777 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9778 crtl
->need_drap
= true;
9780 if (stack_realign_drap
)
9782 /* Assign DRAP to vDRAP and returns vDRAP */
9783 unsigned int regno
= find_drap_reg ();
9788 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9789 crtl
->drap_reg
= arg_ptr
;
9792 drap_vreg
= copy_to_reg (arg_ptr
);
9796 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9799 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9800 RTX_FRAME_RELATED_P (insn
) = 1;
9808 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9811 ix86_internal_arg_pointer (void)
9813 return virtual_incoming_args_rtx
;
9816 struct scratch_reg
{
9821 /* Return a short-lived scratch register for use on function entry.
9822 In 32-bit mode, it is valid only after the registers are saved
9823 in the prologue. This register must be released by means of
9824 release_scratch_register_on_entry once it is dead. */
9827 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9835 /* We always use R11 in 64-bit mode. */
9840 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9842 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9844 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9845 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9846 int regparm
= ix86_function_regparm (fntype
, decl
);
9848 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9850 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9851 for the static chain register. */
9852 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9853 && drap_regno
!= AX_REG
)
9855 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9856 for the static chain register. */
9857 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9859 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9861 /* ecx is the static chain register. */
9862 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9864 && drap_regno
!= CX_REG
)
9866 else if (ix86_save_reg (BX_REG
, true))
9868 /* esi is the static chain register. */
9869 else if (!(regparm
== 3 && static_chain_p
)
9870 && ix86_save_reg (SI_REG
, true))
9872 else if (ix86_save_reg (DI_REG
, true))
9876 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9881 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9884 rtx insn
= emit_insn (gen_push (sr
->reg
));
9885 RTX_FRAME_RELATED_P (insn
) = 1;
9889 /* Release a scratch register obtained from the preceding function. */
9892 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9896 struct machine_function
*m
= cfun
->machine
;
9897 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9899 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9900 RTX_FRAME_RELATED_P (insn
) = 1;
9901 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9902 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9903 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9904 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9908 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9910 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9913 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9915 /* We skip the probe for the first interval + a small dope of 4 words and
9916 probe that many bytes past the specified size to maintain a protection
9917 area at the botton of the stack. */
9918 const int dope
= 4 * UNITS_PER_WORD
;
9919 rtx size_rtx
= GEN_INT (size
), last
;
9921 /* See if we have a constant small number of probes to generate. If so,
9922 that's the easy case. The run-time loop is made up of 11 insns in the
9923 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9924 for n # of intervals. */
9925 if (size
<= 5 * PROBE_INTERVAL
)
9927 HOST_WIDE_INT i
, adjust
;
9928 bool first_probe
= true;
9930 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9931 values of N from 1 until it exceeds SIZE. If only one probe is
9932 needed, this will not generate any code. Then adjust and probe
9933 to PROBE_INTERVAL + SIZE. */
9934 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9938 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9939 first_probe
= false;
9942 adjust
= PROBE_INTERVAL
;
9944 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9945 plus_constant (Pmode
, stack_pointer_rtx
,
9947 emit_stack_probe (stack_pointer_rtx
);
9951 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9953 adjust
= size
+ PROBE_INTERVAL
- i
;
9955 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9956 plus_constant (Pmode
, stack_pointer_rtx
,
9958 emit_stack_probe (stack_pointer_rtx
);
9960 /* Adjust back to account for the additional first interval. */
9961 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9962 plus_constant (Pmode
, stack_pointer_rtx
,
9963 PROBE_INTERVAL
+ dope
)));
9966 /* Otherwise, do the same as above, but in a loop. Note that we must be
9967 extra careful with variables wrapping around because we might be at
9968 the very top (or the very bottom) of the address space and we have
9969 to be able to handle this case properly; in particular, we use an
9970 equality test for the loop condition. */
9973 HOST_WIDE_INT rounded_size
;
9974 struct scratch_reg sr
;
9976 get_scratch_register_on_entry (&sr
);
9979 /* Step 1: round SIZE to the previous multiple of the interval. */
9981 rounded_size
= size
& -PROBE_INTERVAL
;
9984 /* Step 2: compute initial and final value of the loop counter. */
9986 /* SP = SP_0 + PROBE_INTERVAL. */
9987 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9988 plus_constant (Pmode
, stack_pointer_rtx
,
9989 - (PROBE_INTERVAL
+ dope
))));
9991 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9992 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9993 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9994 gen_rtx_PLUS (Pmode
, sr
.reg
,
9995 stack_pointer_rtx
)));
10000 while (SP != LAST_ADDR)
10002 SP = SP + PROBE_INTERVAL
10006 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10007 values of N from 1 until it is equal to ROUNDED_SIZE. */
10009 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10012 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10013 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10015 if (size
!= rounded_size
)
10017 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10018 plus_constant (Pmode
, stack_pointer_rtx
,
10019 rounded_size
- size
)));
10020 emit_stack_probe (stack_pointer_rtx
);
10023 /* Adjust back to account for the additional first interval. */
10024 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10025 plus_constant (Pmode
, stack_pointer_rtx
,
10026 PROBE_INTERVAL
+ dope
)));
10028 release_scratch_register_on_entry (&sr
);
10031 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10033 /* Even if the stack pointer isn't the CFA register, we need to correctly
10034 describe the adjustments made to it, in particular differentiate the
10035 frame-related ones from the frame-unrelated ones. */
10038 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10039 XVECEXP (expr
, 0, 0)
10040 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10041 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10042 XVECEXP (expr
, 0, 1)
10043 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10044 plus_constant (Pmode
, stack_pointer_rtx
,
10045 PROBE_INTERVAL
+ dope
+ size
));
10046 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10047 RTX_FRAME_RELATED_P (last
) = 1;
10049 cfun
->machine
->fs
.sp_offset
+= size
;
10052 /* Make sure nothing is scheduled before we are done. */
10053 emit_insn (gen_blockage ());
10056 /* Adjust the stack pointer up to REG while probing it. */
10059 output_adjust_stack_and_probe (rtx reg
)
10061 static int labelno
= 0;
10062 char loop_lab
[32], end_lab
[32];
10065 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10066 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10068 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10070 /* Jump to END_LAB if SP == LAST_ADDR. */
10071 xops
[0] = stack_pointer_rtx
;
10073 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10074 fputs ("\tje\t", asm_out_file
);
10075 assemble_name_raw (asm_out_file
, end_lab
);
10076 fputc ('\n', asm_out_file
);
10078 /* SP = SP + PROBE_INTERVAL. */
10079 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10080 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10083 xops
[1] = const0_rtx
;
10084 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10086 fprintf (asm_out_file
, "\tjmp\t");
10087 assemble_name_raw (asm_out_file
, loop_lab
);
10088 fputc ('\n', asm_out_file
);
10090 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10095 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10096 inclusive. These are offsets from the current stack pointer. */
10099 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10101 /* See if we have a constant small number of probes to generate. If so,
10102 that's the easy case. The run-time loop is made up of 7 insns in the
10103 generic case while the compile-time loop is made up of n insns for n #
10105 if (size
<= 7 * PROBE_INTERVAL
)
10109 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10110 it exceeds SIZE. If only one probe is needed, this will not
10111 generate any code. Then probe at FIRST + SIZE. */
10112 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10113 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10116 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10120 /* Otherwise, do the same as above, but in a loop. Note that we must be
10121 extra careful with variables wrapping around because we might be at
10122 the very top (or the very bottom) of the address space and we have
10123 to be able to handle this case properly; in particular, we use an
10124 equality test for the loop condition. */
10127 HOST_WIDE_INT rounded_size
, last
;
10128 struct scratch_reg sr
;
10130 get_scratch_register_on_entry (&sr
);
10133 /* Step 1: round SIZE to the previous multiple of the interval. */
10135 rounded_size
= size
& -PROBE_INTERVAL
;
10138 /* Step 2: compute initial and final value of the loop counter. */
10140 /* TEST_OFFSET = FIRST. */
10141 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10143 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10144 last
= first
+ rounded_size
;
10147 /* Step 3: the loop
10149 while (TEST_ADDR != LAST_ADDR)
10151 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10155 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10156 until it is equal to ROUNDED_SIZE. */
10158 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10161 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10162 that SIZE is equal to ROUNDED_SIZE. */
10164 if (size
!= rounded_size
)
10165 emit_stack_probe (plus_constant (Pmode
,
10166 gen_rtx_PLUS (Pmode
,
10169 rounded_size
- size
));
10171 release_scratch_register_on_entry (&sr
);
10174 /* Make sure nothing is scheduled before we are done. */
10175 emit_insn (gen_blockage ());
10178 /* Probe a range of stack addresses from REG to END, inclusive. These are
10179 offsets from the current stack pointer. */
10182 output_probe_stack_range (rtx reg
, rtx end
)
10184 static int labelno
= 0;
10185 char loop_lab
[32], end_lab
[32];
10188 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10189 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10191 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10193 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10196 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10197 fputs ("\tje\t", asm_out_file
);
10198 assemble_name_raw (asm_out_file
, end_lab
);
10199 fputc ('\n', asm_out_file
);
10201 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10202 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10203 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10205 /* Probe at TEST_ADDR. */
10206 xops
[0] = stack_pointer_rtx
;
10208 xops
[2] = const0_rtx
;
10209 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10211 fprintf (asm_out_file
, "\tjmp\t");
10212 assemble_name_raw (asm_out_file
, loop_lab
);
10213 fputc ('\n', asm_out_file
);
10215 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10220 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10221 to be generated in correct form. */
10223 ix86_finalize_stack_realign_flags (void)
10225 /* Check if stack realign is really needed after reload, and
10226 stores result in cfun */
10227 unsigned int incoming_stack_boundary
10228 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10229 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10230 unsigned int stack_realign
= (incoming_stack_boundary
10232 ? crtl
->max_used_stack_slot_alignment
10233 : crtl
->stack_alignment_needed
));
10235 if (crtl
->stack_realign_finalized
)
10237 /* After stack_realign_needed is finalized, we can't no longer
10239 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10243 /* If the only reason for frame_pointer_needed is that we conservatively
10244 assumed stack realignment might be needed, but in the end nothing that
10245 needed the stack alignment had been spilled, clear frame_pointer_needed
10246 and say we don't need stack realignment. */
10248 && !crtl
->need_drap
10249 && frame_pointer_needed
10251 && flag_omit_frame_pointer
10252 && crtl
->sp_is_unchanging
10253 && !ix86_current_function_calls_tls_descriptor
10254 && !crtl
->accesses_prior_frames
10255 && !cfun
->calls_alloca
10256 && !crtl
->calls_eh_return
10257 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10258 && !ix86_frame_pointer_required ()
10259 && get_frame_size () == 0
10260 && ix86_nsaved_sseregs () == 0
10261 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10263 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10266 CLEAR_HARD_REG_SET (prologue_used
);
10267 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10268 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10269 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10270 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10271 HARD_FRAME_POINTER_REGNUM
);
10275 FOR_BB_INSNS (bb
, insn
)
10276 if (NONDEBUG_INSN_P (insn
)
10277 && requires_stack_frame_p (insn
, prologue_used
,
10278 set_up_by_prologue
))
10280 crtl
->stack_realign_needed
= stack_realign
;
10281 crtl
->stack_realign_finalized
= true;
10286 frame_pointer_needed
= false;
10287 stack_realign
= false;
10288 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10289 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10290 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10291 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10292 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10293 df_finish_pass (true);
10294 df_scan_alloc (NULL
);
10296 df_compute_regs_ever_live (true);
10300 crtl
->stack_realign_needed
= stack_realign
;
10301 crtl
->stack_realign_finalized
= true;
10304 /* Expand the prologue into a bunch of separate insns. */
10307 ix86_expand_prologue (void)
10309 struct machine_function
*m
= cfun
->machine
;
10312 struct ix86_frame frame
;
10313 HOST_WIDE_INT allocate
;
10314 bool int_registers_saved
;
10315 bool sse_registers_saved
;
10317 ix86_finalize_stack_realign_flags ();
10319 /* DRAP should not coexist with stack_realign_fp */
10320 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10322 memset (&m
->fs
, 0, sizeof (m
->fs
));
10324 /* Initialize CFA state for before the prologue. */
10325 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10326 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10328 /* Track SP offset to the CFA. We continue tracking this after we've
10329 swapped the CFA register away from SP. In the case of re-alignment
10330 this is fudged; we're interested to offsets within the local frame. */
10331 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10332 m
->fs
.sp_valid
= true;
10334 ix86_compute_frame_layout (&frame
);
10336 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10338 /* We should have already generated an error for any use of
10339 ms_hook on a nested function. */
10340 gcc_checking_assert (!ix86_static_chain_on_stack
);
10342 /* Check if profiling is active and we shall use profiling before
10343 prologue variant. If so sorry. */
10344 if (crtl
->profile
&& flag_fentry
!= 0)
10345 sorry ("ms_hook_prologue attribute isn%'t compatible "
10346 "with -mfentry for 32-bit");
10348 /* In ix86_asm_output_function_label we emitted:
10349 8b ff movl.s %edi,%edi
10351 8b ec movl.s %esp,%ebp
10353 This matches the hookable function prologue in Win32 API
10354 functions in Microsoft Windows XP Service Pack 2 and newer.
10355 Wine uses this to enable Windows apps to hook the Win32 API
10356 functions provided by Wine.
10358 What that means is that we've already set up the frame pointer. */
10360 if (frame_pointer_needed
10361 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10365 /* We've decided to use the frame pointer already set up.
10366 Describe this to the unwinder by pretending that both
10367 push and mov insns happen right here.
10369 Putting the unwind info here at the end of the ms_hook
10370 is done so that we can make absolutely certain we get
10371 the required byte sequence at the start of the function,
10372 rather than relying on an assembler that can produce
10373 the exact encoding required.
10375 However it does mean (in the unpatched case) that we have
10376 a 1 insn window where the asynchronous unwind info is
10377 incorrect. However, if we placed the unwind info at
10378 its correct location we would have incorrect unwind info
10379 in the patched case. Which is probably all moot since
10380 I don't expect Wine generates dwarf2 unwind info for the
10381 system libraries that use this feature. */
10383 insn
= emit_insn (gen_blockage ());
10385 push
= gen_push (hard_frame_pointer_rtx
);
10386 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10387 stack_pointer_rtx
);
10388 RTX_FRAME_RELATED_P (push
) = 1;
10389 RTX_FRAME_RELATED_P (mov
) = 1;
10391 RTX_FRAME_RELATED_P (insn
) = 1;
10392 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10393 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10395 /* Note that gen_push incremented m->fs.cfa_offset, even
10396 though we didn't emit the push insn here. */
10397 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10398 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10399 m
->fs
.fp_valid
= true;
10403 /* The frame pointer is not needed so pop %ebp again.
10404 This leaves us with a pristine state. */
10405 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10409 /* The first insn of a function that accepts its static chain on the
10410 stack is to push the register that would be filled in by a direct
10411 call. This insn will be skipped by the trampoline. */
10412 else if (ix86_static_chain_on_stack
)
10414 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10415 emit_insn (gen_blockage ());
10417 /* We don't want to interpret this push insn as a register save,
10418 only as a stack adjustment. The real copy of the register as
10419 a save will be done later, if needed. */
10420 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10421 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10422 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10423 RTX_FRAME_RELATED_P (insn
) = 1;
10426 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10427 of DRAP is needed and stack realignment is really needed after reload */
10428 if (stack_realign_drap
)
10430 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10432 /* Only need to push parameter pointer reg if it is caller saved. */
10433 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10435 /* Push arg pointer reg */
10436 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10437 RTX_FRAME_RELATED_P (insn
) = 1;
10440 /* Grab the argument pointer. */
10441 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10442 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10443 RTX_FRAME_RELATED_P (insn
) = 1;
10444 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10445 m
->fs
.cfa_offset
= 0;
10447 /* Align the stack. */
10448 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10450 GEN_INT (-align_bytes
)));
10451 RTX_FRAME_RELATED_P (insn
) = 1;
10453 /* Replicate the return address on the stack so that return
10454 address can be reached via (argp - 1) slot. This is needed
10455 to implement macro RETURN_ADDR_RTX and intrinsic function
10456 expand_builtin_return_addr etc. */
10457 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10458 t
= gen_frame_mem (word_mode
, t
);
10459 insn
= emit_insn (gen_push (t
));
10460 RTX_FRAME_RELATED_P (insn
) = 1;
10462 /* For the purposes of frame and register save area addressing,
10463 we've started over with a new frame. */
10464 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10465 m
->fs
.realigned
= true;
10468 int_registers_saved
= (frame
.nregs
== 0);
10469 sse_registers_saved
= (frame
.nsseregs
== 0);
10471 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10473 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10474 slower on all targets. Also sdb doesn't like it. */
10475 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10476 RTX_FRAME_RELATED_P (insn
) = 1;
10478 /* Push registers now, before setting the frame pointer
10480 if (!int_registers_saved
10482 && !frame
.save_regs_using_mov
)
10484 ix86_emit_save_regs ();
10485 int_registers_saved
= true;
10486 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10489 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10491 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10492 RTX_FRAME_RELATED_P (insn
) = 1;
10494 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10495 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10496 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10497 m
->fs
.fp_valid
= true;
10501 if (!int_registers_saved
)
10503 /* If saving registers via PUSH, do so now. */
10504 if (!frame
.save_regs_using_mov
)
10506 ix86_emit_save_regs ();
10507 int_registers_saved
= true;
10508 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10511 /* When using red zone we may start register saving before allocating
10512 the stack frame saving one cycle of the prologue. However, avoid
10513 doing this if we have to probe the stack; at least on x86_64 the
10514 stack probe can turn into a call that clobbers a red zone location. */
10515 else if (ix86_using_red_zone ()
10516 && (! TARGET_STACK_PROBE
10517 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10519 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10520 int_registers_saved
= true;
10524 if (stack_realign_fp
)
10526 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10527 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10529 /* The computation of the size of the re-aligned stack frame means
10530 that we must allocate the size of the register save area before
10531 performing the actual alignment. Otherwise we cannot guarantee
10532 that there's enough storage above the realignment point. */
10533 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10534 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10535 GEN_INT (m
->fs
.sp_offset
10536 - frame
.sse_reg_save_offset
),
10539 /* Align the stack. */
10540 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10542 GEN_INT (-align_bytes
)));
10544 /* For the purposes of register save area addressing, the stack
10545 pointer is no longer valid. As for the value of sp_offset,
10546 see ix86_compute_frame_layout, which we need to match in order
10547 to pass verification of stack_pointer_offset at the end. */
10548 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10549 m
->fs
.sp_valid
= false;
10552 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10554 if (flag_stack_usage_info
)
10556 /* We start to count from ARG_POINTER. */
10557 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10559 /* If it was realigned, take into account the fake frame. */
10560 if (stack_realign_drap
)
10562 if (ix86_static_chain_on_stack
)
10563 stack_size
+= UNITS_PER_WORD
;
10565 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10566 stack_size
+= UNITS_PER_WORD
;
10568 /* This over-estimates by 1 minimal-stack-alignment-unit but
10569 mitigates that by counting in the new return address slot. */
10570 current_function_dynamic_stack_size
10571 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10574 current_function_static_stack_size
= stack_size
;
10577 /* On SEH target with very large frame size, allocate an area to save
10578 SSE registers (as the very large allocation won't be described). */
10580 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10581 && !sse_registers_saved
)
10583 HOST_WIDE_INT sse_size
=
10584 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10586 gcc_assert (int_registers_saved
);
10588 /* No need to do stack checking as the area will be immediately
10590 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10591 GEN_INT (-sse_size
), -1,
10592 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10593 allocate
-= sse_size
;
10594 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10595 sse_registers_saved
= true;
10598 /* The stack has already been decremented by the instruction calling us
10599 so probe if the size is non-negative to preserve the protection area. */
10600 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10602 /* We expect the registers to be saved when probes are used. */
10603 gcc_assert (int_registers_saved
);
10605 if (STACK_CHECK_MOVING_SP
)
10607 ix86_adjust_stack_and_probe (allocate
);
10612 HOST_WIDE_INT size
= allocate
;
10614 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10615 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10617 if (TARGET_STACK_PROBE
)
10618 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10620 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10626 else if (!ix86_target_stack_probe ()
10627 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10629 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10630 GEN_INT (-allocate
), -1,
10631 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10635 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10637 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10638 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10639 bool eax_live
= false;
10640 bool r10_live
= false;
10643 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10644 if (!TARGET_64BIT_MS_ABI
)
10645 eax_live
= ix86_eax_live_at_start_p ();
10647 /* Note that SEH directives need to continue tracking the stack
10648 pointer even after the frame pointer has been set up. */
10651 insn
= emit_insn (gen_push (eax
));
10652 allocate
-= UNITS_PER_WORD
;
10653 if (sp_is_cfa_reg
|| TARGET_SEH
)
10656 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10657 RTX_FRAME_RELATED_P (insn
) = 1;
10663 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10664 insn
= emit_insn (gen_push (r10
));
10665 allocate
-= UNITS_PER_WORD
;
10666 if (sp_is_cfa_reg
|| TARGET_SEH
)
10669 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10670 RTX_FRAME_RELATED_P (insn
) = 1;
10674 emit_move_insn (eax
, GEN_INT (allocate
));
10675 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10677 /* Use the fact that AX still contains ALLOCATE. */
10678 adjust_stack_insn
= (Pmode
== DImode
10679 ? gen_pro_epilogue_adjust_stack_di_sub
10680 : gen_pro_epilogue_adjust_stack_si_sub
);
10682 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10683 stack_pointer_rtx
, eax
));
10685 if (sp_is_cfa_reg
|| TARGET_SEH
)
10688 m
->fs
.cfa_offset
+= allocate
;
10689 RTX_FRAME_RELATED_P (insn
) = 1;
10690 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10691 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10692 plus_constant (Pmode
, stack_pointer_rtx
,
10695 m
->fs
.sp_offset
+= allocate
;
10697 if (r10_live
&& eax_live
)
10699 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10700 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10701 gen_frame_mem (word_mode
, t
));
10702 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10703 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10704 gen_frame_mem (word_mode
, t
));
10706 else if (eax_live
|| r10_live
)
10708 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10709 emit_move_insn (gen_rtx_REG (word_mode
,
10710 (eax_live
? AX_REG
: R10_REG
)),
10711 gen_frame_mem (word_mode
, t
));
10714 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10716 /* If we havn't already set up the frame pointer, do so now. */
10717 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10719 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10720 GEN_INT (frame
.stack_pointer_offset
10721 - frame
.hard_frame_pointer_offset
));
10722 insn
= emit_insn (insn
);
10723 RTX_FRAME_RELATED_P (insn
) = 1;
10724 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10726 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10727 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10728 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10729 m
->fs
.fp_valid
= true;
10732 if (!int_registers_saved
)
10733 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10734 if (!sse_registers_saved
)
10735 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10737 pic_reg_used
= false;
10738 /* We don't use pic-register for pe-coff target. */
10739 if (pic_offset_table_rtx
10741 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10744 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10746 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10747 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10749 pic_reg_used
= true;
10756 if (ix86_cmodel
== CM_LARGE_PIC
)
10758 rtx label
, tmp_reg
;
10760 gcc_assert (Pmode
== DImode
);
10761 label
= gen_label_rtx ();
10762 emit_label (label
);
10763 LABEL_PRESERVE_P (label
) = 1;
10764 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10765 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10766 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10768 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10769 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10770 pic_offset_table_rtx
, tmp_reg
));
10773 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10777 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10778 RTX_FRAME_RELATED_P (insn
) = 1;
10779 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10783 /* In the pic_reg_used case, make sure that the got load isn't deleted
10784 when mcount needs it. Blockage to avoid call movement across mcount
10785 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10787 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10788 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10790 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10792 /* vDRAP is setup but after reload it turns out stack realign
10793 isn't necessary, here we will emit prologue to setup DRAP
10794 without stack realign adjustment */
10795 t
= choose_baseaddr (0);
10796 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10799 /* Prevent instructions from being scheduled into register save push
10800 sequence when access to the redzone area is done through frame pointer.
10801 The offset between the frame pointer and the stack pointer is calculated
10802 relative to the value of the stack pointer at the end of the function
10803 prologue, and moving instructions that access redzone area via frame
10804 pointer inside push sequence violates this assumption. */
10805 if (frame_pointer_needed
&& frame
.red_zone_size
)
10806 emit_insn (gen_memory_blockage ());
10808 /* Emit cld instruction if stringops are used in the function. */
10809 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10810 emit_insn (gen_cld ());
10812 /* SEH requires that the prologue end within 256 bytes of the start of
10813 the function. Prevent instruction schedules that would extend that.
10814 Further, prevent alloca modifications to the stack pointer from being
10815 combined with prologue modifications. */
10817 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10820 /* Emit code to restore REG using a POP insn. */
10823 ix86_emit_restore_reg_using_pop (rtx reg
)
10825 struct machine_function
*m
= cfun
->machine
;
10826 rtx insn
= emit_insn (gen_pop (reg
));
10828 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10829 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10831 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10832 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10834 /* Previously we'd represented the CFA as an expression
10835 like *(%ebp - 8). We've just popped that value from
10836 the stack, which means we need to reset the CFA to
10837 the drap register. This will remain until we restore
10838 the stack pointer. */
10839 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10840 RTX_FRAME_RELATED_P (insn
) = 1;
10842 /* This means that the DRAP register is valid for addressing too. */
10843 m
->fs
.drap_valid
= true;
10847 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10849 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10850 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10851 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10852 RTX_FRAME_RELATED_P (insn
) = 1;
10854 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10857 /* When the frame pointer is the CFA, and we pop it, we are
10858 swapping back to the stack pointer as the CFA. This happens
10859 for stack frames that don't allocate other data, so we assume
10860 the stack pointer is now pointing at the return address, i.e.
10861 the function entry state, which makes the offset be 1 word. */
10862 if (reg
== hard_frame_pointer_rtx
)
10864 m
->fs
.fp_valid
= false;
10865 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10867 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10868 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10870 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10871 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10872 GEN_INT (m
->fs
.cfa_offset
)));
10873 RTX_FRAME_RELATED_P (insn
) = 1;
10878 /* Emit code to restore saved registers using POP insns. */
10881 ix86_emit_restore_regs_using_pop (void)
10883 unsigned int regno
;
10885 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10886 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10887 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10890 /* Emit code and notes for the LEAVE instruction. */
10893 ix86_emit_leave (void)
10895 struct machine_function
*m
= cfun
->machine
;
10896 rtx insn
= emit_insn (ix86_gen_leave ());
10898 ix86_add_queued_cfa_restore_notes (insn
);
10900 gcc_assert (m
->fs
.fp_valid
);
10901 m
->fs
.sp_valid
= true;
10902 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10903 m
->fs
.fp_valid
= false;
10905 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10907 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10908 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10910 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10911 plus_constant (Pmode
, stack_pointer_rtx
,
10913 RTX_FRAME_RELATED_P (insn
) = 1;
10915 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10919 /* Emit code to restore saved registers using MOV insns.
10920 First register is restored from CFA - CFA_OFFSET. */
10922 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10923 bool maybe_eh_return
)
10925 struct machine_function
*m
= cfun
->machine
;
10926 unsigned int regno
;
10928 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10929 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10931 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10934 mem
= choose_baseaddr (cfa_offset
);
10935 mem
= gen_frame_mem (word_mode
, mem
);
10936 insn
= emit_move_insn (reg
, mem
);
10938 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10940 /* Previously we'd represented the CFA as an expression
10941 like *(%ebp - 8). We've just popped that value from
10942 the stack, which means we need to reset the CFA to
10943 the drap register. This will remain until we restore
10944 the stack pointer. */
10945 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10946 RTX_FRAME_RELATED_P (insn
) = 1;
10948 /* This means that the DRAP register is valid for addressing. */
10949 m
->fs
.drap_valid
= true;
10952 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10954 cfa_offset
-= UNITS_PER_WORD
;
10958 /* Emit code to restore saved registers using MOV insns.
10959 First register is restored from CFA - CFA_OFFSET. */
10961 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10962 bool maybe_eh_return
)
10964 unsigned int regno
;
10966 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10967 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10969 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10972 mem
= choose_baseaddr (cfa_offset
);
10973 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10974 set_mem_align (mem
, 128);
10975 emit_move_insn (reg
, mem
);
10977 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10983 /* Restore function stack, frame, and registers. */
10986 ix86_expand_epilogue (int style
)
10988 struct machine_function
*m
= cfun
->machine
;
10989 struct machine_frame_state frame_state_save
= m
->fs
;
10990 struct ix86_frame frame
;
10991 bool restore_regs_via_mov
;
10994 ix86_finalize_stack_realign_flags ();
10995 ix86_compute_frame_layout (&frame
);
10997 m
->fs
.sp_valid
= (!frame_pointer_needed
10998 || (crtl
->sp_is_unchanging
10999 && !stack_realign_fp
));
11000 gcc_assert (!m
->fs
.sp_valid
11001 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11003 /* The FP must be valid if the frame pointer is present. */
11004 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11005 gcc_assert (!m
->fs
.fp_valid
11006 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11008 /* We must have *some* valid pointer to the stack frame. */
11009 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11011 /* The DRAP is never valid at this point. */
11012 gcc_assert (!m
->fs
.drap_valid
);
11014 /* See the comment about red zone and frame
11015 pointer usage in ix86_expand_prologue. */
11016 if (frame_pointer_needed
&& frame
.red_zone_size
)
11017 emit_insn (gen_memory_blockage ());
11019 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11020 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11022 /* Determine the CFA offset of the end of the red-zone. */
11023 m
->fs
.red_zone_offset
= 0;
11024 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11026 /* The red-zone begins below the return address. */
11027 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11029 /* When the register save area is in the aligned portion of
11030 the stack, determine the maximum runtime displacement that
11031 matches up with the aligned frame. */
11032 if (stack_realign_drap
)
11033 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11037 /* Special care must be taken for the normal return case of a function
11038 using eh_return: the eax and edx registers are marked as saved, but
11039 not restored along this path. Adjust the save location to match. */
11040 if (crtl
->calls_eh_return
&& style
!= 2)
11041 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11043 /* EH_RETURN requires the use of moves to function properly. */
11044 if (crtl
->calls_eh_return
)
11045 restore_regs_via_mov
= true;
11046 /* SEH requires the use of pops to identify the epilogue. */
11047 else if (TARGET_SEH
)
11048 restore_regs_via_mov
= false;
11049 /* If we're only restoring one register and sp is not valid then
11050 using a move instruction to restore the register since it's
11051 less work than reloading sp and popping the register. */
11052 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11053 restore_regs_via_mov
= true;
11054 else if (TARGET_EPILOGUE_USING_MOVE
11055 && cfun
->machine
->use_fast_prologue_epilogue
11056 && (frame
.nregs
> 1
11057 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11058 restore_regs_via_mov
= true;
11059 else if (frame_pointer_needed
11061 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11062 restore_regs_via_mov
= true;
11063 else if (frame_pointer_needed
11064 && TARGET_USE_LEAVE
11065 && cfun
->machine
->use_fast_prologue_epilogue
11066 && frame
.nregs
== 1)
11067 restore_regs_via_mov
= true;
11069 restore_regs_via_mov
= false;
11071 if (restore_regs_via_mov
|| frame
.nsseregs
)
11073 /* Ensure that the entire register save area is addressable via
11074 the stack pointer, if we will restore via sp. */
11076 && m
->fs
.sp_offset
> 0x7fffffff
11077 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11078 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11080 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11081 GEN_INT (m
->fs
.sp_offset
11082 - frame
.sse_reg_save_offset
),
11084 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11088 /* If there are any SSE registers to restore, then we have to do it
11089 via moves, since there's obviously no pop for SSE regs. */
11090 if (frame
.nsseregs
)
11091 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11094 if (restore_regs_via_mov
)
11099 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11101 /* eh_return epilogues need %ecx added to the stack pointer. */
11104 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11106 /* Stack align doesn't work with eh_return. */
11107 gcc_assert (!stack_realign_drap
);
11108 /* Neither does regparm nested functions. */
11109 gcc_assert (!ix86_static_chain_on_stack
);
11111 if (frame_pointer_needed
)
11113 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11114 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11115 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11117 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11118 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11120 /* Note that we use SA as a temporary CFA, as the return
11121 address is at the proper place relative to it. We
11122 pretend this happens at the FP restore insn because
11123 prior to this insn the FP would be stored at the wrong
11124 offset relative to SA, and after this insn we have no
11125 other reasonable register to use for the CFA. We don't
11126 bother resetting the CFA to the SP for the duration of
11127 the return insn. */
11128 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11129 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11130 ix86_add_queued_cfa_restore_notes (insn
);
11131 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11132 RTX_FRAME_RELATED_P (insn
) = 1;
11134 m
->fs
.cfa_reg
= sa
;
11135 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11136 m
->fs
.fp_valid
= false;
11138 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11139 const0_rtx
, style
, false);
11143 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11144 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11145 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11146 ix86_add_queued_cfa_restore_notes (insn
);
11148 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11149 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11151 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11152 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11153 plus_constant (Pmode
, stack_pointer_rtx
,
11155 RTX_FRAME_RELATED_P (insn
) = 1;
11158 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11159 m
->fs
.sp_valid
= true;
11164 /* SEH requires that the function end with (1) a stack adjustment
11165 if necessary, (2) a sequence of pops, and (3) a return or
11166 jump instruction. Prevent insns from the function body from
11167 being scheduled into this sequence. */
11170 /* Prevent a catch region from being adjacent to the standard
11171 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11172 several other flags that would be interesting to test are
11174 if (flag_non_call_exceptions
)
11175 emit_insn (gen_nops (const1_rtx
));
11177 emit_insn (gen_blockage ());
11180 /* First step is to deallocate the stack frame so that we can
11181 pop the registers. Also do it on SEH target for very large
11182 frame as the emitted instructions aren't allowed by the ABI in
11184 if (!m
->fs
.sp_valid
11186 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11187 >= SEH_MAX_FRAME_SIZE
)))
11189 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11190 GEN_INT (m
->fs
.fp_offset
11191 - frame
.reg_save_offset
),
11194 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11196 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11197 GEN_INT (m
->fs
.sp_offset
11198 - frame
.reg_save_offset
),
11200 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11203 ix86_emit_restore_regs_using_pop ();
11206 /* If we used a stack pointer and haven't already got rid of it,
11208 if (m
->fs
.fp_valid
)
11210 /* If the stack pointer is valid and pointing at the frame
11211 pointer store address, then we only need a pop. */
11212 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11213 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11214 /* Leave results in shorter dependency chains on CPUs that are
11215 able to grok it fast. */
11216 else if (TARGET_USE_LEAVE
11217 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11218 || !cfun
->machine
->use_fast_prologue_epilogue
)
11219 ix86_emit_leave ();
11222 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11223 hard_frame_pointer_rtx
,
11224 const0_rtx
, style
, !using_drap
);
11225 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11231 int param_ptr_offset
= UNITS_PER_WORD
;
11234 gcc_assert (stack_realign_drap
);
11236 if (ix86_static_chain_on_stack
)
11237 param_ptr_offset
+= UNITS_PER_WORD
;
11238 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11239 param_ptr_offset
+= UNITS_PER_WORD
;
11241 insn
= emit_insn (gen_rtx_SET
11242 (VOIDmode
, stack_pointer_rtx
,
11243 gen_rtx_PLUS (Pmode
,
11245 GEN_INT (-param_ptr_offset
))));
11246 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11247 m
->fs
.cfa_offset
= param_ptr_offset
;
11248 m
->fs
.sp_offset
= param_ptr_offset
;
11249 m
->fs
.realigned
= false;
11251 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11252 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11253 GEN_INT (param_ptr_offset
)));
11254 RTX_FRAME_RELATED_P (insn
) = 1;
11256 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11257 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11260 /* At this point the stack pointer must be valid, and we must have
11261 restored all of the registers. We may not have deallocated the
11262 entire stack frame. We've delayed this until now because it may
11263 be possible to merge the local stack deallocation with the
11264 deallocation forced by ix86_static_chain_on_stack. */
11265 gcc_assert (m
->fs
.sp_valid
);
11266 gcc_assert (!m
->fs
.fp_valid
);
11267 gcc_assert (!m
->fs
.realigned
);
11268 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11270 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11271 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11275 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11277 /* Sibcall epilogues don't want a return instruction. */
11280 m
->fs
= frame_state_save
;
11284 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11286 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11288 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11289 address, do explicit add, and jump indirectly to the caller. */
11291 if (crtl
->args
.pops_args
>= 65536)
11293 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11296 /* There is no "pascal" calling convention in any 64bit ABI. */
11297 gcc_assert (!TARGET_64BIT
);
11299 insn
= emit_insn (gen_pop (ecx
));
11300 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11301 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11303 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11304 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11305 add_reg_note (insn
, REG_CFA_REGISTER
,
11306 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11307 RTX_FRAME_RELATED_P (insn
) = 1;
11309 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11311 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11314 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11317 emit_jump_insn (gen_simple_return_internal ());
11319 /* Restore the state back to the state from the prologue,
11320 so that it's correct for the next epilogue. */
11321 m
->fs
= frame_state_save
;
11324 /* Reset from the function's potential modifications. */
11327 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11328 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11330 if (pic_offset_table_rtx
)
11331 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11333 /* Mach-O doesn't support labels at the end of objects, so if
11334 it looks like we might want one, insert a NOP. */
11336 rtx insn
= get_last_insn ();
11337 rtx deleted_debug_label
= NULL_RTX
;
11340 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11342 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11343 notes only, instead set their CODE_LABEL_NUMBER to -1,
11344 otherwise there would be code generation differences
11345 in between -g and -g0. */
11346 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11347 deleted_debug_label
= insn
;
11348 insn
= PREV_INSN (insn
);
11353 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11354 fputs ("\tnop\n", file
);
11355 else if (deleted_debug_label
)
11356 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11357 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11358 CODE_LABEL_NUMBER (insn
) = -1;
11364 /* Return a scratch register to use in the split stack prologue. The
11365 split stack prologue is used for -fsplit-stack. It is the first
11366 instructions in the function, even before the regular prologue.
11367 The scratch register can be any caller-saved register which is not
11368 used for parameters or for the static chain. */
11370 static unsigned int
11371 split_stack_prologue_scratch_regno (void)
11377 bool is_fastcall
, is_thiscall
;
11380 is_fastcall
= (lookup_attribute ("fastcall",
11381 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11383 is_thiscall
= (lookup_attribute ("thiscall",
11384 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11386 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11390 if (DECL_STATIC_CHAIN (cfun
->decl
))
11392 sorry ("-fsplit-stack does not support fastcall with "
11393 "nested function");
11394 return INVALID_REGNUM
;
11398 else if (is_thiscall
)
11400 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11404 else if (regparm
< 3)
11406 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11412 sorry ("-fsplit-stack does not support 2 register "
11413 " parameters for a nested function");
11414 return INVALID_REGNUM
;
11421 /* FIXME: We could make this work by pushing a register
11422 around the addition and comparison. */
11423 sorry ("-fsplit-stack does not support 3 register parameters");
11424 return INVALID_REGNUM
;
11429 /* A SYMBOL_REF for the function which allocates new stackspace for
11432 static GTY(()) rtx split_stack_fn
;
11434 /* A SYMBOL_REF for the more stack function when using the large
11437 static GTY(()) rtx split_stack_fn_large
;
11439 /* Handle -fsplit-stack. These are the first instructions in the
11440 function, even before the regular prologue. */
11443 ix86_expand_split_stack_prologue (void)
11445 struct ix86_frame frame
;
11446 HOST_WIDE_INT allocate
;
11447 unsigned HOST_WIDE_INT args_size
;
11448 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11449 rtx scratch_reg
= NULL_RTX
;
11450 rtx varargs_label
= NULL_RTX
;
11453 gcc_assert (flag_split_stack
&& reload_completed
);
11455 ix86_finalize_stack_realign_flags ();
11456 ix86_compute_frame_layout (&frame
);
11457 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11459 /* This is the label we will branch to if we have enough stack
11460 space. We expect the basic block reordering pass to reverse this
11461 branch if optimizing, so that we branch in the unlikely case. */
11462 label
= gen_label_rtx ();
11464 /* We need to compare the stack pointer minus the frame size with
11465 the stack boundary in the TCB. The stack boundary always gives
11466 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11467 can compare directly. Otherwise we need to do an addition. */
11469 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11470 UNSPEC_STACK_CHECK
);
11471 limit
= gen_rtx_CONST (Pmode
, limit
);
11472 limit
= gen_rtx_MEM (Pmode
, limit
);
11473 if (allocate
< SPLIT_STACK_AVAILABLE
)
11474 current
= stack_pointer_rtx
;
11477 unsigned int scratch_regno
;
11480 /* We need a scratch register to hold the stack pointer minus
11481 the required frame size. Since this is the very start of the
11482 function, the scratch register can be any caller-saved
11483 register which is not used for parameters. */
11484 offset
= GEN_INT (- allocate
);
11485 scratch_regno
= split_stack_prologue_scratch_regno ();
11486 if (scratch_regno
== INVALID_REGNUM
)
11488 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11489 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11491 /* We don't use ix86_gen_add3 in this case because it will
11492 want to split to lea, but when not optimizing the insn
11493 will not be split after this point. */
11494 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11495 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11500 emit_move_insn (scratch_reg
, offset
);
11501 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11502 stack_pointer_rtx
));
11504 current
= scratch_reg
;
11507 ix86_expand_branch (GEU
, current
, limit
, label
);
11508 jump_insn
= get_last_insn ();
11509 JUMP_LABEL (jump_insn
) = label
;
11511 /* Mark the jump as very likely to be taken. */
11512 add_reg_note (jump_insn
, REG_BR_PROB
,
11513 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11515 if (split_stack_fn
== NULL_RTX
)
11516 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11517 fn
= split_stack_fn
;
11519 /* Get more stack space. We pass in the desired stack space and the
11520 size of the arguments to copy to the new stack. In 32-bit mode
11521 we push the parameters; __morestack will return on a new stack
11522 anyhow. In 64-bit mode we pass the parameters in r10 and
11524 allocate_rtx
= GEN_INT (allocate
);
11525 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11526 call_fusage
= NULL_RTX
;
11531 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11532 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11534 /* If this function uses a static chain, it will be in %r10.
11535 Preserve it across the call to __morestack. */
11536 if (DECL_STATIC_CHAIN (cfun
->decl
))
11540 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11541 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11542 use_reg (&call_fusage
, rax
);
11545 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11548 HOST_WIDE_INT argval
;
11550 gcc_assert (Pmode
== DImode
);
11551 /* When using the large model we need to load the address
11552 into a register, and we've run out of registers. So we
11553 switch to a different calling convention, and we call a
11554 different function: __morestack_large. We pass the
11555 argument size in the upper 32 bits of r10 and pass the
11556 frame size in the lower 32 bits. */
11557 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11558 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11560 if (split_stack_fn_large
== NULL_RTX
)
11561 split_stack_fn_large
=
11562 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11564 if (ix86_cmodel
== CM_LARGE_PIC
)
11568 label
= gen_label_rtx ();
11569 emit_label (label
);
11570 LABEL_PRESERVE_P (label
) = 1;
11571 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11572 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11573 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11574 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11576 x
= gen_rtx_CONST (Pmode
, x
);
11577 emit_move_insn (reg11
, x
);
11578 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11579 x
= gen_const_mem (Pmode
, x
);
11580 emit_move_insn (reg11
, x
);
11583 emit_move_insn (reg11
, split_stack_fn_large
);
11587 argval
= ((args_size
<< 16) << 16) + allocate
;
11588 emit_move_insn (reg10
, GEN_INT (argval
));
11592 emit_move_insn (reg10
, allocate_rtx
);
11593 emit_move_insn (reg11
, GEN_INT (args_size
));
11594 use_reg (&call_fusage
, reg11
);
11597 use_reg (&call_fusage
, reg10
);
11601 emit_insn (gen_push (GEN_INT (args_size
)));
11602 emit_insn (gen_push (allocate_rtx
));
11604 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11605 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11607 add_function_usage_to (call_insn
, call_fusage
);
11609 /* In order to make call/return prediction work right, we now need
11610 to execute a return instruction. See
11611 libgcc/config/i386/morestack.S for the details on how this works.
11613 For flow purposes gcc must not see this as a return
11614 instruction--we need control flow to continue at the subsequent
11615 label. Therefore, we use an unspec. */
11616 gcc_assert (crtl
->args
.pops_args
< 65536);
11617 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11619 /* If we are in 64-bit mode and this function uses a static chain,
11620 we saved %r10 in %rax before calling _morestack. */
11621 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11622 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11623 gen_rtx_REG (word_mode
, AX_REG
));
11625 /* If this function calls va_start, we need to store a pointer to
11626 the arguments on the old stack, because they may not have been
11627 all copied to the new stack. At this point the old stack can be
11628 found at the frame pointer value used by __morestack, because
11629 __morestack has set that up before calling back to us. Here we
11630 store that pointer in a scratch register, and in
11631 ix86_expand_prologue we store the scratch register in a stack
11633 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11635 unsigned int scratch_regno
;
11639 scratch_regno
= split_stack_prologue_scratch_regno ();
11640 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11641 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11645 return address within this function
11646 return address of caller of this function
11648 So we add three words to get to the stack arguments.
11652 return address within this function
11653 first argument to __morestack
11654 second argument to __morestack
11655 return address of caller of this function
11657 So we add five words to get to the stack arguments.
11659 words
= TARGET_64BIT
? 3 : 5;
11660 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11661 gen_rtx_PLUS (Pmode
, frame_reg
,
11662 GEN_INT (words
* UNITS_PER_WORD
))));
11664 varargs_label
= gen_label_rtx ();
11665 emit_jump_insn (gen_jump (varargs_label
));
11666 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11671 emit_label (label
);
11672 LABEL_NUSES (label
) = 1;
11674 /* If this function calls va_start, we now have to set the scratch
11675 register for the case where we do not call __morestack. In this
11676 case we need to set it based on the stack pointer. */
11677 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11679 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11680 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11681 GEN_INT (UNITS_PER_WORD
))));
11683 emit_label (varargs_label
);
11684 LABEL_NUSES (varargs_label
) = 1;
11688 /* We may have to tell the dataflow pass that the split stack prologue
11689 is initializing a scratch register. */
11692 ix86_live_on_entry (bitmap regs
)
11694 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11696 gcc_assert (flag_split_stack
);
11697 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11701 /* Determine if op is suitable SUBREG RTX for address. */
11704 ix86_address_subreg_operand (rtx op
)
11706 enum machine_mode mode
;
11711 mode
= GET_MODE (op
);
11713 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11716 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11717 failures when the register is one word out of a two word structure. */
11718 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11721 /* Allow only SUBREGs of non-eliminable hard registers. */
11722 return register_no_elim_operand (op
, mode
);
11725 /* Extract the parts of an RTL expression that is a valid memory address
11726 for an instruction. Return 0 if the structure of the address is
11727 grossly off. Return -1 if the address contains ASHIFT, so it is not
11728 strictly valid, but still used for computing length of lea instruction. */
11731 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11733 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11734 rtx base_reg
, index_reg
;
11735 HOST_WIDE_INT scale
= 1;
11736 rtx scale_rtx
= NULL_RTX
;
11739 enum ix86_address_seg seg
= SEG_DEFAULT
;
11741 /* Allow zero-extended SImode addresses,
11742 they will be emitted with addr32 prefix. */
11743 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11745 if (GET_CODE (addr
) == ZERO_EXTEND
11746 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11748 addr
= XEXP (addr
, 0);
11749 if (CONST_INT_P (addr
))
11752 else if (GET_CODE (addr
) == AND
11753 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11755 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11756 if (addr
== NULL_RTX
)
11759 if (CONST_INT_P (addr
))
11764 /* Allow SImode subregs of DImode addresses,
11765 they will be emitted with addr32 prefix. */
11766 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11768 if (GET_CODE (addr
) == SUBREG
11769 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11771 addr
= SUBREG_REG (addr
);
11772 if (CONST_INT_P (addr
))
11779 else if (GET_CODE (addr
) == SUBREG
)
11781 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11786 else if (GET_CODE (addr
) == PLUS
)
11788 rtx addends
[4], op
;
11796 addends
[n
++] = XEXP (op
, 1);
11799 while (GET_CODE (op
) == PLUS
);
11804 for (i
= n
; i
>= 0; --i
)
11807 switch (GET_CODE (op
))
11812 index
= XEXP (op
, 0);
11813 scale_rtx
= XEXP (op
, 1);
11819 index
= XEXP (op
, 0);
11820 tmp
= XEXP (op
, 1);
11821 if (!CONST_INT_P (tmp
))
11823 scale
= INTVAL (tmp
);
11824 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11826 scale
= 1 << scale
;
11831 if (GET_CODE (op
) != UNSPEC
)
11836 if (XINT (op
, 1) == UNSPEC_TP
11837 && TARGET_TLS_DIRECT_SEG_REFS
11838 && seg
== SEG_DEFAULT
)
11839 seg
= DEFAULT_TLS_SEG_REG
;
11845 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11872 else if (GET_CODE (addr
) == MULT
)
11874 index
= XEXP (addr
, 0); /* index*scale */
11875 scale_rtx
= XEXP (addr
, 1);
11877 else if (GET_CODE (addr
) == ASHIFT
)
11879 /* We're called for lea too, which implements ashift on occasion. */
11880 index
= XEXP (addr
, 0);
11881 tmp
= XEXP (addr
, 1);
11882 if (!CONST_INT_P (tmp
))
11884 scale
= INTVAL (tmp
);
11885 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11887 scale
= 1 << scale
;
11890 else if (CONST_INT_P (addr
))
11892 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11895 /* Constant addresses are sign extended to 64bit, we have to
11896 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11898 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11904 disp
= addr
; /* displacement */
11910 else if (GET_CODE (index
) == SUBREG
11911 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11917 /* Address override works only on the (%reg) part of %fs:(%reg). */
11918 if (seg
!= SEG_DEFAULT
11919 && ((base
&& GET_MODE (base
) != word_mode
)
11920 || (index
&& GET_MODE (index
) != word_mode
)))
11923 /* Extract the integral value of scale. */
11926 if (!CONST_INT_P (scale_rtx
))
11928 scale
= INTVAL (scale_rtx
);
11931 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11932 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11934 /* Avoid useless 0 displacement. */
11935 if (disp
== const0_rtx
&& (base
|| index
))
11938 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11939 if (base_reg
&& index_reg
&& scale
== 1
11940 && (index_reg
== arg_pointer_rtx
11941 || index_reg
== frame_pointer_rtx
11942 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11945 tmp
= base
, base
= index
, index
= tmp
;
11946 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11949 /* Special case: %ebp cannot be encoded as a base without a displacement.
11953 && (base_reg
== hard_frame_pointer_rtx
11954 || base_reg
== frame_pointer_rtx
11955 || base_reg
== arg_pointer_rtx
11956 || (REG_P (base_reg
)
11957 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11958 || REGNO (base_reg
) == R13_REG
))))
11961 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11962 Avoid this by transforming to [%esi+0].
11963 Reload calls address legitimization without cfun defined, so we need
11964 to test cfun for being non-NULL. */
11965 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11966 && base_reg
&& !index_reg
&& !disp
11967 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11970 /* Special case: encode reg+reg instead of reg*2. */
11971 if (!base
&& index
&& scale
== 2)
11972 base
= index
, base_reg
= index_reg
, scale
= 1;
11974 /* Special case: scaling cannot be encoded without base or displacement. */
11975 if (!base
&& !disp
&& index
&& scale
!= 1)
11979 out
->index
= index
;
11981 out
->scale
= scale
;
11987 /* Return cost of the memory address x.
11988 For i386, it is better to use a complex address than let gcc copy
11989 the address into a reg and make a new pseudo. But not if the address
11990 requires to two regs - that would mean more pseudos with longer
11993 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11994 addr_space_t as ATTRIBUTE_UNUSED
,
11995 bool speed ATTRIBUTE_UNUSED
)
11997 struct ix86_address parts
;
11999 int ok
= ix86_decompose_address (x
, &parts
);
12003 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12004 parts
.base
= SUBREG_REG (parts
.base
);
12005 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12006 parts
.index
= SUBREG_REG (parts
.index
);
12008 /* Attempt to minimize number of registers in the address. */
12010 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12012 && (!REG_P (parts
.index
)
12013 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12017 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12019 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12020 && parts
.base
!= parts
.index
)
12023 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12024 since it's predecode logic can't detect the length of instructions
12025 and it degenerates to vector decoded. Increase cost of such
12026 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12027 to split such addresses or even refuse such addresses at all.
12029 Following addressing modes are affected:
12034 The first and last case may be avoidable by explicitly coding the zero in
12035 memory address, but I don't have AMD-K6 machine handy to check this
12039 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12040 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12041 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12047 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12048 this is used for to form addresses to local data when -fPIC is in
12052 darwin_local_data_pic (rtx disp
)
12054 return (GET_CODE (disp
) == UNSPEC
12055 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12058 /* Determine if a given RTX is a valid constant. We already know this
12059 satisfies CONSTANT_P. */
12062 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12064 switch (GET_CODE (x
))
12069 if (GET_CODE (x
) == PLUS
)
12071 if (!CONST_INT_P (XEXP (x
, 1)))
12076 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12079 /* Only some unspecs are valid as "constants". */
12080 if (GET_CODE (x
) == UNSPEC
)
12081 switch (XINT (x
, 1))
12084 case UNSPEC_GOTOFF
:
12085 case UNSPEC_PLTOFF
:
12086 return TARGET_64BIT
;
12088 case UNSPEC_NTPOFF
:
12089 x
= XVECEXP (x
, 0, 0);
12090 return (GET_CODE (x
) == SYMBOL_REF
12091 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12092 case UNSPEC_DTPOFF
:
12093 x
= XVECEXP (x
, 0, 0);
12094 return (GET_CODE (x
) == SYMBOL_REF
12095 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12100 /* We must have drilled down to a symbol. */
12101 if (GET_CODE (x
) == LABEL_REF
)
12103 if (GET_CODE (x
) != SYMBOL_REF
)
12108 /* TLS symbols are never valid. */
12109 if (SYMBOL_REF_TLS_MODEL (x
))
12112 /* DLLIMPORT symbols are never valid. */
12113 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12114 && SYMBOL_REF_DLLIMPORT_P (x
))
12118 /* mdynamic-no-pic */
12119 if (MACHO_DYNAMIC_NO_PIC_P
)
12120 return machopic_symbol_defined_p (x
);
12125 if (GET_MODE (x
) == TImode
12126 && x
!= CONST0_RTX (TImode
)
12132 if (!standard_sse_constant_p (x
))
12139 /* Otherwise we handle everything else in the move patterns. */
12143 /* Determine if it's legal to put X into the constant pool. This
12144 is not possible for the address of thread-local symbols, which
12145 is checked above. */
12148 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12150 /* We can always put integral constants and vectors in memory. */
12151 switch (GET_CODE (x
))
12161 return !ix86_legitimate_constant_p (mode
, x
);
12164 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12168 is_imported_p (rtx x
)
12170 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12171 || GET_CODE (x
) != SYMBOL_REF
)
12174 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12178 /* Nonzero if the constant value X is a legitimate general operand
12179 when generating PIC code. It is given that flag_pic is on and
12180 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12183 legitimate_pic_operand_p (rtx x
)
12187 switch (GET_CODE (x
))
12190 inner
= XEXP (x
, 0);
12191 if (GET_CODE (inner
) == PLUS
12192 && CONST_INT_P (XEXP (inner
, 1)))
12193 inner
= XEXP (inner
, 0);
12195 /* Only some unspecs are valid as "constants". */
12196 if (GET_CODE (inner
) == UNSPEC
)
12197 switch (XINT (inner
, 1))
12200 case UNSPEC_GOTOFF
:
12201 case UNSPEC_PLTOFF
:
12202 return TARGET_64BIT
;
12204 x
= XVECEXP (inner
, 0, 0);
12205 return (GET_CODE (x
) == SYMBOL_REF
12206 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12207 case UNSPEC_MACHOPIC_OFFSET
:
12208 return legitimate_pic_address_disp_p (x
);
12216 return legitimate_pic_address_disp_p (x
);
12223 /* Determine if a given CONST RTX is a valid memory displacement
12227 legitimate_pic_address_disp_p (rtx disp
)
12231 /* In 64bit mode we can allow direct addresses of symbols and labels
12232 when they are not dynamic symbols. */
12235 rtx op0
= disp
, op1
;
12237 switch (GET_CODE (disp
))
12243 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12245 op0
= XEXP (XEXP (disp
, 0), 0);
12246 op1
= XEXP (XEXP (disp
, 0), 1);
12247 if (!CONST_INT_P (op1
)
12248 || INTVAL (op1
) >= 16*1024*1024
12249 || INTVAL (op1
) < -16*1024*1024)
12251 if (GET_CODE (op0
) == LABEL_REF
)
12253 if (GET_CODE (op0
) == CONST
12254 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12255 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12257 if (GET_CODE (op0
) == UNSPEC
12258 && XINT (op0
, 1) == UNSPEC_PCREL
)
12260 if (GET_CODE (op0
) != SYMBOL_REF
)
12265 /* TLS references should always be enclosed in UNSPEC.
12266 The dllimported symbol needs always to be resolved. */
12267 if (SYMBOL_REF_TLS_MODEL (op0
)
12268 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12273 if (is_imported_p (op0
))
12276 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12277 || !SYMBOL_REF_LOCAL_P (op0
))
12280 /* Function-symbols need to be resolved only for
12282 For the small-model we don't need to resolve anything
12284 if ((ix86_cmodel
!= CM_LARGE_PIC
12285 && SYMBOL_REF_FUNCTION_P (op0
))
12286 || ix86_cmodel
== CM_SMALL_PIC
)
12288 /* Non-external symbols don't need to be resolved for
12289 large, and medium-model. */
12290 if ((ix86_cmodel
== CM_LARGE_PIC
12291 || ix86_cmodel
== CM_MEDIUM_PIC
)
12292 && !SYMBOL_REF_EXTERNAL_P (op0
))
12295 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12296 && SYMBOL_REF_LOCAL_P (op0
)
12297 && ix86_cmodel
!= CM_LARGE_PIC
)
12305 if (GET_CODE (disp
) != CONST
)
12307 disp
= XEXP (disp
, 0);
12311 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12312 of GOT tables. We should not need these anyway. */
12313 if (GET_CODE (disp
) != UNSPEC
12314 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12315 && XINT (disp
, 1) != UNSPEC_GOTOFF
12316 && XINT (disp
, 1) != UNSPEC_PCREL
12317 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12320 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12321 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12327 if (GET_CODE (disp
) == PLUS
)
12329 if (!CONST_INT_P (XEXP (disp
, 1)))
12331 disp
= XEXP (disp
, 0);
12335 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12338 if (GET_CODE (disp
) != UNSPEC
)
12341 switch (XINT (disp
, 1))
12346 /* We need to check for both symbols and labels because VxWorks loads
12347 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12349 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12350 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12351 case UNSPEC_GOTOFF
:
12352 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12353 While ABI specify also 32bit relocation but we don't produce it in
12354 small PIC model at all. */
12355 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12356 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12358 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12360 case UNSPEC_GOTTPOFF
:
12361 case UNSPEC_GOTNTPOFF
:
12362 case UNSPEC_INDNTPOFF
:
12365 disp
= XVECEXP (disp
, 0, 0);
12366 return (GET_CODE (disp
) == SYMBOL_REF
12367 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12368 case UNSPEC_NTPOFF
:
12369 disp
= XVECEXP (disp
, 0, 0);
12370 return (GET_CODE (disp
) == SYMBOL_REF
12371 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12372 case UNSPEC_DTPOFF
:
12373 disp
= XVECEXP (disp
, 0, 0);
12374 return (GET_CODE (disp
) == SYMBOL_REF
12375 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12381 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12382 replace the input X, or the original X if no replacement is called for.
12383 The output parameter *WIN is 1 if the calling macro should goto WIN,
12384 0 if it should not. */
12387 ix86_legitimize_reload_address (rtx x
,
12388 enum machine_mode mode ATTRIBUTE_UNUSED
,
12389 int opnum
, int type
,
12390 int ind_levels ATTRIBUTE_UNUSED
)
12392 /* Reload can generate:
12394 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12398 This RTX is rejected from ix86_legitimate_address_p due to
12399 non-strictness of base register 97. Following this rejection,
12400 reload pushes all three components into separate registers,
12401 creating invalid memory address RTX.
12403 Following code reloads only the invalid part of the
12404 memory address RTX. */
12406 if (GET_CODE (x
) == PLUS
12407 && REG_P (XEXP (x
, 1))
12408 && GET_CODE (XEXP (x
, 0)) == PLUS
12409 && REG_P (XEXP (XEXP (x
, 0), 1)))
12412 bool something_reloaded
= false;
12414 base
= XEXP (XEXP (x
, 0), 1);
12415 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12417 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12418 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12419 opnum
, (enum reload_type
) type
);
12420 something_reloaded
= true;
12423 index
= XEXP (x
, 1);
12424 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12426 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12427 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12428 opnum
, (enum reload_type
) type
);
12429 something_reloaded
= true;
12432 gcc_assert (something_reloaded
);
12439 /* Recognizes RTL expressions that are valid memory addresses for an
12440 instruction. The MODE argument is the machine mode for the MEM
12441 expression that wants to use this address.
12443 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12444 convert common non-canonical forms to canonical form so that they will
12448 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12449 rtx addr
, bool strict
)
12451 struct ix86_address parts
;
12452 rtx base
, index
, disp
;
12453 HOST_WIDE_INT scale
;
12455 if (ix86_decompose_address (addr
, &parts
) <= 0)
12456 /* Decomposition failed. */
12460 index
= parts
.index
;
12462 scale
= parts
.scale
;
12464 /* Validate base register. */
12471 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12472 reg
= SUBREG_REG (base
);
12474 /* Base is not a register. */
12477 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12480 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12481 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12482 /* Base is not valid. */
12486 /* Validate index register. */
12493 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12494 reg
= SUBREG_REG (index
);
12496 /* Index is not a register. */
12499 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12502 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12503 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12504 /* Index is not valid. */
12508 /* Index and base should have the same mode. */
12510 && GET_MODE (base
) != GET_MODE (index
))
12513 /* Validate scale factor. */
12517 /* Scale without index. */
12520 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12521 /* Scale is not a valid multiplier. */
12525 /* Validate displacement. */
12528 if (GET_CODE (disp
) == CONST
12529 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12530 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12531 switch (XINT (XEXP (disp
, 0), 1))
12533 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12534 used. While ABI specify also 32bit relocations, we don't produce
12535 them at all and use IP relative instead. */
12537 case UNSPEC_GOTOFF
:
12538 gcc_assert (flag_pic
);
12540 goto is_legitimate_pic
;
12542 /* 64bit address unspec. */
12545 case UNSPEC_GOTPCREL
:
12547 gcc_assert (flag_pic
);
12548 goto is_legitimate_pic
;
12550 case UNSPEC_GOTTPOFF
:
12551 case UNSPEC_GOTNTPOFF
:
12552 case UNSPEC_INDNTPOFF
:
12553 case UNSPEC_NTPOFF
:
12554 case UNSPEC_DTPOFF
:
12557 case UNSPEC_STACK_CHECK
:
12558 gcc_assert (flag_split_stack
);
12562 /* Invalid address unspec. */
12566 else if (SYMBOLIC_CONST (disp
)
12570 && MACHOPIC_INDIRECT
12571 && !machopic_operand_p (disp
)
12577 if (TARGET_64BIT
&& (index
|| base
))
12579 /* foo@dtpoff(%rX) is ok. */
12580 if (GET_CODE (disp
) != CONST
12581 || GET_CODE (XEXP (disp
, 0)) != PLUS
12582 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12583 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12584 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12585 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12586 /* Non-constant pic memory reference. */
12589 else if ((!TARGET_MACHO
|| flag_pic
)
12590 && ! legitimate_pic_address_disp_p (disp
))
12591 /* Displacement is an invalid pic construct. */
12594 else if (MACHO_DYNAMIC_NO_PIC_P
12595 && !ix86_legitimate_constant_p (Pmode
, disp
))
12596 /* displacment must be referenced via non_lazy_pointer */
12600 /* This code used to verify that a symbolic pic displacement
12601 includes the pic_offset_table_rtx register.
12603 While this is good idea, unfortunately these constructs may
12604 be created by "adds using lea" optimization for incorrect
12613 This code is nonsensical, but results in addressing
12614 GOT table with pic_offset_table_rtx base. We can't
12615 just refuse it easily, since it gets matched by
12616 "addsi3" pattern, that later gets split to lea in the
12617 case output register differs from input. While this
12618 can be handled by separate addsi pattern for this case
12619 that never results in lea, this seems to be easier and
12620 correct fix for crash to disable this test. */
12622 else if (GET_CODE (disp
) != LABEL_REF
12623 && !CONST_INT_P (disp
)
12624 && (GET_CODE (disp
) != CONST
12625 || !ix86_legitimate_constant_p (Pmode
, disp
))
12626 && (GET_CODE (disp
) != SYMBOL_REF
12627 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12628 /* Displacement is not constant. */
12630 else if (TARGET_64BIT
12631 && !x86_64_immediate_operand (disp
, VOIDmode
))
12632 /* Displacement is out of range. */
12636 /* Everything looks valid. */
12640 /* Determine if a given RTX is a valid constant address. */
12643 constant_address_p (rtx x
)
12645 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12648 /* Return a unique alias set for the GOT. */
12650 static alias_set_type
12651 ix86_GOT_alias_set (void)
12653 static alias_set_type set
= -1;
12655 set
= new_alias_set ();
12659 /* Return a legitimate reference for ORIG (an address) using the
12660 register REG. If REG is 0, a new pseudo is generated.
12662 There are two types of references that must be handled:
12664 1. Global data references must load the address from the GOT, via
12665 the PIC reg. An insn is emitted to do this load, and the reg is
12668 2. Static data references, constant pool addresses, and code labels
12669 compute the address as an offset from the GOT, whose base is in
12670 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12671 differentiate them from global data objects. The returned
12672 address is the PIC reg + an unspec constant.
12674 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12675 reg also appears in the address. */
12678 legitimize_pic_address (rtx orig
, rtx reg
)
12681 rtx new_rtx
= orig
;
12684 if (TARGET_MACHO
&& !TARGET_64BIT
)
12687 reg
= gen_reg_rtx (Pmode
);
12688 /* Use the generic Mach-O PIC machinery. */
12689 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12693 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12695 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12700 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12702 else if (TARGET_64BIT
&& !TARGET_PECOFF
12703 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12706 /* This symbol may be referenced via a displacement from the PIC
12707 base address (@GOTOFF). */
12709 if (reload_in_progress
)
12710 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12711 if (GET_CODE (addr
) == CONST
)
12712 addr
= XEXP (addr
, 0);
12713 if (GET_CODE (addr
) == PLUS
)
12715 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12717 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12720 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12721 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12723 tmpreg
= gen_reg_rtx (Pmode
);
12726 emit_move_insn (tmpreg
, new_rtx
);
12730 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12731 tmpreg
, 1, OPTAB_DIRECT
);
12735 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12737 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12739 /* This symbol may be referenced via a displacement from the PIC
12740 base address (@GOTOFF). */
12742 if (reload_in_progress
)
12743 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12744 if (GET_CODE (addr
) == CONST
)
12745 addr
= XEXP (addr
, 0);
12746 if (GET_CODE (addr
) == PLUS
)
12748 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12750 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12753 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12754 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12755 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12759 emit_move_insn (reg
, new_rtx
);
12763 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12764 /* We can't use @GOTOFF for text labels on VxWorks;
12765 see gotoff_operand. */
12766 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12768 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12772 /* For x64 PE-COFF there is no GOT table. So we use address
12774 if (TARGET_64BIT
&& TARGET_PECOFF
)
12776 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12777 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12780 reg
= gen_reg_rtx (Pmode
);
12781 emit_move_insn (reg
, new_rtx
);
12784 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12786 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12787 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12788 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12789 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12792 reg
= gen_reg_rtx (Pmode
);
12793 /* Use directly gen_movsi, otherwise the address is loaded
12794 into register for CSE. We don't want to CSE this addresses,
12795 instead we CSE addresses from the GOT table, so skip this. */
12796 emit_insn (gen_movsi (reg
, new_rtx
));
12801 /* This symbol must be referenced via a load from the
12802 Global Offset Table (@GOT). */
12804 if (reload_in_progress
)
12805 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12806 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12807 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12809 new_rtx
= force_reg (Pmode
, new_rtx
);
12810 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12811 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12812 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12815 reg
= gen_reg_rtx (Pmode
);
12816 emit_move_insn (reg
, new_rtx
);
12822 if (CONST_INT_P (addr
)
12823 && !x86_64_immediate_operand (addr
, VOIDmode
))
12827 emit_move_insn (reg
, addr
);
12831 new_rtx
= force_reg (Pmode
, addr
);
12833 else if (GET_CODE (addr
) == CONST
)
12835 addr
= XEXP (addr
, 0);
12837 /* We must match stuff we generate before. Assume the only
12838 unspecs that can get here are ours. Not that we could do
12839 anything with them anyway.... */
12840 if (GET_CODE (addr
) == UNSPEC
12841 || (GET_CODE (addr
) == PLUS
12842 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12844 gcc_assert (GET_CODE (addr
) == PLUS
);
12846 if (GET_CODE (addr
) == PLUS
)
12848 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12850 /* Check first to see if this is a constant offset from a @GOTOFF
12851 symbol reference. */
12852 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
12853 && CONST_INT_P (op1
))
12857 if (reload_in_progress
)
12858 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12859 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12861 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12862 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12863 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12867 emit_move_insn (reg
, new_rtx
);
12873 if (INTVAL (op1
) < -16*1024*1024
12874 || INTVAL (op1
) >= 16*1024*1024)
12876 if (!x86_64_immediate_operand (op1
, Pmode
))
12877 op1
= force_reg (Pmode
, op1
);
12878 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12884 rtx base
= legitimize_pic_address (op0
, reg
);
12885 enum machine_mode mode
= GET_MODE (base
);
12887 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12889 if (CONST_INT_P (new_rtx
))
12891 if (INTVAL (new_rtx
) < -16*1024*1024
12892 || INTVAL (new_rtx
) >= 16*1024*1024)
12894 if (!x86_64_immediate_operand (new_rtx
, mode
))
12895 new_rtx
= force_reg (mode
, new_rtx
);
12897 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12900 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12904 if (GET_CODE (new_rtx
) == PLUS
12905 && CONSTANT_P (XEXP (new_rtx
, 1)))
12907 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12908 new_rtx
= XEXP (new_rtx
, 1);
12910 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12918 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12921 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12923 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12925 if (GET_MODE (tp
) != tp_mode
)
12927 gcc_assert (GET_MODE (tp
) == SImode
);
12928 gcc_assert (tp_mode
== DImode
);
12930 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12934 tp
= copy_to_mode_reg (tp_mode
, tp
);
12939 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12941 static GTY(()) rtx ix86_tls_symbol
;
12944 ix86_tls_get_addr (void)
12946 if (!ix86_tls_symbol
)
12949 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12950 ? "___tls_get_addr" : "__tls_get_addr");
12952 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12955 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
12957 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
12959 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
12960 gen_rtx_CONST (Pmode
, unspec
));
12963 return ix86_tls_symbol
;
12966 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12968 static GTY(()) rtx ix86_tls_module_base_symbol
;
12971 ix86_tls_module_base (void)
12973 if (!ix86_tls_module_base_symbol
)
12975 ix86_tls_module_base_symbol
12976 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12978 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12979 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12982 return ix86_tls_module_base_symbol
;
12985 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12986 false if we expect this to be used for a memory address and true if
12987 we expect to load the address into a register. */
12990 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12992 rtx dest
, base
, off
;
12993 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12994 enum machine_mode tp_mode
= Pmode
;
12999 case TLS_MODEL_GLOBAL_DYNAMIC
:
13000 dest
= gen_reg_rtx (Pmode
);
13004 if (flag_pic
&& !TARGET_PECOFF
)
13005 pic
= pic_offset_table_rtx
;
13008 pic
= gen_reg_rtx (Pmode
);
13009 emit_insn (gen_set_got (pic
));
13013 if (TARGET_GNU2_TLS
)
13016 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13018 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13020 tp
= get_thread_pointer (Pmode
, true);
13021 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13023 if (GET_MODE (x
) != Pmode
)
13024 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13026 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13030 rtx caddr
= ix86_tls_get_addr ();
13034 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13039 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13040 insns
= get_insns ();
13043 if (GET_MODE (x
) != Pmode
)
13044 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13046 RTL_CONST_CALL_P (insns
) = 1;
13047 emit_libcall_block (insns
, dest
, rax
, x
);
13050 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13054 case TLS_MODEL_LOCAL_DYNAMIC
:
13055 base
= gen_reg_rtx (Pmode
);
13060 pic
= pic_offset_table_rtx
;
13063 pic
= gen_reg_rtx (Pmode
);
13064 emit_insn (gen_set_got (pic
));
13068 if (TARGET_GNU2_TLS
)
13070 rtx tmp
= ix86_tls_module_base ();
13073 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13075 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13077 tp
= get_thread_pointer (Pmode
, true);
13078 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13079 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13083 rtx caddr
= ix86_tls_get_addr ();
13087 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13092 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13093 insns
= get_insns ();
13096 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13097 share the LD_BASE result with other LD model accesses. */
13098 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13099 UNSPEC_TLS_LD_BASE
);
13101 RTL_CONST_CALL_P (insns
) = 1;
13102 emit_libcall_block (insns
, base
, rax
, eqv
);
13105 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13108 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13109 off
= gen_rtx_CONST (Pmode
, off
);
13111 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13113 if (TARGET_GNU2_TLS
)
13115 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13117 if (GET_MODE (x
) != Pmode
)
13118 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13120 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13124 case TLS_MODEL_INITIAL_EXEC
:
13127 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13129 /* The Sun linker took the AMD64 TLS spec literally
13130 and can only handle %rax as destination of the
13131 initial executable code sequence. */
13133 dest
= gen_reg_rtx (DImode
);
13134 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13138 /* Generate DImode references to avoid %fs:(%reg32)
13139 problems and linker IE->LE relaxation bug. */
13142 type
= UNSPEC_GOTNTPOFF
;
13146 if (reload_in_progress
)
13147 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13148 pic
= pic_offset_table_rtx
;
13149 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13151 else if (!TARGET_ANY_GNU_TLS
)
13153 pic
= gen_reg_rtx (Pmode
);
13154 emit_insn (gen_set_got (pic
));
13155 type
= UNSPEC_GOTTPOFF
;
13160 type
= UNSPEC_INDNTPOFF
;
13163 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13164 off
= gen_rtx_CONST (tp_mode
, off
);
13166 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13167 off
= gen_const_mem (tp_mode
, off
);
13168 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13170 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13172 base
= get_thread_pointer (tp_mode
,
13173 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13174 off
= force_reg (tp_mode
, off
);
13175 return gen_rtx_PLUS (tp_mode
, base
, off
);
13179 base
= get_thread_pointer (Pmode
, true);
13180 dest
= gen_reg_rtx (Pmode
);
13181 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13185 case TLS_MODEL_LOCAL_EXEC
:
13186 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13187 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13188 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13189 off
= gen_rtx_CONST (Pmode
, off
);
13191 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13193 base
= get_thread_pointer (Pmode
,
13194 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13195 return gen_rtx_PLUS (Pmode
, base
, off
);
13199 base
= get_thread_pointer (Pmode
, true);
13200 dest
= gen_reg_rtx (Pmode
);
13201 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13206 gcc_unreachable ();
13212 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13213 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13214 unique refptr-DECL symbol corresponding to symbol DECL. */
13216 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13217 htab_t dllimport_map
;
13220 get_dllimport_decl (tree decl
, bool beimport
)
13222 struct tree_map
*h
, in
;
13225 const char *prefix
;
13226 size_t namelen
, prefixlen
;
13231 if (!dllimport_map
)
13232 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13234 in
.hash
= htab_hash_pointer (decl
);
13235 in
.base
.from
= decl
;
13236 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13237 h
= (struct tree_map
*) *loc
;
13241 *loc
= h
= ggc_alloc_tree_map ();
13243 h
->base
.from
= decl
;
13244 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13245 VAR_DECL
, NULL
, ptr_type_node
);
13246 DECL_ARTIFICIAL (to
) = 1;
13247 DECL_IGNORED_P (to
) = 1;
13248 DECL_EXTERNAL (to
) = 1;
13249 TREE_READONLY (to
) = 1;
13251 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13252 name
= targetm
.strip_name_encoding (name
);
13254 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13255 ? "*__imp_" : "*__imp__";
13257 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13258 namelen
= strlen (name
);
13259 prefixlen
= strlen (prefix
);
13260 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13261 memcpy (imp_name
, prefix
, prefixlen
);
13262 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13264 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13265 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13266 SET_SYMBOL_REF_DECL (rtl
, to
);
13267 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13270 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13271 #ifdef SUB_TARGET_RECORD_STUB
13272 SUB_TARGET_RECORD_STUB (name
);
13276 rtl
= gen_const_mem (Pmode
, rtl
);
13277 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13279 SET_DECL_RTL (to
, rtl
);
13280 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13285 /* Expand SYMBOL into its corresponding far-addresse symbol.
13286 WANT_REG is true if we require the result be a register. */
13289 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13294 gcc_assert (SYMBOL_REF_DECL (symbol
));
13295 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13297 x
= DECL_RTL (imp_decl
);
13299 x
= force_reg (Pmode
, x
);
13303 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13304 true if we require the result be a register. */
13307 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13312 gcc_assert (SYMBOL_REF_DECL (symbol
));
13313 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13315 x
= DECL_RTL (imp_decl
);
13317 x
= force_reg (Pmode
, x
);
13321 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13322 is true if we require the result be a register. */
13325 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13327 if (!TARGET_PECOFF
)
13330 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13332 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13333 return legitimize_dllimport_symbol (addr
, inreg
);
13334 if (GET_CODE (addr
) == CONST
13335 && GET_CODE (XEXP (addr
, 0)) == PLUS
13336 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13337 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13339 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13340 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13344 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13346 if (GET_CODE (addr
) == SYMBOL_REF
13347 && !is_imported_p (addr
)
13348 && SYMBOL_REF_EXTERNAL_P (addr
)
13349 && SYMBOL_REF_DECL (addr
))
13350 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13352 if (GET_CODE (addr
) == CONST
13353 && GET_CODE (XEXP (addr
, 0)) == PLUS
13354 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13355 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13356 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13357 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13359 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13360 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13365 /* Try machine-dependent ways of modifying an illegitimate address
13366 to be legitimate. If we find one, return the new, valid address.
13367 This macro is used in only one place: `memory_address' in explow.c.
13369 OLDX is the address as it was before break_out_memory_refs was called.
13370 In some cases it is useful to look at this to decide what needs to be done.
13372 It is always safe for this macro to do nothing. It exists to recognize
13373 opportunities to optimize the output.
13375 For the 80386, we handle X+REG by loading X into a register R and
13376 using R+REG. R will go in a general reg and indexing will be used.
13377 However, if REG is a broken-out memory address or multiplication,
13378 nothing needs to be done because REG can certainly go in a general reg.
13380 When -fpic is used, special handling is needed for symbolic references.
13381 See comments by legitimize_pic_address in i386.c for details. */
13384 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13385 enum machine_mode mode
)
13390 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13392 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13393 if (GET_CODE (x
) == CONST
13394 && GET_CODE (XEXP (x
, 0)) == PLUS
13395 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13396 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13398 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13399 (enum tls_model
) log
, false);
13400 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13403 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13405 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13410 if (flag_pic
&& SYMBOLIC_CONST (x
))
13411 return legitimize_pic_address (x
, 0);
13414 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13415 return machopic_indirect_data_reference (x
, 0);
13418 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13419 if (GET_CODE (x
) == ASHIFT
13420 && CONST_INT_P (XEXP (x
, 1))
13421 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13424 log
= INTVAL (XEXP (x
, 1));
13425 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13426 GEN_INT (1 << log
));
13429 if (GET_CODE (x
) == PLUS
)
13431 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13433 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13434 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13435 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13438 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13439 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13440 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13441 GEN_INT (1 << log
));
13444 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13445 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13446 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13449 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13450 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13451 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13452 GEN_INT (1 << log
));
13455 /* Put multiply first if it isn't already. */
13456 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13458 rtx tmp
= XEXP (x
, 0);
13459 XEXP (x
, 0) = XEXP (x
, 1);
13464 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13465 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13466 created by virtual register instantiation, register elimination, and
13467 similar optimizations. */
13468 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13471 x
= gen_rtx_PLUS (Pmode
,
13472 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13473 XEXP (XEXP (x
, 1), 0)),
13474 XEXP (XEXP (x
, 1), 1));
13478 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13479 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13480 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13481 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13482 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13483 && CONSTANT_P (XEXP (x
, 1)))
13486 rtx other
= NULL_RTX
;
13488 if (CONST_INT_P (XEXP (x
, 1)))
13490 constant
= XEXP (x
, 1);
13491 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13493 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13495 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13496 other
= XEXP (x
, 1);
13504 x
= gen_rtx_PLUS (Pmode
,
13505 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13506 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13507 plus_constant (Pmode
, other
,
13508 INTVAL (constant
)));
13512 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13515 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13518 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13521 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13524 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13528 && REG_P (XEXP (x
, 1))
13529 && REG_P (XEXP (x
, 0)))
13532 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13535 x
= legitimize_pic_address (x
, 0);
13538 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13541 if (REG_P (XEXP (x
, 0)))
13543 rtx temp
= gen_reg_rtx (Pmode
);
13544 rtx val
= force_operand (XEXP (x
, 1), temp
);
13547 val
= convert_to_mode (Pmode
, val
, 1);
13548 emit_move_insn (temp
, val
);
13551 XEXP (x
, 1) = temp
;
13555 else if (REG_P (XEXP (x
, 1)))
13557 rtx temp
= gen_reg_rtx (Pmode
);
13558 rtx val
= force_operand (XEXP (x
, 0), temp
);
13561 val
= convert_to_mode (Pmode
, val
, 1);
13562 emit_move_insn (temp
, val
);
13565 XEXP (x
, 0) = temp
;
13573 /* Print an integer constant expression in assembler syntax. Addition
13574 and subtraction are the only arithmetic that may appear in these
13575 expressions. FILE is the stdio stream to write to, X is the rtx, and
13576 CODE is the operand print code from the output string. */
13579 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13583 switch (GET_CODE (x
))
13586 gcc_assert (flag_pic
);
13591 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13592 output_addr_const (file
, x
);
13595 const char *name
= XSTR (x
, 0);
13597 /* Mark the decl as referenced so that cgraph will
13598 output the function. */
13599 if (SYMBOL_REF_DECL (x
))
13600 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13603 if (MACHOPIC_INDIRECT
13604 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13605 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13607 assemble_name (file
, name
);
13609 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13610 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13611 fputs ("@PLT", file
);
13618 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13619 assemble_name (asm_out_file
, buf
);
13623 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13627 /* This used to output parentheses around the expression,
13628 but that does not work on the 386 (either ATT or BSD assembler). */
13629 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13633 if (GET_MODE (x
) == VOIDmode
)
13635 /* We can use %d if the number is <32 bits and positive. */
13636 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13637 fprintf (file
, "0x%lx%08lx",
13638 (unsigned long) CONST_DOUBLE_HIGH (x
),
13639 (unsigned long) CONST_DOUBLE_LOW (x
));
13641 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13644 /* We can't handle floating point constants;
13645 TARGET_PRINT_OPERAND must handle them. */
13646 output_operand_lossage ("floating constant misused");
13650 /* Some assemblers need integer constants to appear first. */
13651 if (CONST_INT_P (XEXP (x
, 0)))
13653 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13655 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13659 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13660 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13662 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13668 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13669 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13671 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13673 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13677 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13679 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13684 gcc_assert (XVECLEN (x
, 0) == 1);
13685 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13686 switch (XINT (x
, 1))
13689 fputs ("@GOT", file
);
13691 case UNSPEC_GOTOFF
:
13692 fputs ("@GOTOFF", file
);
13694 case UNSPEC_PLTOFF
:
13695 fputs ("@PLTOFF", file
);
13698 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13699 "(%rip)" : "[rip]", file
);
13701 case UNSPEC_GOTPCREL
:
13702 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13703 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13705 case UNSPEC_GOTTPOFF
:
13706 /* FIXME: This might be @TPOFF in Sun ld too. */
13707 fputs ("@gottpoff", file
);
13710 fputs ("@tpoff", file
);
13712 case UNSPEC_NTPOFF
:
13714 fputs ("@tpoff", file
);
13716 fputs ("@ntpoff", file
);
13718 case UNSPEC_DTPOFF
:
13719 fputs ("@dtpoff", file
);
13721 case UNSPEC_GOTNTPOFF
:
13723 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13724 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13726 fputs ("@gotntpoff", file
);
13728 case UNSPEC_INDNTPOFF
:
13729 fputs ("@indntpoff", file
);
13732 case UNSPEC_MACHOPIC_OFFSET
:
13734 machopic_output_function_base_name (file
);
13738 output_operand_lossage ("invalid UNSPEC as operand");
13744 output_operand_lossage ("invalid expression as operand");
13748 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13749 We need to emit DTP-relative relocations. */
13751 static void ATTRIBUTE_UNUSED
13752 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13754 fputs (ASM_LONG
, file
);
13755 output_addr_const (file
, x
);
13756 fputs ("@dtpoff", file
);
13762 fputs (", 0", file
);
13765 gcc_unreachable ();
13769 /* Return true if X is a representation of the PIC register. This copes
13770 with calls from ix86_find_base_term, where the register might have
13771 been replaced by a cselib value. */
13774 ix86_pic_register_p (rtx x
)
13776 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13777 return (pic_offset_table_rtx
13778 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13780 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13783 /* Helper function for ix86_delegitimize_address.
13784 Attempt to delegitimize TLS local-exec accesses. */
13787 ix86_delegitimize_tls_address (rtx orig_x
)
13789 rtx x
= orig_x
, unspec
;
13790 struct ix86_address addr
;
13792 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13796 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13798 if (ix86_decompose_address (x
, &addr
) == 0
13799 || addr
.seg
!= DEFAULT_TLS_SEG_REG
13800 || addr
.disp
== NULL_RTX
13801 || GET_CODE (addr
.disp
) != CONST
)
13803 unspec
= XEXP (addr
.disp
, 0);
13804 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13805 unspec
= XEXP (unspec
, 0);
13806 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13808 x
= XVECEXP (unspec
, 0, 0);
13809 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13810 if (unspec
!= XEXP (addr
.disp
, 0))
13811 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13814 rtx idx
= addr
.index
;
13815 if (addr
.scale
!= 1)
13816 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13817 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13820 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13821 if (MEM_P (orig_x
))
13822 x
= replace_equiv_address_nv (orig_x
, x
);
13826 /* In the name of slightly smaller debug output, and to cater to
13827 general assembler lossage, recognize PIC+GOTOFF and turn it back
13828 into a direct symbol reference.
13830 On Darwin, this is necessary to avoid a crash, because Darwin
13831 has a different PIC label for each routine but the DWARF debugging
13832 information is not associated with any particular routine, so it's
13833 necessary to remove references to the PIC label from RTL stored by
13834 the DWARF output code. */
13837 ix86_delegitimize_address (rtx x
)
13839 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13840 /* addend is NULL or some rtx if x is something+GOTOFF where
13841 something doesn't include the PIC register. */
13842 rtx addend
= NULL_RTX
;
13843 /* reg_addend is NULL or a multiple of some register. */
13844 rtx reg_addend
= NULL_RTX
;
13845 /* const_addend is NULL or a const_int. */
13846 rtx const_addend
= NULL_RTX
;
13847 /* This is the result, or NULL. */
13848 rtx result
= NULL_RTX
;
13857 if (GET_CODE (x
) == CONST
13858 && GET_CODE (XEXP (x
, 0)) == PLUS
13859 && GET_MODE (XEXP (x
, 0)) == Pmode
13860 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13861 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13862 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13864 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13865 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13866 if (MEM_P (orig_x
))
13867 x
= replace_equiv_address_nv (orig_x
, x
);
13871 if (GET_CODE (x
) == CONST
13872 && GET_CODE (XEXP (x
, 0)) == UNSPEC
13873 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
13874 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
13875 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
13877 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13878 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13880 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13888 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
13889 return ix86_delegitimize_tls_address (orig_x
);
13891 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
13892 and -mcmodel=medium -fpic. */
13895 if (GET_CODE (x
) != PLUS
13896 || GET_CODE (XEXP (x
, 1)) != CONST
)
13897 return ix86_delegitimize_tls_address (orig_x
);
13899 if (ix86_pic_register_p (XEXP (x
, 0)))
13900 /* %ebx + GOT/GOTOFF */
13902 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13904 /* %ebx + %reg * scale + GOT/GOTOFF */
13905 reg_addend
= XEXP (x
, 0);
13906 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13907 reg_addend
= XEXP (reg_addend
, 1);
13908 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13909 reg_addend
= XEXP (reg_addend
, 0);
13912 reg_addend
= NULL_RTX
;
13913 addend
= XEXP (x
, 0);
13917 addend
= XEXP (x
, 0);
13919 x
= XEXP (XEXP (x
, 1), 0);
13920 if (GET_CODE (x
) == PLUS
13921 && CONST_INT_P (XEXP (x
, 1)))
13923 const_addend
= XEXP (x
, 1);
13927 if (GET_CODE (x
) == UNSPEC
13928 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13929 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
13930 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
13931 && !MEM_P (orig_x
) && !addend
)))
13932 result
= XVECEXP (x
, 0, 0);
13934 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
13935 && !MEM_P (orig_x
))
13936 result
= XVECEXP (x
, 0, 0);
13939 return ix86_delegitimize_tls_address (orig_x
);
13942 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13944 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13947 /* If the rest of original X doesn't involve the PIC register, add
13948 addend and subtract pic_offset_table_rtx. This can happen e.g.
13950 leal (%ebx, %ecx, 4), %ecx
13952 movl foo@GOTOFF(%ecx), %edx
13953 in which case we return (%ecx - %ebx) + foo. */
13954 if (pic_offset_table_rtx
)
13955 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13956 pic_offset_table_rtx
),
13961 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13963 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13964 if (result
== NULL_RTX
)
13970 /* If X is a machine specific address (i.e. a symbol or label being
13971 referenced as a displacement from the GOT implemented using an
13972 UNSPEC), then return the base term. Otherwise return X. */
13975 ix86_find_base_term (rtx x
)
13981 if (GET_CODE (x
) != CONST
)
13983 term
= XEXP (x
, 0);
13984 if (GET_CODE (term
) == PLUS
13985 && (CONST_INT_P (XEXP (term
, 1))
13986 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13987 term
= XEXP (term
, 0);
13988 if (GET_CODE (term
) != UNSPEC
13989 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13990 && XINT (term
, 1) != UNSPEC_PCREL
))
13993 return XVECEXP (term
, 0, 0);
13996 return ix86_delegitimize_address (x
);
14000 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14001 bool fp
, FILE *file
)
14003 const char *suffix
;
14005 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14007 code
= ix86_fp_compare_code_to_integer (code
);
14011 code
= reverse_condition (code
);
14062 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14066 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14067 Those same assemblers have the same but opposite lossage on cmov. */
14068 if (mode
== CCmode
)
14069 suffix
= fp
? "nbe" : "a";
14070 else if (mode
== CCCmode
)
14073 gcc_unreachable ();
14089 gcc_unreachable ();
14093 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14110 gcc_unreachable ();
14114 /* ??? As above. */
14115 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14116 suffix
= fp
? "nb" : "ae";
14119 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14123 /* ??? As above. */
14124 if (mode
== CCmode
)
14126 else if (mode
== CCCmode
)
14127 suffix
= fp
? "nb" : "ae";
14129 gcc_unreachable ();
14132 suffix
= fp
? "u" : "p";
14135 suffix
= fp
? "nu" : "np";
14138 gcc_unreachable ();
14140 fputs (suffix
, file
);
14143 /* Print the name of register X to FILE based on its machine mode and number.
14144 If CODE is 'w', pretend the mode is HImode.
14145 If CODE is 'b', pretend the mode is QImode.
14146 If CODE is 'k', pretend the mode is SImode.
14147 If CODE is 'q', pretend the mode is DImode.
14148 If CODE is 'x', pretend the mode is V4SFmode.
14149 If CODE is 't', pretend the mode is V8SFmode.
14150 If CODE is 'g', pretend the mode is V16SFmode.
14151 If CODE is 'h', pretend the reg is the 'high' byte register.
14152 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14153 If CODE is 'd', duplicate the operand for AVX instruction.
14157 print_reg (rtx x
, int code
, FILE *file
)
14160 unsigned int regno
;
14161 bool duplicated
= code
== 'd' && TARGET_AVX
;
14163 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14168 gcc_assert (TARGET_64BIT
);
14169 fputs ("rip", file
);
14173 regno
= true_regnum (x
);
14174 gcc_assert (regno
!= ARG_POINTER_REGNUM
14175 && regno
!= FRAME_POINTER_REGNUM
14176 && regno
!= FLAGS_REG
14177 && regno
!= FPSR_REG
14178 && regno
!= FPCR_REG
);
14180 if (code
== 'w' || MMX_REG_P (x
))
14182 else if (code
== 'b')
14184 else if (code
== 'k')
14186 else if (code
== 'q')
14188 else if (code
== 'y')
14190 else if (code
== 'h')
14192 else if (code
== 'x')
14194 else if (code
== 't')
14196 else if (code
== 'g')
14199 code
= GET_MODE_SIZE (GET_MODE (x
));
14201 /* Irritatingly, AMD extended registers use different naming convention
14202 from the normal registers: "r%d[bwd]" */
14203 if (REX_INT_REGNO_P (regno
))
14205 gcc_assert (TARGET_64BIT
);
14207 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14211 error ("extended registers have no high halves");
14226 error ("unsupported operand size for extended register");
14236 if (STACK_TOP_P (x
))
14245 if (! ANY_FP_REG_P (x
))
14246 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14251 reg
= hi_reg_name
[regno
];
14254 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14256 reg
= qi_reg_name
[regno
];
14259 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14261 reg
= qi_high_reg_name
[regno
];
14266 gcc_assert (!duplicated
);
14268 fputs (hi_reg_name
[regno
] + 1, file
);
14274 gcc_assert (!duplicated
);
14276 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14281 gcc_unreachable ();
14287 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14288 fprintf (file
, ", %%%s", reg
);
14290 fprintf (file
, ", %s", reg
);
14294 /* Locate some local-dynamic symbol still in use by this function
14295 so that we can print its name in some tls_local_dynamic_base
14299 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14303 if (GET_CODE (x
) == SYMBOL_REF
14304 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14306 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14313 static const char *
14314 get_some_local_dynamic_name (void)
14318 if (cfun
->machine
->some_ld_name
)
14319 return cfun
->machine
->some_ld_name
;
14321 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14322 if (NONDEBUG_INSN_P (insn
)
14323 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14324 return cfun
->machine
->some_ld_name
;
14329 /* Meaning of CODE:
14330 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14331 C -- print opcode suffix for set/cmov insn.
14332 c -- like C, but print reversed condition
14333 F,f -- likewise, but for floating-point.
14334 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14336 R -- print the prefix for register names.
14337 z -- print the opcode suffix for the size of the current operand.
14338 Z -- likewise, with special suffixes for x87 instructions.
14339 * -- print a star (in certain assembler syntax)
14340 A -- print an absolute memory reference.
14341 E -- print address with DImode register names if TARGET_64BIT.
14342 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14343 s -- print a shift double count, followed by the assemblers argument
14345 b -- print the QImode name of the register for the indicated operand.
14346 %b0 would print %al if operands[0] is reg 0.
14347 w -- likewise, print the HImode name of the register.
14348 k -- likewise, print the SImode name of the register.
14349 q -- likewise, print the DImode name of the register.
14350 x -- likewise, print the V4SFmode name of the register.
14351 t -- likewise, print the V8SFmode name of the register.
14352 g -- likewise, print the V16SFmode name of the register.
14353 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14354 y -- print "st(0)" instead of "st" as a register.
14355 d -- print duplicated register operand for AVX instruction.
14356 D -- print condition for SSE cmp instruction.
14357 P -- if PIC, print an @PLT suffix.
14358 p -- print raw symbol name.
14359 X -- don't print any sort of PIC '@' suffix for a symbol.
14360 & -- print some in-use local-dynamic symbol name.
14361 H -- print a memory address offset by 8; used for sse high-parts
14362 Y -- print condition for XOP pcom* instruction.
14363 + -- print a branch hint as 'cs' or 'ds' prefix
14364 ; -- print a semicolon (after prefixes due to bug in older gas).
14365 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14366 @ -- print a segment register of thread base pointer load
14367 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14371 ix86_print_operand (FILE *file
, rtx x
, int code
)
14378 switch (ASSEMBLER_DIALECT
)
14385 /* Intel syntax. For absolute addresses, registers should not
14386 be surrounded by braces. */
14390 ix86_print_operand (file
, x
, 0);
14397 gcc_unreachable ();
14400 ix86_print_operand (file
, x
, 0);
14404 /* Wrap address in an UNSPEC to declare special handling. */
14406 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14408 output_address (x
);
14412 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14417 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14422 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14427 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14432 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14437 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14442 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14443 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14446 switch (GET_MODE_SIZE (GET_MODE (x
)))
14461 output_operand_lossage
14462 ("invalid operand size for operand code 'O'");
14471 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14473 /* Opcodes don't get size suffixes if using Intel opcodes. */
14474 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14477 switch (GET_MODE_SIZE (GET_MODE (x
)))
14496 output_operand_lossage
14497 ("invalid operand size for operand code 'z'");
14502 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14504 (0, "non-integer operand used with operand code 'z'");
14508 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14509 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14512 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14514 switch (GET_MODE_SIZE (GET_MODE (x
)))
14517 #ifdef HAVE_AS_IX86_FILDS
14527 #ifdef HAVE_AS_IX86_FILDQ
14530 fputs ("ll", file
);
14538 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14540 /* 387 opcodes don't get size suffixes
14541 if the operands are registers. */
14542 if (STACK_REG_P (x
))
14545 switch (GET_MODE_SIZE (GET_MODE (x
)))
14566 output_operand_lossage
14567 ("invalid operand type used with operand code 'Z'");
14571 output_operand_lossage
14572 ("invalid operand size for operand code 'Z'");
14591 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14593 ix86_print_operand (file
, x
, 0);
14594 fputs (", ", file
);
14599 switch (GET_CODE (x
))
14602 fputs ("neq", file
);
14605 fputs ("eq", file
);
14609 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14613 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14617 fputs ("le", file
);
14621 fputs ("lt", file
);
14624 fputs ("unord", file
);
14627 fputs ("ord", file
);
14630 fputs ("ueq", file
);
14633 fputs ("nlt", file
);
14636 fputs ("nle", file
);
14639 fputs ("ule", file
);
14642 fputs ("ult", file
);
14645 fputs ("une", file
);
14648 output_operand_lossage ("operand is not a condition code, "
14649 "invalid operand code 'Y'");
14655 /* Little bit of braindamage here. The SSE compare instructions
14656 does use completely different names for the comparisons that the
14657 fp conditional moves. */
14658 switch (GET_CODE (x
))
14663 fputs ("eq_us", file
);
14667 fputs ("eq", file
);
14672 fputs ("nge", file
);
14676 fputs ("lt", file
);
14681 fputs ("ngt", file
);
14685 fputs ("le", file
);
14688 fputs ("unord", file
);
14693 fputs ("neq_oq", file
);
14697 fputs ("neq", file
);
14702 fputs ("ge", file
);
14706 fputs ("nlt", file
);
14711 fputs ("gt", file
);
14715 fputs ("nle", file
);
14718 fputs ("ord", file
);
14721 output_operand_lossage ("operand is not a condition code, "
14722 "invalid operand code 'D'");
14729 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14730 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14736 if (!COMPARISON_P (x
))
14738 output_operand_lossage ("operand is not a condition code, "
14739 "invalid operand code '%c'", code
);
14742 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14743 code
== 'c' || code
== 'f',
14744 code
== 'F' || code
== 'f',
14749 if (!offsettable_memref_p (x
))
14751 output_operand_lossage ("operand is not an offsettable memory "
14752 "reference, invalid operand code 'H'");
14755 /* It doesn't actually matter what mode we use here, as we're
14756 only going to use this for printing. */
14757 x
= adjust_address_nv (x
, DImode
, 8);
14758 /* Output 'qword ptr' for intel assembler dialect. */
14759 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14764 gcc_assert (CONST_INT_P (x
));
14766 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14767 #ifdef HAVE_AS_IX86_HLE
14768 fputs ("xacquire ", file
);
14770 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14772 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14773 #ifdef HAVE_AS_IX86_HLE
14774 fputs ("xrelease ", file
);
14776 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14778 /* We do not want to print value of the operand. */
14782 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14788 const char *name
= get_some_local_dynamic_name ();
14790 output_operand_lossage ("'%%&' used without any "
14791 "local dynamic TLS references");
14793 assemble_name (file
, name
);
14802 || optimize_function_for_size_p (cfun
)
14803 || !TARGET_BRANCH_PREDICTION_HINTS
)
14806 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14809 int pred_val
= INTVAL (XEXP (x
, 0));
14811 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14812 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14814 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14816 = final_forward_branch_p (current_output_insn
) == 0;
14818 /* Emit hints only in the case default branch prediction
14819 heuristics would fail. */
14820 if (taken
!= cputaken
)
14822 /* We use 3e (DS) prefix for taken branches and
14823 2e (CS) prefix for not taken branches. */
14825 fputs ("ds ; ", file
);
14827 fputs ("cs ; ", file
);
14835 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14841 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14844 /* The kernel uses a different segment register for performance
14845 reasons; a system call would not have to trash the userspace
14846 segment register, which would be expensive. */
14847 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14848 fputs ("fs", file
);
14850 fputs ("gs", file
);
14854 putc (TARGET_AVX2
? 'i' : 'f', file
);
14858 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14859 fputs ("addr32 ", file
);
14863 output_operand_lossage ("invalid operand code '%c'", code
);
14868 print_reg (x
, code
, file
);
14870 else if (MEM_P (x
))
14872 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14873 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14874 && GET_MODE (x
) != BLKmode
)
14877 switch (GET_MODE_SIZE (GET_MODE (x
)))
14879 case 1: size
= "BYTE"; break;
14880 case 2: size
= "WORD"; break;
14881 case 4: size
= "DWORD"; break;
14882 case 8: size
= "QWORD"; break;
14883 case 12: size
= "TBYTE"; break;
14885 if (GET_MODE (x
) == XFmode
)
14890 case 32: size
= "YMMWORD"; break;
14891 case 64: size
= "ZMMWORD"; break;
14893 gcc_unreachable ();
14896 /* Check for explicit size override (codes 'b', 'w', 'k',
14900 else if (code
== 'w')
14902 else if (code
== 'k')
14904 else if (code
== 'q')
14906 else if (code
== 'x')
14909 fputs (size
, file
);
14910 fputs (" PTR ", file
);
14914 /* Avoid (%rip) for call operands. */
14915 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14916 && !CONST_INT_P (x
))
14917 output_addr_const (file
, x
);
14918 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14919 output_operand_lossage ("invalid constraints for operand");
14921 output_address (x
);
14924 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14929 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14930 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14932 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14934 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14936 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14937 (unsigned long long) (int) l
);
14939 fprintf (file
, "0x%08x", (unsigned int) l
);
14942 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14947 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14948 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14950 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14952 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14955 /* These float cases don't actually occur as immediate operands. */
14956 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14960 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14961 fputs (dstr
, file
);
14966 /* We have patterns that allow zero sets of memory, for instance.
14967 In 64-bit mode, we should probably support all 8-byte vectors,
14968 since we can in fact encode that into an immediate. */
14969 if (GET_CODE (x
) == CONST_VECTOR
)
14971 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14975 if (code
!= 'P' && code
!= 'p')
14977 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14979 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14982 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14983 || GET_CODE (x
) == LABEL_REF
)
14985 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14988 fputs ("OFFSET FLAT:", file
);
14991 if (CONST_INT_P (x
))
14992 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14993 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14994 output_pic_addr_const (file
, x
, code
);
14996 output_addr_const (file
, x
);
15001 ix86_print_operand_punct_valid_p (unsigned char code
)
15003 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15004 || code
== ';' || code
== '~' || code
== '^');
15007 /* Print a memory operand whose address is ADDR. */
15010 ix86_print_operand_address (FILE *file
, rtx addr
)
15012 struct ix86_address parts
;
15013 rtx base
, index
, disp
;
15019 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15021 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15022 gcc_assert (parts
.index
== NULL_RTX
);
15023 parts
.index
= XVECEXP (addr
, 0, 1);
15024 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15025 addr
= XVECEXP (addr
, 0, 0);
15028 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15030 gcc_assert (TARGET_64BIT
);
15031 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15035 ok
= ix86_decompose_address (addr
, &parts
);
15040 index
= parts
.index
;
15042 scale
= parts
.scale
;
15050 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15052 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15055 gcc_unreachable ();
15058 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15059 if (TARGET_64BIT
&& !base
&& !index
)
15063 if (GET_CODE (disp
) == CONST
15064 && GET_CODE (XEXP (disp
, 0)) == PLUS
15065 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15066 symbol
= XEXP (XEXP (disp
, 0), 0);
15068 if (GET_CODE (symbol
) == LABEL_REF
15069 || (GET_CODE (symbol
) == SYMBOL_REF
15070 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15073 if (!base
&& !index
)
15075 /* Displacement only requires special attention. */
15077 if (CONST_INT_P (disp
))
15079 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15080 fputs ("ds:", file
);
15081 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15084 output_pic_addr_const (file
, disp
, 0);
15086 output_addr_const (file
, disp
);
15090 /* Print SImode register names to force addr32 prefix. */
15091 if (SImode_address_operand (addr
, VOIDmode
))
15093 #ifdef ENABLE_CHECKING
15094 gcc_assert (TARGET_64BIT
);
15095 switch (GET_CODE (addr
))
15098 gcc_assert (GET_MODE (addr
) == SImode
);
15099 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15103 gcc_assert (GET_MODE (addr
) == DImode
);
15106 gcc_unreachable ();
15109 gcc_assert (!code
);
15115 && CONST_INT_P (disp
)
15116 && INTVAL (disp
) < -16*1024*1024)
15118 /* X32 runs in 64-bit mode, where displacement, DISP, in
15119 address DISP(%r64), is encoded as 32-bit immediate sign-
15120 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15121 address is %r64 + 0xffffffffbffffd00. When %r64 <
15122 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15123 which is invalid for x32. The correct address is %r64
15124 - 0x40000300 == 0xf7ffdd64. To properly encode
15125 -0x40000300(%r64) for x32, we zero-extend negative
15126 displacement by forcing addr32 prefix which truncates
15127 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15128 zero-extend all negative displacements, including -1(%rsp).
15129 However, for small negative displacements, sign-extension
15130 won't cause overflow. We only zero-extend negative
15131 displacements if they < -16*1024*1024, which is also used
15132 to check legitimate address displacements for PIC. */
15136 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15141 output_pic_addr_const (file
, disp
, 0);
15142 else if (GET_CODE (disp
) == LABEL_REF
)
15143 output_asm_label (disp
);
15145 output_addr_const (file
, disp
);
15150 print_reg (base
, code
, file
);
15154 print_reg (index
, vsib
? 0 : code
, file
);
15155 if (scale
!= 1 || vsib
)
15156 fprintf (file
, ",%d", scale
);
15162 rtx offset
= NULL_RTX
;
15166 /* Pull out the offset of a symbol; print any symbol itself. */
15167 if (GET_CODE (disp
) == CONST
15168 && GET_CODE (XEXP (disp
, 0)) == PLUS
15169 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15171 offset
= XEXP (XEXP (disp
, 0), 1);
15172 disp
= gen_rtx_CONST (VOIDmode
,
15173 XEXP (XEXP (disp
, 0), 0));
15177 output_pic_addr_const (file
, disp
, 0);
15178 else if (GET_CODE (disp
) == LABEL_REF
)
15179 output_asm_label (disp
);
15180 else if (CONST_INT_P (disp
))
15183 output_addr_const (file
, disp
);
15189 print_reg (base
, code
, file
);
15192 if (INTVAL (offset
) >= 0)
15194 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15198 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15205 print_reg (index
, vsib
? 0 : code
, file
);
15206 if (scale
!= 1 || vsib
)
15207 fprintf (file
, "*%d", scale
);
15214 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15217 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15221 if (GET_CODE (x
) != UNSPEC
)
15224 op
= XVECEXP (x
, 0, 0);
15225 switch (XINT (x
, 1))
15227 case UNSPEC_GOTTPOFF
:
15228 output_addr_const (file
, op
);
15229 /* FIXME: This might be @TPOFF in Sun ld. */
15230 fputs ("@gottpoff", file
);
15233 output_addr_const (file
, op
);
15234 fputs ("@tpoff", file
);
15236 case UNSPEC_NTPOFF
:
15237 output_addr_const (file
, op
);
15239 fputs ("@tpoff", file
);
15241 fputs ("@ntpoff", file
);
15243 case UNSPEC_DTPOFF
:
15244 output_addr_const (file
, op
);
15245 fputs ("@dtpoff", file
);
15247 case UNSPEC_GOTNTPOFF
:
15248 output_addr_const (file
, op
);
15250 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15251 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15253 fputs ("@gotntpoff", file
);
15255 case UNSPEC_INDNTPOFF
:
15256 output_addr_const (file
, op
);
15257 fputs ("@indntpoff", file
);
15260 case UNSPEC_MACHOPIC_OFFSET
:
15261 output_addr_const (file
, op
);
15263 machopic_output_function_base_name (file
);
15267 case UNSPEC_STACK_CHECK
:
15271 gcc_assert (flag_split_stack
);
15273 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15274 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15276 gcc_unreachable ();
15279 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15290 /* Split one or more double-mode RTL references into pairs of half-mode
15291 references. The RTL can be REG, offsettable MEM, integer constant, or
15292 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15293 split and "num" is its length. lo_half and hi_half are output arrays
15294 that parallel "operands". */
15297 split_double_mode (enum machine_mode mode
, rtx operands
[],
15298 int num
, rtx lo_half
[], rtx hi_half
[])
15300 enum machine_mode half_mode
;
15306 half_mode
= DImode
;
15309 half_mode
= SImode
;
15312 gcc_unreachable ();
15315 byte
= GET_MODE_SIZE (half_mode
);
15319 rtx op
= operands
[num
];
15321 /* simplify_subreg refuse to split volatile memory addresses,
15322 but we still have to handle it. */
15325 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15326 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15330 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15331 GET_MODE (op
) == VOIDmode
15332 ? mode
: GET_MODE (op
), 0);
15333 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15334 GET_MODE (op
) == VOIDmode
15335 ? mode
: GET_MODE (op
), byte
);
15340 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15341 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15342 is the expression of the binary operation. The output may either be
15343 emitted here, or returned to the caller, like all output_* functions.
15345 There is no guarantee that the operands are the same mode, as they
15346 might be within FLOAT or FLOAT_EXTEND expressions. */
15348 #ifndef SYSV386_COMPAT
15349 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15350 wants to fix the assemblers because that causes incompatibility
15351 with gcc. No-one wants to fix gcc because that causes
15352 incompatibility with assemblers... You can use the option of
15353 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15354 #define SYSV386_COMPAT 1
15358 output_387_binary_op (rtx insn
, rtx
*operands
)
15360 static char buf
[40];
15363 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15365 #ifdef ENABLE_CHECKING
15366 /* Even if we do not want to check the inputs, this documents input
15367 constraints. Which helps in understanding the following code. */
15368 if (STACK_REG_P (operands
[0])
15369 && ((REG_P (operands
[1])
15370 && REGNO (operands
[0]) == REGNO (operands
[1])
15371 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15372 || (REG_P (operands
[2])
15373 && REGNO (operands
[0]) == REGNO (operands
[2])
15374 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15375 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15378 gcc_assert (is_sse
);
15381 switch (GET_CODE (operands
[3]))
15384 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15385 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15393 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15394 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15402 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15403 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15411 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15412 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15420 gcc_unreachable ();
15427 strcpy (buf
, ssep
);
15428 if (GET_MODE (operands
[0]) == SFmode
)
15429 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15431 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15435 strcpy (buf
, ssep
+ 1);
15436 if (GET_MODE (operands
[0]) == SFmode
)
15437 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15439 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15445 switch (GET_CODE (operands
[3]))
15449 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15451 rtx temp
= operands
[2];
15452 operands
[2] = operands
[1];
15453 operands
[1] = temp
;
15456 /* know operands[0] == operands[1]. */
15458 if (MEM_P (operands
[2]))
15464 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15466 if (STACK_TOP_P (operands
[0]))
15467 /* How is it that we are storing to a dead operand[2]?
15468 Well, presumably operands[1] is dead too. We can't
15469 store the result to st(0) as st(0) gets popped on this
15470 instruction. Instead store to operands[2] (which I
15471 think has to be st(1)). st(1) will be popped later.
15472 gcc <= 2.8.1 didn't have this check and generated
15473 assembly code that the Unixware assembler rejected. */
15474 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15476 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15480 if (STACK_TOP_P (operands
[0]))
15481 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15483 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15488 if (MEM_P (operands
[1]))
15494 if (MEM_P (operands
[2]))
15500 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15503 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15504 derived assemblers, confusingly reverse the direction of
15505 the operation for fsub{r} and fdiv{r} when the
15506 destination register is not st(0). The Intel assembler
15507 doesn't have this brain damage. Read !SYSV386_COMPAT to
15508 figure out what the hardware really does. */
15509 if (STACK_TOP_P (operands
[0]))
15510 p
= "{p\t%0, %2|rp\t%2, %0}";
15512 p
= "{rp\t%2, %0|p\t%0, %2}";
15514 if (STACK_TOP_P (operands
[0]))
15515 /* As above for fmul/fadd, we can't store to st(0). */
15516 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15518 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15523 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15526 if (STACK_TOP_P (operands
[0]))
15527 p
= "{rp\t%0, %1|p\t%1, %0}";
15529 p
= "{p\t%1, %0|rp\t%0, %1}";
15531 if (STACK_TOP_P (operands
[0]))
15532 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15534 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15539 if (STACK_TOP_P (operands
[0]))
15541 if (STACK_TOP_P (operands
[1]))
15542 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15544 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15547 else if (STACK_TOP_P (operands
[1]))
15550 p
= "{\t%1, %0|r\t%0, %1}";
15552 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15558 p
= "{r\t%2, %0|\t%0, %2}";
15560 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15566 gcc_unreachable ();
15573 /* Check if a 256bit AVX register is referenced inside of EXP. */
15576 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15580 if (GET_CODE (exp
) == SUBREG
)
15581 exp
= SUBREG_REG (exp
);
15584 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15590 /* Return needed mode for entity in optimize_mode_switching pass. */
15593 ix86_avx_u128_mode_needed (rtx insn
)
15599 /* Needed mode is set to AVX_U128_CLEAN if there are
15600 no 256bit modes used in function arguments. */
15601 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15603 link
= XEXP (link
, 1))
15605 if (GET_CODE (XEXP (link
, 0)) == USE
)
15607 rtx arg
= XEXP (XEXP (link
, 0), 0);
15609 if (ix86_check_avx256_register (&arg
, NULL
))
15610 return AVX_U128_ANY
;
15614 return AVX_U128_CLEAN
;
15617 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15618 changes state only when a 256bit register is written to, but we need
15619 to prevent the compiler from moving optimal insertion point above
15620 eventual read from 256bit register. */
15621 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15622 return AVX_U128_DIRTY
;
15624 return AVX_U128_ANY
;
15627 /* Return mode that i387 must be switched into
15628 prior to the execution of insn. */
15631 ix86_i387_mode_needed (int entity
, rtx insn
)
15633 enum attr_i387_cw mode
;
15635 /* The mode UNINITIALIZED is used to store control word after a
15636 function call or ASM pattern. The mode ANY specify that function
15637 has no requirements on the control word and make no changes in the
15638 bits we are interested in. */
15641 || (NONJUMP_INSN_P (insn
)
15642 && (asm_noperands (PATTERN (insn
)) >= 0
15643 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15644 return I387_CW_UNINITIALIZED
;
15646 if (recog_memoized (insn
) < 0)
15647 return I387_CW_ANY
;
15649 mode
= get_attr_i387_cw (insn
);
15654 if (mode
== I387_CW_TRUNC
)
15659 if (mode
== I387_CW_FLOOR
)
15664 if (mode
== I387_CW_CEIL
)
15669 if (mode
== I387_CW_MASK_PM
)
15674 gcc_unreachable ();
15677 return I387_CW_ANY
;
15680 /* Return mode that entity must be switched into
15681 prior to the execution of insn. */
15684 ix86_mode_needed (int entity
, rtx insn
)
15689 return ix86_avx_u128_mode_needed (insn
);
15694 return ix86_i387_mode_needed (entity
, insn
);
15696 gcc_unreachable ();
15701 /* Check if a 256bit AVX register is referenced in stores. */
15704 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15706 if (ix86_check_avx256_register (&dest
, NULL
))
15708 bool *used
= (bool *) data
;
15713 /* Calculate mode of upper 128bit AVX registers after the insn. */
15716 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15718 rtx pat
= PATTERN (insn
);
15720 if (vzeroupper_operation (pat
, VOIDmode
)
15721 || vzeroall_operation (pat
, VOIDmode
))
15722 return AVX_U128_CLEAN
;
15724 /* We know that state is clean after CALL insn if there are no
15725 256bit registers used in the function return register. */
15728 bool avx_reg256_found
= false;
15729 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15730 if (!avx_reg256_found
)
15731 return AVX_U128_CLEAN
;
15734 /* Otherwise, return current mode. Remember that if insn
15735 references AVX 256bit registers, the mode was already changed
15736 to DIRTY from MODE_NEEDED. */
15740 /* Return the mode that an insn results in. */
15743 ix86_mode_after (int entity
, int mode
, rtx insn
)
15748 return ix86_avx_u128_mode_after (mode
, insn
);
15755 gcc_unreachable ();
15760 ix86_avx_u128_mode_entry (void)
15764 /* Entry mode is set to AVX_U128_DIRTY if there are
15765 256bit modes used in function arguments. */
15766 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15767 arg
= TREE_CHAIN (arg
))
15769 rtx incoming
= DECL_INCOMING_RTL (arg
);
15771 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15772 return AVX_U128_DIRTY
;
15775 return AVX_U128_CLEAN
;
15778 /* Return a mode that ENTITY is assumed to be
15779 switched to at function entry. */
15782 ix86_mode_entry (int entity
)
15787 return ix86_avx_u128_mode_entry ();
15792 return I387_CW_ANY
;
15794 gcc_unreachable ();
15799 ix86_avx_u128_mode_exit (void)
15801 rtx reg
= crtl
->return_rtx
;
15803 /* Exit mode is set to AVX_U128_DIRTY if there are
15804 256bit modes used in the function return register. */
15805 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15806 return AVX_U128_DIRTY
;
15808 return AVX_U128_CLEAN
;
15811 /* Return a mode that ENTITY is assumed to be
15812 switched to at function exit. */
15815 ix86_mode_exit (int entity
)
15820 return ix86_avx_u128_mode_exit ();
15825 return I387_CW_ANY
;
15827 gcc_unreachable ();
15831 /* Output code to initialize control word copies used by trunc?f?i and
15832 rounding patterns. CURRENT_MODE is set to current control word,
15833 while NEW_MODE is set to new control word. */
15836 emit_i387_cw_initialization (int mode
)
15838 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15841 enum ix86_stack_slot slot
;
15843 rtx reg
= gen_reg_rtx (HImode
);
15845 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15846 emit_move_insn (reg
, copy_rtx (stored_mode
));
15848 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15849 || optimize_insn_for_size_p ())
15853 case I387_CW_TRUNC
:
15854 /* round toward zero (truncate) */
15855 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15856 slot
= SLOT_CW_TRUNC
;
15859 case I387_CW_FLOOR
:
15860 /* round down toward -oo */
15861 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15862 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15863 slot
= SLOT_CW_FLOOR
;
15867 /* round up toward +oo */
15868 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15869 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15870 slot
= SLOT_CW_CEIL
;
15873 case I387_CW_MASK_PM
:
15874 /* mask precision exception for nearbyint() */
15875 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15876 slot
= SLOT_CW_MASK_PM
;
15880 gcc_unreachable ();
15887 case I387_CW_TRUNC
:
15888 /* round toward zero (truncate) */
15889 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15890 slot
= SLOT_CW_TRUNC
;
15893 case I387_CW_FLOOR
:
15894 /* round down toward -oo */
15895 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15896 slot
= SLOT_CW_FLOOR
;
15900 /* round up toward +oo */
15901 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15902 slot
= SLOT_CW_CEIL
;
15905 case I387_CW_MASK_PM
:
15906 /* mask precision exception for nearbyint() */
15907 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15908 slot
= SLOT_CW_MASK_PM
;
15912 gcc_unreachable ();
15916 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15918 new_mode
= assign_386_stack_local (HImode
, slot
);
15919 emit_move_insn (new_mode
, reg
);
15922 /* Emit vzeroupper. */
15925 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15929 /* Cancel automatic vzeroupper insertion if there are
15930 live call-saved SSE registers at the insertion point. */
15932 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15933 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15937 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15938 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15941 emit_insn (gen_avx_vzeroupper ());
15944 /* Generate one or more insns to set ENTITY to MODE. */
15947 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15952 if (mode
== AVX_U128_CLEAN
)
15953 ix86_avx_emit_vzeroupper (regs_live
);
15959 if (mode
!= I387_CW_ANY
15960 && mode
!= I387_CW_UNINITIALIZED
)
15961 emit_i387_cw_initialization (mode
);
15964 gcc_unreachable ();
15968 /* Output code for INSN to convert a float to a signed int. OPERANDS
15969 are the insn operands. The output may be [HSD]Imode and the input
15970 operand may be [SDX]Fmode. */
15973 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15975 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15976 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15977 int round_mode
= get_attr_i387_cw (insn
);
15979 /* Jump through a hoop or two for DImode, since the hardware has no
15980 non-popping instruction. We used to do this a different way, but
15981 that was somewhat fragile and broke with post-reload splitters. */
15982 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15983 output_asm_insn ("fld\t%y1", operands
);
15985 gcc_assert (STACK_TOP_P (operands
[1]));
15986 gcc_assert (MEM_P (operands
[0]));
15987 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15990 output_asm_insn ("fisttp%Z0\t%0", operands
);
15993 if (round_mode
!= I387_CW_ANY
)
15994 output_asm_insn ("fldcw\t%3", operands
);
15995 if (stack_top_dies
|| dimode_p
)
15996 output_asm_insn ("fistp%Z0\t%0", operands
);
15998 output_asm_insn ("fist%Z0\t%0", operands
);
15999 if (round_mode
!= I387_CW_ANY
)
16000 output_asm_insn ("fldcw\t%2", operands
);
16006 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16007 have the values zero or one, indicates the ffreep insn's operand
16008 from the OPERANDS array. */
16010 static const char *
16011 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16013 if (TARGET_USE_FFREEP
)
16014 #ifdef HAVE_AS_IX86_FFREEP
16015 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16018 static char retval
[32];
16019 int regno
= REGNO (operands
[opno
]);
16021 gcc_assert (STACK_REGNO_P (regno
));
16023 regno
-= FIRST_STACK_REG
;
16025 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16030 return opno
? "fstp\t%y1" : "fstp\t%y0";
16034 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16035 should be used. UNORDERED_P is true when fucom should be used. */
16038 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16040 int stack_top_dies
;
16041 rtx cmp_op0
, cmp_op1
;
16042 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16046 cmp_op0
= operands
[0];
16047 cmp_op1
= operands
[1];
16051 cmp_op0
= operands
[1];
16052 cmp_op1
= operands
[2];
16057 if (GET_MODE (operands
[0]) == SFmode
)
16059 return "%vucomiss\t{%1, %0|%0, %1}";
16061 return "%vcomiss\t{%1, %0|%0, %1}";
16064 return "%vucomisd\t{%1, %0|%0, %1}";
16066 return "%vcomisd\t{%1, %0|%0, %1}";
16069 gcc_assert (STACK_TOP_P (cmp_op0
));
16071 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16073 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16075 if (stack_top_dies
)
16077 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16078 return output_387_ffreep (operands
, 1);
16081 return "ftst\n\tfnstsw\t%0";
16084 if (STACK_REG_P (cmp_op1
)
16086 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16087 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16089 /* If both the top of the 387 stack dies, and the other operand
16090 is also a stack register that dies, then this must be a
16091 `fcompp' float compare */
16095 /* There is no double popping fcomi variant. Fortunately,
16096 eflags is immune from the fstp's cc clobbering. */
16098 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16100 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16101 return output_387_ffreep (operands
, 0);
16106 return "fucompp\n\tfnstsw\t%0";
16108 return "fcompp\n\tfnstsw\t%0";
16113 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16115 static const char * const alt
[16] =
16117 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16118 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16119 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16120 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16122 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16123 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16127 "fcomi\t{%y1, %0|%0, %y1}",
16128 "fcomip\t{%y1, %0|%0, %y1}",
16129 "fucomi\t{%y1, %0|%0, %y1}",
16130 "fucomip\t{%y1, %0|%0, %y1}",
16141 mask
= eflags_p
<< 3;
16142 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16143 mask
|= unordered_p
<< 1;
16144 mask
|= stack_top_dies
;
16146 gcc_assert (mask
< 16);
16155 ix86_output_addr_vec_elt (FILE *file
, int value
)
16157 const char *directive
= ASM_LONG
;
16161 directive
= ASM_QUAD
;
16163 gcc_assert (!TARGET_64BIT
);
16166 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16170 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16172 const char *directive
= ASM_LONG
;
16175 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16176 directive
= ASM_QUAD
;
16178 gcc_assert (!TARGET_64BIT
);
16180 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16181 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16182 fprintf (file
, "%s%s%d-%s%d\n",
16183 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16184 else if (HAVE_AS_GOTOFF_IN_DATA
)
16185 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16187 else if (TARGET_MACHO
)
16189 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16190 machopic_output_function_base_name (file
);
16195 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16196 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16199 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16203 ix86_expand_clear (rtx dest
)
16207 /* We play register width games, which are only valid after reload. */
16208 gcc_assert (reload_completed
);
16210 /* Avoid HImode and its attendant prefix byte. */
16211 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16212 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16213 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16215 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16216 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16218 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16219 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16225 /* X is an unchanging MEM. If it is a constant pool reference, return
16226 the constant pool rtx, else NULL. */
16229 maybe_get_pool_constant (rtx x
)
16231 x
= ix86_delegitimize_address (XEXP (x
, 0));
16233 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16234 return get_pool_constant (x
);
16240 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16243 enum tls_model model
;
16248 if (GET_CODE (op1
) == SYMBOL_REF
)
16252 model
= SYMBOL_REF_TLS_MODEL (op1
);
16255 op1
= legitimize_tls_address (op1
, model
, true);
16256 op1
= force_operand (op1
, op0
);
16259 op1
= convert_to_mode (mode
, op1
, 1);
16261 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16264 else if (GET_CODE (op1
) == CONST
16265 && GET_CODE (XEXP (op1
, 0)) == PLUS
16266 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16268 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16269 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16272 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16274 tmp
= legitimize_tls_address (symbol
, model
, true);
16276 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16280 tmp
= force_operand (tmp
, NULL
);
16281 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16282 op0
, 1, OPTAB_DIRECT
);
16285 op1
= convert_to_mode (mode
, tmp
, 1);
16289 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16290 && symbolic_operand (op1
, mode
))
16292 if (TARGET_MACHO
&& !TARGET_64BIT
)
16295 /* dynamic-no-pic */
16296 if (MACHOPIC_INDIRECT
)
16298 rtx temp
= ((reload_in_progress
16299 || ((op0
&& REG_P (op0
))
16301 ? op0
: gen_reg_rtx (Pmode
));
16302 op1
= machopic_indirect_data_reference (op1
, temp
);
16304 op1
= machopic_legitimize_pic_address (op1
, mode
,
16305 temp
== op1
? 0 : temp
);
16307 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16309 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16313 if (GET_CODE (op0
) == MEM
)
16314 op1
= force_reg (Pmode
, op1
);
16318 if (GET_CODE (temp
) != REG
)
16319 temp
= gen_reg_rtx (Pmode
);
16320 temp
= legitimize_pic_address (op1
, temp
);
16325 /* dynamic-no-pic */
16331 op1
= force_reg (mode
, op1
);
16332 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16334 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16335 op1
= legitimize_pic_address (op1
, reg
);
16338 op1
= convert_to_mode (mode
, op1
, 1);
16345 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16346 || !push_operand (op0
, mode
))
16348 op1
= force_reg (mode
, op1
);
16350 if (push_operand (op0
, mode
)
16351 && ! general_no_elim_operand (op1
, mode
))
16352 op1
= copy_to_mode_reg (mode
, op1
);
16354 /* Force large constants in 64bit compilation into register
16355 to get them CSEed. */
16356 if (can_create_pseudo_p ()
16357 && (mode
== DImode
) && TARGET_64BIT
16358 && immediate_operand (op1
, mode
)
16359 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16360 && !register_operand (op0
, mode
)
16362 op1
= copy_to_mode_reg (mode
, op1
);
16364 if (can_create_pseudo_p ()
16365 && FLOAT_MODE_P (mode
)
16366 && GET_CODE (op1
) == CONST_DOUBLE
)
16368 /* If we are loading a floating point constant to a register,
16369 force the value to memory now, since we'll get better code
16370 out the back end. */
16372 op1
= validize_mem (force_const_mem (mode
, op1
));
16373 if (!register_operand (op0
, mode
))
16375 rtx temp
= gen_reg_rtx (mode
);
16376 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16377 emit_move_insn (op0
, temp
);
16383 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16387 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16389 rtx op0
= operands
[0], op1
= operands
[1];
16390 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16392 /* Force constants other than zero into memory. We do not know how
16393 the instructions used to build constants modify the upper 64 bits
16394 of the register, once we have that information we may be able
16395 to handle some of them more efficiently. */
16396 if (can_create_pseudo_p ()
16397 && register_operand (op0
, mode
)
16398 && (CONSTANT_P (op1
)
16399 || (GET_CODE (op1
) == SUBREG
16400 && CONSTANT_P (SUBREG_REG (op1
))))
16401 && !standard_sse_constant_p (op1
))
16402 op1
= validize_mem (force_const_mem (mode
, op1
));
16404 /* We need to check memory alignment for SSE mode since attribute
16405 can make operands unaligned. */
16406 if (can_create_pseudo_p ()
16407 && SSE_REG_MODE_P (mode
)
16408 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16409 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16413 /* ix86_expand_vector_move_misalign() does not like constants ... */
16414 if (CONSTANT_P (op1
)
16415 || (GET_CODE (op1
) == SUBREG
16416 && CONSTANT_P (SUBREG_REG (op1
))))
16417 op1
= validize_mem (force_const_mem (mode
, op1
));
16419 /* ... nor both arguments in memory. */
16420 if (!register_operand (op0
, mode
)
16421 && !register_operand (op1
, mode
))
16422 op1
= force_reg (mode
, op1
);
16424 tmp
[0] = op0
; tmp
[1] = op1
;
16425 ix86_expand_vector_move_misalign (mode
, tmp
);
16429 /* Make operand1 a register if it isn't already. */
16430 if (can_create_pseudo_p ()
16431 && !register_operand (op0
, mode
)
16432 && !register_operand (op1
, mode
))
16434 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16438 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16441 /* Split 32-byte AVX unaligned load and store if needed. */
16444 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16447 rtx (*extract
) (rtx
, rtx
, rtx
);
16448 rtx (*load_unaligned
) (rtx
, rtx
);
16449 rtx (*store_unaligned
) (rtx
, rtx
);
16450 enum machine_mode mode
;
16452 switch (GET_MODE (op0
))
16455 gcc_unreachable ();
16457 extract
= gen_avx_vextractf128v32qi
;
16458 load_unaligned
= gen_avx_loaddqu256
;
16459 store_unaligned
= gen_avx_storedqu256
;
16463 extract
= gen_avx_vextractf128v8sf
;
16464 load_unaligned
= gen_avx_loadups256
;
16465 store_unaligned
= gen_avx_storeups256
;
16469 extract
= gen_avx_vextractf128v4df
;
16470 load_unaligned
= gen_avx_loadupd256
;
16471 store_unaligned
= gen_avx_storeupd256
;
16478 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16480 rtx r
= gen_reg_rtx (mode
);
16481 m
= adjust_address (op1
, mode
, 0);
16482 emit_move_insn (r
, m
);
16483 m
= adjust_address (op1
, mode
, 16);
16484 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16485 emit_move_insn (op0
, r
);
16488 emit_insn (load_unaligned (op0
, op1
));
16490 else if (MEM_P (op0
))
16492 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16494 m
= adjust_address (op0
, mode
, 0);
16495 emit_insn (extract (m
, op1
, const0_rtx
));
16496 m
= adjust_address (op0
, mode
, 16);
16497 emit_insn (extract (m
, op1
, const1_rtx
));
16500 emit_insn (store_unaligned (op0
, op1
));
16503 gcc_unreachable ();
16506 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16507 straight to ix86_expand_vector_move. */
16508 /* Code generation for scalar reg-reg moves of single and double precision data:
16509 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16513 if (x86_sse_partial_reg_dependency == true)
16518 Code generation for scalar loads of double precision data:
16519 if (x86_sse_split_regs == true)
16520 movlpd mem, reg (gas syntax)
16524 Code generation for unaligned packed loads of single precision data
16525 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16526 if (x86_sse_unaligned_move_optimal)
16529 if (x86_sse_partial_reg_dependency == true)
16541 Code generation for unaligned packed loads of double precision data
16542 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16543 if (x86_sse_unaligned_move_optimal)
16546 if (x86_sse_split_regs == true)
16559 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16567 && GET_MODE_SIZE (mode
) == 32)
16569 switch (GET_MODE_CLASS (mode
))
16571 case MODE_VECTOR_INT
:
16573 op0
= gen_lowpart (V32QImode
, op0
);
16574 op1
= gen_lowpart (V32QImode
, op1
);
16577 case MODE_VECTOR_FLOAT
:
16578 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16582 gcc_unreachable ();
16590 /* ??? If we have typed data, then it would appear that using
16591 movdqu is the only way to get unaligned data loaded with
16593 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16595 op0
= gen_lowpart (V16QImode
, op0
);
16596 op1
= gen_lowpart (V16QImode
, op1
);
16597 /* We will eventually emit movups based on insn attributes. */
16598 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16600 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16605 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16606 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16607 || optimize_insn_for_size_p ())
16609 /* We will eventually emit movups based on insn attributes. */
16610 emit_insn (gen_sse2_loadupd (op0
, op1
));
16614 /* When SSE registers are split into halves, we can avoid
16615 writing to the top half twice. */
16616 if (TARGET_SSE_SPLIT_REGS
)
16618 emit_clobber (op0
);
16623 /* ??? Not sure about the best option for the Intel chips.
16624 The following would seem to satisfy; the register is
16625 entirely cleared, breaking the dependency chain. We
16626 then store to the upper half, with a dependency depth
16627 of one. A rumor has it that Intel recommends two movsd
16628 followed by an unpacklpd, but this is unconfirmed. And
16629 given that the dependency depth of the unpacklpd would
16630 still be one, I'm not sure why this would be better. */
16631 zero
= CONST0_RTX (V2DFmode
);
16634 m
= adjust_address (op1
, DFmode
, 0);
16635 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16636 m
= adjust_address (op1
, DFmode
, 8);
16637 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16642 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16643 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16644 || optimize_insn_for_size_p ())
16646 op0
= gen_lowpart (V4SFmode
, op0
);
16647 op1
= gen_lowpart (V4SFmode
, op1
);
16648 emit_insn (gen_sse_loadups (op0
, op1
));
16652 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16653 emit_move_insn (op0
, CONST0_RTX (mode
));
16655 emit_clobber (op0
);
16657 if (mode
!= V4SFmode
)
16658 op0
= gen_lowpart (V4SFmode
, op0
);
16660 m
= adjust_address (op1
, V2SFmode
, 0);
16661 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16662 m
= adjust_address (op1
, V2SFmode
, 8);
16663 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16666 else if (MEM_P (op0
))
16668 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16670 op0
= gen_lowpart (V16QImode
, op0
);
16671 op1
= gen_lowpart (V16QImode
, op1
);
16672 /* We will eventually emit movups based on insn attributes. */
16673 emit_insn (gen_sse2_storedqu (op0
, op1
));
16675 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16678 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16679 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16680 || optimize_insn_for_size_p ())
16681 /* We will eventually emit movups based on insn attributes. */
16682 emit_insn (gen_sse2_storeupd (op0
, op1
));
16685 m
= adjust_address (op0
, DFmode
, 0);
16686 emit_insn (gen_sse2_storelpd (m
, op1
));
16687 m
= adjust_address (op0
, DFmode
, 8);
16688 emit_insn (gen_sse2_storehpd (m
, op1
));
16693 if (mode
!= V4SFmode
)
16694 op1
= gen_lowpart (V4SFmode
, op1
);
16697 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16698 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16699 || optimize_insn_for_size_p ())
16701 op0
= gen_lowpart (V4SFmode
, op0
);
16702 emit_insn (gen_sse_storeups (op0
, op1
));
16706 m
= adjust_address (op0
, V2SFmode
, 0);
16707 emit_insn (gen_sse_storelps (m
, op1
));
16708 m
= adjust_address (op0
, V2SFmode
, 8);
16709 emit_insn (gen_sse_storehps (m
, op1
));
16714 gcc_unreachable ();
16717 /* Expand a push in MODE. This is some mode for which we do not support
16718 proper push instructions, at least from the registers that we expect
16719 the value to live in. */
16722 ix86_expand_push (enum machine_mode mode
, rtx x
)
16726 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16727 GEN_INT (-GET_MODE_SIZE (mode
)),
16728 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16729 if (tmp
!= stack_pointer_rtx
)
16730 emit_move_insn (stack_pointer_rtx
, tmp
);
16732 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16734 /* When we push an operand onto stack, it has to be aligned at least
16735 at the function argument boundary. However since we don't have
16736 the argument type, we can't determine the actual argument
16738 emit_move_insn (tmp
, x
);
16741 /* Helper function of ix86_fixup_binary_operands to canonicalize
16742 operand order. Returns true if the operands should be swapped. */
16745 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16748 rtx dst
= operands
[0];
16749 rtx src1
= operands
[1];
16750 rtx src2
= operands
[2];
16752 /* If the operation is not commutative, we can't do anything. */
16753 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16756 /* Highest priority is that src1 should match dst. */
16757 if (rtx_equal_p (dst
, src1
))
16759 if (rtx_equal_p (dst
, src2
))
16762 /* Next highest priority is that immediate constants come second. */
16763 if (immediate_operand (src2
, mode
))
16765 if (immediate_operand (src1
, mode
))
16768 /* Lowest priority is that memory references should come second. */
16778 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16779 destination to use for the operation. If different from the true
16780 destination in operands[0], a copy operation will be required. */
16783 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16786 rtx dst
= operands
[0];
16787 rtx src1
= operands
[1];
16788 rtx src2
= operands
[2];
16790 /* Canonicalize operand order. */
16791 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16795 /* It is invalid to swap operands of different modes. */
16796 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16803 /* Both source operands cannot be in memory. */
16804 if (MEM_P (src1
) && MEM_P (src2
))
16806 /* Optimization: Only read from memory once. */
16807 if (rtx_equal_p (src1
, src2
))
16809 src2
= force_reg (mode
, src2
);
16813 src2
= force_reg (mode
, src2
);
16816 /* If the destination is memory, and we do not have matching source
16817 operands, do things in registers. */
16818 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16819 dst
= gen_reg_rtx (mode
);
16821 /* Source 1 cannot be a constant. */
16822 if (CONSTANT_P (src1
))
16823 src1
= force_reg (mode
, src1
);
16825 /* Source 1 cannot be a non-matching memory. */
16826 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16827 src1
= force_reg (mode
, src1
);
16829 /* Improve address combine. */
16831 && GET_MODE_CLASS (mode
) == MODE_INT
16833 src2
= force_reg (mode
, src2
);
16835 operands
[1] = src1
;
16836 operands
[2] = src2
;
16840 /* Similarly, but assume that the destination has already been
16841 set up properly. */
16844 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16845 enum machine_mode mode
, rtx operands
[])
16847 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16848 gcc_assert (dst
== operands
[0]);
16851 /* Attempt to expand a binary operator. Make the expansion closer to the
16852 actual machine, then just general_operand, which will allow 3 separate
16853 memory references (one output, two input) in a single insn. */
16856 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16859 rtx src1
, src2
, dst
, op
, clob
;
16861 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16862 src1
= operands
[1];
16863 src2
= operands
[2];
16865 /* Emit the instruction. */
16867 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16868 if (reload_in_progress
)
16870 /* Reload doesn't know about the flags register, and doesn't know that
16871 it doesn't want to clobber it. We can only do this with PLUS. */
16872 gcc_assert (code
== PLUS
);
16875 else if (reload_completed
16877 && !rtx_equal_p (dst
, src1
))
16879 /* This is going to be an LEA; avoid splitting it later. */
16884 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16885 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16888 /* Fix up the destination if needed. */
16889 if (dst
!= operands
[0])
16890 emit_move_insn (operands
[0], dst
);
16893 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16894 the given OPERANDS. */
16897 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16900 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16901 if (GET_CODE (operands
[1]) == SUBREG
)
16906 else if (GET_CODE (operands
[2]) == SUBREG
)
16911 /* Optimize (__m128i) d | (__m128i) e and similar code
16912 when d and e are float vectors into float vector logical
16913 insn. In C/C++ without using intrinsics there is no other way
16914 to express vector logical operation on float vectors than
16915 to cast them temporarily to integer vectors. */
16917 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16918 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16919 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16920 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16921 && SUBREG_BYTE (op1
) == 0
16922 && (GET_CODE (op2
) == CONST_VECTOR
16923 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16924 && SUBREG_BYTE (op2
) == 0))
16925 && can_create_pseudo_p ())
16928 switch (GET_MODE (SUBREG_REG (op1
)))
16934 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16935 if (GET_CODE (op2
) == CONST_VECTOR
)
16937 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16938 op2
= force_reg (GET_MODE (dst
), op2
);
16943 op2
= SUBREG_REG (operands
[2]);
16944 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16945 op2
= force_reg (GET_MODE (dst
), op2
);
16947 op1
= SUBREG_REG (op1
);
16948 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16949 op1
= force_reg (GET_MODE (dst
), op1
);
16950 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16951 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16953 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16959 if (!nonimmediate_operand (operands
[1], mode
))
16960 operands
[1] = force_reg (mode
, operands
[1]);
16961 if (!nonimmediate_operand (operands
[2], mode
))
16962 operands
[2] = force_reg (mode
, operands
[2]);
16963 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16964 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16965 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16969 /* Return TRUE or FALSE depending on whether the binary operator meets the
16970 appropriate constraints. */
16973 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16976 rtx dst
= operands
[0];
16977 rtx src1
= operands
[1];
16978 rtx src2
= operands
[2];
16980 /* Both source operands cannot be in memory. */
16981 if (MEM_P (src1
) && MEM_P (src2
))
16984 /* Canonicalize operand order for commutative operators. */
16985 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16992 /* If the destination is memory, we must have a matching source operand. */
16993 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16996 /* Source 1 cannot be a constant. */
16997 if (CONSTANT_P (src1
))
17000 /* Source 1 cannot be a non-matching memory. */
17001 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17002 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17003 return (code
== AND
17006 || (TARGET_64BIT
&& mode
== DImode
))
17007 && satisfies_constraint_L (src2
));
17012 /* Attempt to expand a unary operator. Make the expansion closer to the
17013 actual machine, then just general_operand, which will allow 2 separate
17014 memory references (one output, one input) in a single insn. */
17017 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17020 int matching_memory
;
17021 rtx src
, dst
, op
, clob
;
17026 /* If the destination is memory, and we do not have matching source
17027 operands, do things in registers. */
17028 matching_memory
= 0;
17031 if (rtx_equal_p (dst
, src
))
17032 matching_memory
= 1;
17034 dst
= gen_reg_rtx (mode
);
17037 /* When source operand is memory, destination must match. */
17038 if (MEM_P (src
) && !matching_memory
)
17039 src
= force_reg (mode
, src
);
17041 /* Emit the instruction. */
17043 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17044 if (reload_in_progress
|| code
== NOT
)
17046 /* Reload doesn't know about the flags register, and doesn't know that
17047 it doesn't want to clobber it. */
17048 gcc_assert (code
== NOT
);
17053 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17054 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17057 /* Fix up the destination if needed. */
17058 if (dst
!= operands
[0])
17059 emit_move_insn (operands
[0], dst
);
17062 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17063 divisor are within the range [0-255]. */
17066 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17069 rtx end_label
, qimode_label
;
17070 rtx insn
, div
, mod
;
17071 rtx scratch
, tmp0
, tmp1
, tmp2
;
17072 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17073 rtx (*gen_zero_extend
) (rtx
, rtx
);
17074 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17079 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17080 gen_test_ccno_1
= gen_testsi_ccno_1
;
17081 gen_zero_extend
= gen_zero_extendqisi2
;
17084 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17085 gen_test_ccno_1
= gen_testdi_ccno_1
;
17086 gen_zero_extend
= gen_zero_extendqidi2
;
17089 gcc_unreachable ();
17092 end_label
= gen_label_rtx ();
17093 qimode_label
= gen_label_rtx ();
17095 scratch
= gen_reg_rtx (mode
);
17097 /* Use 8bit unsigned divimod if dividend and divisor are within
17098 the range [0-255]. */
17099 emit_move_insn (scratch
, operands
[2]);
17100 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17101 scratch
, 1, OPTAB_DIRECT
);
17102 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17103 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17104 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17105 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17106 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17108 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17109 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17110 JUMP_LABEL (insn
) = qimode_label
;
17112 /* Generate original signed/unsigned divimod. */
17113 div
= gen_divmod4_1 (operands
[0], operands
[1],
17114 operands
[2], operands
[3]);
17117 /* Branch to the end. */
17118 emit_jump_insn (gen_jump (end_label
));
17121 /* Generate 8bit unsigned divide. */
17122 emit_label (qimode_label
);
17123 /* Don't use operands[0] for result of 8bit divide since not all
17124 registers support QImode ZERO_EXTRACT. */
17125 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17126 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17127 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17128 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17132 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17133 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17137 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17138 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17141 /* Extract remainder from AH. */
17142 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17143 if (REG_P (operands
[1]))
17144 insn
= emit_move_insn (operands
[1], tmp1
);
17147 /* Need a new scratch register since the old one has result
17149 scratch
= gen_reg_rtx (mode
);
17150 emit_move_insn (scratch
, tmp1
);
17151 insn
= emit_move_insn (operands
[1], scratch
);
17153 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17155 /* Zero extend quotient from AL. */
17156 tmp1
= gen_lowpart (QImode
, tmp0
);
17157 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17158 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17160 emit_label (end_label
);
17163 #define LEA_MAX_STALL (3)
17164 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17166 /* Increase given DISTANCE in half-cycles according to
17167 dependencies between PREV and NEXT instructions.
17168 Add 1 half-cycle if there is no dependency and
17169 go to next cycle if there is some dependecy. */
17171 static unsigned int
17172 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17177 if (!prev
|| !next
)
17178 return distance
+ (distance
& 1) + 2;
17180 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17181 return distance
+ 1;
17183 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17184 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17185 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17186 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17187 return distance
+ (distance
& 1) + 2;
17189 return distance
+ 1;
17192 /* Function checks if instruction INSN defines register number
17193 REGNO1 or REGNO2. */
17196 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17201 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17202 if (DF_REF_REG_DEF_P (*def_rec
)
17203 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17204 && (regno1
== DF_REF_REGNO (*def_rec
)
17205 || regno2
== DF_REF_REGNO (*def_rec
)))
17213 /* Function checks if instruction INSN uses register number
17214 REGNO as a part of address expression. */
17217 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17221 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17222 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17228 /* Search backward for non-agu definition of register number REGNO1
17229 or register number REGNO2 in basic block starting from instruction
17230 START up to head of basic block or instruction INSN.
17232 Function puts true value into *FOUND var if definition was found
17233 and false otherwise.
17235 Distance in half-cycles between START and found instruction or head
17236 of BB is added to DISTANCE and returned. */
17239 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17240 rtx insn
, int distance
,
17241 rtx start
, bool *found
)
17243 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17251 && distance
< LEA_SEARCH_THRESHOLD
)
17253 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17255 distance
= increase_distance (prev
, next
, distance
);
17256 if (insn_defines_reg (regno1
, regno2
, prev
))
17258 if (recog_memoized (prev
) < 0
17259 || get_attr_type (prev
) != TYPE_LEA
)
17268 if (prev
== BB_HEAD (bb
))
17271 prev
= PREV_INSN (prev
);
17277 /* Search backward for non-agu definition of register number REGNO1
17278 or register number REGNO2 in INSN's basic block until
17279 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17280 2. Reach neighbour BBs boundary, or
17281 3. Reach agu definition.
17282 Returns the distance between the non-agu definition point and INSN.
17283 If no definition point, returns -1. */
17286 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17289 basic_block bb
= BLOCK_FOR_INSN (insn
);
17291 bool found
= false;
17293 if (insn
!= BB_HEAD (bb
))
17294 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17295 distance
, PREV_INSN (insn
),
17298 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17302 bool simple_loop
= false;
17304 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17307 simple_loop
= true;
17312 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17314 BB_END (bb
), &found
);
17317 int shortest_dist
= -1;
17318 bool found_in_bb
= false;
17320 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17323 = distance_non_agu_define_in_bb (regno1
, regno2
,
17329 if (shortest_dist
< 0)
17330 shortest_dist
= bb_dist
;
17331 else if (bb_dist
> 0)
17332 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17338 distance
= shortest_dist
;
17342 /* get_attr_type may modify recog data. We want to make sure
17343 that recog data is valid for instruction INSN, on which
17344 distance_non_agu_define is called. INSN is unchanged here. */
17345 extract_insn_cached (insn
);
17350 return distance
>> 1;
17353 /* Return the distance in half-cycles between INSN and the next
17354 insn that uses register number REGNO in memory address added
17355 to DISTANCE. Return -1 if REGNO0 is set.
17357 Put true value into *FOUND if register usage was found and
17359 Put true value into *REDEFINED if register redefinition was
17360 found and false otherwise. */
17363 distance_agu_use_in_bb (unsigned int regno
,
17364 rtx insn
, int distance
, rtx start
,
17365 bool *found
, bool *redefined
)
17367 basic_block bb
= NULL
;
17372 *redefined
= false;
17374 if (start
!= NULL_RTX
)
17376 bb
= BLOCK_FOR_INSN (start
);
17377 if (start
!= BB_HEAD (bb
))
17378 /* If insn and start belong to the same bb, set prev to insn,
17379 so the call to increase_distance will increase the distance
17380 between insns by 1. */
17386 && distance
< LEA_SEARCH_THRESHOLD
)
17388 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17390 distance
= increase_distance(prev
, next
, distance
);
17391 if (insn_uses_reg_mem (regno
, next
))
17393 /* Return DISTANCE if OP0 is used in memory
17394 address in NEXT. */
17399 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17401 /* Return -1 if OP0 is set in NEXT. */
17409 if (next
== BB_END (bb
))
17412 next
= NEXT_INSN (next
);
17418 /* Return the distance between INSN and the next insn that uses
17419 register number REGNO0 in memory address. Return -1 if no such
17420 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17423 distance_agu_use (unsigned int regno0
, rtx insn
)
17425 basic_block bb
= BLOCK_FOR_INSN (insn
);
17427 bool found
= false;
17428 bool redefined
= false;
17430 if (insn
!= BB_END (bb
))
17431 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17433 &found
, &redefined
);
17435 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17439 bool simple_loop
= false;
17441 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17444 simple_loop
= true;
17449 distance
= distance_agu_use_in_bb (regno0
, insn
,
17450 distance
, BB_HEAD (bb
),
17451 &found
, &redefined
);
17454 int shortest_dist
= -1;
17455 bool found_in_bb
= false;
17456 bool redefined_in_bb
= false;
17458 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17461 = distance_agu_use_in_bb (regno0
, insn
,
17462 distance
, BB_HEAD (e
->dest
),
17463 &found_in_bb
, &redefined_in_bb
);
17466 if (shortest_dist
< 0)
17467 shortest_dist
= bb_dist
;
17468 else if (bb_dist
> 0)
17469 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17475 distance
= shortest_dist
;
17479 if (!found
|| redefined
)
17482 return distance
>> 1;
17485 /* Define this macro to tune LEA priority vs ADD, it take effect when
17486 there is a dilemma of choicing LEA or ADD
17487 Negative value: ADD is more preferred than LEA
17489 Positive value: LEA is more preferred than ADD*/
17490 #define IX86_LEA_PRIORITY 0
17492 /* Return true if usage of lea INSN has performance advantage
17493 over a sequence of instructions. Instructions sequence has
17494 SPLIT_COST cycles higher latency than lea latency. */
17497 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17498 unsigned int regno2
, int split_cost
, bool has_scale
)
17500 int dist_define
, dist_use
;
17502 /* For Silvermont if using a 2-source or 3-source LEA for
17503 non-destructive destination purposes, or due to wanting
17504 ability to use SCALE, the use of LEA is justified. */
17505 if (ix86_tune
== PROCESSOR_SLM
)
17509 if (split_cost
< 1)
17511 if (regno0
== regno1
|| regno0
== regno2
)
17516 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17517 dist_use
= distance_agu_use (regno0
, insn
);
17519 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17521 /* If there is no non AGU operand definition, no AGU
17522 operand usage and split cost is 0 then both lea
17523 and non lea variants have same priority. Currently
17524 we prefer lea for 64 bit code and non lea on 32 bit
17526 if (dist_use
< 0 && split_cost
== 0)
17527 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17532 /* With longer definitions distance lea is more preferable.
17533 Here we change it to take into account splitting cost and
17535 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17537 /* If there is no use in memory addess then we just check
17538 that split cost exceeds AGU stall. */
17540 return dist_define
> LEA_MAX_STALL
;
17542 /* If this insn has both backward non-agu dependence and forward
17543 agu dependence, the one with short distance takes effect. */
17544 return dist_define
>= dist_use
;
17547 /* Return true if it is legal to clobber flags by INSN and
17548 false otherwise. */
17551 ix86_ok_to_clobber_flags (rtx insn
)
17553 basic_block bb
= BLOCK_FOR_INSN (insn
);
17559 if (NONDEBUG_INSN_P (insn
))
17561 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17562 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17565 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17569 if (insn
== BB_END (bb
))
17572 insn
= NEXT_INSN (insn
);
17575 live
= df_get_live_out(bb
);
17576 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17579 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17580 move and add to avoid AGU stalls. */
17583 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17585 unsigned int regno0
, regno1
, regno2
;
17587 /* Check if we need to optimize. */
17588 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17591 /* Check it is correct to split here. */
17592 if (!ix86_ok_to_clobber_flags(insn
))
17595 regno0
= true_regnum (operands
[0]);
17596 regno1
= true_regnum (operands
[1]);
17597 regno2
= true_regnum (operands
[2]);
17599 /* We need to split only adds with non destructive
17600 destination operand. */
17601 if (regno0
== regno1
|| regno0
== regno2
)
17604 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17607 /* Return true if we should emit lea instruction instead of mov
17611 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17613 unsigned int regno0
, regno1
;
17615 /* Check if we need to optimize. */
17616 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17619 /* Use lea for reg to reg moves only. */
17620 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17623 regno0
= true_regnum (operands
[0]);
17624 regno1
= true_regnum (operands
[1]);
17626 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17629 /* Return true if we need to split lea into a sequence of
17630 instructions to avoid AGU stalls. */
17633 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17635 unsigned int regno0
, regno1
, regno2
;
17637 struct ix86_address parts
;
17640 /* Check we need to optimize. */
17641 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17644 /* Check it is correct to split here. */
17645 if (!ix86_ok_to_clobber_flags(insn
))
17648 ok
= ix86_decompose_address (operands
[1], &parts
);
17651 /* There should be at least two components in the address. */
17652 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17653 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17656 /* We should not split into add if non legitimate pic
17657 operand is used as displacement. */
17658 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17661 regno0
= true_regnum (operands
[0]) ;
17662 regno1
= INVALID_REGNUM
;
17663 regno2
= INVALID_REGNUM
;
17666 regno1
= true_regnum (parts
.base
);
17668 regno2
= true_regnum (parts
.index
);
17672 /* Compute how many cycles we will add to execution time
17673 if split lea into a sequence of instructions. */
17674 if (parts
.base
|| parts
.index
)
17676 /* Have to use mov instruction if non desctructive
17677 destination form is used. */
17678 if (regno1
!= regno0
&& regno2
!= regno0
)
17681 /* Have to add index to base if both exist. */
17682 if (parts
.base
&& parts
.index
)
17685 /* Have to use shift and adds if scale is 2 or greater. */
17686 if (parts
.scale
> 1)
17688 if (regno0
!= regno1
)
17690 else if (regno2
== regno0
)
17693 split_cost
+= parts
.scale
;
17696 /* Have to use add instruction with immediate if
17697 disp is non zero. */
17698 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17701 /* Subtract the price of lea. */
17705 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17709 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17710 matches destination. RTX includes clobber of FLAGS_REG. */
17713 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17718 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17719 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17721 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17724 /* Return true if regno1 def is nearest to the insn. */
17727 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17730 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17734 while (prev
&& prev
!= start
)
17736 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17738 prev
= PREV_INSN (prev
);
17741 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17743 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17745 prev
= PREV_INSN (prev
);
17748 /* None of the regs is defined in the bb. */
17752 /* Split lea instructions into a sequence of instructions
17753 which are executed on ALU to avoid AGU stalls.
17754 It is assumed that it is allowed to clobber flags register
17755 at lea position. */
17758 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17760 unsigned int regno0
, regno1
, regno2
;
17761 struct ix86_address parts
;
17765 ok
= ix86_decompose_address (operands
[1], &parts
);
17768 target
= gen_lowpart (mode
, operands
[0]);
17770 regno0
= true_regnum (target
);
17771 regno1
= INVALID_REGNUM
;
17772 regno2
= INVALID_REGNUM
;
17776 parts
.base
= gen_lowpart (mode
, parts
.base
);
17777 regno1
= true_regnum (parts
.base
);
17782 parts
.index
= gen_lowpart (mode
, parts
.index
);
17783 regno2
= true_regnum (parts
.index
);
17787 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17789 if (parts
.scale
> 1)
17791 /* Case r1 = r1 + ... */
17792 if (regno1
== regno0
)
17794 /* If we have a case r1 = r1 + C * r1 then we
17795 should use multiplication which is very
17796 expensive. Assume cost model is wrong if we
17797 have such case here. */
17798 gcc_assert (regno2
!= regno0
);
17800 for (adds
= parts
.scale
; adds
> 0; adds
--)
17801 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17805 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17806 if (regno0
!= regno2
)
17807 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17809 /* Use shift for scaling. */
17810 ix86_emit_binop (ASHIFT
, mode
, target
,
17811 GEN_INT (exact_log2 (parts
.scale
)));
17814 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17816 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17817 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17820 else if (!parts
.base
&& !parts
.index
)
17822 gcc_assert(parts
.disp
);
17823 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17829 if (regno0
!= regno2
)
17830 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17832 else if (!parts
.index
)
17834 if (regno0
!= regno1
)
17835 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17839 if (regno0
== regno1
)
17841 else if (regno0
== regno2
)
17847 /* Find better operand for SET instruction, depending
17848 on which definition is farther from the insn. */
17849 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17850 tmp
= parts
.index
, tmp1
= parts
.base
;
17852 tmp
= parts
.base
, tmp1
= parts
.index
;
17854 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17856 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17857 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17859 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17863 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17866 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17867 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17871 /* Return true if it is ok to optimize an ADD operation to LEA
17872 operation to avoid flag register consumation. For most processors,
17873 ADD is faster than LEA. For the processors like ATOM, if the
17874 destination register of LEA holds an actual address which will be
17875 used soon, LEA is better and otherwise ADD is better. */
17878 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17880 unsigned int regno0
= true_regnum (operands
[0]);
17881 unsigned int regno1
= true_regnum (operands
[1]);
17882 unsigned int regno2
= true_regnum (operands
[2]);
17884 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17885 if (regno0
!= regno1
&& regno0
!= regno2
)
17888 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17891 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
17894 /* Return true if destination reg of SET_BODY is shift count of
17898 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17904 /* Retrieve destination of SET_BODY. */
17905 switch (GET_CODE (set_body
))
17908 set_dest
= SET_DEST (set_body
);
17909 if (!set_dest
|| !REG_P (set_dest
))
17913 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17914 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17922 /* Retrieve shift count of USE_BODY. */
17923 switch (GET_CODE (use_body
))
17926 shift_rtx
= XEXP (use_body
, 1);
17929 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17930 if (ix86_dep_by_shift_count_body (set_body
,
17931 XVECEXP (use_body
, 0, i
)))
17939 && (GET_CODE (shift_rtx
) == ASHIFT
17940 || GET_CODE (shift_rtx
) == LSHIFTRT
17941 || GET_CODE (shift_rtx
) == ASHIFTRT
17942 || GET_CODE (shift_rtx
) == ROTATE
17943 || GET_CODE (shift_rtx
) == ROTATERT
))
17945 rtx shift_count
= XEXP (shift_rtx
, 1);
17947 /* Return true if shift count is dest of SET_BODY. */
17948 if (REG_P (shift_count
))
17950 /* Add check since it can be invoked before register
17951 allocation in pre-reload schedule. */
17952 if (reload_completed
17953 && true_regnum (set_dest
) == true_regnum (shift_count
))
17955 else if (REGNO(set_dest
) == REGNO(shift_count
))
17963 /* Return true if destination reg of SET_INSN is shift count of
17967 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17969 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17970 PATTERN (use_insn
));
17973 /* Return TRUE or FALSE depending on whether the unary operator meets the
17974 appropriate constraints. */
17977 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17978 enum machine_mode mode ATTRIBUTE_UNUSED
,
17981 /* If one of operands is memory, source and destination must match. */
17982 if ((MEM_P (operands
[0])
17983 || MEM_P (operands
[1]))
17984 && ! rtx_equal_p (operands
[0], operands
[1]))
17989 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17990 are ok, keeping in mind the possible movddup alternative. */
17993 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17995 if (MEM_P (operands
[0]))
17996 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17997 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17998 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18002 /* Post-reload splitter for converting an SF or DFmode value in an
18003 SSE register into an unsigned SImode. */
18006 ix86_split_convert_uns_si_sse (rtx operands
[])
18008 enum machine_mode vecmode
;
18009 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18011 large
= operands
[1];
18012 zero_or_two31
= operands
[2];
18013 input
= operands
[3];
18014 two31
= operands
[4];
18015 vecmode
= GET_MODE (large
);
18016 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18018 /* Load up the value into the low element. We must ensure that the other
18019 elements are valid floats -- zero is the easiest such value. */
18022 if (vecmode
== V4SFmode
)
18023 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18025 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18029 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18030 emit_move_insn (value
, CONST0_RTX (vecmode
));
18031 if (vecmode
== V4SFmode
)
18032 emit_insn (gen_sse_movss (value
, value
, input
));
18034 emit_insn (gen_sse2_movsd (value
, value
, input
));
18037 emit_move_insn (large
, two31
);
18038 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18040 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18041 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18043 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18044 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18046 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18047 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18049 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18050 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18052 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18053 if (vecmode
== V4SFmode
)
18054 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18056 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18059 emit_insn (gen_xorv4si3 (value
, value
, large
));
18062 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18063 Expects the 64-bit DImode to be supplied in a pair of integral
18064 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18065 -mfpmath=sse, !optimize_size only. */
18068 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18070 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18071 rtx int_xmm
, fp_xmm
;
18072 rtx biases
, exponents
;
18075 int_xmm
= gen_reg_rtx (V4SImode
);
18076 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18077 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18078 else if (TARGET_SSE_SPLIT_REGS
)
18080 emit_clobber (int_xmm
);
18081 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18085 x
= gen_reg_rtx (V2DImode
);
18086 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18087 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18090 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18091 gen_rtvec (4, GEN_INT (0x43300000UL
),
18092 GEN_INT (0x45300000UL
),
18093 const0_rtx
, const0_rtx
));
18094 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18096 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18097 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18099 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18100 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18101 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18102 (0x1.0p84 + double(fp_value_hi_xmm)).
18103 Note these exponents differ by 32. */
18105 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18107 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18108 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18109 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18110 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18111 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18112 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18113 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18114 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18115 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18117 /* Add the upper and lower DFmode values together. */
18119 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18122 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18123 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18124 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18127 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18130 /* Not used, but eases macroization of patterns. */
18132 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18133 rtx input ATTRIBUTE_UNUSED
)
18135 gcc_unreachable ();
18138 /* Convert an unsigned SImode value into a DFmode. Only currently used
18139 for SSE, but applicable anywhere. */
18142 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18144 REAL_VALUE_TYPE TWO31r
;
18147 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18148 NULL
, 1, OPTAB_DIRECT
);
18150 fp
= gen_reg_rtx (DFmode
);
18151 emit_insn (gen_floatsidf2 (fp
, x
));
18153 real_ldexp (&TWO31r
, &dconst1
, 31);
18154 x
= const_double_from_real_value (TWO31r
, DFmode
);
18156 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18158 emit_move_insn (target
, x
);
18161 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18162 32-bit mode; otherwise we have a direct convert instruction. */
18165 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18167 REAL_VALUE_TYPE TWO32r
;
18168 rtx fp_lo
, fp_hi
, x
;
18170 fp_lo
= gen_reg_rtx (DFmode
);
18171 fp_hi
= gen_reg_rtx (DFmode
);
18173 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18175 real_ldexp (&TWO32r
, &dconst1
, 32);
18176 x
= const_double_from_real_value (TWO32r
, DFmode
);
18177 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18179 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18181 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18184 emit_move_insn (target
, x
);
18187 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18188 For x86_32, -mfpmath=sse, !optimize_size only. */
18190 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18192 REAL_VALUE_TYPE ONE16r
;
18193 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18195 real_ldexp (&ONE16r
, &dconst1
, 16);
18196 x
= const_double_from_real_value (ONE16r
, SFmode
);
18197 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18198 NULL
, 0, OPTAB_DIRECT
);
18199 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18200 NULL
, 0, OPTAB_DIRECT
);
18201 fp_hi
= gen_reg_rtx (SFmode
);
18202 fp_lo
= gen_reg_rtx (SFmode
);
18203 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18204 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18205 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18207 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18209 if (!rtx_equal_p (target
, fp_hi
))
18210 emit_move_insn (target
, fp_hi
);
18213 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18214 a vector of unsigned ints VAL to vector of floats TARGET. */
18217 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18220 REAL_VALUE_TYPE TWO16r
;
18221 enum machine_mode intmode
= GET_MODE (val
);
18222 enum machine_mode fltmode
= GET_MODE (target
);
18223 rtx (*cvt
) (rtx
, rtx
);
18225 if (intmode
== V4SImode
)
18226 cvt
= gen_floatv4siv4sf2
;
18228 cvt
= gen_floatv8siv8sf2
;
18229 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18230 tmp
[0] = force_reg (intmode
, tmp
[0]);
18231 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18233 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18234 NULL_RTX
, 1, OPTAB_DIRECT
);
18235 tmp
[3] = gen_reg_rtx (fltmode
);
18236 emit_insn (cvt (tmp
[3], tmp
[1]));
18237 tmp
[4] = gen_reg_rtx (fltmode
);
18238 emit_insn (cvt (tmp
[4], tmp
[2]));
18239 real_ldexp (&TWO16r
, &dconst1
, 16);
18240 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18241 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18242 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18244 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18246 if (tmp
[7] != target
)
18247 emit_move_insn (target
, tmp
[7]);
18250 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18251 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18252 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18253 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18256 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18258 REAL_VALUE_TYPE TWO31r
;
18259 rtx two31r
, tmp
[4];
18260 enum machine_mode mode
= GET_MODE (val
);
18261 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18262 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18263 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18266 for (i
= 0; i
< 3; i
++)
18267 tmp
[i
] = gen_reg_rtx (mode
);
18268 real_ldexp (&TWO31r
, &dconst1
, 31);
18269 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18270 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18271 two31r
= force_reg (mode
, two31r
);
18274 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18275 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18276 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18277 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18278 default: gcc_unreachable ();
18280 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18281 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18282 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18284 if (intmode
== V4SImode
|| TARGET_AVX2
)
18285 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18286 gen_lowpart (intmode
, tmp
[0]),
18287 GEN_INT (31), NULL_RTX
, 0,
18291 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18292 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18293 *xorp
= expand_simple_binop (intmode
, AND
,
18294 gen_lowpart (intmode
, tmp
[0]),
18295 two31
, NULL_RTX
, 0,
18298 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18302 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18303 then replicate the value for all elements of the vector
18307 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18311 enum machine_mode scalar_mode
;
18328 n_elt
= GET_MODE_NUNITS (mode
);
18329 v
= rtvec_alloc (n_elt
);
18330 scalar_mode
= GET_MODE_INNER (mode
);
18332 RTVEC_ELT (v
, 0) = value
;
18334 for (i
= 1; i
< n_elt
; ++i
)
18335 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18337 return gen_rtx_CONST_VECTOR (mode
, v
);
18340 gcc_unreachable ();
18344 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18345 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18346 for an SSE register. If VECT is true, then replicate the mask for
18347 all elements of the vector register. If INVERT is true, then create
18348 a mask excluding the sign bit. */
18351 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18353 enum machine_mode vec_mode
, imode
;
18354 HOST_WIDE_INT hi
, lo
;
18359 /* Find the sign bit, sign extended to 2*HWI. */
18367 mode
= GET_MODE_INNER (mode
);
18369 lo
= 0x80000000, hi
= lo
< 0;
18377 mode
= GET_MODE_INNER (mode
);
18379 if (HOST_BITS_PER_WIDE_INT
>= 64)
18380 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18382 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18387 vec_mode
= VOIDmode
;
18388 if (HOST_BITS_PER_WIDE_INT
>= 64)
18391 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18398 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18402 lo
= ~lo
, hi
= ~hi
;
18408 mask
= immed_double_const (lo
, hi
, imode
);
18410 vec
= gen_rtvec (2, v
, mask
);
18411 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18412 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18419 gcc_unreachable ();
18423 lo
= ~lo
, hi
= ~hi
;
18425 /* Force this value into the low part of a fp vector constant. */
18426 mask
= immed_double_const (lo
, hi
, imode
);
18427 mask
= gen_lowpart (mode
, mask
);
18429 if (vec_mode
== VOIDmode
)
18430 return force_reg (mode
, mask
);
18432 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18433 return force_reg (vec_mode
, v
);
18436 /* Generate code for floating point ABS or NEG. */
18439 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18442 rtx mask
, set
, dst
, src
;
18443 bool use_sse
= false;
18444 bool vector_mode
= VECTOR_MODE_P (mode
);
18445 enum machine_mode vmode
= mode
;
18449 else if (mode
== TFmode
)
18451 else if (TARGET_SSE_MATH
)
18453 use_sse
= SSE_FLOAT_MODE_P (mode
);
18454 if (mode
== SFmode
)
18456 else if (mode
== DFmode
)
18460 /* NEG and ABS performed with SSE use bitwise mask operations.
18461 Create the appropriate mask now. */
18463 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18470 set
= gen_rtx_fmt_e (code
, mode
, src
);
18471 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18478 use
= gen_rtx_USE (VOIDmode
, mask
);
18480 par
= gen_rtvec (2, set
, use
);
18483 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18484 par
= gen_rtvec (3, set
, use
, clob
);
18486 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18492 /* Expand a copysign operation. Special case operand 0 being a constant. */
18495 ix86_expand_copysign (rtx operands
[])
18497 enum machine_mode mode
, vmode
;
18498 rtx dest
, op0
, op1
, mask
, nmask
;
18500 dest
= operands
[0];
18504 mode
= GET_MODE (dest
);
18506 if (mode
== SFmode
)
18508 else if (mode
== DFmode
)
18513 if (GET_CODE (op0
) == CONST_DOUBLE
)
18515 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18517 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18518 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18520 if (mode
== SFmode
|| mode
== DFmode
)
18522 if (op0
== CONST0_RTX (mode
))
18523 op0
= CONST0_RTX (vmode
);
18526 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18528 op0
= force_reg (vmode
, v
);
18531 else if (op0
!= CONST0_RTX (mode
))
18532 op0
= force_reg (mode
, op0
);
18534 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18536 if (mode
== SFmode
)
18537 copysign_insn
= gen_copysignsf3_const
;
18538 else if (mode
== DFmode
)
18539 copysign_insn
= gen_copysigndf3_const
;
18541 copysign_insn
= gen_copysigntf3_const
;
18543 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18547 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18549 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18550 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18552 if (mode
== SFmode
)
18553 copysign_insn
= gen_copysignsf3_var
;
18554 else if (mode
== DFmode
)
18555 copysign_insn
= gen_copysigndf3_var
;
18557 copysign_insn
= gen_copysigntf3_var
;
18559 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18563 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18564 be a constant, and so has already been expanded into a vector constant. */
18567 ix86_split_copysign_const (rtx operands
[])
18569 enum machine_mode mode
, vmode
;
18570 rtx dest
, op0
, mask
, x
;
18572 dest
= operands
[0];
18574 mask
= operands
[3];
18576 mode
= GET_MODE (dest
);
18577 vmode
= GET_MODE (mask
);
18579 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18580 x
= gen_rtx_AND (vmode
, dest
, mask
);
18581 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18583 if (op0
!= CONST0_RTX (vmode
))
18585 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18586 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18590 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18591 so we have to do two masks. */
18594 ix86_split_copysign_var (rtx operands
[])
18596 enum machine_mode mode
, vmode
;
18597 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18599 dest
= operands
[0];
18600 scratch
= operands
[1];
18603 nmask
= operands
[4];
18604 mask
= operands
[5];
18606 mode
= GET_MODE (dest
);
18607 vmode
= GET_MODE (mask
);
18609 if (rtx_equal_p (op0
, op1
))
18611 /* Shouldn't happen often (it's useless, obviously), but when it does
18612 we'd generate incorrect code if we continue below. */
18613 emit_move_insn (dest
, op0
);
18617 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18619 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18621 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18622 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18625 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18626 x
= gen_rtx_NOT (vmode
, dest
);
18627 x
= gen_rtx_AND (vmode
, x
, op0
);
18628 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18632 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18634 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18636 else /* alternative 2,4 */
18638 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18639 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18640 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18642 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18644 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18646 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18647 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18649 else /* alternative 3,4 */
18651 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18653 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18654 x
= gen_rtx_AND (vmode
, dest
, op0
);
18656 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18659 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18660 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18663 /* Return TRUE or FALSE depending on whether the first SET in INSN
18664 has source and destination with matching CC modes, and that the
18665 CC mode is at least as constrained as REQ_MODE. */
18668 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18671 enum machine_mode set_mode
;
18673 set
= PATTERN (insn
);
18674 if (GET_CODE (set
) == PARALLEL
)
18675 set
= XVECEXP (set
, 0, 0);
18676 gcc_assert (GET_CODE (set
) == SET
);
18677 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18679 set_mode
= GET_MODE (SET_DEST (set
));
18683 if (req_mode
!= CCNOmode
18684 && (req_mode
!= CCmode
18685 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18689 if (req_mode
== CCGCmode
)
18693 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18697 if (req_mode
== CCZmode
)
18707 if (set_mode
!= req_mode
)
18712 gcc_unreachable ();
18715 return GET_MODE (SET_SRC (set
)) == set_mode
;
18718 /* Generate insn patterns to do an integer compare of OPERANDS. */
18721 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18723 enum machine_mode cmpmode
;
18726 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18727 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18729 /* This is very simple, but making the interface the same as in the
18730 FP case makes the rest of the code easier. */
18731 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18732 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18734 /* Return the test that should be put into the flags user, i.e.
18735 the bcc, scc, or cmov instruction. */
18736 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18739 /* Figure out whether to use ordered or unordered fp comparisons.
18740 Return the appropriate mode to use. */
18743 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18745 /* ??? In order to make all comparisons reversible, we do all comparisons
18746 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18747 all forms trapping and nontrapping comparisons, we can make inequality
18748 comparisons trapping again, since it results in better code when using
18749 FCOM based compares. */
18750 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18754 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18756 enum machine_mode mode
= GET_MODE (op0
);
18758 if (SCALAR_FLOAT_MODE_P (mode
))
18760 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18761 return ix86_fp_compare_mode (code
);
18766 /* Only zero flag is needed. */
18767 case EQ
: /* ZF=0 */
18768 case NE
: /* ZF!=0 */
18770 /* Codes needing carry flag. */
18771 case GEU
: /* CF=0 */
18772 case LTU
: /* CF=1 */
18773 /* Detect overflow checks. They need just the carry flag. */
18774 if (GET_CODE (op0
) == PLUS
18775 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18779 case GTU
: /* CF=0 & ZF=0 */
18780 case LEU
: /* CF=1 | ZF=1 */
18781 /* Detect overflow checks. They need just the carry flag. */
18782 if (GET_CODE (op0
) == MINUS
18783 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18787 /* Codes possibly doable only with sign flag when
18788 comparing against zero. */
18789 case GE
: /* SF=OF or SF=0 */
18790 case LT
: /* SF<>OF or SF=1 */
18791 if (op1
== const0_rtx
)
18794 /* For other cases Carry flag is not required. */
18796 /* Codes doable only with sign flag when comparing
18797 against zero, but we miss jump instruction for it
18798 so we need to use relational tests against overflow
18799 that thus needs to be zero. */
18800 case GT
: /* ZF=0 & SF=OF */
18801 case LE
: /* ZF=1 | SF<>OF */
18802 if (op1
== const0_rtx
)
18806 /* strcmp pattern do (use flags) and combine may ask us for proper
18811 gcc_unreachable ();
18815 /* Return the fixed registers used for condition codes. */
18818 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18825 /* If two condition code modes are compatible, return a condition code
18826 mode which is compatible with both. Otherwise, return
18829 static enum machine_mode
18830 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18835 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18838 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18839 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18842 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18844 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18850 gcc_unreachable ();
18880 /* These are only compatible with themselves, which we already
18887 /* Return a comparison we can do and that it is equivalent to
18888 swap_condition (code) apart possibly from orderedness.
18889 But, never change orderedness if TARGET_IEEE_FP, returning
18890 UNKNOWN in that case if necessary. */
18892 static enum rtx_code
18893 ix86_fp_swap_condition (enum rtx_code code
)
18897 case GT
: /* GTU - CF=0 & ZF=0 */
18898 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18899 case GE
: /* GEU - CF=0 */
18900 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18901 case UNLT
: /* LTU - CF=1 */
18902 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18903 case UNLE
: /* LEU - CF=1 | ZF=1 */
18904 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18906 return swap_condition (code
);
18910 /* Return cost of comparison CODE using the best strategy for performance.
18911 All following functions do use number of instructions as a cost metrics.
18912 In future this should be tweaked to compute bytes for optimize_size and
18913 take into account performance of various instructions on various CPUs. */
18916 ix86_fp_comparison_cost (enum rtx_code code
)
18920 /* The cost of code using bit-twiddling on %ah. */
18937 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18941 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18944 gcc_unreachable ();
18947 switch (ix86_fp_comparison_strategy (code
))
18949 case IX86_FPCMP_COMI
:
18950 return arith_cost
> 4 ? 3 : 2;
18951 case IX86_FPCMP_SAHF
:
18952 return arith_cost
> 4 ? 4 : 3;
18958 /* Return strategy to use for floating-point. We assume that fcomi is always
18959 preferrable where available, since that is also true when looking at size
18960 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18962 enum ix86_fpcmp_strategy
18963 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18965 /* Do fcomi/sahf based test when profitable. */
18968 return IX86_FPCMP_COMI
;
18970 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
18971 return IX86_FPCMP_SAHF
;
18973 return IX86_FPCMP_ARITH
;
18976 /* Swap, force into registers, or otherwise massage the two operands
18977 to a fp comparison. The operands are updated in place; the new
18978 comparison code is returned. */
18980 static enum rtx_code
18981 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18983 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18984 rtx op0
= *pop0
, op1
= *pop1
;
18985 enum machine_mode op_mode
= GET_MODE (op0
);
18986 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18988 /* All of the unordered compare instructions only work on registers.
18989 The same is true of the fcomi compare instructions. The XFmode
18990 compare instructions require registers except when comparing
18991 against zero or when converting operand 1 from fixed point to
18995 && (fpcmp_mode
== CCFPUmode
18996 || (op_mode
== XFmode
18997 && ! (standard_80387_constant_p (op0
) == 1
18998 || standard_80387_constant_p (op1
) == 1)
18999 && GET_CODE (op1
) != FLOAT
)
19000 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19002 op0
= force_reg (op_mode
, op0
);
19003 op1
= force_reg (op_mode
, op1
);
19007 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19008 things around if they appear profitable, otherwise force op0
19009 into a register. */
19011 if (standard_80387_constant_p (op0
) == 0
19013 && ! (standard_80387_constant_p (op1
) == 0
19016 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19017 if (new_code
!= UNKNOWN
)
19020 tmp
= op0
, op0
= op1
, op1
= tmp
;
19026 op0
= force_reg (op_mode
, op0
);
19028 if (CONSTANT_P (op1
))
19030 int tmp
= standard_80387_constant_p (op1
);
19032 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19036 op1
= force_reg (op_mode
, op1
);
19039 op1
= force_reg (op_mode
, op1
);
19043 /* Try to rearrange the comparison to make it cheaper. */
19044 if (ix86_fp_comparison_cost (code
)
19045 > ix86_fp_comparison_cost (swap_condition (code
))
19046 && (REG_P (op1
) || can_create_pseudo_p ()))
19049 tmp
= op0
, op0
= op1
, op1
= tmp
;
19050 code
= swap_condition (code
);
19052 op0
= force_reg (op_mode
, op0
);
19060 /* Convert comparison codes we use to represent FP comparison to integer
19061 code that will result in proper branch. Return UNKNOWN if no such code
19065 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19094 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19097 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19099 enum machine_mode fpcmp_mode
, intcmp_mode
;
19102 fpcmp_mode
= ix86_fp_compare_mode (code
);
19103 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19105 /* Do fcomi/sahf based test when profitable. */
19106 switch (ix86_fp_comparison_strategy (code
))
19108 case IX86_FPCMP_COMI
:
19109 intcmp_mode
= fpcmp_mode
;
19110 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19111 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19116 case IX86_FPCMP_SAHF
:
19117 intcmp_mode
= fpcmp_mode
;
19118 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19119 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19123 scratch
= gen_reg_rtx (HImode
);
19124 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19125 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19128 case IX86_FPCMP_ARITH
:
19129 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19130 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19131 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19133 scratch
= gen_reg_rtx (HImode
);
19134 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19136 /* In the unordered case, we have to check C2 for NaN's, which
19137 doesn't happen to work out to anything nice combination-wise.
19138 So do some bit twiddling on the value we've got in AH to come
19139 up with an appropriate set of condition codes. */
19141 intcmp_mode
= CCNOmode
;
19146 if (code
== GT
|| !TARGET_IEEE_FP
)
19148 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19153 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19154 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19155 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19156 intcmp_mode
= CCmode
;
19162 if (code
== LT
&& TARGET_IEEE_FP
)
19164 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19165 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19166 intcmp_mode
= CCmode
;
19171 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19177 if (code
== GE
|| !TARGET_IEEE_FP
)
19179 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19184 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19185 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19191 if (code
== LE
&& TARGET_IEEE_FP
)
19193 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19194 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19195 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19196 intcmp_mode
= CCmode
;
19201 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19207 if (code
== EQ
&& TARGET_IEEE_FP
)
19209 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19210 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19211 intcmp_mode
= CCmode
;
19216 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19222 if (code
== NE
&& TARGET_IEEE_FP
)
19224 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19225 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19231 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19237 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19241 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19246 gcc_unreachable ();
19254 /* Return the test that should be put into the flags user, i.e.
19255 the bcc, scc, or cmov instruction. */
19256 return gen_rtx_fmt_ee (code
, VOIDmode
,
19257 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19262 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19266 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19267 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19269 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19271 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19272 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19275 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19281 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19283 enum machine_mode mode
= GET_MODE (op0
);
19295 tmp
= ix86_expand_compare (code
, op0
, op1
);
19296 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19297 gen_rtx_LABEL_REF (VOIDmode
, label
),
19299 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19306 /* Expand DImode branch into multiple compare+branch. */
19308 rtx lo
[2], hi
[2], label2
;
19309 enum rtx_code code1
, code2
, code3
;
19310 enum machine_mode submode
;
19312 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19314 tmp
= op0
, op0
= op1
, op1
= tmp
;
19315 code
= swap_condition (code
);
19318 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19319 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19321 submode
= mode
== DImode
? SImode
: DImode
;
19323 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19324 avoid two branches. This costs one extra insn, so disable when
19325 optimizing for size. */
19327 if ((code
== EQ
|| code
== NE
)
19328 && (!optimize_insn_for_size_p ()
19329 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19334 if (hi
[1] != const0_rtx
)
19335 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19336 NULL_RTX
, 0, OPTAB_WIDEN
);
19339 if (lo
[1] != const0_rtx
)
19340 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19341 NULL_RTX
, 0, OPTAB_WIDEN
);
19343 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19344 NULL_RTX
, 0, OPTAB_WIDEN
);
19346 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19350 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19351 op1 is a constant and the low word is zero, then we can just
19352 examine the high word. Similarly for low word -1 and
19353 less-or-equal-than or greater-than. */
19355 if (CONST_INT_P (hi
[1]))
19358 case LT
: case LTU
: case GE
: case GEU
:
19359 if (lo
[1] == const0_rtx
)
19361 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19365 case LE
: case LEU
: case GT
: case GTU
:
19366 if (lo
[1] == constm1_rtx
)
19368 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19376 /* Otherwise, we need two or three jumps. */
19378 label2
= gen_label_rtx ();
19381 code2
= swap_condition (code
);
19382 code3
= unsigned_condition (code
);
19386 case LT
: case GT
: case LTU
: case GTU
:
19389 case LE
: code1
= LT
; code2
= GT
; break;
19390 case GE
: code1
= GT
; code2
= LT
; break;
19391 case LEU
: code1
= LTU
; code2
= GTU
; break;
19392 case GEU
: code1
= GTU
; code2
= LTU
; break;
19394 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19395 case NE
: code2
= UNKNOWN
; break;
19398 gcc_unreachable ();
19403 * if (hi(a) < hi(b)) goto true;
19404 * if (hi(a) > hi(b)) goto false;
19405 * if (lo(a) < lo(b)) goto true;
19409 if (code1
!= UNKNOWN
)
19410 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19411 if (code2
!= UNKNOWN
)
19412 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19414 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19416 if (code2
!= UNKNOWN
)
19417 emit_label (label2
);
19422 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19427 /* Split branch based on floating point condition. */
19429 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19430 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19435 if (target2
!= pc_rtx
)
19438 code
= reverse_condition_maybe_unordered (code
);
19443 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19446 /* Remove pushed operand from stack. */
19448 ix86_free_from_memory (GET_MODE (pushed
));
19450 i
= emit_jump_insn (gen_rtx_SET
19452 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19453 condition
, target1
, target2
)));
19454 if (split_branch_probability
>= 0)
19455 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19459 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19463 gcc_assert (GET_MODE (dest
) == QImode
);
19465 ret
= ix86_expand_compare (code
, op0
, op1
);
19466 PUT_MODE (ret
, QImode
);
19467 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19470 /* Expand comparison setting or clearing carry flag. Return true when
19471 successful and set pop for the operation. */
19473 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19475 enum machine_mode mode
=
19476 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19478 /* Do not handle double-mode compares that go through special path. */
19479 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19482 if (SCALAR_FLOAT_MODE_P (mode
))
19484 rtx compare_op
, compare_seq
;
19486 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19488 /* Shortcut: following common codes never translate
19489 into carry flag compares. */
19490 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19491 || code
== ORDERED
|| code
== UNORDERED
)
19494 /* These comparisons require zero flag; swap operands so they won't. */
19495 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19496 && !TARGET_IEEE_FP
)
19501 code
= swap_condition (code
);
19504 /* Try to expand the comparison and verify that we end up with
19505 carry flag based comparison. This fails to be true only when
19506 we decide to expand comparison using arithmetic that is not
19507 too common scenario. */
19509 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19510 compare_seq
= get_insns ();
19513 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19514 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19515 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19517 code
= GET_CODE (compare_op
);
19519 if (code
!= LTU
&& code
!= GEU
)
19522 emit_insn (compare_seq
);
19527 if (!INTEGRAL_MODE_P (mode
))
19536 /* Convert a==0 into (unsigned)a<1. */
19539 if (op1
!= const0_rtx
)
19542 code
= (code
== EQ
? LTU
: GEU
);
19545 /* Convert a>b into b<a or a>=b-1. */
19548 if (CONST_INT_P (op1
))
19550 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19551 /* Bail out on overflow. We still can swap operands but that
19552 would force loading of the constant into register. */
19553 if (op1
== const0_rtx
19554 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19556 code
= (code
== GTU
? GEU
: LTU
);
19563 code
= (code
== GTU
? LTU
: GEU
);
19567 /* Convert a>=0 into (unsigned)a<0x80000000. */
19570 if (mode
== DImode
|| op1
!= const0_rtx
)
19572 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19573 code
= (code
== LT
? GEU
: LTU
);
19577 if (mode
== DImode
|| op1
!= constm1_rtx
)
19579 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19580 code
= (code
== LE
? GEU
: LTU
);
19586 /* Swapping operands may cause constant to appear as first operand. */
19587 if (!nonimmediate_operand (op0
, VOIDmode
))
19589 if (!can_create_pseudo_p ())
19591 op0
= force_reg (mode
, op0
);
19593 *pop
= ix86_expand_compare (code
, op0
, op1
);
19594 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19599 ix86_expand_int_movcc (rtx operands
[])
19601 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19602 rtx compare_seq
, compare_op
;
19603 enum machine_mode mode
= GET_MODE (operands
[0]);
19604 bool sign_bit_compare_p
= false;
19605 rtx op0
= XEXP (operands
[1], 0);
19606 rtx op1
= XEXP (operands
[1], 1);
19608 if (GET_MODE (op0
) == TImode
19609 || (GET_MODE (op0
) == DImode
19614 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19615 compare_seq
= get_insns ();
19618 compare_code
= GET_CODE (compare_op
);
19620 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19621 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19622 sign_bit_compare_p
= true;
19624 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19625 HImode insns, we'd be swallowed in word prefix ops. */
19627 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19628 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19629 && CONST_INT_P (operands
[2])
19630 && CONST_INT_P (operands
[3]))
19632 rtx out
= operands
[0];
19633 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19634 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19635 HOST_WIDE_INT diff
;
19638 /* Sign bit compares are better done using shifts than we do by using
19640 if (sign_bit_compare_p
19641 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19643 /* Detect overlap between destination and compare sources. */
19646 if (!sign_bit_compare_p
)
19649 bool fpcmp
= false;
19651 compare_code
= GET_CODE (compare_op
);
19653 flags
= XEXP (compare_op
, 0);
19655 if (GET_MODE (flags
) == CCFPmode
19656 || GET_MODE (flags
) == CCFPUmode
)
19660 = ix86_fp_compare_code_to_integer (compare_code
);
19663 /* To simplify rest of code, restrict to the GEU case. */
19664 if (compare_code
== LTU
)
19666 HOST_WIDE_INT tmp
= ct
;
19669 compare_code
= reverse_condition (compare_code
);
19670 code
= reverse_condition (code
);
19675 PUT_CODE (compare_op
,
19676 reverse_condition_maybe_unordered
19677 (GET_CODE (compare_op
)));
19679 PUT_CODE (compare_op
,
19680 reverse_condition (GET_CODE (compare_op
)));
19684 if (reg_overlap_mentioned_p (out
, op0
)
19685 || reg_overlap_mentioned_p (out
, op1
))
19686 tmp
= gen_reg_rtx (mode
);
19688 if (mode
== DImode
)
19689 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19691 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19692 flags
, compare_op
));
19696 if (code
== GT
|| code
== GE
)
19697 code
= reverse_condition (code
);
19700 HOST_WIDE_INT tmp
= ct
;
19705 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19718 tmp
= expand_simple_binop (mode
, PLUS
,
19720 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19731 tmp
= expand_simple_binop (mode
, IOR
,
19733 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19735 else if (diff
== -1 && ct
)
19745 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19747 tmp
= expand_simple_binop (mode
, PLUS
,
19748 copy_rtx (tmp
), GEN_INT (cf
),
19749 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19757 * andl cf - ct, dest
19767 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19770 tmp
= expand_simple_binop (mode
, AND
,
19772 gen_int_mode (cf
- ct
, mode
),
19773 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19775 tmp
= expand_simple_binop (mode
, PLUS
,
19776 copy_rtx (tmp
), GEN_INT (ct
),
19777 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19780 if (!rtx_equal_p (tmp
, out
))
19781 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19788 enum machine_mode cmp_mode
= GET_MODE (op0
);
19791 tmp
= ct
, ct
= cf
, cf
= tmp
;
19794 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19796 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19798 /* We may be reversing unordered compare to normal compare, that
19799 is not valid in general (we may convert non-trapping condition
19800 to trapping one), however on i386 we currently emit all
19801 comparisons unordered. */
19802 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19803 code
= reverse_condition_maybe_unordered (code
);
19807 compare_code
= reverse_condition (compare_code
);
19808 code
= reverse_condition (code
);
19812 compare_code
= UNKNOWN
;
19813 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19814 && CONST_INT_P (op1
))
19816 if (op1
== const0_rtx
19817 && (code
== LT
|| code
== GE
))
19818 compare_code
= code
;
19819 else if (op1
== constm1_rtx
)
19823 else if (code
== GT
)
19828 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19829 if (compare_code
!= UNKNOWN
19830 && GET_MODE (op0
) == GET_MODE (out
)
19831 && (cf
== -1 || ct
== -1))
19833 /* If lea code below could be used, only optimize
19834 if it results in a 2 insn sequence. */
19836 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19837 || diff
== 3 || diff
== 5 || diff
== 9)
19838 || (compare_code
== LT
&& ct
== -1)
19839 || (compare_code
== GE
&& cf
== -1))
19842 * notl op1 (if necessary)
19850 code
= reverse_condition (code
);
19853 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19855 out
= expand_simple_binop (mode
, IOR
,
19857 out
, 1, OPTAB_DIRECT
);
19858 if (out
!= operands
[0])
19859 emit_move_insn (operands
[0], out
);
19866 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19867 || diff
== 3 || diff
== 5 || diff
== 9)
19868 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19870 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19876 * lea cf(dest*(ct-cf)),dest
19880 * This also catches the degenerate setcc-only case.
19886 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19889 /* On x86_64 the lea instruction operates on Pmode, so we need
19890 to get arithmetics done in proper mode to match. */
19892 tmp
= copy_rtx (out
);
19896 out1
= copy_rtx (out
);
19897 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19901 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19907 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19910 if (!rtx_equal_p (tmp
, out
))
19913 out
= force_operand (tmp
, copy_rtx (out
));
19915 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19917 if (!rtx_equal_p (out
, operands
[0]))
19918 emit_move_insn (operands
[0], copy_rtx (out
));
19924 * General case: Jumpful:
19925 * xorl dest,dest cmpl op1, op2
19926 * cmpl op1, op2 movl ct, dest
19927 * setcc dest jcc 1f
19928 * decl dest movl cf, dest
19929 * andl (cf-ct),dest 1:
19932 * Size 20. Size 14.
19934 * This is reasonably steep, but branch mispredict costs are
19935 * high on modern cpus, so consider failing only if optimizing
19939 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19940 && BRANCH_COST (optimize_insn_for_speed_p (),
19945 enum machine_mode cmp_mode
= GET_MODE (op0
);
19950 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19952 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19954 /* We may be reversing unordered compare to normal compare,
19955 that is not valid in general (we may convert non-trapping
19956 condition to trapping one), however on i386 we currently
19957 emit all comparisons unordered. */
19958 code
= reverse_condition_maybe_unordered (code
);
19962 code
= reverse_condition (code
);
19963 if (compare_code
!= UNKNOWN
)
19964 compare_code
= reverse_condition (compare_code
);
19968 if (compare_code
!= UNKNOWN
)
19970 /* notl op1 (if needed)
19975 For x < 0 (resp. x <= -1) there will be no notl,
19976 so if possible swap the constants to get rid of the
19978 True/false will be -1/0 while code below (store flag
19979 followed by decrement) is 0/-1, so the constants need
19980 to be exchanged once more. */
19982 if (compare_code
== GE
|| !cf
)
19984 code
= reverse_condition (code
);
19989 HOST_WIDE_INT tmp
= cf
;
19994 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19998 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20000 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20002 copy_rtx (out
), 1, OPTAB_DIRECT
);
20005 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20006 gen_int_mode (cf
- ct
, mode
),
20007 copy_rtx (out
), 1, OPTAB_DIRECT
);
20009 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20010 copy_rtx (out
), 1, OPTAB_DIRECT
);
20011 if (!rtx_equal_p (out
, operands
[0]))
20012 emit_move_insn (operands
[0], copy_rtx (out
));
20018 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20020 /* Try a few things more with specific constants and a variable. */
20023 rtx var
, orig_out
, out
, tmp
;
20025 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20028 /* If one of the two operands is an interesting constant, load a
20029 constant with the above and mask it in with a logical operation. */
20031 if (CONST_INT_P (operands
[2]))
20034 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20035 operands
[3] = constm1_rtx
, op
= and_optab
;
20036 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20037 operands
[3] = const0_rtx
, op
= ior_optab
;
20041 else if (CONST_INT_P (operands
[3]))
20044 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20045 operands
[2] = constm1_rtx
, op
= and_optab
;
20046 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20047 operands
[2] = const0_rtx
, op
= ior_optab
;
20054 orig_out
= operands
[0];
20055 tmp
= gen_reg_rtx (mode
);
20058 /* Recurse to get the constant loaded. */
20059 if (ix86_expand_int_movcc (operands
) == 0)
20062 /* Mask in the interesting variable. */
20063 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20065 if (!rtx_equal_p (out
, orig_out
))
20066 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20072 * For comparison with above,
20082 if (! nonimmediate_operand (operands
[2], mode
))
20083 operands
[2] = force_reg (mode
, operands
[2]);
20084 if (! nonimmediate_operand (operands
[3], mode
))
20085 operands
[3] = force_reg (mode
, operands
[3]);
20087 if (! register_operand (operands
[2], VOIDmode
)
20089 || ! register_operand (operands
[3], VOIDmode
)))
20090 operands
[2] = force_reg (mode
, operands
[2]);
20093 && ! register_operand (operands
[3], VOIDmode
))
20094 operands
[3] = force_reg (mode
, operands
[3]);
20096 emit_insn (compare_seq
);
20097 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20098 gen_rtx_IF_THEN_ELSE (mode
,
20099 compare_op
, operands
[2],
20104 /* Swap, force into registers, or otherwise massage the two operands
20105 to an sse comparison with a mask result. Thus we differ a bit from
20106 ix86_prepare_fp_compare_args which expects to produce a flags result.
20108 The DEST operand exists to help determine whether to commute commutative
20109 operators. The POP0/POP1 operands are updated in place. The new
20110 comparison code is returned, or UNKNOWN if not implementable. */
20112 static enum rtx_code
20113 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20114 rtx
*pop0
, rtx
*pop1
)
20122 /* AVX supports all the needed comparisons. */
20125 /* We have no LTGT as an operator. We could implement it with
20126 NE & ORDERED, but this requires an extra temporary. It's
20127 not clear that it's worth it. */
20134 /* These are supported directly. */
20141 /* AVX has 3 operand comparisons, no need to swap anything. */
20144 /* For commutative operators, try to canonicalize the destination
20145 operand to be first in the comparison - this helps reload to
20146 avoid extra moves. */
20147 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20155 /* These are not supported directly before AVX, and furthermore
20156 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20157 comparison operands to transform into something that is
20162 code
= swap_condition (code
);
20166 gcc_unreachable ();
20172 /* Detect conditional moves that exactly match min/max operational
20173 semantics. Note that this is IEEE safe, as long as we don't
20174 interchange the operands.
20176 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20177 and TRUE if the operation is successful and instructions are emitted. */
20180 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20181 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20183 enum machine_mode mode
;
20189 else if (code
== UNGE
)
20192 if_true
= if_false
;
20198 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20200 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20205 mode
= GET_MODE (dest
);
20207 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20208 but MODE may be a vector mode and thus not appropriate. */
20209 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20211 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20214 if_true
= force_reg (mode
, if_true
);
20215 v
= gen_rtvec (2, if_true
, if_false
);
20216 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20220 code
= is_min
? SMIN
: SMAX
;
20221 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20224 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20228 /* Expand an sse vector comparison. Return the register with the result. */
20231 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20232 rtx op_true
, rtx op_false
)
20234 enum machine_mode mode
= GET_MODE (dest
);
20235 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20238 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20239 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20240 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20243 || reg_overlap_mentioned_p (dest
, op_true
)
20244 || reg_overlap_mentioned_p (dest
, op_false
))
20245 dest
= gen_reg_rtx (mode
);
20247 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20248 if (cmp_mode
!= mode
)
20250 x
= force_reg (cmp_mode
, x
);
20251 convert_move (dest
, x
, false);
20254 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20259 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20260 operations. This is used for both scalar and vector conditional moves. */
20263 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20265 enum machine_mode mode
= GET_MODE (dest
);
20268 if (vector_all_ones_operand (op_true
, mode
)
20269 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20271 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20273 else if (op_false
== CONST0_RTX (mode
))
20275 op_true
= force_reg (mode
, op_true
);
20276 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20277 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20279 else if (op_true
== CONST0_RTX (mode
))
20281 op_false
= force_reg (mode
, op_false
);
20282 x
= gen_rtx_NOT (mode
, cmp
);
20283 x
= gen_rtx_AND (mode
, x
, op_false
);
20284 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20286 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20288 op_false
= force_reg (mode
, op_false
);
20289 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20290 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20292 else if (TARGET_XOP
)
20294 op_true
= force_reg (mode
, op_true
);
20296 if (!nonimmediate_operand (op_false
, mode
))
20297 op_false
= force_reg (mode
, op_false
);
20299 emit_insn (gen_rtx_SET (mode
, dest
,
20300 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20306 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20308 if (!nonimmediate_operand (op_true
, mode
))
20309 op_true
= force_reg (mode
, op_true
);
20311 op_false
= force_reg (mode
, op_false
);
20317 gen
= gen_sse4_1_blendvps
;
20321 gen
= gen_sse4_1_blendvpd
;
20329 gen
= gen_sse4_1_pblendvb
;
20330 dest
= gen_lowpart (V16QImode
, dest
);
20331 op_false
= gen_lowpart (V16QImode
, op_false
);
20332 op_true
= gen_lowpart (V16QImode
, op_true
);
20333 cmp
= gen_lowpart (V16QImode
, cmp
);
20338 gen
= gen_avx_blendvps256
;
20342 gen
= gen_avx_blendvpd256
;
20350 gen
= gen_avx2_pblendvb
;
20351 dest
= gen_lowpart (V32QImode
, dest
);
20352 op_false
= gen_lowpart (V32QImode
, op_false
);
20353 op_true
= gen_lowpart (V32QImode
, op_true
);
20354 cmp
= gen_lowpart (V32QImode
, cmp
);
20362 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20365 op_true
= force_reg (mode
, op_true
);
20367 t2
= gen_reg_rtx (mode
);
20369 t3
= gen_reg_rtx (mode
);
20373 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20374 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20376 x
= gen_rtx_NOT (mode
, cmp
);
20377 x
= gen_rtx_AND (mode
, x
, op_false
);
20378 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20380 x
= gen_rtx_IOR (mode
, t3
, t2
);
20381 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20386 /* Expand a floating-point conditional move. Return true if successful. */
20389 ix86_expand_fp_movcc (rtx operands
[])
20391 enum machine_mode mode
= GET_MODE (operands
[0]);
20392 enum rtx_code code
= GET_CODE (operands
[1]);
20393 rtx tmp
, compare_op
;
20394 rtx op0
= XEXP (operands
[1], 0);
20395 rtx op1
= XEXP (operands
[1], 1);
20397 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20399 enum machine_mode cmode
;
20401 /* Since we've no cmove for sse registers, don't force bad register
20402 allocation just to gain access to it. Deny movcc when the
20403 comparison mode doesn't match the move mode. */
20404 cmode
= GET_MODE (op0
);
20405 if (cmode
== VOIDmode
)
20406 cmode
= GET_MODE (op1
);
20410 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20411 if (code
== UNKNOWN
)
20414 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20415 operands
[2], operands
[3]))
20418 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20419 operands
[2], operands
[3]);
20420 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20424 if (GET_MODE (op0
) == TImode
20425 || (GET_MODE (op0
) == DImode
20429 /* The floating point conditional move instructions don't directly
20430 support conditions resulting from a signed integer comparison. */
20432 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20433 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20435 tmp
= gen_reg_rtx (QImode
);
20436 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20438 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20441 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20442 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20443 operands
[2], operands
[3])));
20448 /* Expand a floating-point vector conditional move; a vcond operation
20449 rather than a movcc operation. */
20452 ix86_expand_fp_vcond (rtx operands
[])
20454 enum rtx_code code
= GET_CODE (operands
[3]);
20457 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20458 &operands
[4], &operands
[5]);
20459 if (code
== UNKNOWN
)
20462 switch (GET_CODE (operands
[3]))
20465 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20466 operands
[5], operands
[0], operands
[0]);
20467 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20468 operands
[5], operands
[1], operands
[2]);
20472 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20473 operands
[5], operands
[0], operands
[0]);
20474 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20475 operands
[5], operands
[1], operands
[2]);
20479 gcc_unreachable ();
20481 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20483 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20487 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20488 operands
[5], operands
[1], operands
[2]))
20491 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20492 operands
[1], operands
[2]);
20493 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20497 /* Expand a signed/unsigned integral vector conditional move. */
20500 ix86_expand_int_vcond (rtx operands
[])
20502 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20503 enum machine_mode mode
= GET_MODE (operands
[4]);
20504 enum rtx_code code
= GET_CODE (operands
[3]);
20505 bool negate
= false;
20508 cop0
= operands
[4];
20509 cop1
= operands
[5];
20511 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20512 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20513 if ((code
== LT
|| code
== GE
)
20514 && data_mode
== mode
20515 && cop1
== CONST0_RTX (mode
)
20516 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20517 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20518 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20519 && (GET_MODE_SIZE (data_mode
) == 16
20520 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20522 rtx negop
= operands
[2 - (code
== LT
)];
20523 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20524 if (negop
== CONST1_RTX (data_mode
))
20526 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20527 operands
[0], 1, OPTAB_DIRECT
);
20528 if (res
!= operands
[0])
20529 emit_move_insn (operands
[0], res
);
20532 else if (GET_MODE_INNER (data_mode
) != DImode
20533 && vector_all_ones_operand (negop
, data_mode
))
20535 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20536 operands
[0], 0, OPTAB_DIRECT
);
20537 if (res
!= operands
[0])
20538 emit_move_insn (operands
[0], res
);
20543 if (!nonimmediate_operand (cop1
, mode
))
20544 cop1
= force_reg (mode
, cop1
);
20545 if (!general_operand (operands
[1], data_mode
))
20546 operands
[1] = force_reg (data_mode
, operands
[1]);
20547 if (!general_operand (operands
[2], data_mode
))
20548 operands
[2] = force_reg (data_mode
, operands
[2]);
20550 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20552 && (mode
== V16QImode
|| mode
== V8HImode
20553 || mode
== V4SImode
|| mode
== V2DImode
))
20557 /* Canonicalize the comparison to EQ, GT, GTU. */
20568 code
= reverse_condition (code
);
20574 code
= reverse_condition (code
);
20580 code
= swap_condition (code
);
20581 x
= cop0
, cop0
= cop1
, cop1
= x
;
20585 gcc_unreachable ();
20588 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20589 if (mode
== V2DImode
)
20594 /* SSE4.1 supports EQ. */
20595 if (!TARGET_SSE4_1
)
20601 /* SSE4.2 supports GT/GTU. */
20602 if (!TARGET_SSE4_2
)
20607 gcc_unreachable ();
20611 /* Unsigned parallel compare is not supported by the hardware.
20612 Play some tricks to turn this into a signed comparison
20616 cop0
= force_reg (mode
, cop0
);
20626 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20630 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20631 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20632 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20633 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20635 gcc_unreachable ();
20637 /* Subtract (-(INT MAX) - 1) from both operands to make
20639 mask
= ix86_build_signbit_mask (mode
, true, false);
20640 t1
= gen_reg_rtx (mode
);
20641 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20643 t2
= gen_reg_rtx (mode
);
20644 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20656 /* Perform a parallel unsigned saturating subtraction. */
20657 x
= gen_reg_rtx (mode
);
20658 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20659 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20662 cop1
= CONST0_RTX (mode
);
20668 gcc_unreachable ();
20673 /* Allow the comparison to be done in one mode, but the movcc to
20674 happen in another mode. */
20675 if (data_mode
== mode
)
20677 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20678 operands
[1+negate
], operands
[2-negate
]);
20682 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20683 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20685 operands
[1+negate
], operands
[2-negate
]);
20686 x
= gen_lowpart (data_mode
, x
);
20689 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20690 operands
[2-negate
]);
20694 /* Expand a variable vector permutation. */
20697 ix86_expand_vec_perm (rtx operands
[])
20699 rtx target
= operands
[0];
20700 rtx op0
= operands
[1];
20701 rtx op1
= operands
[2];
20702 rtx mask
= operands
[3];
20703 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20704 enum machine_mode mode
= GET_MODE (op0
);
20705 enum machine_mode maskmode
= GET_MODE (mask
);
20707 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20709 /* Number of elements in the vector. */
20710 w
= GET_MODE_NUNITS (mode
);
20711 e
= GET_MODE_UNIT_SIZE (mode
);
20712 gcc_assert (w
<= 32);
20716 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20718 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20719 an constant shuffle operand. With a tiny bit of effort we can
20720 use VPERMD instead. A re-interpretation stall for V4DFmode is
20721 unfortunate but there's no avoiding it.
20722 Similarly for V16HImode we don't have instructions for variable
20723 shuffling, while for V32QImode we can use after preparing suitable
20724 masks vpshufb; vpshufb; vpermq; vpor. */
20726 if (mode
== V16HImode
)
20728 maskmode
= mode
= V32QImode
;
20734 maskmode
= mode
= V8SImode
;
20738 t1
= gen_reg_rtx (maskmode
);
20740 /* Replicate the low bits of the V4DImode mask into V8SImode:
20742 t1 = { A A B B C C D D }. */
20743 for (i
= 0; i
< w
/ 2; ++i
)
20744 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20745 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20746 vt
= force_reg (maskmode
, vt
);
20747 mask
= gen_lowpart (maskmode
, mask
);
20748 if (maskmode
== V8SImode
)
20749 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20751 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20753 /* Multiply the shuffle indicies by two. */
20754 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20757 /* Add one to the odd shuffle indicies:
20758 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20759 for (i
= 0; i
< w
/ 2; ++i
)
20761 vec
[i
* 2] = const0_rtx
;
20762 vec
[i
* 2 + 1] = const1_rtx
;
20764 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20765 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20766 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20769 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20770 operands
[3] = mask
= t1
;
20771 target
= gen_lowpart (mode
, target
);
20772 op0
= gen_lowpart (mode
, op0
);
20773 op1
= gen_lowpart (mode
, op1
);
20779 /* The VPERMD and VPERMPS instructions already properly ignore
20780 the high bits of the shuffle elements. No need for us to
20781 perform an AND ourselves. */
20782 if (one_operand_shuffle
)
20783 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20786 t1
= gen_reg_rtx (V8SImode
);
20787 t2
= gen_reg_rtx (V8SImode
);
20788 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20789 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20795 mask
= gen_lowpart (V8SFmode
, mask
);
20796 if (one_operand_shuffle
)
20797 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20800 t1
= gen_reg_rtx (V8SFmode
);
20801 t2
= gen_reg_rtx (V8SFmode
);
20802 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20803 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20809 /* By combining the two 128-bit input vectors into one 256-bit
20810 input vector, we can use VPERMD and VPERMPS for the full
20811 two-operand shuffle. */
20812 t1
= gen_reg_rtx (V8SImode
);
20813 t2
= gen_reg_rtx (V8SImode
);
20814 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20815 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20816 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20817 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20821 t1
= gen_reg_rtx (V8SFmode
);
20822 t2
= gen_reg_rtx (V8SImode
);
20823 mask
= gen_lowpart (V4SImode
, mask
);
20824 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20825 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20826 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20827 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20831 t1
= gen_reg_rtx (V32QImode
);
20832 t2
= gen_reg_rtx (V32QImode
);
20833 t3
= gen_reg_rtx (V32QImode
);
20834 vt2
= GEN_INT (128);
20835 for (i
= 0; i
< 32; i
++)
20837 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20838 vt
= force_reg (V32QImode
, vt
);
20839 for (i
= 0; i
< 32; i
++)
20840 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20841 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20842 vt2
= force_reg (V32QImode
, vt2
);
20843 /* From mask create two adjusted masks, which contain the same
20844 bits as mask in the low 7 bits of each vector element.
20845 The first mask will have the most significant bit clear
20846 if it requests element from the same 128-bit lane
20847 and MSB set if it requests element from the other 128-bit lane.
20848 The second mask will have the opposite values of the MSB,
20849 and additionally will have its 128-bit lanes swapped.
20850 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20851 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20852 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20853 stands for other 12 bytes. */
20854 /* The bit whether element is from the same lane or the other
20855 lane is bit 4, so shift it up by 3 to the MSB position. */
20856 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20857 gen_lowpart (V4DImode
, mask
),
20859 /* Clear MSB bits from the mask just in case it had them set. */
20860 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20861 /* After this t1 will have MSB set for elements from other lane. */
20862 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20863 /* Clear bits other than MSB. */
20864 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20865 /* Or in the lower bits from mask into t3. */
20866 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20867 /* And invert MSB bits in t1, so MSB is set for elements from the same
20869 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20870 /* Swap 128-bit lanes in t3. */
20871 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20872 gen_lowpart (V4DImode
, t3
),
20873 const2_rtx
, GEN_INT (3),
20874 const0_rtx
, const1_rtx
));
20875 /* And or in the lower bits from mask into t1. */
20876 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20877 if (one_operand_shuffle
)
20879 /* Each of these shuffles will put 0s in places where
20880 element from the other 128-bit lane is needed, otherwise
20881 will shuffle in the requested value. */
20882 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20883 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20884 /* For t3 the 128-bit lanes are swapped again. */
20885 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20886 gen_lowpart (V4DImode
, t3
),
20887 const2_rtx
, GEN_INT (3),
20888 const0_rtx
, const1_rtx
));
20889 /* And oring both together leads to the result. */
20890 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20894 t4
= gen_reg_rtx (V32QImode
);
20895 /* Similarly to the above one_operand_shuffle code,
20896 just for repeated twice for each operand. merge_two:
20897 code will merge the two results together. */
20898 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20899 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20900 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20901 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20902 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20903 gen_lowpart (V4DImode
, t4
),
20904 const2_rtx
, GEN_INT (3),
20905 const0_rtx
, const1_rtx
));
20906 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20907 gen_lowpart (V4DImode
, t3
),
20908 const2_rtx
, GEN_INT (3),
20909 const0_rtx
, const1_rtx
));
20910 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20911 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20917 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20924 /* The XOP VPPERM insn supports three inputs. By ignoring the
20925 one_operand_shuffle special case, we avoid creating another
20926 set of constant vectors in memory. */
20927 one_operand_shuffle
= false;
20929 /* mask = mask & {2*w-1, ...} */
20930 vt
= GEN_INT (2*w
- 1);
20934 /* mask = mask & {w-1, ...} */
20935 vt
= GEN_INT (w
- 1);
20938 for (i
= 0; i
< w
; i
++)
20940 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20941 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20942 NULL_RTX
, 0, OPTAB_DIRECT
);
20944 /* For non-QImode operations, convert the word permutation control
20945 into a byte permutation control. */
20946 if (mode
!= V16QImode
)
20948 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20949 GEN_INT (exact_log2 (e
)),
20950 NULL_RTX
, 0, OPTAB_DIRECT
);
20952 /* Convert mask to vector of chars. */
20953 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20955 /* Replicate each of the input bytes into byte positions:
20956 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20957 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20958 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20959 for (i
= 0; i
< 16; ++i
)
20960 vec
[i
] = GEN_INT (i
/e
* e
);
20961 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20962 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20964 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20966 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20968 /* Convert it into the byte positions by doing
20969 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20970 for (i
= 0; i
< 16; ++i
)
20971 vec
[i
] = GEN_INT (i
% e
);
20972 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20973 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
20974 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20977 /* The actual shuffle operations all operate on V16QImode. */
20978 op0
= gen_lowpart (V16QImode
, op0
);
20979 op1
= gen_lowpart (V16QImode
, op1
);
20980 target
= gen_lowpart (V16QImode
, target
);
20984 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20986 else if (one_operand_shuffle
)
20988 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20995 /* Shuffle the two input vectors independently. */
20996 t1
= gen_reg_rtx (V16QImode
);
20997 t2
= gen_reg_rtx (V16QImode
);
20998 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20999 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21002 /* Then merge them together. The key is whether any given control
21003 element contained a bit set that indicates the second word. */
21004 mask
= operands
[3];
21006 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21008 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21009 more shuffle to convert the V2DI input mask into a V4SI
21010 input mask. At which point the masking that expand_int_vcond
21011 will work as desired. */
21012 rtx t3
= gen_reg_rtx (V4SImode
);
21013 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21014 const0_rtx
, const0_rtx
,
21015 const2_rtx
, const2_rtx
));
21017 maskmode
= V4SImode
;
21021 for (i
= 0; i
< w
; i
++)
21023 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21024 vt
= force_reg (maskmode
, vt
);
21025 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21026 NULL_RTX
, 0, OPTAB_DIRECT
);
21028 xops
[0] = gen_lowpart (mode
, operands
[0]);
21029 xops
[1] = gen_lowpart (mode
, t2
);
21030 xops
[2] = gen_lowpart (mode
, t1
);
21031 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21034 ok
= ix86_expand_int_vcond (xops
);
21039 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21040 true if we should do zero extension, else sign extension. HIGH_P is
21041 true if we want the N/2 high elements, else the low elements. */
21044 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21046 enum machine_mode imode
= GET_MODE (src
);
21051 rtx (*unpack
)(rtx
, rtx
);
21052 rtx (*extract
)(rtx
, rtx
) = NULL
;
21053 enum machine_mode halfmode
= BLKmode
;
21059 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21061 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21062 halfmode
= V16QImode
;
21064 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21068 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21070 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21071 halfmode
= V8HImode
;
21073 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21077 unpack
= gen_avx2_zero_extendv4siv4di2
;
21079 unpack
= gen_avx2_sign_extendv4siv4di2
;
21080 halfmode
= V4SImode
;
21082 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21086 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21088 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21092 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21094 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21098 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21100 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21103 gcc_unreachable ();
21106 if (GET_MODE_SIZE (imode
) == 32)
21108 tmp
= gen_reg_rtx (halfmode
);
21109 emit_insn (extract (tmp
, src
));
21113 /* Shift higher 8 bytes to lower 8 bytes. */
21114 tmp
= gen_reg_rtx (imode
);
21115 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
21116 gen_lowpart (V1TImode
, src
),
21122 emit_insn (unpack (dest
, tmp
));
21126 rtx (*unpack
)(rtx
, rtx
, rtx
);
21132 unpack
= gen_vec_interleave_highv16qi
;
21134 unpack
= gen_vec_interleave_lowv16qi
;
21138 unpack
= gen_vec_interleave_highv8hi
;
21140 unpack
= gen_vec_interleave_lowv8hi
;
21144 unpack
= gen_vec_interleave_highv4si
;
21146 unpack
= gen_vec_interleave_lowv4si
;
21149 gcc_unreachable ();
21153 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21155 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21156 src
, pc_rtx
, pc_rtx
);
21158 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
21162 /* Expand conditional increment or decrement using adb/sbb instructions.
21163 The default case using setcc followed by the conditional move can be
21164 done by generic code. */
21166 ix86_expand_int_addcc (rtx operands
[])
21168 enum rtx_code code
= GET_CODE (operands
[1]);
21170 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21172 rtx val
= const0_rtx
;
21173 bool fpcmp
= false;
21174 enum machine_mode mode
;
21175 rtx op0
= XEXP (operands
[1], 0);
21176 rtx op1
= XEXP (operands
[1], 1);
21178 if (operands
[3] != const1_rtx
21179 && operands
[3] != constm1_rtx
)
21181 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21183 code
= GET_CODE (compare_op
);
21185 flags
= XEXP (compare_op
, 0);
21187 if (GET_MODE (flags
) == CCFPmode
21188 || GET_MODE (flags
) == CCFPUmode
)
21191 code
= ix86_fp_compare_code_to_integer (code
);
21198 PUT_CODE (compare_op
,
21199 reverse_condition_maybe_unordered
21200 (GET_CODE (compare_op
)));
21202 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21205 mode
= GET_MODE (operands
[0]);
21207 /* Construct either adc or sbb insn. */
21208 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21213 insn
= gen_subqi3_carry
;
21216 insn
= gen_subhi3_carry
;
21219 insn
= gen_subsi3_carry
;
21222 insn
= gen_subdi3_carry
;
21225 gcc_unreachable ();
21233 insn
= gen_addqi3_carry
;
21236 insn
= gen_addhi3_carry
;
21239 insn
= gen_addsi3_carry
;
21242 insn
= gen_adddi3_carry
;
21245 gcc_unreachable ();
21248 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21254 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21255 but works for floating pointer parameters and nonoffsetable memories.
21256 For pushes, it returns just stack offsets; the values will be saved
21257 in the right order. Maximally three parts are generated. */
21260 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21265 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21267 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21269 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21270 gcc_assert (size
>= 2 && size
<= 4);
21272 /* Optimize constant pool reference to immediates. This is used by fp
21273 moves, that force all constants to memory to allow combining. */
21274 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21276 rtx tmp
= maybe_get_pool_constant (operand
);
21281 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21283 /* The only non-offsetable memories we handle are pushes. */
21284 int ok
= push_operand (operand
, VOIDmode
);
21288 operand
= copy_rtx (operand
);
21289 PUT_MODE (operand
, word_mode
);
21290 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21294 if (GET_CODE (operand
) == CONST_VECTOR
)
21296 enum machine_mode imode
= int_mode_for_mode (mode
);
21297 /* Caution: if we looked through a constant pool memory above,
21298 the operand may actually have a different mode now. That's
21299 ok, since we want to pun this all the way back to an integer. */
21300 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21301 gcc_assert (operand
!= NULL
);
21307 if (mode
== DImode
)
21308 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21313 if (REG_P (operand
))
21315 gcc_assert (reload_completed
);
21316 for (i
= 0; i
< size
; i
++)
21317 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21319 else if (offsettable_memref_p (operand
))
21321 operand
= adjust_address (operand
, SImode
, 0);
21322 parts
[0] = operand
;
21323 for (i
= 1; i
< size
; i
++)
21324 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21326 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21331 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21335 real_to_target (l
, &r
, mode
);
21336 parts
[3] = gen_int_mode (l
[3], SImode
);
21337 parts
[2] = gen_int_mode (l
[2], SImode
);
21340 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21341 long double may not be 80-bit. */
21342 real_to_target (l
, &r
, mode
);
21343 parts
[2] = gen_int_mode (l
[2], SImode
);
21346 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21349 gcc_unreachable ();
21351 parts
[1] = gen_int_mode (l
[1], SImode
);
21352 parts
[0] = gen_int_mode (l
[0], SImode
);
21355 gcc_unreachable ();
21360 if (mode
== TImode
)
21361 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21362 if (mode
== XFmode
|| mode
== TFmode
)
21364 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21365 if (REG_P (operand
))
21367 gcc_assert (reload_completed
);
21368 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21369 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21371 else if (offsettable_memref_p (operand
))
21373 operand
= adjust_address (operand
, DImode
, 0);
21374 parts
[0] = operand
;
21375 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21377 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21382 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21383 real_to_target (l
, &r
, mode
);
21385 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21386 if (HOST_BITS_PER_WIDE_INT
>= 64)
21389 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21390 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21393 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21395 if (upper_mode
== SImode
)
21396 parts
[1] = gen_int_mode (l
[2], SImode
);
21397 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21400 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21401 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21404 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21407 gcc_unreachable ();
21414 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21415 Return false when normal moves are needed; true when all required
21416 insns have been emitted. Operands 2-4 contain the input values
21417 int the correct order; operands 5-7 contain the output values. */
21420 ix86_split_long_move (rtx operands
[])
21425 int collisions
= 0;
21426 enum machine_mode mode
= GET_MODE (operands
[0]);
21427 bool collisionparts
[4];
21429 /* The DFmode expanders may ask us to move double.
21430 For 64bit target this is single move. By hiding the fact
21431 here we simplify i386.md splitters. */
21432 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21434 /* Optimize constant pool reference to immediates. This is used by
21435 fp moves, that force all constants to memory to allow combining. */
21437 if (MEM_P (operands
[1])
21438 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21439 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21440 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21441 if (push_operand (operands
[0], VOIDmode
))
21443 operands
[0] = copy_rtx (operands
[0]);
21444 PUT_MODE (operands
[0], word_mode
);
21447 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21448 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21449 emit_move_insn (operands
[0], operands
[1]);
21453 /* The only non-offsettable memory we handle is push. */
21454 if (push_operand (operands
[0], VOIDmode
))
21457 gcc_assert (!MEM_P (operands
[0])
21458 || offsettable_memref_p (operands
[0]));
21460 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21461 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21463 /* When emitting push, take care for source operands on the stack. */
21464 if (push
&& MEM_P (operands
[1])
21465 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21467 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21469 /* Compensate for the stack decrement by 4. */
21470 if (!TARGET_64BIT
&& nparts
== 3
21471 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21472 src_base
= plus_constant (Pmode
, src_base
, 4);
21474 /* src_base refers to the stack pointer and is
21475 automatically decreased by emitted push. */
21476 for (i
= 0; i
< nparts
; i
++)
21477 part
[1][i
] = change_address (part
[1][i
],
21478 GET_MODE (part
[1][i
]), src_base
);
21481 /* We need to do copy in the right order in case an address register
21482 of the source overlaps the destination. */
21483 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21487 for (i
= 0; i
< nparts
; i
++)
21490 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21491 if (collisionparts
[i
])
21495 /* Collision in the middle part can be handled by reordering. */
21496 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21498 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21499 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21501 else if (collisions
== 1
21503 && (collisionparts
[1] || collisionparts
[2]))
21505 if (collisionparts
[1])
21507 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21508 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21512 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21513 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21517 /* If there are more collisions, we can't handle it by reordering.
21518 Do an lea to the last part and use only one colliding move. */
21519 else if (collisions
> 1)
21525 base
= part
[0][nparts
- 1];
21527 /* Handle the case when the last part isn't valid for lea.
21528 Happens in 64-bit mode storing the 12-byte XFmode. */
21529 if (GET_MODE (base
) != Pmode
)
21530 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21532 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21533 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21534 for (i
= 1; i
< nparts
; i
++)
21536 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21537 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21548 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21549 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21550 stack_pointer_rtx
, GEN_INT (-4)));
21551 emit_move_insn (part
[0][2], part
[1][2]);
21553 else if (nparts
== 4)
21555 emit_move_insn (part
[0][3], part
[1][3]);
21556 emit_move_insn (part
[0][2], part
[1][2]);
21561 /* In 64bit mode we don't have 32bit push available. In case this is
21562 register, it is OK - we will just use larger counterpart. We also
21563 retype memory - these comes from attempt to avoid REX prefix on
21564 moving of second half of TFmode value. */
21565 if (GET_MODE (part
[1][1]) == SImode
)
21567 switch (GET_CODE (part
[1][1]))
21570 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21574 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21578 gcc_unreachable ();
21581 if (GET_MODE (part
[1][0]) == SImode
)
21582 part
[1][0] = part
[1][1];
21585 emit_move_insn (part
[0][1], part
[1][1]);
21586 emit_move_insn (part
[0][0], part
[1][0]);
21590 /* Choose correct order to not overwrite the source before it is copied. */
21591 if ((REG_P (part
[0][0])
21592 && REG_P (part
[1][1])
21593 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21595 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21597 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21599 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21601 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21603 operands
[2 + i
] = part
[0][j
];
21604 operands
[6 + i
] = part
[1][j
];
21609 for (i
= 0; i
< nparts
; i
++)
21611 operands
[2 + i
] = part
[0][i
];
21612 operands
[6 + i
] = part
[1][i
];
21616 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21617 if (optimize_insn_for_size_p ())
21619 for (j
= 0; j
< nparts
- 1; j
++)
21620 if (CONST_INT_P (operands
[6 + j
])
21621 && operands
[6 + j
] != const0_rtx
21622 && REG_P (operands
[2 + j
]))
21623 for (i
= j
; i
< nparts
- 1; i
++)
21624 if (CONST_INT_P (operands
[7 + i
])
21625 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21626 operands
[7 + i
] = operands
[2 + j
];
21629 for (i
= 0; i
< nparts
; i
++)
21630 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21635 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21636 left shift by a constant, either using a single shift or
21637 a sequence of add instructions. */
21640 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21642 rtx (*insn
)(rtx
, rtx
, rtx
);
21645 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21646 && !optimize_insn_for_size_p ()))
21648 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21649 while (count
-- > 0)
21650 emit_insn (insn (operand
, operand
, operand
));
21654 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21655 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21660 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21662 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21663 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21664 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21666 rtx low
[2], high
[2];
21669 if (CONST_INT_P (operands
[2]))
21671 split_double_mode (mode
, operands
, 2, low
, high
);
21672 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21674 if (count
>= half_width
)
21676 emit_move_insn (high
[0], low
[1]);
21677 emit_move_insn (low
[0], const0_rtx
);
21679 if (count
> half_width
)
21680 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21684 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21686 if (!rtx_equal_p (operands
[0], operands
[1]))
21687 emit_move_insn (operands
[0], operands
[1]);
21689 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21690 ix86_expand_ashl_const (low
[0], count
, mode
);
21695 split_double_mode (mode
, operands
, 1, low
, high
);
21697 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21699 if (operands
[1] == const1_rtx
)
21701 /* Assuming we've chosen a QImode capable registers, then 1 << N
21702 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21703 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21705 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21707 ix86_expand_clear (low
[0]);
21708 ix86_expand_clear (high
[0]);
21709 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21711 d
= gen_lowpart (QImode
, low
[0]);
21712 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21713 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21714 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21716 d
= gen_lowpart (QImode
, high
[0]);
21717 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21718 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21719 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21722 /* Otherwise, we can get the same results by manually performing
21723 a bit extract operation on bit 5/6, and then performing the two
21724 shifts. The two methods of getting 0/1 into low/high are exactly
21725 the same size. Avoiding the shift in the bit extract case helps
21726 pentium4 a bit; no one else seems to care much either way. */
21729 enum machine_mode half_mode
;
21730 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21731 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21732 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21733 HOST_WIDE_INT bits
;
21736 if (mode
== DImode
)
21738 half_mode
= SImode
;
21739 gen_lshr3
= gen_lshrsi3
;
21740 gen_and3
= gen_andsi3
;
21741 gen_xor3
= gen_xorsi3
;
21746 half_mode
= DImode
;
21747 gen_lshr3
= gen_lshrdi3
;
21748 gen_and3
= gen_anddi3
;
21749 gen_xor3
= gen_xordi3
;
21753 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21754 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21756 x
= gen_lowpart (half_mode
, operands
[2]);
21757 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21759 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21760 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21761 emit_move_insn (low
[0], high
[0]);
21762 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21765 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21766 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21770 if (operands
[1] == constm1_rtx
)
21772 /* For -1 << N, we can avoid the shld instruction, because we
21773 know that we're shifting 0...31/63 ones into a -1. */
21774 emit_move_insn (low
[0], constm1_rtx
);
21775 if (optimize_insn_for_size_p ())
21776 emit_move_insn (high
[0], low
[0]);
21778 emit_move_insn (high
[0], constm1_rtx
);
21782 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21784 if (!rtx_equal_p (operands
[0], operands
[1]))
21785 emit_move_insn (operands
[0], operands
[1]);
21787 split_double_mode (mode
, operands
, 1, low
, high
);
21788 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21791 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21793 if (TARGET_CMOVE
&& scratch
)
21795 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21796 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21798 ix86_expand_clear (scratch
);
21799 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21803 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21804 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21806 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21811 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21813 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21814 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21815 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21816 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21818 rtx low
[2], high
[2];
21821 if (CONST_INT_P (operands
[2]))
21823 split_double_mode (mode
, operands
, 2, low
, high
);
21824 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21826 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21828 emit_move_insn (high
[0], high
[1]);
21829 emit_insn (gen_ashr3 (high
[0], high
[0],
21830 GEN_INT (half_width
- 1)));
21831 emit_move_insn (low
[0], high
[0]);
21834 else if (count
>= half_width
)
21836 emit_move_insn (low
[0], high
[1]);
21837 emit_move_insn (high
[0], low
[0]);
21838 emit_insn (gen_ashr3 (high
[0], high
[0],
21839 GEN_INT (half_width
- 1)));
21841 if (count
> half_width
)
21842 emit_insn (gen_ashr3 (low
[0], low
[0],
21843 GEN_INT (count
- half_width
)));
21847 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21849 if (!rtx_equal_p (operands
[0], operands
[1]))
21850 emit_move_insn (operands
[0], operands
[1]);
21852 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21853 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21858 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21860 if (!rtx_equal_p (operands
[0], operands
[1]))
21861 emit_move_insn (operands
[0], operands
[1]);
21863 split_double_mode (mode
, operands
, 1, low
, high
);
21865 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21866 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21868 if (TARGET_CMOVE
&& scratch
)
21870 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21871 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21873 emit_move_insn (scratch
, high
[0]);
21874 emit_insn (gen_ashr3 (scratch
, scratch
,
21875 GEN_INT (half_width
- 1)));
21876 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21881 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21882 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21884 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21890 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21892 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21893 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21894 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21895 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21897 rtx low
[2], high
[2];
21900 if (CONST_INT_P (operands
[2]))
21902 split_double_mode (mode
, operands
, 2, low
, high
);
21903 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21905 if (count
>= half_width
)
21907 emit_move_insn (low
[0], high
[1]);
21908 ix86_expand_clear (high
[0]);
21910 if (count
> half_width
)
21911 emit_insn (gen_lshr3 (low
[0], low
[0],
21912 GEN_INT (count
- half_width
)));
21916 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21918 if (!rtx_equal_p (operands
[0], operands
[1]))
21919 emit_move_insn (operands
[0], operands
[1]);
21921 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21922 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21927 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21929 if (!rtx_equal_p (operands
[0], operands
[1]))
21930 emit_move_insn (operands
[0], operands
[1]);
21932 split_double_mode (mode
, operands
, 1, low
, high
);
21934 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21935 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21937 if (TARGET_CMOVE
&& scratch
)
21939 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21940 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21942 ix86_expand_clear (scratch
);
21943 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21948 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21949 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21951 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21956 /* Predict just emitted jump instruction to be taken with probability PROB. */
21958 predict_jump (int prob
)
21960 rtx insn
= get_last_insn ();
21961 gcc_assert (JUMP_P (insn
));
21962 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21965 /* Helper function for the string operations below. Dest VARIABLE whether
21966 it is aligned to VALUE bytes. If true, jump to the label. */
21968 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21970 rtx label
= gen_label_rtx ();
21971 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21972 if (GET_MODE (variable
) == DImode
)
21973 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21975 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21976 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21979 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21981 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21985 /* Adjust COUNTER by the VALUE. */
21987 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21989 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21990 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21992 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21995 /* Zero extend possibly SImode EXP to Pmode register. */
21997 ix86_zero_extend_to_Pmode (rtx exp
)
21999 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22002 /* Divide COUNTREG by SCALE. */
22004 scale_counter (rtx countreg
, int scale
)
22010 if (CONST_INT_P (countreg
))
22011 return GEN_INT (INTVAL (countreg
) / scale
);
22012 gcc_assert (REG_P (countreg
));
22014 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22015 GEN_INT (exact_log2 (scale
)),
22016 NULL
, 1, OPTAB_DIRECT
);
22020 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22021 DImode for constant loop counts. */
22023 static enum machine_mode
22024 counter_mode (rtx count_exp
)
22026 if (GET_MODE (count_exp
) != VOIDmode
)
22027 return GET_MODE (count_exp
);
22028 if (!CONST_INT_P (count_exp
))
22030 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22035 /* When SRCPTR is non-NULL, output simple loop to move memory
22036 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
22037 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
22038 equivalent loop to set memory by VALUE (supposed to be in MODE).
22040 The size is rounded down to whole number of chunk size moved at once.
22041 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22045 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22046 rtx destptr
, rtx srcptr
, rtx value
,
22047 rtx count
, enum machine_mode mode
, int unroll
,
22050 rtx out_label
, top_label
, iter
, tmp
;
22051 enum machine_mode iter_mode
= counter_mode (count
);
22052 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22053 rtx piece_size
= GEN_INT (piece_size_n
);
22054 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22058 top_label
= gen_label_rtx ();
22059 out_label
= gen_label_rtx ();
22060 iter
= gen_reg_rtx (iter_mode
);
22062 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22063 NULL
, 1, OPTAB_DIRECT
);
22064 /* Those two should combine. */
22065 if (piece_size
== const1_rtx
)
22067 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22069 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22071 emit_move_insn (iter
, const0_rtx
);
22073 emit_label (top_label
);
22075 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22077 /* This assert could be relaxed - in this case we'll need to compute
22078 smallest power of two, containing in PIECE_SIZE_N and pass it to
22080 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22081 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22082 destmem
= adjust_address (destmem
, mode
, 0);
22086 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22087 srcmem
= adjust_address (srcmem
, mode
, 0);
22089 /* When unrolling for chips that reorder memory reads and writes,
22090 we can save registers by using single temporary.
22091 Also using 4 temporaries is overkill in 32bit mode. */
22092 if (!TARGET_64BIT
&& 0)
22094 for (i
= 0; i
< unroll
; i
++)
22099 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22101 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22103 emit_move_insn (destmem
, srcmem
);
22109 gcc_assert (unroll
<= 4);
22110 for (i
= 0; i
< unroll
; i
++)
22112 tmpreg
[i
] = gen_reg_rtx (mode
);
22116 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22118 emit_move_insn (tmpreg
[i
], srcmem
);
22120 for (i
= 0; i
< unroll
; i
++)
22125 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22127 emit_move_insn (destmem
, tmpreg
[i
]);
22132 for (i
= 0; i
< unroll
; i
++)
22136 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22137 emit_move_insn (destmem
, value
);
22140 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22141 true, OPTAB_LIB_WIDEN
);
22143 emit_move_insn (iter
, tmp
);
22145 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22147 if (expected_size
!= -1)
22149 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22150 if (expected_size
== 0)
22152 else if (expected_size
> REG_BR_PROB_BASE
)
22153 predict_jump (REG_BR_PROB_BASE
- 1);
22155 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22158 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22159 iter
= ix86_zero_extend_to_Pmode (iter
);
22160 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22161 true, OPTAB_LIB_WIDEN
);
22162 if (tmp
!= destptr
)
22163 emit_move_insn (destptr
, tmp
);
22166 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22167 true, OPTAB_LIB_WIDEN
);
22169 emit_move_insn (srcptr
, tmp
);
22171 emit_label (out_label
);
22174 /* Output "rep; mov" instruction.
22175 Arguments have same meaning as for previous function */
22177 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
22178 rtx destptr
, rtx srcptr
,
22180 enum machine_mode mode
)
22185 HOST_WIDE_INT rounded_count
;
22187 /* If the size is known, it is shorter to use rep movs. */
22188 if (mode
== QImode
&& CONST_INT_P (count
)
22189 && !(INTVAL (count
) & 3))
22192 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22193 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22194 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22195 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22196 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22197 if (mode
!= QImode
)
22199 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22200 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22201 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22202 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22203 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22204 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22208 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22209 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22211 if (CONST_INT_P (count
))
22213 rounded_count
= (INTVAL (count
)
22214 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22215 destmem
= shallow_copy_rtx (destmem
);
22216 srcmem
= shallow_copy_rtx (srcmem
);
22217 set_mem_size (destmem
, rounded_count
);
22218 set_mem_size (srcmem
, rounded_count
);
22222 if (MEM_SIZE_KNOWN_P (destmem
))
22223 clear_mem_size (destmem
);
22224 if (MEM_SIZE_KNOWN_P (srcmem
))
22225 clear_mem_size (srcmem
);
22227 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22231 /* Output "rep; stos" instruction.
22232 Arguments have same meaning as for previous function */
22234 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
22235 rtx count
, enum machine_mode mode
,
22240 HOST_WIDE_INT rounded_count
;
22242 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22243 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22244 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22245 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22246 if (mode
!= QImode
)
22248 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22249 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22250 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22253 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22254 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
22256 rounded_count
= (INTVAL (count
)
22257 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22258 destmem
= shallow_copy_rtx (destmem
);
22259 set_mem_size (destmem
, rounded_count
);
22261 else if (MEM_SIZE_KNOWN_P (destmem
))
22262 clear_mem_size (destmem
);
22263 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22266 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22268 SRC is passed by pointer to be updated on return.
22269 Return value is updated DST. */
22271 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22272 HOST_WIDE_INT size_to_move
)
22274 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22275 enum insn_code code
;
22276 enum machine_mode move_mode
;
22279 /* Find the widest mode in which we could perform moves.
22280 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22281 it until move of such size is supported. */
22282 piece_size
= 1 << floor_log2 (size_to_move
);
22283 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22284 code
= optab_handler (mov_optab
, move_mode
);
22285 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22288 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22289 code
= optab_handler (mov_optab
, move_mode
);
22292 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22293 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22294 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22296 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22297 move_mode
= mode_for_vector (word_mode
, nunits
);
22298 code
= optab_handler (mov_optab
, move_mode
);
22299 if (code
== CODE_FOR_nothing
)
22301 move_mode
= word_mode
;
22302 piece_size
= GET_MODE_SIZE (move_mode
);
22303 code
= optab_handler (mov_optab
, move_mode
);
22306 gcc_assert (code
!= CODE_FOR_nothing
);
22308 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22309 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22311 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22312 gcc_assert (size_to_move
% piece_size
== 0);
22313 adjust
= GEN_INT (piece_size
);
22314 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22316 /* We move from memory to memory, so we'll need to do it via
22317 a temporary register. */
22318 tempreg
= gen_reg_rtx (move_mode
);
22319 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22320 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22322 emit_move_insn (destptr
,
22323 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22324 emit_move_insn (srcptr
,
22325 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22327 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22329 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22333 /* Update DST and SRC rtx. */
22338 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22340 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22341 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22344 if (CONST_INT_P (count
))
22346 HOST_WIDE_INT countval
= INTVAL (count
);
22347 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22350 /* For now MAX_SIZE should be a power of 2. This assert could be
22351 relaxed, but it'll require a bit more complicated epilogue
22353 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22354 for (i
= max_size
; i
>= 1; i
>>= 1)
22356 if (epilogue_size
& i
)
22357 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22363 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22364 count
, 1, OPTAB_DIRECT
);
22365 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22366 count
, QImode
, 1, 4);
22370 /* When there are stringops, we can cheaply increase dest and src pointers.
22371 Otherwise we save code size by maintaining offset (zero is readily
22372 available from preceding rep operation) and using x86 addressing modes.
22374 if (TARGET_SINGLE_STRINGOP
)
22378 rtx label
= ix86_expand_aligntest (count
, 4, true);
22379 src
= change_address (srcmem
, SImode
, srcptr
);
22380 dest
= change_address (destmem
, SImode
, destptr
);
22381 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22382 emit_label (label
);
22383 LABEL_NUSES (label
) = 1;
22387 rtx label
= ix86_expand_aligntest (count
, 2, true);
22388 src
= change_address (srcmem
, HImode
, srcptr
);
22389 dest
= change_address (destmem
, HImode
, destptr
);
22390 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22391 emit_label (label
);
22392 LABEL_NUSES (label
) = 1;
22396 rtx label
= ix86_expand_aligntest (count
, 1, true);
22397 src
= change_address (srcmem
, QImode
, srcptr
);
22398 dest
= change_address (destmem
, QImode
, destptr
);
22399 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22400 emit_label (label
);
22401 LABEL_NUSES (label
) = 1;
22406 rtx offset
= force_reg (Pmode
, const0_rtx
);
22411 rtx label
= ix86_expand_aligntest (count
, 4, true);
22412 src
= change_address (srcmem
, SImode
, srcptr
);
22413 dest
= change_address (destmem
, SImode
, destptr
);
22414 emit_move_insn (dest
, src
);
22415 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22416 true, OPTAB_LIB_WIDEN
);
22418 emit_move_insn (offset
, tmp
);
22419 emit_label (label
);
22420 LABEL_NUSES (label
) = 1;
22424 rtx label
= ix86_expand_aligntest (count
, 2, true);
22425 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22426 src
= change_address (srcmem
, HImode
, tmp
);
22427 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22428 dest
= change_address (destmem
, HImode
, tmp
);
22429 emit_move_insn (dest
, src
);
22430 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22431 true, OPTAB_LIB_WIDEN
);
22433 emit_move_insn (offset
, tmp
);
22434 emit_label (label
);
22435 LABEL_NUSES (label
) = 1;
22439 rtx label
= ix86_expand_aligntest (count
, 1, true);
22440 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22441 src
= change_address (srcmem
, QImode
, tmp
);
22442 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22443 dest
= change_address (destmem
, QImode
, tmp
);
22444 emit_move_insn (dest
, src
);
22445 emit_label (label
);
22446 LABEL_NUSES (label
) = 1;
22451 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22453 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22454 rtx count
, int max_size
)
22457 expand_simple_binop (counter_mode (count
), AND
, count
,
22458 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22459 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22460 gen_lowpart (QImode
, value
), count
, QImode
,
22464 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22466 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22470 if (CONST_INT_P (count
))
22472 HOST_WIDE_INT countval
= INTVAL (count
);
22475 if ((countval
& 0x10) && max_size
> 16)
22479 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22480 emit_insn (gen_strset (destptr
, dest
, value
));
22481 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22482 emit_insn (gen_strset (destptr
, dest
, value
));
22485 gcc_unreachable ();
22488 if ((countval
& 0x08) && max_size
> 8)
22492 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22493 emit_insn (gen_strset (destptr
, dest
, value
));
22497 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22498 emit_insn (gen_strset (destptr
, dest
, value
));
22499 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22500 emit_insn (gen_strset (destptr
, dest
, value
));
22504 if ((countval
& 0x04) && max_size
> 4)
22506 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22507 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22510 if ((countval
& 0x02) && max_size
> 2)
22512 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22513 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22516 if ((countval
& 0x01) && max_size
> 1)
22518 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22519 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22526 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22531 rtx label
= ix86_expand_aligntest (count
, 16, true);
22534 dest
= change_address (destmem
, DImode
, destptr
);
22535 emit_insn (gen_strset (destptr
, dest
, value
));
22536 emit_insn (gen_strset (destptr
, dest
, value
));
22540 dest
= change_address (destmem
, SImode
, destptr
);
22541 emit_insn (gen_strset (destptr
, dest
, value
));
22542 emit_insn (gen_strset (destptr
, dest
, value
));
22543 emit_insn (gen_strset (destptr
, dest
, value
));
22544 emit_insn (gen_strset (destptr
, dest
, value
));
22546 emit_label (label
);
22547 LABEL_NUSES (label
) = 1;
22551 rtx label
= ix86_expand_aligntest (count
, 8, true);
22554 dest
= change_address (destmem
, DImode
, destptr
);
22555 emit_insn (gen_strset (destptr
, dest
, value
));
22559 dest
= change_address (destmem
, SImode
, destptr
);
22560 emit_insn (gen_strset (destptr
, dest
, value
));
22561 emit_insn (gen_strset (destptr
, dest
, value
));
22563 emit_label (label
);
22564 LABEL_NUSES (label
) = 1;
22568 rtx label
= ix86_expand_aligntest (count
, 4, true);
22569 dest
= change_address (destmem
, SImode
, destptr
);
22570 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22571 emit_label (label
);
22572 LABEL_NUSES (label
) = 1;
22576 rtx label
= ix86_expand_aligntest (count
, 2, true);
22577 dest
= change_address (destmem
, HImode
, destptr
);
22578 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22579 emit_label (label
);
22580 LABEL_NUSES (label
) = 1;
22584 rtx label
= ix86_expand_aligntest (count
, 1, true);
22585 dest
= change_address (destmem
, QImode
, destptr
);
22586 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22587 emit_label (label
);
22588 LABEL_NUSES (label
) = 1;
22592 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22594 Return value is updated DESTMEM. */
22596 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22597 rtx destptr
, rtx srcptr
, rtx count
,
22598 int align
, int desired_alignment
)
22601 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22605 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22606 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22607 ix86_adjust_counter (count
, i
);
22608 emit_label (label
);
22609 LABEL_NUSES (label
) = 1;
22610 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22616 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22617 ALIGN_BYTES is how many bytes need to be copied.
22618 The function updates DST and SRC, namely, it sets proper alignment.
22619 DST is returned via return value, SRC is updated via pointer SRCP. */
22621 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22622 int desired_align
, int align_bytes
)
22625 rtx orig_dst
= dst
;
22626 rtx orig_src
= src
;
22627 int piece_size
= 1;
22628 int copied_bytes
= 0;
22629 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22630 if (src_align_bytes
>= 0)
22631 src_align_bytes
= desired_align
- src_align_bytes
;
22633 for (piece_size
= 1;
22634 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
22637 if (align_bytes
& piece_size
)
22639 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
22640 copied_bytes
+= piece_size
;
22644 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22645 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22646 if (src_align_bytes
>= 0)
22648 unsigned int src_align
;
22649 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
22651 if ((src_align_bytes
& (src_align
- 1))
22652 == (align_bytes
& (src_align
- 1)))
22655 if (src_align
> (unsigned int) desired_align
)
22656 src_align
= desired_align
;
22657 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22658 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22660 if (MEM_SIZE_KNOWN_P (orig_dst
))
22661 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22662 if (MEM_SIZE_KNOWN_P (orig_src
))
22663 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22668 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22669 DESIRED_ALIGNMENT. */
22671 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22672 int align
, int desired_alignment
)
22674 if (align
<= 1 && desired_alignment
> 1)
22676 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22677 destmem
= change_address (destmem
, QImode
, destptr
);
22678 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22679 ix86_adjust_counter (count
, 1);
22680 emit_label (label
);
22681 LABEL_NUSES (label
) = 1;
22683 if (align
<= 2 && desired_alignment
> 2)
22685 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22686 destmem
= change_address (destmem
, HImode
, destptr
);
22687 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22688 ix86_adjust_counter (count
, 2);
22689 emit_label (label
);
22690 LABEL_NUSES (label
) = 1;
22692 if (align
<= 4 && desired_alignment
> 4)
22694 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22695 destmem
= change_address (destmem
, SImode
, destptr
);
22696 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22697 ix86_adjust_counter (count
, 4);
22698 emit_label (label
);
22699 LABEL_NUSES (label
) = 1;
22701 gcc_assert (desired_alignment
<= 8);
22704 /* Set enough from DST to align DST known to by aligned by ALIGN to
22705 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22707 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22708 int desired_align
, int align_bytes
)
22711 rtx orig_dst
= dst
;
22712 if (align_bytes
& 1)
22714 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22716 emit_insn (gen_strset (destreg
, dst
,
22717 gen_lowpart (QImode
, value
)));
22719 if (align_bytes
& 2)
22721 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22722 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22723 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22725 emit_insn (gen_strset (destreg
, dst
,
22726 gen_lowpart (HImode
, value
)));
22728 if (align_bytes
& 4)
22730 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22731 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22732 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22734 emit_insn (gen_strset (destreg
, dst
,
22735 gen_lowpart (SImode
, value
)));
22737 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22738 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22739 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22740 if (MEM_SIZE_KNOWN_P (orig_dst
))
22741 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22745 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22746 static enum stringop_alg
22747 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22748 int *dynamic_check
, bool *noalign
)
22750 const struct stringop_algs
* algs
;
22751 bool optimize_for_speed
;
22752 /* Algorithms using the rep prefix want at least edi and ecx;
22753 additionally, memset wants eax and memcpy wants esi. Don't
22754 consider such algorithms if the user has appropriated those
22755 registers for their own purposes. */
22756 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22758 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22761 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22762 || (alg != rep_prefix_1_byte \
22763 && alg != rep_prefix_4_byte \
22764 && alg != rep_prefix_8_byte))
22765 const struct processor_costs
*cost
;
22767 /* Even if the string operation call is cold, we still might spend a lot
22768 of time processing large blocks. */
22769 if (optimize_function_for_size_p (cfun
)
22770 || (optimize_insn_for_size_p ()
22771 && expected_size
!= -1 && expected_size
< 256))
22772 optimize_for_speed
= false;
22774 optimize_for_speed
= true;
22776 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22778 *dynamic_check
= -1;
22780 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22782 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22783 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22784 return ix86_stringop_alg
;
22785 /* rep; movq or rep; movl is the smallest variant. */
22786 else if (!optimize_for_speed
)
22788 if (!count
|| (count
& 3))
22789 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22791 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22793 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22795 else if (expected_size
!= -1 && expected_size
< 4)
22796 return loop_1_byte
;
22797 else if (expected_size
!= -1)
22800 enum stringop_alg alg
= libcall
;
22801 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22803 /* We get here if the algorithms that were not libcall-based
22804 were rep-prefix based and we are unable to use rep prefixes
22805 based on global register usage. Break out of the loop and
22806 use the heuristic below. */
22807 if (algs
->size
[i
].max
== 0)
22809 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22811 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22813 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22815 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22816 last non-libcall inline algorithm. */
22817 if (TARGET_INLINE_ALL_STRINGOPS
)
22819 /* When the current size is best to be copied by a libcall,
22820 but we are still forced to inline, run the heuristic below
22821 that will pick code for medium sized blocks. */
22822 if (alg
!= libcall
)
22826 else if (ALG_USABLE_P (candidate
))
22828 *noalign
= algs
->size
[i
].noalign
;
22833 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22835 /* When asked to inline the call anyway, try to pick meaningful choice.
22836 We look for maximal size of block that is faster to copy by hand and
22837 take blocks of at most of that size guessing that average size will
22838 be roughly half of the block.
22840 If this turns out to be bad, we might simply specify the preferred
22841 choice in ix86_costs. */
22842 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22843 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22846 enum stringop_alg alg
;
22848 bool any_alg_usable_p
= true;
22850 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22852 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22853 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22855 if (candidate
!= libcall
&& candidate
22856 && ALG_USABLE_P (candidate
))
22857 max
= algs
->size
[i
].max
;
22859 /* If there aren't any usable algorithms, then recursing on
22860 smaller sizes isn't going to find anything. Just return the
22861 simple byte-at-a-time copy loop. */
22862 if (!any_alg_usable_p
)
22864 /* Pick something reasonable. */
22865 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22866 *dynamic_check
= 128;
22867 return loop_1_byte
;
22871 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22872 gcc_assert (*dynamic_check
== -1);
22873 gcc_assert (alg
!= libcall
);
22874 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22875 *dynamic_check
= max
;
22878 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22879 #undef ALG_USABLE_P
22882 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22883 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22885 decide_alignment (int align
,
22886 enum stringop_alg alg
,
22888 enum machine_mode move_mode
)
22890 int desired_align
= 0;
22892 gcc_assert (alg
!= no_stringop
);
22894 if (alg
== libcall
)
22896 if (move_mode
== VOIDmode
)
22899 desired_align
= GET_MODE_SIZE (move_mode
);
22900 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22901 copying whole cacheline at once. */
22902 if (TARGET_PENTIUMPRO
22903 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
22908 if (desired_align
< align
)
22909 desired_align
= align
;
22910 if (expected_size
!= -1 && expected_size
< 4)
22911 desired_align
= align
;
22913 return desired_align
;
22916 /* Expand string move (memcpy) operation. Use i386 string operations
22917 when profitable. expand_setmem contains similar code. The code
22918 depends upon architecture, block size and alignment, but always has
22919 the same overall structure:
22921 1) Prologue guard: Conditional that jumps up to epilogues for small
22922 blocks that can be handled by epilogue alone. This is faster
22923 but also needed for correctness, since prologue assume the block
22924 is larger than the desired alignment.
22926 Optional dynamic check for size and libcall for large
22927 blocks is emitted here too, with -minline-stringops-dynamically.
22929 2) Prologue: copy first few bytes in order to get destination
22930 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22931 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22932 copied. We emit either a jump tree on power of two sized
22933 blocks, or a byte loop.
22935 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22936 with specified algorithm.
22938 4) Epilogue: code copying tail of the block that is too small to be
22939 handled by main body (or up to size guarded by prologue guard). */
22942 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22943 rtx expected_align_exp
, rtx expected_size_exp
)
22949 rtx jump_around_label
= NULL
;
22950 HOST_WIDE_INT align
= 1;
22951 unsigned HOST_WIDE_INT count
= 0;
22952 HOST_WIDE_INT expected_size
= -1;
22953 int size_needed
= 0, epilogue_size_needed
;
22954 int desired_align
= 0, align_bytes
= 0;
22955 enum stringop_alg alg
;
22957 bool need_zero_guard
= false;
22959 enum machine_mode move_mode
= VOIDmode
;
22960 int unroll_factor
= 1;
22962 if (CONST_INT_P (align_exp
))
22963 align
= INTVAL (align_exp
);
22964 /* i386 can do misaligned access on reasonably increased cost. */
22965 if (CONST_INT_P (expected_align_exp
)
22966 && INTVAL (expected_align_exp
) > align
)
22967 align
= INTVAL (expected_align_exp
);
22968 /* ALIGN is the minimum of destination and source alignment, but we care here
22969 just about destination alignment. */
22970 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22971 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22973 if (CONST_INT_P (count_exp
))
22974 count
= expected_size
= INTVAL (count_exp
);
22975 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22976 expected_size
= INTVAL (expected_size_exp
);
22978 /* Make sure we don't need to care about overflow later on. */
22979 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22982 /* Step 0: Decide on preferred algorithm, desired alignment and
22983 size of chunks to be copied by main loop. */
22984 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22985 if (alg
== libcall
)
22987 gcc_assert (alg
!= no_stringop
);
22990 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22991 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22992 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22995 move_mode
= word_mode
;
23001 gcc_unreachable ();
23003 need_zero_guard
= true;
23004 move_mode
= QImode
;
23007 need_zero_guard
= true;
23009 case unrolled_loop
:
23010 need_zero_guard
= true;
23011 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23014 need_zero_guard
= true;
23016 /* Find the widest supported mode. */
23017 move_mode
= word_mode
;
23018 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23019 != CODE_FOR_nothing
)
23020 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23022 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23023 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23024 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23026 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23027 move_mode
= mode_for_vector (word_mode
, nunits
);
23028 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23029 move_mode
= word_mode
;
23031 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23033 case rep_prefix_8_byte
:
23034 move_mode
= DImode
;
23036 case rep_prefix_4_byte
:
23037 move_mode
= SImode
;
23039 case rep_prefix_1_byte
:
23040 move_mode
= QImode
;
23043 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23044 epilogue_size_needed
= size_needed
;
23046 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23047 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23048 align
= desired_align
;
23050 /* Step 1: Prologue guard. */
23052 /* Alignment code needs count to be in register. */
23053 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23055 if (INTVAL (count_exp
) > desired_align
23056 && INTVAL (count_exp
) > size_needed
)
23059 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23060 if (align_bytes
<= 0)
23063 align_bytes
= desired_align
- align_bytes
;
23065 if (align_bytes
== 0)
23066 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23068 gcc_assert (desired_align
>= 1 && align
>= 1);
23070 /* Ensure that alignment prologue won't copy past end of block. */
23071 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23073 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23074 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23075 Make sure it is power of 2. */
23076 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23080 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23082 /* If main algorithm works on QImode, no epilogue is needed.
23083 For small sizes just don't align anything. */
23084 if (size_needed
== 1)
23085 desired_align
= align
;
23092 label
= gen_label_rtx ();
23093 emit_cmp_and_jump_insns (count_exp
,
23094 GEN_INT (epilogue_size_needed
),
23095 LTU
, 0, counter_mode (count_exp
), 1, label
);
23096 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23097 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23099 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23103 /* Emit code to decide on runtime whether library call or inline should be
23105 if (dynamic_check
!= -1)
23107 if (CONST_INT_P (count_exp
))
23109 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23111 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23112 count_exp
= const0_rtx
;
23118 rtx hot_label
= gen_label_rtx ();
23119 jump_around_label
= gen_label_rtx ();
23120 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23121 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23122 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23123 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23124 emit_jump (jump_around_label
);
23125 emit_label (hot_label
);
23129 /* Step 2: Alignment prologue. */
23131 if (desired_align
> align
)
23133 if (align_bytes
== 0)
23135 /* Except for the first move in epilogue, we no longer know
23136 constant offset in aliasing info. It don't seems to worth
23137 the pain to maintain it for the first move, so throw away
23139 src
= change_address (src
, BLKmode
, srcreg
);
23140 dst
= change_address (dst
, BLKmode
, destreg
);
23141 dst
= expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
23146 /* If we know how many bytes need to be stored before dst is
23147 sufficiently aligned, maintain aliasing info accurately. */
23148 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
23149 desired_align
, align_bytes
);
23150 count_exp
= plus_constant (counter_mode (count_exp
),
23151 count_exp
, -align_bytes
);
23152 count
-= align_bytes
;
23154 if (need_zero_guard
23155 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23156 || (align_bytes
== 0
23157 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23158 + desired_align
- align
))))
23160 /* It is possible that we copied enough so the main loop will not
23162 gcc_assert (size_needed
> 1);
23163 if (label
== NULL_RTX
)
23164 label
= gen_label_rtx ();
23165 emit_cmp_and_jump_insns (count_exp
,
23166 GEN_INT (size_needed
),
23167 LTU
, 0, counter_mode (count_exp
), 1, label
);
23168 if (expected_size
== -1
23169 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23170 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23172 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23175 if (label
&& size_needed
== 1)
23177 emit_label (label
);
23178 LABEL_NUSES (label
) = 1;
23180 epilogue_size_needed
= 1;
23182 else if (label
== NULL_RTX
)
23183 epilogue_size_needed
= size_needed
;
23185 /* Step 3: Main loop. */
23192 gcc_unreachable ();
23195 case unrolled_loop
:
23197 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
23198 count_exp
, move_mode
, unroll_factor
,
23201 case rep_prefix_8_byte
:
23202 case rep_prefix_4_byte
:
23203 case rep_prefix_1_byte
:
23204 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
23208 /* Adjust properly the offset of src and dest memory for aliasing. */
23209 if (CONST_INT_P (count_exp
))
23211 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23212 (count
/ size_needed
) * size_needed
);
23213 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23214 (count
/ size_needed
) * size_needed
);
23218 src
= change_address (src
, BLKmode
, srcreg
);
23219 dst
= change_address (dst
, BLKmode
, destreg
);
23222 /* Step 4: Epilogue to copy the remaining bytes. */
23226 /* When the main loop is done, COUNT_EXP might hold original count,
23227 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23228 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23229 bytes. Compensate if needed. */
23231 if (size_needed
< epilogue_size_needed
)
23234 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23235 GEN_INT (size_needed
- 1), count_exp
, 1,
23237 if (tmp
!= count_exp
)
23238 emit_move_insn (count_exp
, tmp
);
23240 emit_label (label
);
23241 LABEL_NUSES (label
) = 1;
23244 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23245 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
23246 epilogue_size_needed
);
23247 if (jump_around_label
)
23248 emit_label (jump_around_label
);
23252 /* Helper function for memcpy. For QImode value 0xXY produce
23253 0xXYXYXYXY of wide specified by MODE. This is essentially
23254 a * 0x10101010, but we can do slightly better than
23255 synth_mult by unwinding the sequence by hand on CPUs with
23258 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23260 enum machine_mode valmode
= GET_MODE (val
);
23262 int nops
= mode
== DImode
? 3 : 2;
23264 gcc_assert (mode
== SImode
|| mode
== DImode
);
23265 if (val
== const0_rtx
)
23266 return copy_to_mode_reg (mode
, const0_rtx
);
23267 if (CONST_INT_P (val
))
23269 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23273 if (mode
== DImode
)
23274 v
|= (v
<< 16) << 16;
23275 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23278 if (valmode
== VOIDmode
)
23280 if (valmode
!= QImode
)
23281 val
= gen_lowpart (QImode
, val
);
23282 if (mode
== QImode
)
23284 if (!TARGET_PARTIAL_REG_STALL
)
23286 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23287 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23288 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23289 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23291 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23292 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23293 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23298 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23300 if (!TARGET_PARTIAL_REG_STALL
)
23301 if (mode
== SImode
)
23302 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23304 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23307 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23308 NULL
, 1, OPTAB_DIRECT
);
23310 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23312 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23313 NULL
, 1, OPTAB_DIRECT
);
23314 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23315 if (mode
== SImode
)
23317 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23318 NULL
, 1, OPTAB_DIRECT
);
23319 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23324 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23325 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23326 alignment from ALIGN to DESIRED_ALIGN. */
23328 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23333 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23334 promoted_val
= promote_duplicated_reg (DImode
, val
);
23335 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23336 promoted_val
= promote_duplicated_reg (SImode
, val
);
23337 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23338 promoted_val
= promote_duplicated_reg (HImode
, val
);
23340 promoted_val
= val
;
23342 return promoted_val
;
23345 /* Expand string clear operation (bzero). Use i386 string operations when
23346 profitable. See expand_movmem comment for explanation of individual
23347 steps performed. */
23349 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23350 rtx expected_align_exp
, rtx expected_size_exp
)
23355 rtx jump_around_label
= NULL
;
23356 HOST_WIDE_INT align
= 1;
23357 unsigned HOST_WIDE_INT count
= 0;
23358 HOST_WIDE_INT expected_size
= -1;
23359 int size_needed
= 0, epilogue_size_needed
;
23360 int desired_align
= 0, align_bytes
= 0;
23361 enum stringop_alg alg
;
23362 rtx promoted_val
= NULL
;
23363 bool force_loopy_epilogue
= false;
23365 bool need_zero_guard
= false;
23367 enum machine_mode move_mode
= VOIDmode
;
23370 if (CONST_INT_P (align_exp
))
23371 align
= INTVAL (align_exp
);
23372 /* i386 can do misaligned access on reasonably increased cost. */
23373 if (CONST_INT_P (expected_align_exp
)
23374 && INTVAL (expected_align_exp
) > align
)
23375 align
= INTVAL (expected_align_exp
);
23376 if (CONST_INT_P (count_exp
))
23377 count
= expected_size
= INTVAL (count_exp
);
23378 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23379 expected_size
= INTVAL (expected_size_exp
);
23381 /* Make sure we don't need to care about overflow later on. */
23382 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23385 /* Step 0: Decide on preferred algorithm, desired alignment and
23386 size of chunks to be copied by main loop. */
23388 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23389 if (alg
== libcall
)
23391 gcc_assert (alg
!= no_stringop
);
23394 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23395 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23397 move_mode
= word_mode
;
23404 gcc_unreachable ();
23406 need_zero_guard
= true;
23409 case unrolled_loop
:
23410 need_zero_guard
= true;
23413 case rep_prefix_8_byte
:
23414 move_mode
= DImode
;
23416 case rep_prefix_4_byte
:
23417 move_mode
= SImode
;
23419 case rep_prefix_1_byte
:
23420 move_mode
= QImode
;
23423 need_zero_guard
= true;
23424 move_mode
= QImode
;
23427 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23428 epilogue_size_needed
= size_needed
;
23430 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23431 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23432 align
= desired_align
;
23434 /* Step 1: Prologue guard. */
23436 /* Alignment code needs count to be in register. */
23437 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23439 if (INTVAL (count_exp
) > desired_align
23440 && INTVAL (count_exp
) > size_needed
)
23443 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23444 if (align_bytes
<= 0)
23447 align_bytes
= desired_align
- align_bytes
;
23449 if (align_bytes
== 0)
23451 enum machine_mode mode
= SImode
;
23452 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23454 count_exp
= force_reg (mode
, count_exp
);
23457 /* Do the cheap promotion to allow better CSE across the
23458 main loop and epilogue (ie one load of the big constant in the
23459 front of all code. */
23460 if (CONST_INT_P (val_exp
))
23461 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23462 desired_align
, align
);
23463 /* Ensure that alignment prologue won't copy past end of block. */
23464 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23466 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23467 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23468 Make sure it is power of 2. */
23469 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23471 /* To improve performance of small blocks, we jump around the VAL
23472 promoting mode. This mean that if the promoted VAL is not constant,
23473 we might not use it in the epilogue and have to use byte
23475 if (epilogue_size_needed
> 2 && !promoted_val
)
23476 force_loopy_epilogue
= true;
23479 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23481 /* If main algorithm works on QImode, no epilogue is needed.
23482 For small sizes just don't align anything. */
23483 if (size_needed
== 1)
23484 desired_align
= align
;
23491 label
= gen_label_rtx ();
23492 emit_cmp_and_jump_insns (count_exp
,
23493 GEN_INT (epilogue_size_needed
),
23494 LTU
, 0, counter_mode (count_exp
), 1, label
);
23495 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23496 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23498 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23501 if (dynamic_check
!= -1)
23503 rtx hot_label
= gen_label_rtx ();
23504 jump_around_label
= gen_label_rtx ();
23505 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23506 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23507 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23508 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23509 emit_jump (jump_around_label
);
23510 emit_label (hot_label
);
23513 /* Step 2: Alignment prologue. */
23515 /* Do the expensive promotion once we branched off the small blocks. */
23517 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23518 desired_align
, align
);
23519 gcc_assert (desired_align
>= 1 && align
>= 1);
23521 if (desired_align
> align
)
23523 if (align_bytes
== 0)
23525 /* Except for the first move in epilogue, we no longer know
23526 constant offset in aliasing info. It don't seems to worth
23527 the pain to maintain it for the first move, so throw away
23529 dst
= change_address (dst
, BLKmode
, destreg
);
23530 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23535 /* If we know how many bytes need to be stored before dst is
23536 sufficiently aligned, maintain aliasing info accurately. */
23537 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23538 desired_align
, align_bytes
);
23539 count_exp
= plus_constant (counter_mode (count_exp
),
23540 count_exp
, -align_bytes
);
23541 count
-= align_bytes
;
23543 if (need_zero_guard
23544 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23545 || (align_bytes
== 0
23546 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23547 + desired_align
- align
))))
23549 /* It is possible that we copied enough so the main loop will not
23551 gcc_assert (size_needed
> 1);
23552 if (label
== NULL_RTX
)
23553 label
= gen_label_rtx ();
23554 emit_cmp_and_jump_insns (count_exp
,
23555 GEN_INT (size_needed
),
23556 LTU
, 0, counter_mode (count_exp
), 1, label
);
23557 if (expected_size
== -1
23558 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23559 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23561 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23564 if (label
&& size_needed
== 1)
23566 emit_label (label
);
23567 LABEL_NUSES (label
) = 1;
23569 promoted_val
= val_exp
;
23570 epilogue_size_needed
= 1;
23572 else if (label
== NULL_RTX
)
23573 epilogue_size_needed
= size_needed
;
23575 /* Step 3: Main loop. */
23582 gcc_unreachable ();
23586 case unrolled_loop
:
23587 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23588 count_exp
, move_mode
, unroll_factor
,
23591 case rep_prefix_8_byte
:
23592 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23595 case rep_prefix_4_byte
:
23596 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23599 case rep_prefix_1_byte
:
23600 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23604 /* Adjust properly the offset of src and dest memory for aliasing. */
23605 if (CONST_INT_P (count_exp
))
23606 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23607 (count
/ size_needed
) * size_needed
);
23609 dst
= change_address (dst
, BLKmode
, destreg
);
23611 /* Step 4: Epilogue to copy the remaining bytes. */
23615 /* When the main loop is done, COUNT_EXP might hold original count,
23616 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23617 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23618 bytes. Compensate if needed. */
23620 if (size_needed
< epilogue_size_needed
)
23623 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23624 GEN_INT (size_needed
- 1), count_exp
, 1,
23626 if (tmp
!= count_exp
)
23627 emit_move_insn (count_exp
, tmp
);
23629 emit_label (label
);
23630 LABEL_NUSES (label
) = 1;
23633 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23635 if (force_loopy_epilogue
)
23636 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23637 epilogue_size_needed
);
23639 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23640 epilogue_size_needed
);
23642 if (jump_around_label
)
23643 emit_label (jump_around_label
);
23647 /* Expand the appropriate insns for doing strlen if not just doing
23650 out = result, initialized with the start address
23651 align_rtx = alignment of the address.
23652 scratch = scratch register, initialized with the startaddress when
23653 not aligned, otherwise undefined
23655 This is just the body. It needs the initializations mentioned above and
23656 some address computing at the end. These things are done in i386.md. */
23659 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23663 rtx align_2_label
= NULL_RTX
;
23664 rtx align_3_label
= NULL_RTX
;
23665 rtx align_4_label
= gen_label_rtx ();
23666 rtx end_0_label
= gen_label_rtx ();
23668 rtx tmpreg
= gen_reg_rtx (SImode
);
23669 rtx scratch
= gen_reg_rtx (SImode
);
23673 if (CONST_INT_P (align_rtx
))
23674 align
= INTVAL (align_rtx
);
23676 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23678 /* Is there a known alignment and is it less than 4? */
23681 rtx scratch1
= gen_reg_rtx (Pmode
);
23682 emit_move_insn (scratch1
, out
);
23683 /* Is there a known alignment and is it not 2? */
23686 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23687 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23689 /* Leave just the 3 lower bits. */
23690 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23691 NULL_RTX
, 0, OPTAB_WIDEN
);
23693 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23694 Pmode
, 1, align_4_label
);
23695 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23696 Pmode
, 1, align_2_label
);
23697 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23698 Pmode
, 1, align_3_label
);
23702 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23703 check if is aligned to 4 - byte. */
23705 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23706 NULL_RTX
, 0, OPTAB_WIDEN
);
23708 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23709 Pmode
, 1, align_4_label
);
23712 mem
= change_address (src
, QImode
, out
);
23714 /* Now compare the bytes. */
23716 /* Compare the first n unaligned byte on a byte per byte basis. */
23717 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23718 QImode
, 1, end_0_label
);
23720 /* Increment the address. */
23721 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23723 /* Not needed with an alignment of 2 */
23726 emit_label (align_2_label
);
23728 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23731 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23733 emit_label (align_3_label
);
23736 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23739 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23742 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23743 align this loop. It gives only huge programs, but does not help to
23745 emit_label (align_4_label
);
23747 mem
= change_address (src
, SImode
, out
);
23748 emit_move_insn (scratch
, mem
);
23749 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23751 /* This formula yields a nonzero result iff one of the bytes is zero.
23752 This saves three branches inside loop and many cycles. */
23754 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23755 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23756 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23757 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23758 gen_int_mode (0x80808080, SImode
)));
23759 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23764 rtx reg
= gen_reg_rtx (SImode
);
23765 rtx reg2
= gen_reg_rtx (Pmode
);
23766 emit_move_insn (reg
, tmpreg
);
23767 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23769 /* If zero is not in the first two bytes, move two bytes forward. */
23770 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23771 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23772 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23773 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23774 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23777 /* Emit lea manually to avoid clobbering of flags. */
23778 emit_insn (gen_rtx_SET (SImode
, reg2
,
23779 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23781 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23782 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23783 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23784 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23790 rtx end_2_label
= gen_label_rtx ();
23791 /* Is zero in the first two bytes? */
23793 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23794 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23795 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23796 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23797 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23799 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23800 JUMP_LABEL (tmp
) = end_2_label
;
23802 /* Not in the first two. Move two bytes forward. */
23803 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23804 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23806 emit_label (end_2_label
);
23810 /* Avoid branch in fixing the byte. */
23811 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23812 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23813 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23814 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23815 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23817 emit_label (end_0_label
);
23820 /* Expand strlen. */
23823 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23825 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23827 /* The generic case of strlen expander is long. Avoid it's
23828 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23830 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23831 && !TARGET_INLINE_ALL_STRINGOPS
23832 && !optimize_insn_for_size_p ()
23833 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23836 addr
= force_reg (Pmode
, XEXP (src
, 0));
23837 scratch1
= gen_reg_rtx (Pmode
);
23839 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23840 && !optimize_insn_for_size_p ())
23842 /* Well it seems that some optimizer does not combine a call like
23843 foo(strlen(bar), strlen(bar));
23844 when the move and the subtraction is done here. It does calculate
23845 the length just once when these instructions are done inside of
23846 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23847 often used and I use one fewer register for the lifetime of
23848 output_strlen_unroll() this is better. */
23850 emit_move_insn (out
, addr
);
23852 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23854 /* strlensi_unroll_1 returns the address of the zero at the end of
23855 the string, like memchr(), so compute the length by subtracting
23856 the start address. */
23857 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23863 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23864 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23867 scratch2
= gen_reg_rtx (Pmode
);
23868 scratch3
= gen_reg_rtx (Pmode
);
23869 scratch4
= force_reg (Pmode
, constm1_rtx
);
23871 emit_move_insn (scratch3
, addr
);
23872 eoschar
= force_reg (QImode
, eoschar
);
23874 src
= replace_equiv_address_nv (src
, scratch3
);
23876 /* If .md starts supporting :P, this can be done in .md. */
23877 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23878 scratch4
), UNSPEC_SCAS
);
23879 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23880 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23881 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23886 /* For given symbol (function) construct code to compute address of it's PLT
23887 entry in large x86-64 PIC model. */
23889 construct_plt_address (rtx symbol
)
23893 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23894 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
23895 gcc_assert (Pmode
== DImode
);
23897 tmp
= gen_reg_rtx (Pmode
);
23898 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23900 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23901 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23906 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23908 rtx pop
, bool sibcall
)
23910 unsigned int const cregs_size
23911 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
23912 rtx vec
[3 + cregs_size
];
23913 rtx use
= NULL
, call
;
23914 unsigned int vec_len
= 0;
23916 if (pop
== const0_rtx
)
23918 gcc_assert (!TARGET_64BIT
|| !pop
);
23920 if (TARGET_MACHO
&& !TARGET_64BIT
)
23923 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23924 fnaddr
= machopic_indirect_call_target (fnaddr
);
23929 /* Static functions and indirect calls don't need the pic register. */
23932 || (ix86_cmodel
== CM_LARGE_PIC
23933 && DEFAULT_ABI
!= MS_ABI
))
23934 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23935 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23936 use_reg (&use
, pic_offset_table_rtx
);
23939 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23941 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23942 emit_move_insn (al
, callarg2
);
23943 use_reg (&use
, al
);
23946 if (ix86_cmodel
== CM_LARGE_PIC
23949 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23950 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23951 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23953 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23954 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23956 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23957 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23960 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23962 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23963 vec
[vec_len
++] = call
;
23967 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23968 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23969 vec
[vec_len
++] = pop
;
23972 if (TARGET_64BIT_MS_ABI
23973 && (!callarg2
|| INTVAL (callarg2
) != -2))
23977 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23978 UNSPEC_MS_TO_SYSV_CALL
);
23980 for (i
= 0; i
< cregs_size
; i
++)
23982 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
23983 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
23986 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
23991 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23992 call
= emit_call_insn (call
);
23994 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23999 /* Output the assembly for a call instruction. */
24002 ix86_output_call_insn (rtx insn
, rtx call_op
)
24004 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24005 bool seh_nop_p
= false;
24008 if (SIBLING_CALL_P (insn
))
24012 /* SEH epilogue detection requires the indirect branch case
24013 to include REX.W. */
24014 else if (TARGET_SEH
)
24015 xasm
= "rex.W jmp %A0";
24019 output_asm_insn (xasm
, &call_op
);
24023 /* SEH unwinding can require an extra nop to be emitted in several
24024 circumstances. Determine if we have one of those. */
24029 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24031 /* If we get to another real insn, we don't need the nop. */
24035 /* If we get to the epilogue note, prevent a catch region from
24036 being adjacent to the standard epilogue sequence. If non-
24037 call-exceptions, we'll have done this during epilogue emission. */
24038 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24039 && !flag_non_call_exceptions
24040 && !can_throw_internal (insn
))
24047 /* If we didn't find a real insn following the call, prevent the
24048 unwinder from looking into the next function. */
24054 xasm
= "call\t%P0";
24056 xasm
= "call\t%A0";
24058 output_asm_insn (xasm
, &call_op
);
24066 /* Clear stack slot assignments remembered from previous functions.
24067 This is called from INIT_EXPANDERS once before RTL is emitted for each
24070 static struct machine_function
*
24071 ix86_init_machine_status (void)
24073 struct machine_function
*f
;
24075 f
= ggc_alloc_cleared_machine_function ();
24076 f
->use_fast_prologue_epilogue_nregs
= -1;
24077 f
->call_abi
= ix86_abi
;
24082 /* Return a MEM corresponding to a stack slot with mode MODE.
24083 Allocate a new slot if necessary.
24085 The RTL for a function can have several slots available: N is
24086 which slot to use. */
24089 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24091 struct stack_local_entry
*s
;
24093 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24095 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24096 if (s
->mode
== mode
&& s
->n
== n
)
24097 return validize_mem (copy_rtx (s
->rtl
));
24099 s
= ggc_alloc_stack_local_entry ();
24102 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24104 s
->next
= ix86_stack_locals
;
24105 ix86_stack_locals
= s
;
24106 return validize_mem (s
->rtl
);
24110 ix86_instantiate_decls (void)
24112 struct stack_local_entry
*s
;
24114 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24115 if (s
->rtl
!= NULL_RTX
)
24116 instantiate_decl_rtl (s
->rtl
);
24119 /* Calculate the length of the memory address in the instruction encoding.
24120 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24121 or other prefixes. We never generate addr32 prefix for LEA insn. */
24124 memory_address_length (rtx addr
, bool lea
)
24126 struct ix86_address parts
;
24127 rtx base
, index
, disp
;
24131 if (GET_CODE (addr
) == PRE_DEC
24132 || GET_CODE (addr
) == POST_INC
24133 || GET_CODE (addr
) == PRE_MODIFY
24134 || GET_CODE (addr
) == POST_MODIFY
)
24137 ok
= ix86_decompose_address (addr
, &parts
);
24140 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24142 /* If this is not LEA instruction, add the length of addr32 prefix. */
24143 if (TARGET_64BIT
&& !lea
24144 && (SImode_address_operand (addr
, VOIDmode
)
24145 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24146 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24150 index
= parts
.index
;
24153 if (base
&& GET_CODE (base
) == SUBREG
)
24154 base
= SUBREG_REG (base
);
24155 if (index
&& GET_CODE (index
) == SUBREG
)
24156 index
= SUBREG_REG (index
);
24158 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24159 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24162 - esp as the base always wants an index,
24163 - ebp as the base always wants a displacement,
24164 - r12 as the base always wants an index,
24165 - r13 as the base always wants a displacement. */
24167 /* Register Indirect. */
24168 if (base
&& !index
&& !disp
)
24170 /* esp (for its index) and ebp (for its displacement) need
24171 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24173 if (base
== arg_pointer_rtx
24174 || base
== frame_pointer_rtx
24175 || REGNO (base
) == SP_REG
24176 || REGNO (base
) == BP_REG
24177 || REGNO (base
) == R12_REG
24178 || REGNO (base
) == R13_REG
)
24182 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24183 is not disp32, but disp32(%rip), so for disp32
24184 SIB byte is needed, unless print_operand_address
24185 optimizes it into disp32(%rip) or (%rip) is implied
24187 else if (disp
&& !base
&& !index
)
24194 if (GET_CODE (disp
) == CONST
)
24195 symbol
= XEXP (disp
, 0);
24196 if (GET_CODE (symbol
) == PLUS
24197 && CONST_INT_P (XEXP (symbol
, 1)))
24198 symbol
= XEXP (symbol
, 0);
24200 if (GET_CODE (symbol
) != LABEL_REF
24201 && (GET_CODE (symbol
) != SYMBOL_REF
24202 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
24203 && (GET_CODE (symbol
) != UNSPEC
24204 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
24205 && XINT (symbol
, 1) != UNSPEC_PCREL
24206 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
24212 /* Find the length of the displacement constant. */
24215 if (base
&& satisfies_constraint_K (disp
))
24220 /* ebp always wants a displacement. Similarly r13. */
24221 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24224 /* An index requires the two-byte modrm form.... */
24226 /* ...like esp (or r12), which always wants an index. */
24227 || base
== arg_pointer_rtx
24228 || base
== frame_pointer_rtx
24229 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24236 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24237 is set, expect that insn have 8bit immediate alternative. */
24239 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24243 extract_insn_cached (insn
);
24244 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24245 if (CONSTANT_P (recog_data
.operand
[i
]))
24247 enum attr_mode mode
= get_attr_mode (insn
);
24250 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24252 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24259 ival
= trunc_int_for_mode (ival
, HImode
);
24262 ival
= trunc_int_for_mode (ival
, SImode
);
24267 if (IN_RANGE (ival
, -128, 127))
24284 /* Immediates for DImode instructions are encoded
24285 as 32bit sign extended values. */
24290 fatal_insn ("unknown insn mode", insn
);
24296 /* Compute default value for "length_address" attribute. */
24298 ix86_attr_length_address_default (rtx insn
)
24302 if (get_attr_type (insn
) == TYPE_LEA
)
24304 rtx set
= PATTERN (insn
), addr
;
24306 if (GET_CODE (set
) == PARALLEL
)
24307 set
= XVECEXP (set
, 0, 0);
24309 gcc_assert (GET_CODE (set
) == SET
);
24311 addr
= SET_SRC (set
);
24313 return memory_address_length (addr
, true);
24316 extract_insn_cached (insn
);
24317 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24318 if (MEM_P (recog_data
.operand
[i
]))
24320 constrain_operands_cached (reload_completed
);
24321 if (which_alternative
!= -1)
24323 const char *constraints
= recog_data
.constraints
[i
];
24324 int alt
= which_alternative
;
24326 while (*constraints
== '=' || *constraints
== '+')
24329 while (*constraints
++ != ',')
24331 /* Skip ignored operands. */
24332 if (*constraints
== 'X')
24335 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24340 /* Compute default value for "length_vex" attribute. It includes
24341 2 or 3 byte VEX prefix and 1 opcode byte. */
24344 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24348 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24349 byte VEX prefix. */
24350 if (!has_0f_opcode
|| has_vex_w
)
24353 /* We can always use 2 byte VEX prefix in 32bit. */
24357 extract_insn_cached (insn
);
24359 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24360 if (REG_P (recog_data
.operand
[i
]))
24362 /* REX.W bit uses 3 byte VEX prefix. */
24363 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24364 && GENERAL_REG_P (recog_data
.operand
[i
]))
24369 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24370 if (MEM_P (recog_data
.operand
[i
])
24371 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24378 /* Return the maximum number of instructions a cpu can issue. */
24381 ix86_issue_rate (void)
24385 case PROCESSOR_PENTIUM
:
24386 case PROCESSOR_ATOM
:
24387 case PROCESSOR_SLM
:
24389 case PROCESSOR_BTVER2
:
24392 case PROCESSOR_PENTIUMPRO
:
24393 case PROCESSOR_PENTIUM4
:
24394 case PROCESSOR_CORE2
:
24395 case PROCESSOR_COREI7
:
24396 case PROCESSOR_HASWELL
:
24397 case PROCESSOR_ATHLON
:
24399 case PROCESSOR_AMDFAM10
:
24400 case PROCESSOR_NOCONA
:
24401 case PROCESSOR_GENERIC
:
24402 case PROCESSOR_BDVER1
:
24403 case PROCESSOR_BDVER2
:
24404 case PROCESSOR_BDVER3
:
24405 case PROCESSOR_BTVER1
:
24413 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24414 by DEP_INSN and nothing set by DEP_INSN. */
24417 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24421 /* Simplify the test for uninteresting insns. */
24422 if (insn_type
!= TYPE_SETCC
24423 && insn_type
!= TYPE_ICMOV
24424 && insn_type
!= TYPE_FCMOV
24425 && insn_type
!= TYPE_IBR
)
24428 if ((set
= single_set (dep_insn
)) != 0)
24430 set
= SET_DEST (set
);
24433 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24434 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24435 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24436 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24438 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24439 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24444 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24447 /* This test is true if the dependent insn reads the flags but
24448 not any other potentially set register. */
24449 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24452 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24458 /* Return true iff USE_INSN has a memory address with operands set by
24462 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24465 extract_insn_cached (use_insn
);
24466 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24467 if (MEM_P (recog_data
.operand
[i
]))
24469 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24470 return modified_in_p (addr
, set_insn
) != 0;
24475 /* Helper function for exact_store_load_dependency.
24476 Return true if addr is found in insn. */
24478 exact_dependency_1 (rtx addr
, rtx insn
)
24480 enum rtx_code code
;
24481 const char *format_ptr
;
24484 code
= GET_CODE (insn
);
24488 if (rtx_equal_p (addr
, insn
))
24503 format_ptr
= GET_RTX_FORMAT (code
);
24504 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24506 switch (*format_ptr
++)
24509 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24513 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24514 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24522 /* Return true if there exists exact dependency for store & load, i.e.
24523 the same memory address is used in them. */
24525 exact_store_load_dependency (rtx store
, rtx load
)
24529 set1
= single_set (store
);
24532 if (!MEM_P (SET_DEST (set1
)))
24534 set2
= single_set (load
);
24537 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24543 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24545 enum attr_type insn_type
, dep_insn_type
;
24546 enum attr_memory memory
;
24548 int dep_insn_code_number
;
24550 /* Anti and output dependencies have zero cost on all CPUs. */
24551 if (REG_NOTE_KIND (link
) != 0)
24554 dep_insn_code_number
= recog_memoized (dep_insn
);
24556 /* If we can't recognize the insns, we can't really do anything. */
24557 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24560 insn_type
= get_attr_type (insn
);
24561 dep_insn_type
= get_attr_type (dep_insn
);
24565 case PROCESSOR_PENTIUM
:
24566 /* Address Generation Interlock adds a cycle of latency. */
24567 if (insn_type
== TYPE_LEA
)
24569 rtx addr
= PATTERN (insn
);
24571 if (GET_CODE (addr
) == PARALLEL
)
24572 addr
= XVECEXP (addr
, 0, 0);
24574 gcc_assert (GET_CODE (addr
) == SET
);
24576 addr
= SET_SRC (addr
);
24577 if (modified_in_p (addr
, dep_insn
))
24580 else if (ix86_agi_dependent (dep_insn
, insn
))
24583 /* ??? Compares pair with jump/setcc. */
24584 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24587 /* Floating point stores require value to be ready one cycle earlier. */
24588 if (insn_type
== TYPE_FMOV
24589 && get_attr_memory (insn
) == MEMORY_STORE
24590 && !ix86_agi_dependent (dep_insn
, insn
))
24594 case PROCESSOR_PENTIUMPRO
:
24595 memory
= get_attr_memory (insn
);
24597 /* INT->FP conversion is expensive. */
24598 if (get_attr_fp_int_src (dep_insn
))
24601 /* There is one cycle extra latency between an FP op and a store. */
24602 if (insn_type
== TYPE_FMOV
24603 && (set
= single_set (dep_insn
)) != NULL_RTX
24604 && (set2
= single_set (insn
)) != NULL_RTX
24605 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24606 && MEM_P (SET_DEST (set2
)))
24609 /* Show ability of reorder buffer to hide latency of load by executing
24610 in parallel with previous instruction in case
24611 previous instruction is not needed to compute the address. */
24612 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24613 && !ix86_agi_dependent (dep_insn
, insn
))
24615 /* Claim moves to take one cycle, as core can issue one load
24616 at time and the next load can start cycle later. */
24617 if (dep_insn_type
== TYPE_IMOV
24618 || dep_insn_type
== TYPE_FMOV
)
24626 memory
= get_attr_memory (insn
);
24628 /* The esp dependency is resolved before the instruction is really
24630 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24631 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24634 /* INT->FP conversion is expensive. */
24635 if (get_attr_fp_int_src (dep_insn
))
24638 /* Show ability of reorder buffer to hide latency of load by executing
24639 in parallel with previous instruction in case
24640 previous instruction is not needed to compute the address. */
24641 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24642 && !ix86_agi_dependent (dep_insn
, insn
))
24644 /* Claim moves to take one cycle, as core can issue one load
24645 at time and the next load can start cycle later. */
24646 if (dep_insn_type
== TYPE_IMOV
24647 || dep_insn_type
== TYPE_FMOV
)
24656 case PROCESSOR_ATHLON
:
24658 case PROCESSOR_AMDFAM10
:
24659 case PROCESSOR_BDVER1
:
24660 case PROCESSOR_BDVER2
:
24661 case PROCESSOR_BDVER3
:
24662 case PROCESSOR_BTVER1
:
24663 case PROCESSOR_BTVER2
:
24664 case PROCESSOR_ATOM
:
24665 case PROCESSOR_GENERIC
:
24666 memory
= get_attr_memory (insn
);
24668 /* Show ability of reorder buffer to hide latency of load by executing
24669 in parallel with previous instruction in case
24670 previous instruction is not needed to compute the address. */
24671 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24672 && !ix86_agi_dependent (dep_insn
, insn
))
24674 enum attr_unit unit
= get_attr_unit (insn
);
24677 /* Because of the difference between the length of integer and
24678 floating unit pipeline preparation stages, the memory operands
24679 for floating point are cheaper.
24681 ??? For Athlon it the difference is most probably 2. */
24682 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24685 loadcost
= TARGET_ATHLON
? 2 : 0;
24687 if (cost
>= loadcost
)
24694 case PROCESSOR_SLM
:
24695 if (!reload_completed
)
24698 /* Increase cost of integer loads. */
24699 memory
= get_attr_memory (dep_insn
);
24700 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24702 enum attr_unit unit
= get_attr_unit (dep_insn
);
24703 if (unit
== UNIT_INTEGER
&& cost
== 1)
24705 if (memory
== MEMORY_LOAD
)
24709 /* Increase cost of ld/st for short int types only
24710 because of store forwarding issue. */
24711 rtx set
= single_set (dep_insn
);
24712 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
24713 || GET_MODE (SET_DEST (set
)) == HImode
))
24715 /* Increase cost of store/load insn if exact
24716 dependence exists and it is load insn. */
24717 enum attr_memory insn_memory
= get_attr_memory (insn
);
24718 if (insn_memory
== MEMORY_LOAD
24719 && exact_store_load_dependency (dep_insn
, insn
))
24733 /* How many alternative schedules to try. This should be as wide as the
24734 scheduling freedom in the DFA, but no wider. Making this value too
24735 large results extra work for the scheduler. */
24738 ia32_multipass_dfa_lookahead (void)
24742 case PROCESSOR_PENTIUM
:
24745 case PROCESSOR_PENTIUMPRO
:
24749 case PROCESSOR_CORE2
:
24750 case PROCESSOR_COREI7
:
24751 case PROCESSOR_HASWELL
:
24752 case PROCESSOR_ATOM
:
24753 case PROCESSOR_SLM
:
24754 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24755 as many instructions can be executed on a cycle, i.e.,
24756 issue_rate. I wonder why tuning for many CPUs does not do this. */
24757 if (reload_completed
)
24758 return ix86_issue_rate ();
24759 /* Don't use lookahead for pre-reload schedule to save compile time. */
24767 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24768 execution. It is applied if
24769 (1) IMUL instruction is on the top of list;
24770 (2) There exists the only producer of independent IMUL instruction in
24772 Return index of IMUL producer if it was found and -1 otherwise. */
24774 do_reorder_for_imul (rtx
*ready
, int n_ready
)
24776 rtx insn
, set
, insn1
, insn2
;
24777 sd_iterator_def sd_it
;
24782 if (ix86_tune
!= PROCESSOR_ATOM
)
24785 /* Check that IMUL instruction is on the top of ready list. */
24786 insn
= ready
[n_ready
- 1];
24787 set
= single_set (insn
);
24790 if (!(GET_CODE (SET_SRC (set
)) == MULT
24791 && GET_MODE (SET_SRC (set
)) == SImode
))
24794 /* Search for producer of independent IMUL instruction. */
24795 for (i
= n_ready
- 2; i
>= 0; i
--)
24798 if (!NONDEBUG_INSN_P (insn
))
24800 /* Skip IMUL instruction. */
24801 insn2
= PATTERN (insn
);
24802 if (GET_CODE (insn2
) == PARALLEL
)
24803 insn2
= XVECEXP (insn2
, 0, 0);
24804 if (GET_CODE (insn2
) == SET
24805 && GET_CODE (SET_SRC (insn2
)) == MULT
24806 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24809 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24812 con
= DEP_CON (dep
);
24813 if (!NONDEBUG_INSN_P (con
))
24815 insn1
= PATTERN (con
);
24816 if (GET_CODE (insn1
) == PARALLEL
)
24817 insn1
= XVECEXP (insn1
, 0, 0);
24819 if (GET_CODE (insn1
) == SET
24820 && GET_CODE (SET_SRC (insn1
)) == MULT
24821 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24823 sd_iterator_def sd_it1
;
24825 /* Check if there is no other dependee for IMUL. */
24827 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24830 pro
= DEP_PRO (dep1
);
24831 if (!NONDEBUG_INSN_P (pro
))
24846 /* Try to find the best candidate on the top of ready list if two insns
24847 have the same priority - candidate is best if its dependees were
24848 scheduled earlier. Applied for Silvermont only.
24849 Return true if top 2 insns must be interchanged. */
24851 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
24853 rtx top
= ready
[n_ready
- 1];
24854 rtx next
= ready
[n_ready
- 2];
24856 sd_iterator_def sd_it
;
24860 #define INSN_TICK(INSN) (HID (INSN)->tick)
24862 if (ix86_tune
!= PROCESSOR_SLM
)
24865 if (!NONDEBUG_INSN_P (top
))
24867 if (!NONJUMP_INSN_P (top
))
24869 if (!NONDEBUG_INSN_P (next
))
24871 if (!NONJUMP_INSN_P (next
))
24873 set
= single_set (top
);
24876 set
= single_set (next
);
24880 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
24882 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
24884 /* Determine winner more precise. */
24885 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
24888 pro
= DEP_PRO (dep
);
24889 if (!NONDEBUG_INSN_P (pro
))
24891 if (INSN_TICK (pro
) > clock1
)
24892 clock1
= INSN_TICK (pro
);
24894 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
24897 pro
= DEP_PRO (dep
);
24898 if (!NONDEBUG_INSN_P (pro
))
24900 if (INSN_TICK (pro
) > clock2
)
24901 clock2
= INSN_TICK (pro
);
24904 if (clock1
== clock2
)
24906 /* Determine winner - load must win. */
24907 enum attr_memory memory1
, memory2
;
24908 memory1
= get_attr_memory (top
);
24909 memory2
= get_attr_memory (next
);
24910 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
24913 return (bool) (clock2
< clock1
);
24919 /* Perform possible reodering of ready list for Atom/Silvermont only.
24920 Return issue rate. */
24922 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24925 int issue_rate
= -1;
24926 int n_ready
= *pn_ready
;
24931 /* Set up issue rate. */
24932 issue_rate
= ix86_issue_rate ();
24934 /* Do reodering for Atom/SLM only. */
24935 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
24938 /* Nothing to do if ready list contains only 1 instruction. */
24942 /* Do reodering for post-reload scheduler only. */
24943 if (!reload_completed
)
24946 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
24948 if (sched_verbose
> 1)
24949 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
24950 INSN_UID (ready
[index
]));
24952 /* Put IMUL producer (ready[index]) at the top of ready list. */
24953 insn
= ready
[index
];
24954 for (i
= index
; i
< n_ready
- 1; i
++)
24955 ready
[i
] = ready
[i
+ 1];
24956 ready
[n_ready
- 1] = insn
;
24959 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
24961 if (sched_verbose
> 1)
24962 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
24963 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
24964 /* Swap 2 top elements of ready list. */
24965 insn
= ready
[n_ready
- 1];
24966 ready
[n_ready
- 1] = ready
[n_ready
- 2];
24967 ready
[n_ready
- 2] = insn
;
24973 ix86_class_likely_spilled_p (reg_class_t
);
24975 /* Returns true if lhs of insn is HW function argument register and set up
24976 is_spilled to true if it is likely spilled HW register. */
24978 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24982 if (!NONDEBUG_INSN_P (insn
))
24984 /* Call instructions are not movable, ignore it. */
24987 insn
= PATTERN (insn
);
24988 if (GET_CODE (insn
) == PARALLEL
)
24989 insn
= XVECEXP (insn
, 0, 0);
24990 if (GET_CODE (insn
) != SET
)
24992 dst
= SET_DEST (insn
);
24993 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24994 && ix86_function_arg_regno_p (REGNO (dst
)))
24996 /* Is it likely spilled HW register? */
24997 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24998 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24999 *is_spilled
= true;
25005 /* Add output dependencies for chain of function adjacent arguments if only
25006 there is a move to likely spilled HW register. Return first argument
25007 if at least one dependence was added or NULL otherwise. */
25009 add_parameter_dependencies (rtx call
, rtx head
)
25013 rtx first_arg
= NULL
;
25014 bool is_spilled
= false;
25016 head
= PREV_INSN (head
);
25018 /* Find nearest to call argument passing instruction. */
25021 last
= PREV_INSN (last
);
25024 if (!NONDEBUG_INSN_P (last
))
25026 if (insn_is_function_arg (last
, &is_spilled
))
25034 insn
= PREV_INSN (last
);
25035 if (!INSN_P (insn
))
25039 if (!NONDEBUG_INSN_P (insn
))
25044 if (insn_is_function_arg (insn
, &is_spilled
))
25046 /* Add output depdendence between two function arguments if chain
25047 of output arguments contains likely spilled HW registers. */
25049 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25050 first_arg
= last
= insn
;
25060 /* Add output or anti dependency from insn to first_arg to restrict its code
25063 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25068 set
= single_set (insn
);
25071 tmp
= SET_DEST (set
);
25074 /* Add output dependency to the first function argument. */
25075 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25078 /* Add anti dependency. */
25079 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25082 /* Avoid cross block motion of function argument through adding dependency
25083 from the first non-jump instruction in bb. */
25085 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25087 rtx insn
= BB_END (bb
);
25091 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25093 rtx set
= single_set (insn
);
25096 avoid_func_arg_motion (arg
, insn
);
25100 if (insn
== BB_HEAD (bb
))
25102 insn
= PREV_INSN (insn
);
25106 /* Hook for pre-reload schedule - avoid motion of function arguments
25107 passed in likely spilled HW registers. */
25109 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25112 rtx first_arg
= NULL
;
25113 if (reload_completed
)
25115 while (head
!= tail
&& DEBUG_INSN_P (head
))
25116 head
= NEXT_INSN (head
);
25117 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25118 if (INSN_P (insn
) && CALL_P (insn
))
25120 first_arg
= add_parameter_dependencies (insn
, head
);
25123 /* Add dependee for first argument to predecessors if only
25124 region contains more than one block. */
25125 basic_block bb
= BLOCK_FOR_INSN (insn
);
25126 int rgn
= CONTAINING_RGN (bb
->index
);
25127 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25128 /* Skip trivial regions and region head blocks that can have
25129 predecessors outside of region. */
25130 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25134 /* Assume that region is SCC, i.e. all immediate predecessors
25135 of non-head block are in the same region. */
25136 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25138 /* Avoid creating of loop-carried dependencies through
25139 using topological odering in region. */
25140 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25141 add_dependee_for_func_arg (first_arg
, e
->src
);
25149 else if (first_arg
)
25150 avoid_func_arg_motion (first_arg
, insn
);
25153 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25154 HW registers to maximum, to schedule them at soon as possible. These are
25155 moves from function argument registers at the top of the function entry
25156 and moves from function return value registers after call. */
25158 ix86_adjust_priority (rtx insn
, int priority
)
25162 if (reload_completed
)
25165 if (!NONDEBUG_INSN_P (insn
))
25168 set
= single_set (insn
);
25171 rtx tmp
= SET_SRC (set
);
25173 && HARD_REGISTER_P (tmp
)
25174 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25175 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25176 return current_sched_info
->sched_max_insns_priority
;
25182 /* Model decoder of Core 2/i7.
25183 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25184 track the instruction fetch block boundaries and make sure that long
25185 (9+ bytes) instructions are assigned to D0. */
25187 /* Maximum length of an insn that can be handled by
25188 a secondary decoder unit. '8' for Core 2/i7. */
25189 static int core2i7_secondary_decoder_max_insn_size
;
25191 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25192 '16' for Core 2/i7. */
25193 static int core2i7_ifetch_block_size
;
25195 /* Maximum number of instructions decoder can handle per cycle.
25196 '6' for Core 2/i7. */
25197 static int core2i7_ifetch_block_max_insns
;
25199 typedef struct ix86_first_cycle_multipass_data_
*
25200 ix86_first_cycle_multipass_data_t
;
25201 typedef const struct ix86_first_cycle_multipass_data_
*
25202 const_ix86_first_cycle_multipass_data_t
;
25204 /* A variable to store target state across calls to max_issue within
25206 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25207 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25209 /* Initialize DATA. */
25211 core2i7_first_cycle_multipass_init (void *_data
)
25213 ix86_first_cycle_multipass_data_t data
25214 = (ix86_first_cycle_multipass_data_t
) _data
;
25216 data
->ifetch_block_len
= 0;
25217 data
->ifetch_block_n_insns
= 0;
25218 data
->ready_try_change
= NULL
;
25219 data
->ready_try_change_size
= 0;
25222 /* Advancing the cycle; reset ifetch block counts. */
25224 core2i7_dfa_post_advance_cycle (void)
25226 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25228 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25230 data
->ifetch_block_len
= 0;
25231 data
->ifetch_block_n_insns
= 0;
25234 static int min_insn_size (rtx
);
25236 /* Filter out insns from ready_try that the core will not be able to issue
25237 on current cycle due to decoder. */
25239 core2i7_first_cycle_multipass_filter_ready_try
25240 (const_ix86_first_cycle_multipass_data_t data
,
25241 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25248 if (ready_try
[n_ready
])
25251 insn
= get_ready_element (n_ready
);
25252 insn_size
= min_insn_size (insn
);
25254 if (/* If this is a too long an insn for a secondary decoder ... */
25255 (!first_cycle_insn_p
25256 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25257 /* ... or it would not fit into the ifetch block ... */
25258 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25259 /* ... or the decoder is full already ... */
25260 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25261 /* ... mask the insn out. */
25263 ready_try
[n_ready
] = 1;
25265 if (data
->ready_try_change
)
25266 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25271 /* Prepare for a new round of multipass lookahead scheduling. */
25273 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25274 bool first_cycle_insn_p
)
25276 ix86_first_cycle_multipass_data_t data
25277 = (ix86_first_cycle_multipass_data_t
) _data
;
25278 const_ix86_first_cycle_multipass_data_t prev_data
25279 = ix86_first_cycle_multipass_data
;
25281 /* Restore the state from the end of the previous round. */
25282 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25283 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25285 /* Filter instructions that cannot be issued on current cycle due to
25286 decoder restrictions. */
25287 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25288 first_cycle_insn_p
);
25291 /* INSN is being issued in current solution. Account for its impact on
25292 the decoder model. */
25294 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25295 rtx insn
, const void *_prev_data
)
25297 ix86_first_cycle_multipass_data_t data
25298 = (ix86_first_cycle_multipass_data_t
) _data
;
25299 const_ix86_first_cycle_multipass_data_t prev_data
25300 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25302 int insn_size
= min_insn_size (insn
);
25304 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25305 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25306 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25307 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25309 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25310 if (!data
->ready_try_change
)
25312 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25313 data
->ready_try_change_size
= n_ready
;
25315 else if (data
->ready_try_change_size
< n_ready
)
25317 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25319 data
->ready_try_change_size
= n_ready
;
25321 bitmap_clear (data
->ready_try_change
);
25323 /* Filter out insns from ready_try that the core will not be able to issue
25324 on current cycle due to decoder. */
25325 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25329 /* Revert the effect on ready_try. */
25331 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25333 int n_ready ATTRIBUTE_UNUSED
)
25335 const_ix86_first_cycle_multipass_data_t data
25336 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25337 unsigned int i
= 0;
25338 sbitmap_iterator sbi
;
25340 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25341 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25347 /* Save the result of multipass lookahead scheduling for the next round. */
25349 core2i7_first_cycle_multipass_end (const void *_data
)
25351 const_ix86_first_cycle_multipass_data_t data
25352 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25353 ix86_first_cycle_multipass_data_t next_data
25354 = ix86_first_cycle_multipass_data
;
25358 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25359 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25363 /* Deallocate target data. */
25365 core2i7_first_cycle_multipass_fini (void *_data
)
25367 ix86_first_cycle_multipass_data_t data
25368 = (ix86_first_cycle_multipass_data_t
) _data
;
25370 if (data
->ready_try_change
)
25372 sbitmap_free (data
->ready_try_change
);
25373 data
->ready_try_change
= NULL
;
25374 data
->ready_try_change_size
= 0;
25378 /* Prepare for scheduling pass. */
25380 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25381 int verbose ATTRIBUTE_UNUSED
,
25382 int max_uid ATTRIBUTE_UNUSED
)
25384 /* Install scheduling hooks for current CPU. Some of these hooks are used
25385 in time-critical parts of the scheduler, so we only set them up when
25386 they are actually used. */
25389 case PROCESSOR_CORE2
:
25390 case PROCESSOR_COREI7
:
25391 case PROCESSOR_HASWELL
:
25392 /* Do not perform multipass scheduling for pre-reload schedule
25393 to save compile time. */
25394 if (reload_completed
)
25396 targetm
.sched
.dfa_post_advance_cycle
25397 = core2i7_dfa_post_advance_cycle
;
25398 targetm
.sched
.first_cycle_multipass_init
25399 = core2i7_first_cycle_multipass_init
;
25400 targetm
.sched
.first_cycle_multipass_begin
25401 = core2i7_first_cycle_multipass_begin
;
25402 targetm
.sched
.first_cycle_multipass_issue
25403 = core2i7_first_cycle_multipass_issue
;
25404 targetm
.sched
.first_cycle_multipass_backtrack
25405 = core2i7_first_cycle_multipass_backtrack
;
25406 targetm
.sched
.first_cycle_multipass_end
25407 = core2i7_first_cycle_multipass_end
;
25408 targetm
.sched
.first_cycle_multipass_fini
25409 = core2i7_first_cycle_multipass_fini
;
25411 /* Set decoder parameters. */
25412 core2i7_secondary_decoder_max_insn_size
= 8;
25413 core2i7_ifetch_block_size
= 16;
25414 core2i7_ifetch_block_max_insns
= 6;
25417 /* ... Fall through ... */
25419 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25420 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25421 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25422 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25423 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25424 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25425 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
25431 /* Compute the alignment given to a constant that is being placed in memory.
25432 EXP is the constant and ALIGN is the alignment that the object would
25434 The value of this function is used instead of that alignment to align
25438 ix86_constant_alignment (tree exp
, int align
)
25440 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
25441 || TREE_CODE (exp
) == INTEGER_CST
)
25443 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
25445 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
25448 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
25449 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
25450 return BITS_PER_WORD
;
25455 /* Compute the alignment for a static variable.
25456 TYPE is the data type, and ALIGN is the alignment that
25457 the object would ordinarily have. The value of this function is used
25458 instead of that alignment to align the object. */
25461 ix86_data_alignment (tree type
, int align
, bool opt
)
25463 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
25466 && AGGREGATE_TYPE_P (type
)
25467 && TYPE_SIZE (type
)
25468 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25469 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
25470 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
25471 && align
< max_align
)
25474 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25475 to 16byte boundary. */
25478 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
25479 && TYPE_SIZE (type
)
25480 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25481 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
25482 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25489 if (TREE_CODE (type
) == ARRAY_TYPE
)
25491 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25493 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25496 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25499 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25501 if ((TYPE_MODE (type
) == XCmode
25502 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25505 else if ((TREE_CODE (type
) == RECORD_TYPE
25506 || TREE_CODE (type
) == UNION_TYPE
25507 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25508 && TYPE_FIELDS (type
))
25510 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25512 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25515 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25516 || TREE_CODE (type
) == INTEGER_TYPE
)
25518 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25520 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25527 /* Compute the alignment for a local variable or a stack slot. EXP is
25528 the data type or decl itself, MODE is the widest mode available and
25529 ALIGN is the alignment that the object would ordinarily have. The
25530 value of this macro is used instead of that alignment to align the
25534 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25535 unsigned int align
)
25539 if (exp
&& DECL_P (exp
))
25541 type
= TREE_TYPE (exp
);
25550 /* Don't do dynamic stack realignment for long long objects with
25551 -mpreferred-stack-boundary=2. */
25554 && ix86_preferred_stack_boundary
< 64
25555 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25556 && (!type
|| !TYPE_USER_ALIGN (type
))
25557 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25560 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25561 register in MODE. We will return the largest alignment of XF
25565 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25566 align
= GET_MODE_ALIGNMENT (DFmode
);
25570 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25571 to 16byte boundary. Exact wording is:
25573 An array uses the same alignment as its elements, except that a local or
25574 global array variable of length at least 16 bytes or
25575 a C99 variable-length array variable always has alignment of at least 16 bytes.
25577 This was added to allow use of aligned SSE instructions at arrays. This
25578 rule is meant for static storage (where compiler can not do the analysis
25579 by itself). We follow it for automatic variables only when convenient.
25580 We fully control everything in the function compiled and functions from
25581 other unit can not rely on the alignment.
25583 Exclude va_list type. It is the common case of local array where
25584 we can not benefit from the alignment.
25586 TODO: Probably one should optimize for size only when var is not escaping. */
25587 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25590 if (AGGREGATE_TYPE_P (type
)
25591 && (va_list_type_node
== NULL_TREE
25592 || (TYPE_MAIN_VARIANT (type
)
25593 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25594 && TYPE_SIZE (type
)
25595 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25596 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25597 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25600 if (TREE_CODE (type
) == ARRAY_TYPE
)
25602 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25604 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25607 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25609 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25611 if ((TYPE_MODE (type
) == XCmode
25612 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25615 else if ((TREE_CODE (type
) == RECORD_TYPE
25616 || TREE_CODE (type
) == UNION_TYPE
25617 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25618 && TYPE_FIELDS (type
))
25620 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25622 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25625 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25626 || TREE_CODE (type
) == INTEGER_TYPE
)
25629 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25631 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25637 /* Compute the minimum required alignment for dynamic stack realignment
25638 purposes for a local variable, parameter or a stack slot. EXP is
25639 the data type or decl itself, MODE is its mode and ALIGN is the
25640 alignment that the object would ordinarily have. */
25643 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25644 unsigned int align
)
25648 if (exp
&& DECL_P (exp
))
25650 type
= TREE_TYPE (exp
);
25659 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25662 /* Don't do dynamic stack realignment for long long objects with
25663 -mpreferred-stack-boundary=2. */
25664 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25665 && (!type
|| !TYPE_USER_ALIGN (type
))
25666 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25672 /* Find a location for the static chain incoming to a nested function.
25673 This is a register, unless all free registers are used by arguments. */
25676 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25680 if (!DECL_STATIC_CHAIN (fndecl
))
25685 /* We always use R10 in 64-bit mode. */
25693 /* By default in 32-bit mode we use ECX to pass the static chain. */
25696 fntype
= TREE_TYPE (fndecl
);
25697 ccvt
= ix86_get_callcvt (fntype
);
25698 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25700 /* Fastcall functions use ecx/edx for arguments, which leaves
25701 us with EAX for the static chain.
25702 Thiscall functions use ecx for arguments, which also
25703 leaves us with EAX for the static chain. */
25706 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25708 /* Thiscall functions use ecx for arguments, which leaves
25709 us with EAX and EDX for the static chain.
25710 We are using for abi-compatibility EAX. */
25713 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25715 /* For regparm 3, we have no free call-clobbered registers in
25716 which to store the static chain. In order to implement this,
25717 we have the trampoline push the static chain to the stack.
25718 However, we can't push a value below the return address when
25719 we call the nested function directly, so we have to use an
25720 alternate entry point. For this we use ESI, and have the
25721 alternate entry point push ESI, so that things appear the
25722 same once we're executing the nested function. */
25725 if (fndecl
== current_function_decl
)
25726 ix86_static_chain_on_stack
= true;
25727 return gen_frame_mem (SImode
,
25728 plus_constant (Pmode
,
25729 arg_pointer_rtx
, -8));
25735 return gen_rtx_REG (Pmode
, regno
);
25738 /* Emit RTL insns to initialize the variable parts of a trampoline.
25739 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25740 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25741 to be passed to the target function. */
25744 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25750 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25756 /* Load the function address to r11. Try to load address using
25757 the shorter movl instead of movabs. We may want to support
25758 movq for kernel mode, but kernel does not use trampolines at
25759 the moment. FNADDR is a 32bit address and may not be in
25760 DImode when ptr_mode == SImode. Always use movl in this
25762 if (ptr_mode
== SImode
25763 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25765 fnaddr
= copy_addr_to_reg (fnaddr
);
25767 mem
= adjust_address (m_tramp
, HImode
, offset
);
25768 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25770 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25771 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25776 mem
= adjust_address (m_tramp
, HImode
, offset
);
25777 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25779 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25780 emit_move_insn (mem
, fnaddr
);
25784 /* Load static chain using movabs to r10. Use the shorter movl
25785 instead of movabs when ptr_mode == SImode. */
25786 if (ptr_mode
== SImode
)
25797 mem
= adjust_address (m_tramp
, HImode
, offset
);
25798 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25800 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25801 emit_move_insn (mem
, chain_value
);
25804 /* Jump to r11; the last (unused) byte is a nop, only there to
25805 pad the write out to a single 32-bit store. */
25806 mem
= adjust_address (m_tramp
, SImode
, offset
);
25807 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25814 /* Depending on the static chain location, either load a register
25815 with a constant, or push the constant to the stack. All of the
25816 instructions are the same size. */
25817 chain
= ix86_static_chain (fndecl
, true);
25820 switch (REGNO (chain
))
25823 opcode
= 0xb8; break;
25825 opcode
= 0xb9; break;
25827 gcc_unreachable ();
25833 mem
= adjust_address (m_tramp
, QImode
, offset
);
25834 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25836 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25837 emit_move_insn (mem
, chain_value
);
25840 mem
= adjust_address (m_tramp
, QImode
, offset
);
25841 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25843 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25845 /* Compute offset from the end of the jmp to the target function.
25846 In the case in which the trampoline stores the static chain on
25847 the stack, we need to skip the first insn which pushes the
25848 (call-saved) register static chain; this push is 1 byte. */
25850 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25851 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25852 offset
- (MEM_P (chain
) ? 1 : 0)),
25853 NULL_RTX
, 1, OPTAB_DIRECT
);
25854 emit_move_insn (mem
, disp
);
25857 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25859 #ifdef HAVE_ENABLE_EXECUTE_STACK
25860 #ifdef CHECK_EXECUTE_STACK_ENABLED
25861 if (CHECK_EXECUTE_STACK_ENABLED
)
25863 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25864 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25868 /* The following file contains several enumerations and data structures
25869 built from the definitions in i386-builtin-types.def. */
25871 #include "i386-builtin-types.inc"
25873 /* Table for the ix86 builtin non-function types. */
25874 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25876 /* Retrieve an element from the above table, building some of
25877 the types lazily. */
25880 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25882 unsigned int index
;
25885 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25887 type
= ix86_builtin_type_tab
[(int) tcode
];
25891 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25892 if (tcode
<= IX86_BT_LAST_VECT
)
25894 enum machine_mode mode
;
25896 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25897 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25898 mode
= ix86_builtin_type_vect_mode
[index
];
25900 type
= build_vector_type_for_mode (itype
, mode
);
25906 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25907 if (tcode
<= IX86_BT_LAST_PTR
)
25908 quals
= TYPE_UNQUALIFIED
;
25910 quals
= TYPE_QUAL_CONST
;
25912 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25913 if (quals
!= TYPE_UNQUALIFIED
)
25914 itype
= build_qualified_type (itype
, quals
);
25916 type
= build_pointer_type (itype
);
25919 ix86_builtin_type_tab
[(int) tcode
] = type
;
25923 /* Table for the ix86 builtin function types. */
25924 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25926 /* Retrieve an element from the above table, building some of
25927 the types lazily. */
25930 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25934 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25936 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25940 if (tcode
<= IX86_BT_LAST_FUNC
)
25942 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25943 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25944 tree rtype
, atype
, args
= void_list_node
;
25947 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25948 for (i
= after
- 1; i
> start
; --i
)
25950 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25951 args
= tree_cons (NULL
, atype
, args
);
25954 type
= build_function_type (rtype
, args
);
25958 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25959 enum ix86_builtin_func_type icode
;
25961 icode
= ix86_builtin_func_alias_base
[index
];
25962 type
= ix86_get_builtin_func_type (icode
);
25965 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25970 /* Codes for all the SSE/MMX builtins. */
25973 IX86_BUILTIN_ADDPS
,
25974 IX86_BUILTIN_ADDSS
,
25975 IX86_BUILTIN_DIVPS
,
25976 IX86_BUILTIN_DIVSS
,
25977 IX86_BUILTIN_MULPS
,
25978 IX86_BUILTIN_MULSS
,
25979 IX86_BUILTIN_SUBPS
,
25980 IX86_BUILTIN_SUBSS
,
25982 IX86_BUILTIN_CMPEQPS
,
25983 IX86_BUILTIN_CMPLTPS
,
25984 IX86_BUILTIN_CMPLEPS
,
25985 IX86_BUILTIN_CMPGTPS
,
25986 IX86_BUILTIN_CMPGEPS
,
25987 IX86_BUILTIN_CMPNEQPS
,
25988 IX86_BUILTIN_CMPNLTPS
,
25989 IX86_BUILTIN_CMPNLEPS
,
25990 IX86_BUILTIN_CMPNGTPS
,
25991 IX86_BUILTIN_CMPNGEPS
,
25992 IX86_BUILTIN_CMPORDPS
,
25993 IX86_BUILTIN_CMPUNORDPS
,
25994 IX86_BUILTIN_CMPEQSS
,
25995 IX86_BUILTIN_CMPLTSS
,
25996 IX86_BUILTIN_CMPLESS
,
25997 IX86_BUILTIN_CMPNEQSS
,
25998 IX86_BUILTIN_CMPNLTSS
,
25999 IX86_BUILTIN_CMPNLESS
,
26000 IX86_BUILTIN_CMPORDSS
,
26001 IX86_BUILTIN_CMPUNORDSS
,
26003 IX86_BUILTIN_COMIEQSS
,
26004 IX86_BUILTIN_COMILTSS
,
26005 IX86_BUILTIN_COMILESS
,
26006 IX86_BUILTIN_COMIGTSS
,
26007 IX86_BUILTIN_COMIGESS
,
26008 IX86_BUILTIN_COMINEQSS
,
26009 IX86_BUILTIN_UCOMIEQSS
,
26010 IX86_BUILTIN_UCOMILTSS
,
26011 IX86_BUILTIN_UCOMILESS
,
26012 IX86_BUILTIN_UCOMIGTSS
,
26013 IX86_BUILTIN_UCOMIGESS
,
26014 IX86_BUILTIN_UCOMINEQSS
,
26016 IX86_BUILTIN_CVTPI2PS
,
26017 IX86_BUILTIN_CVTPS2PI
,
26018 IX86_BUILTIN_CVTSI2SS
,
26019 IX86_BUILTIN_CVTSI642SS
,
26020 IX86_BUILTIN_CVTSS2SI
,
26021 IX86_BUILTIN_CVTSS2SI64
,
26022 IX86_BUILTIN_CVTTPS2PI
,
26023 IX86_BUILTIN_CVTTSS2SI
,
26024 IX86_BUILTIN_CVTTSS2SI64
,
26026 IX86_BUILTIN_MAXPS
,
26027 IX86_BUILTIN_MAXSS
,
26028 IX86_BUILTIN_MINPS
,
26029 IX86_BUILTIN_MINSS
,
26031 IX86_BUILTIN_LOADUPS
,
26032 IX86_BUILTIN_STOREUPS
,
26033 IX86_BUILTIN_MOVSS
,
26035 IX86_BUILTIN_MOVHLPS
,
26036 IX86_BUILTIN_MOVLHPS
,
26037 IX86_BUILTIN_LOADHPS
,
26038 IX86_BUILTIN_LOADLPS
,
26039 IX86_BUILTIN_STOREHPS
,
26040 IX86_BUILTIN_STORELPS
,
26042 IX86_BUILTIN_MASKMOVQ
,
26043 IX86_BUILTIN_MOVMSKPS
,
26044 IX86_BUILTIN_PMOVMSKB
,
26046 IX86_BUILTIN_MOVNTPS
,
26047 IX86_BUILTIN_MOVNTQ
,
26049 IX86_BUILTIN_LOADDQU
,
26050 IX86_BUILTIN_STOREDQU
,
26052 IX86_BUILTIN_PACKSSWB
,
26053 IX86_BUILTIN_PACKSSDW
,
26054 IX86_BUILTIN_PACKUSWB
,
26056 IX86_BUILTIN_PADDB
,
26057 IX86_BUILTIN_PADDW
,
26058 IX86_BUILTIN_PADDD
,
26059 IX86_BUILTIN_PADDQ
,
26060 IX86_BUILTIN_PADDSB
,
26061 IX86_BUILTIN_PADDSW
,
26062 IX86_BUILTIN_PADDUSB
,
26063 IX86_BUILTIN_PADDUSW
,
26064 IX86_BUILTIN_PSUBB
,
26065 IX86_BUILTIN_PSUBW
,
26066 IX86_BUILTIN_PSUBD
,
26067 IX86_BUILTIN_PSUBQ
,
26068 IX86_BUILTIN_PSUBSB
,
26069 IX86_BUILTIN_PSUBSW
,
26070 IX86_BUILTIN_PSUBUSB
,
26071 IX86_BUILTIN_PSUBUSW
,
26074 IX86_BUILTIN_PANDN
,
26078 IX86_BUILTIN_PAVGB
,
26079 IX86_BUILTIN_PAVGW
,
26081 IX86_BUILTIN_PCMPEQB
,
26082 IX86_BUILTIN_PCMPEQW
,
26083 IX86_BUILTIN_PCMPEQD
,
26084 IX86_BUILTIN_PCMPGTB
,
26085 IX86_BUILTIN_PCMPGTW
,
26086 IX86_BUILTIN_PCMPGTD
,
26088 IX86_BUILTIN_PMADDWD
,
26090 IX86_BUILTIN_PMAXSW
,
26091 IX86_BUILTIN_PMAXUB
,
26092 IX86_BUILTIN_PMINSW
,
26093 IX86_BUILTIN_PMINUB
,
26095 IX86_BUILTIN_PMULHUW
,
26096 IX86_BUILTIN_PMULHW
,
26097 IX86_BUILTIN_PMULLW
,
26099 IX86_BUILTIN_PSADBW
,
26100 IX86_BUILTIN_PSHUFW
,
26102 IX86_BUILTIN_PSLLW
,
26103 IX86_BUILTIN_PSLLD
,
26104 IX86_BUILTIN_PSLLQ
,
26105 IX86_BUILTIN_PSRAW
,
26106 IX86_BUILTIN_PSRAD
,
26107 IX86_BUILTIN_PSRLW
,
26108 IX86_BUILTIN_PSRLD
,
26109 IX86_BUILTIN_PSRLQ
,
26110 IX86_BUILTIN_PSLLWI
,
26111 IX86_BUILTIN_PSLLDI
,
26112 IX86_BUILTIN_PSLLQI
,
26113 IX86_BUILTIN_PSRAWI
,
26114 IX86_BUILTIN_PSRADI
,
26115 IX86_BUILTIN_PSRLWI
,
26116 IX86_BUILTIN_PSRLDI
,
26117 IX86_BUILTIN_PSRLQI
,
26119 IX86_BUILTIN_PUNPCKHBW
,
26120 IX86_BUILTIN_PUNPCKHWD
,
26121 IX86_BUILTIN_PUNPCKHDQ
,
26122 IX86_BUILTIN_PUNPCKLBW
,
26123 IX86_BUILTIN_PUNPCKLWD
,
26124 IX86_BUILTIN_PUNPCKLDQ
,
26126 IX86_BUILTIN_SHUFPS
,
26128 IX86_BUILTIN_RCPPS
,
26129 IX86_BUILTIN_RCPSS
,
26130 IX86_BUILTIN_RSQRTPS
,
26131 IX86_BUILTIN_RSQRTPS_NR
,
26132 IX86_BUILTIN_RSQRTSS
,
26133 IX86_BUILTIN_RSQRTF
,
26134 IX86_BUILTIN_SQRTPS
,
26135 IX86_BUILTIN_SQRTPS_NR
,
26136 IX86_BUILTIN_SQRTSS
,
26138 IX86_BUILTIN_UNPCKHPS
,
26139 IX86_BUILTIN_UNPCKLPS
,
26141 IX86_BUILTIN_ANDPS
,
26142 IX86_BUILTIN_ANDNPS
,
26144 IX86_BUILTIN_XORPS
,
26147 IX86_BUILTIN_LDMXCSR
,
26148 IX86_BUILTIN_STMXCSR
,
26149 IX86_BUILTIN_SFENCE
,
26151 IX86_BUILTIN_FXSAVE
,
26152 IX86_BUILTIN_FXRSTOR
,
26153 IX86_BUILTIN_FXSAVE64
,
26154 IX86_BUILTIN_FXRSTOR64
,
26156 IX86_BUILTIN_XSAVE
,
26157 IX86_BUILTIN_XRSTOR
,
26158 IX86_BUILTIN_XSAVE64
,
26159 IX86_BUILTIN_XRSTOR64
,
26161 IX86_BUILTIN_XSAVEOPT
,
26162 IX86_BUILTIN_XSAVEOPT64
,
26164 /* 3DNow! Original */
26165 IX86_BUILTIN_FEMMS
,
26166 IX86_BUILTIN_PAVGUSB
,
26167 IX86_BUILTIN_PF2ID
,
26168 IX86_BUILTIN_PFACC
,
26169 IX86_BUILTIN_PFADD
,
26170 IX86_BUILTIN_PFCMPEQ
,
26171 IX86_BUILTIN_PFCMPGE
,
26172 IX86_BUILTIN_PFCMPGT
,
26173 IX86_BUILTIN_PFMAX
,
26174 IX86_BUILTIN_PFMIN
,
26175 IX86_BUILTIN_PFMUL
,
26176 IX86_BUILTIN_PFRCP
,
26177 IX86_BUILTIN_PFRCPIT1
,
26178 IX86_BUILTIN_PFRCPIT2
,
26179 IX86_BUILTIN_PFRSQIT1
,
26180 IX86_BUILTIN_PFRSQRT
,
26181 IX86_BUILTIN_PFSUB
,
26182 IX86_BUILTIN_PFSUBR
,
26183 IX86_BUILTIN_PI2FD
,
26184 IX86_BUILTIN_PMULHRW
,
26186 /* 3DNow! Athlon Extensions */
26187 IX86_BUILTIN_PF2IW
,
26188 IX86_BUILTIN_PFNACC
,
26189 IX86_BUILTIN_PFPNACC
,
26190 IX86_BUILTIN_PI2FW
,
26191 IX86_BUILTIN_PSWAPDSI
,
26192 IX86_BUILTIN_PSWAPDSF
,
26195 IX86_BUILTIN_ADDPD
,
26196 IX86_BUILTIN_ADDSD
,
26197 IX86_BUILTIN_DIVPD
,
26198 IX86_BUILTIN_DIVSD
,
26199 IX86_BUILTIN_MULPD
,
26200 IX86_BUILTIN_MULSD
,
26201 IX86_BUILTIN_SUBPD
,
26202 IX86_BUILTIN_SUBSD
,
26204 IX86_BUILTIN_CMPEQPD
,
26205 IX86_BUILTIN_CMPLTPD
,
26206 IX86_BUILTIN_CMPLEPD
,
26207 IX86_BUILTIN_CMPGTPD
,
26208 IX86_BUILTIN_CMPGEPD
,
26209 IX86_BUILTIN_CMPNEQPD
,
26210 IX86_BUILTIN_CMPNLTPD
,
26211 IX86_BUILTIN_CMPNLEPD
,
26212 IX86_BUILTIN_CMPNGTPD
,
26213 IX86_BUILTIN_CMPNGEPD
,
26214 IX86_BUILTIN_CMPORDPD
,
26215 IX86_BUILTIN_CMPUNORDPD
,
26216 IX86_BUILTIN_CMPEQSD
,
26217 IX86_BUILTIN_CMPLTSD
,
26218 IX86_BUILTIN_CMPLESD
,
26219 IX86_BUILTIN_CMPNEQSD
,
26220 IX86_BUILTIN_CMPNLTSD
,
26221 IX86_BUILTIN_CMPNLESD
,
26222 IX86_BUILTIN_CMPORDSD
,
26223 IX86_BUILTIN_CMPUNORDSD
,
26225 IX86_BUILTIN_COMIEQSD
,
26226 IX86_BUILTIN_COMILTSD
,
26227 IX86_BUILTIN_COMILESD
,
26228 IX86_BUILTIN_COMIGTSD
,
26229 IX86_BUILTIN_COMIGESD
,
26230 IX86_BUILTIN_COMINEQSD
,
26231 IX86_BUILTIN_UCOMIEQSD
,
26232 IX86_BUILTIN_UCOMILTSD
,
26233 IX86_BUILTIN_UCOMILESD
,
26234 IX86_BUILTIN_UCOMIGTSD
,
26235 IX86_BUILTIN_UCOMIGESD
,
26236 IX86_BUILTIN_UCOMINEQSD
,
26238 IX86_BUILTIN_MAXPD
,
26239 IX86_BUILTIN_MAXSD
,
26240 IX86_BUILTIN_MINPD
,
26241 IX86_BUILTIN_MINSD
,
26243 IX86_BUILTIN_ANDPD
,
26244 IX86_BUILTIN_ANDNPD
,
26246 IX86_BUILTIN_XORPD
,
26248 IX86_BUILTIN_SQRTPD
,
26249 IX86_BUILTIN_SQRTSD
,
26251 IX86_BUILTIN_UNPCKHPD
,
26252 IX86_BUILTIN_UNPCKLPD
,
26254 IX86_BUILTIN_SHUFPD
,
26256 IX86_BUILTIN_LOADUPD
,
26257 IX86_BUILTIN_STOREUPD
,
26258 IX86_BUILTIN_MOVSD
,
26260 IX86_BUILTIN_LOADHPD
,
26261 IX86_BUILTIN_LOADLPD
,
26263 IX86_BUILTIN_CVTDQ2PD
,
26264 IX86_BUILTIN_CVTDQ2PS
,
26266 IX86_BUILTIN_CVTPD2DQ
,
26267 IX86_BUILTIN_CVTPD2PI
,
26268 IX86_BUILTIN_CVTPD2PS
,
26269 IX86_BUILTIN_CVTTPD2DQ
,
26270 IX86_BUILTIN_CVTTPD2PI
,
26272 IX86_BUILTIN_CVTPI2PD
,
26273 IX86_BUILTIN_CVTSI2SD
,
26274 IX86_BUILTIN_CVTSI642SD
,
26276 IX86_BUILTIN_CVTSD2SI
,
26277 IX86_BUILTIN_CVTSD2SI64
,
26278 IX86_BUILTIN_CVTSD2SS
,
26279 IX86_BUILTIN_CVTSS2SD
,
26280 IX86_BUILTIN_CVTTSD2SI
,
26281 IX86_BUILTIN_CVTTSD2SI64
,
26283 IX86_BUILTIN_CVTPS2DQ
,
26284 IX86_BUILTIN_CVTPS2PD
,
26285 IX86_BUILTIN_CVTTPS2DQ
,
26287 IX86_BUILTIN_MOVNTI
,
26288 IX86_BUILTIN_MOVNTI64
,
26289 IX86_BUILTIN_MOVNTPD
,
26290 IX86_BUILTIN_MOVNTDQ
,
26292 IX86_BUILTIN_MOVQ128
,
26295 IX86_BUILTIN_MASKMOVDQU
,
26296 IX86_BUILTIN_MOVMSKPD
,
26297 IX86_BUILTIN_PMOVMSKB128
,
26299 IX86_BUILTIN_PACKSSWB128
,
26300 IX86_BUILTIN_PACKSSDW128
,
26301 IX86_BUILTIN_PACKUSWB128
,
26303 IX86_BUILTIN_PADDB128
,
26304 IX86_BUILTIN_PADDW128
,
26305 IX86_BUILTIN_PADDD128
,
26306 IX86_BUILTIN_PADDQ128
,
26307 IX86_BUILTIN_PADDSB128
,
26308 IX86_BUILTIN_PADDSW128
,
26309 IX86_BUILTIN_PADDUSB128
,
26310 IX86_BUILTIN_PADDUSW128
,
26311 IX86_BUILTIN_PSUBB128
,
26312 IX86_BUILTIN_PSUBW128
,
26313 IX86_BUILTIN_PSUBD128
,
26314 IX86_BUILTIN_PSUBQ128
,
26315 IX86_BUILTIN_PSUBSB128
,
26316 IX86_BUILTIN_PSUBSW128
,
26317 IX86_BUILTIN_PSUBUSB128
,
26318 IX86_BUILTIN_PSUBUSW128
,
26320 IX86_BUILTIN_PAND128
,
26321 IX86_BUILTIN_PANDN128
,
26322 IX86_BUILTIN_POR128
,
26323 IX86_BUILTIN_PXOR128
,
26325 IX86_BUILTIN_PAVGB128
,
26326 IX86_BUILTIN_PAVGW128
,
26328 IX86_BUILTIN_PCMPEQB128
,
26329 IX86_BUILTIN_PCMPEQW128
,
26330 IX86_BUILTIN_PCMPEQD128
,
26331 IX86_BUILTIN_PCMPGTB128
,
26332 IX86_BUILTIN_PCMPGTW128
,
26333 IX86_BUILTIN_PCMPGTD128
,
26335 IX86_BUILTIN_PMADDWD128
,
26337 IX86_BUILTIN_PMAXSW128
,
26338 IX86_BUILTIN_PMAXUB128
,
26339 IX86_BUILTIN_PMINSW128
,
26340 IX86_BUILTIN_PMINUB128
,
26342 IX86_BUILTIN_PMULUDQ
,
26343 IX86_BUILTIN_PMULUDQ128
,
26344 IX86_BUILTIN_PMULHUW128
,
26345 IX86_BUILTIN_PMULHW128
,
26346 IX86_BUILTIN_PMULLW128
,
26348 IX86_BUILTIN_PSADBW128
,
26349 IX86_BUILTIN_PSHUFHW
,
26350 IX86_BUILTIN_PSHUFLW
,
26351 IX86_BUILTIN_PSHUFD
,
26353 IX86_BUILTIN_PSLLDQI128
,
26354 IX86_BUILTIN_PSLLWI128
,
26355 IX86_BUILTIN_PSLLDI128
,
26356 IX86_BUILTIN_PSLLQI128
,
26357 IX86_BUILTIN_PSRAWI128
,
26358 IX86_BUILTIN_PSRADI128
,
26359 IX86_BUILTIN_PSRLDQI128
,
26360 IX86_BUILTIN_PSRLWI128
,
26361 IX86_BUILTIN_PSRLDI128
,
26362 IX86_BUILTIN_PSRLQI128
,
26364 IX86_BUILTIN_PSLLDQ128
,
26365 IX86_BUILTIN_PSLLW128
,
26366 IX86_BUILTIN_PSLLD128
,
26367 IX86_BUILTIN_PSLLQ128
,
26368 IX86_BUILTIN_PSRAW128
,
26369 IX86_BUILTIN_PSRAD128
,
26370 IX86_BUILTIN_PSRLW128
,
26371 IX86_BUILTIN_PSRLD128
,
26372 IX86_BUILTIN_PSRLQ128
,
26374 IX86_BUILTIN_PUNPCKHBW128
,
26375 IX86_BUILTIN_PUNPCKHWD128
,
26376 IX86_BUILTIN_PUNPCKHDQ128
,
26377 IX86_BUILTIN_PUNPCKHQDQ128
,
26378 IX86_BUILTIN_PUNPCKLBW128
,
26379 IX86_BUILTIN_PUNPCKLWD128
,
26380 IX86_BUILTIN_PUNPCKLDQ128
,
26381 IX86_BUILTIN_PUNPCKLQDQ128
,
26383 IX86_BUILTIN_CLFLUSH
,
26384 IX86_BUILTIN_MFENCE
,
26385 IX86_BUILTIN_LFENCE
,
26386 IX86_BUILTIN_PAUSE
,
26388 IX86_BUILTIN_BSRSI
,
26389 IX86_BUILTIN_BSRDI
,
26390 IX86_BUILTIN_RDPMC
,
26391 IX86_BUILTIN_RDTSC
,
26392 IX86_BUILTIN_RDTSCP
,
26393 IX86_BUILTIN_ROLQI
,
26394 IX86_BUILTIN_ROLHI
,
26395 IX86_BUILTIN_RORQI
,
26396 IX86_BUILTIN_RORHI
,
26399 IX86_BUILTIN_ADDSUBPS
,
26400 IX86_BUILTIN_HADDPS
,
26401 IX86_BUILTIN_HSUBPS
,
26402 IX86_BUILTIN_MOVSHDUP
,
26403 IX86_BUILTIN_MOVSLDUP
,
26404 IX86_BUILTIN_ADDSUBPD
,
26405 IX86_BUILTIN_HADDPD
,
26406 IX86_BUILTIN_HSUBPD
,
26407 IX86_BUILTIN_LDDQU
,
26409 IX86_BUILTIN_MONITOR
,
26410 IX86_BUILTIN_MWAIT
,
26413 IX86_BUILTIN_PHADDW
,
26414 IX86_BUILTIN_PHADDD
,
26415 IX86_BUILTIN_PHADDSW
,
26416 IX86_BUILTIN_PHSUBW
,
26417 IX86_BUILTIN_PHSUBD
,
26418 IX86_BUILTIN_PHSUBSW
,
26419 IX86_BUILTIN_PMADDUBSW
,
26420 IX86_BUILTIN_PMULHRSW
,
26421 IX86_BUILTIN_PSHUFB
,
26422 IX86_BUILTIN_PSIGNB
,
26423 IX86_BUILTIN_PSIGNW
,
26424 IX86_BUILTIN_PSIGND
,
26425 IX86_BUILTIN_PALIGNR
,
26426 IX86_BUILTIN_PABSB
,
26427 IX86_BUILTIN_PABSW
,
26428 IX86_BUILTIN_PABSD
,
26430 IX86_BUILTIN_PHADDW128
,
26431 IX86_BUILTIN_PHADDD128
,
26432 IX86_BUILTIN_PHADDSW128
,
26433 IX86_BUILTIN_PHSUBW128
,
26434 IX86_BUILTIN_PHSUBD128
,
26435 IX86_BUILTIN_PHSUBSW128
,
26436 IX86_BUILTIN_PMADDUBSW128
,
26437 IX86_BUILTIN_PMULHRSW128
,
26438 IX86_BUILTIN_PSHUFB128
,
26439 IX86_BUILTIN_PSIGNB128
,
26440 IX86_BUILTIN_PSIGNW128
,
26441 IX86_BUILTIN_PSIGND128
,
26442 IX86_BUILTIN_PALIGNR128
,
26443 IX86_BUILTIN_PABSB128
,
26444 IX86_BUILTIN_PABSW128
,
26445 IX86_BUILTIN_PABSD128
,
26447 /* AMDFAM10 - SSE4A New Instructions. */
26448 IX86_BUILTIN_MOVNTSD
,
26449 IX86_BUILTIN_MOVNTSS
,
26450 IX86_BUILTIN_EXTRQI
,
26451 IX86_BUILTIN_EXTRQ
,
26452 IX86_BUILTIN_INSERTQI
,
26453 IX86_BUILTIN_INSERTQ
,
26456 IX86_BUILTIN_BLENDPD
,
26457 IX86_BUILTIN_BLENDPS
,
26458 IX86_BUILTIN_BLENDVPD
,
26459 IX86_BUILTIN_BLENDVPS
,
26460 IX86_BUILTIN_PBLENDVB128
,
26461 IX86_BUILTIN_PBLENDW128
,
26466 IX86_BUILTIN_INSERTPS128
,
26468 IX86_BUILTIN_MOVNTDQA
,
26469 IX86_BUILTIN_MPSADBW128
,
26470 IX86_BUILTIN_PACKUSDW128
,
26471 IX86_BUILTIN_PCMPEQQ
,
26472 IX86_BUILTIN_PHMINPOSUW128
,
26474 IX86_BUILTIN_PMAXSB128
,
26475 IX86_BUILTIN_PMAXSD128
,
26476 IX86_BUILTIN_PMAXUD128
,
26477 IX86_BUILTIN_PMAXUW128
,
26479 IX86_BUILTIN_PMINSB128
,
26480 IX86_BUILTIN_PMINSD128
,
26481 IX86_BUILTIN_PMINUD128
,
26482 IX86_BUILTIN_PMINUW128
,
26484 IX86_BUILTIN_PMOVSXBW128
,
26485 IX86_BUILTIN_PMOVSXBD128
,
26486 IX86_BUILTIN_PMOVSXBQ128
,
26487 IX86_BUILTIN_PMOVSXWD128
,
26488 IX86_BUILTIN_PMOVSXWQ128
,
26489 IX86_BUILTIN_PMOVSXDQ128
,
26491 IX86_BUILTIN_PMOVZXBW128
,
26492 IX86_BUILTIN_PMOVZXBD128
,
26493 IX86_BUILTIN_PMOVZXBQ128
,
26494 IX86_BUILTIN_PMOVZXWD128
,
26495 IX86_BUILTIN_PMOVZXWQ128
,
26496 IX86_BUILTIN_PMOVZXDQ128
,
26498 IX86_BUILTIN_PMULDQ128
,
26499 IX86_BUILTIN_PMULLD128
,
26501 IX86_BUILTIN_ROUNDSD
,
26502 IX86_BUILTIN_ROUNDSS
,
26504 IX86_BUILTIN_ROUNDPD
,
26505 IX86_BUILTIN_ROUNDPS
,
26507 IX86_BUILTIN_FLOORPD
,
26508 IX86_BUILTIN_CEILPD
,
26509 IX86_BUILTIN_TRUNCPD
,
26510 IX86_BUILTIN_RINTPD
,
26511 IX86_BUILTIN_ROUNDPD_AZ
,
26513 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26514 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26515 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26517 IX86_BUILTIN_FLOORPS
,
26518 IX86_BUILTIN_CEILPS
,
26519 IX86_BUILTIN_TRUNCPS
,
26520 IX86_BUILTIN_RINTPS
,
26521 IX86_BUILTIN_ROUNDPS_AZ
,
26523 IX86_BUILTIN_FLOORPS_SFIX
,
26524 IX86_BUILTIN_CEILPS_SFIX
,
26525 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26527 IX86_BUILTIN_PTESTZ
,
26528 IX86_BUILTIN_PTESTC
,
26529 IX86_BUILTIN_PTESTNZC
,
26531 IX86_BUILTIN_VEC_INIT_V2SI
,
26532 IX86_BUILTIN_VEC_INIT_V4HI
,
26533 IX86_BUILTIN_VEC_INIT_V8QI
,
26534 IX86_BUILTIN_VEC_EXT_V2DF
,
26535 IX86_BUILTIN_VEC_EXT_V2DI
,
26536 IX86_BUILTIN_VEC_EXT_V4SF
,
26537 IX86_BUILTIN_VEC_EXT_V4SI
,
26538 IX86_BUILTIN_VEC_EXT_V8HI
,
26539 IX86_BUILTIN_VEC_EXT_V2SI
,
26540 IX86_BUILTIN_VEC_EXT_V4HI
,
26541 IX86_BUILTIN_VEC_EXT_V16QI
,
26542 IX86_BUILTIN_VEC_SET_V2DI
,
26543 IX86_BUILTIN_VEC_SET_V4SF
,
26544 IX86_BUILTIN_VEC_SET_V4SI
,
26545 IX86_BUILTIN_VEC_SET_V8HI
,
26546 IX86_BUILTIN_VEC_SET_V4HI
,
26547 IX86_BUILTIN_VEC_SET_V16QI
,
26549 IX86_BUILTIN_VEC_PACK_SFIX
,
26550 IX86_BUILTIN_VEC_PACK_SFIX256
,
26553 IX86_BUILTIN_CRC32QI
,
26554 IX86_BUILTIN_CRC32HI
,
26555 IX86_BUILTIN_CRC32SI
,
26556 IX86_BUILTIN_CRC32DI
,
26558 IX86_BUILTIN_PCMPESTRI128
,
26559 IX86_BUILTIN_PCMPESTRM128
,
26560 IX86_BUILTIN_PCMPESTRA128
,
26561 IX86_BUILTIN_PCMPESTRC128
,
26562 IX86_BUILTIN_PCMPESTRO128
,
26563 IX86_BUILTIN_PCMPESTRS128
,
26564 IX86_BUILTIN_PCMPESTRZ128
,
26565 IX86_BUILTIN_PCMPISTRI128
,
26566 IX86_BUILTIN_PCMPISTRM128
,
26567 IX86_BUILTIN_PCMPISTRA128
,
26568 IX86_BUILTIN_PCMPISTRC128
,
26569 IX86_BUILTIN_PCMPISTRO128
,
26570 IX86_BUILTIN_PCMPISTRS128
,
26571 IX86_BUILTIN_PCMPISTRZ128
,
26573 IX86_BUILTIN_PCMPGTQ
,
26575 /* AES instructions */
26576 IX86_BUILTIN_AESENC128
,
26577 IX86_BUILTIN_AESENCLAST128
,
26578 IX86_BUILTIN_AESDEC128
,
26579 IX86_BUILTIN_AESDECLAST128
,
26580 IX86_BUILTIN_AESIMC128
,
26581 IX86_BUILTIN_AESKEYGENASSIST128
,
26583 /* PCLMUL instruction */
26584 IX86_BUILTIN_PCLMULQDQ128
,
26587 IX86_BUILTIN_ADDPD256
,
26588 IX86_BUILTIN_ADDPS256
,
26589 IX86_BUILTIN_ADDSUBPD256
,
26590 IX86_BUILTIN_ADDSUBPS256
,
26591 IX86_BUILTIN_ANDPD256
,
26592 IX86_BUILTIN_ANDPS256
,
26593 IX86_BUILTIN_ANDNPD256
,
26594 IX86_BUILTIN_ANDNPS256
,
26595 IX86_BUILTIN_BLENDPD256
,
26596 IX86_BUILTIN_BLENDPS256
,
26597 IX86_BUILTIN_BLENDVPD256
,
26598 IX86_BUILTIN_BLENDVPS256
,
26599 IX86_BUILTIN_DIVPD256
,
26600 IX86_BUILTIN_DIVPS256
,
26601 IX86_BUILTIN_DPPS256
,
26602 IX86_BUILTIN_HADDPD256
,
26603 IX86_BUILTIN_HADDPS256
,
26604 IX86_BUILTIN_HSUBPD256
,
26605 IX86_BUILTIN_HSUBPS256
,
26606 IX86_BUILTIN_MAXPD256
,
26607 IX86_BUILTIN_MAXPS256
,
26608 IX86_BUILTIN_MINPD256
,
26609 IX86_BUILTIN_MINPS256
,
26610 IX86_BUILTIN_MULPD256
,
26611 IX86_BUILTIN_MULPS256
,
26612 IX86_BUILTIN_ORPD256
,
26613 IX86_BUILTIN_ORPS256
,
26614 IX86_BUILTIN_SHUFPD256
,
26615 IX86_BUILTIN_SHUFPS256
,
26616 IX86_BUILTIN_SUBPD256
,
26617 IX86_BUILTIN_SUBPS256
,
26618 IX86_BUILTIN_XORPD256
,
26619 IX86_BUILTIN_XORPS256
,
26620 IX86_BUILTIN_CMPSD
,
26621 IX86_BUILTIN_CMPSS
,
26622 IX86_BUILTIN_CMPPD
,
26623 IX86_BUILTIN_CMPPS
,
26624 IX86_BUILTIN_CMPPD256
,
26625 IX86_BUILTIN_CMPPS256
,
26626 IX86_BUILTIN_CVTDQ2PD256
,
26627 IX86_BUILTIN_CVTDQ2PS256
,
26628 IX86_BUILTIN_CVTPD2PS256
,
26629 IX86_BUILTIN_CVTPS2DQ256
,
26630 IX86_BUILTIN_CVTPS2PD256
,
26631 IX86_BUILTIN_CVTTPD2DQ256
,
26632 IX86_BUILTIN_CVTPD2DQ256
,
26633 IX86_BUILTIN_CVTTPS2DQ256
,
26634 IX86_BUILTIN_EXTRACTF128PD256
,
26635 IX86_BUILTIN_EXTRACTF128PS256
,
26636 IX86_BUILTIN_EXTRACTF128SI256
,
26637 IX86_BUILTIN_VZEROALL
,
26638 IX86_BUILTIN_VZEROUPPER
,
26639 IX86_BUILTIN_VPERMILVARPD
,
26640 IX86_BUILTIN_VPERMILVARPS
,
26641 IX86_BUILTIN_VPERMILVARPD256
,
26642 IX86_BUILTIN_VPERMILVARPS256
,
26643 IX86_BUILTIN_VPERMILPD
,
26644 IX86_BUILTIN_VPERMILPS
,
26645 IX86_BUILTIN_VPERMILPD256
,
26646 IX86_BUILTIN_VPERMILPS256
,
26647 IX86_BUILTIN_VPERMIL2PD
,
26648 IX86_BUILTIN_VPERMIL2PS
,
26649 IX86_BUILTIN_VPERMIL2PD256
,
26650 IX86_BUILTIN_VPERMIL2PS256
,
26651 IX86_BUILTIN_VPERM2F128PD256
,
26652 IX86_BUILTIN_VPERM2F128PS256
,
26653 IX86_BUILTIN_VPERM2F128SI256
,
26654 IX86_BUILTIN_VBROADCASTSS
,
26655 IX86_BUILTIN_VBROADCASTSD256
,
26656 IX86_BUILTIN_VBROADCASTSS256
,
26657 IX86_BUILTIN_VBROADCASTPD256
,
26658 IX86_BUILTIN_VBROADCASTPS256
,
26659 IX86_BUILTIN_VINSERTF128PD256
,
26660 IX86_BUILTIN_VINSERTF128PS256
,
26661 IX86_BUILTIN_VINSERTF128SI256
,
26662 IX86_BUILTIN_LOADUPD256
,
26663 IX86_BUILTIN_LOADUPS256
,
26664 IX86_BUILTIN_STOREUPD256
,
26665 IX86_BUILTIN_STOREUPS256
,
26666 IX86_BUILTIN_LDDQU256
,
26667 IX86_BUILTIN_MOVNTDQ256
,
26668 IX86_BUILTIN_MOVNTPD256
,
26669 IX86_BUILTIN_MOVNTPS256
,
26670 IX86_BUILTIN_LOADDQU256
,
26671 IX86_BUILTIN_STOREDQU256
,
26672 IX86_BUILTIN_MASKLOADPD
,
26673 IX86_BUILTIN_MASKLOADPS
,
26674 IX86_BUILTIN_MASKSTOREPD
,
26675 IX86_BUILTIN_MASKSTOREPS
,
26676 IX86_BUILTIN_MASKLOADPD256
,
26677 IX86_BUILTIN_MASKLOADPS256
,
26678 IX86_BUILTIN_MASKSTOREPD256
,
26679 IX86_BUILTIN_MASKSTOREPS256
,
26680 IX86_BUILTIN_MOVSHDUP256
,
26681 IX86_BUILTIN_MOVSLDUP256
,
26682 IX86_BUILTIN_MOVDDUP256
,
26684 IX86_BUILTIN_SQRTPD256
,
26685 IX86_BUILTIN_SQRTPS256
,
26686 IX86_BUILTIN_SQRTPS_NR256
,
26687 IX86_BUILTIN_RSQRTPS256
,
26688 IX86_BUILTIN_RSQRTPS_NR256
,
26690 IX86_BUILTIN_RCPPS256
,
26692 IX86_BUILTIN_ROUNDPD256
,
26693 IX86_BUILTIN_ROUNDPS256
,
26695 IX86_BUILTIN_FLOORPD256
,
26696 IX86_BUILTIN_CEILPD256
,
26697 IX86_BUILTIN_TRUNCPD256
,
26698 IX86_BUILTIN_RINTPD256
,
26699 IX86_BUILTIN_ROUNDPD_AZ256
,
26701 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26702 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26703 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26705 IX86_BUILTIN_FLOORPS256
,
26706 IX86_BUILTIN_CEILPS256
,
26707 IX86_BUILTIN_TRUNCPS256
,
26708 IX86_BUILTIN_RINTPS256
,
26709 IX86_BUILTIN_ROUNDPS_AZ256
,
26711 IX86_BUILTIN_FLOORPS_SFIX256
,
26712 IX86_BUILTIN_CEILPS_SFIX256
,
26713 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26715 IX86_BUILTIN_UNPCKHPD256
,
26716 IX86_BUILTIN_UNPCKLPD256
,
26717 IX86_BUILTIN_UNPCKHPS256
,
26718 IX86_BUILTIN_UNPCKLPS256
,
26720 IX86_BUILTIN_SI256_SI
,
26721 IX86_BUILTIN_PS256_PS
,
26722 IX86_BUILTIN_PD256_PD
,
26723 IX86_BUILTIN_SI_SI256
,
26724 IX86_BUILTIN_PS_PS256
,
26725 IX86_BUILTIN_PD_PD256
,
26727 IX86_BUILTIN_VTESTZPD
,
26728 IX86_BUILTIN_VTESTCPD
,
26729 IX86_BUILTIN_VTESTNZCPD
,
26730 IX86_BUILTIN_VTESTZPS
,
26731 IX86_BUILTIN_VTESTCPS
,
26732 IX86_BUILTIN_VTESTNZCPS
,
26733 IX86_BUILTIN_VTESTZPD256
,
26734 IX86_BUILTIN_VTESTCPD256
,
26735 IX86_BUILTIN_VTESTNZCPD256
,
26736 IX86_BUILTIN_VTESTZPS256
,
26737 IX86_BUILTIN_VTESTCPS256
,
26738 IX86_BUILTIN_VTESTNZCPS256
,
26739 IX86_BUILTIN_PTESTZ256
,
26740 IX86_BUILTIN_PTESTC256
,
26741 IX86_BUILTIN_PTESTNZC256
,
26743 IX86_BUILTIN_MOVMSKPD256
,
26744 IX86_BUILTIN_MOVMSKPS256
,
26747 IX86_BUILTIN_MPSADBW256
,
26748 IX86_BUILTIN_PABSB256
,
26749 IX86_BUILTIN_PABSW256
,
26750 IX86_BUILTIN_PABSD256
,
26751 IX86_BUILTIN_PACKSSDW256
,
26752 IX86_BUILTIN_PACKSSWB256
,
26753 IX86_BUILTIN_PACKUSDW256
,
26754 IX86_BUILTIN_PACKUSWB256
,
26755 IX86_BUILTIN_PADDB256
,
26756 IX86_BUILTIN_PADDW256
,
26757 IX86_BUILTIN_PADDD256
,
26758 IX86_BUILTIN_PADDQ256
,
26759 IX86_BUILTIN_PADDSB256
,
26760 IX86_BUILTIN_PADDSW256
,
26761 IX86_BUILTIN_PADDUSB256
,
26762 IX86_BUILTIN_PADDUSW256
,
26763 IX86_BUILTIN_PALIGNR256
,
26764 IX86_BUILTIN_AND256I
,
26765 IX86_BUILTIN_ANDNOT256I
,
26766 IX86_BUILTIN_PAVGB256
,
26767 IX86_BUILTIN_PAVGW256
,
26768 IX86_BUILTIN_PBLENDVB256
,
26769 IX86_BUILTIN_PBLENDVW256
,
26770 IX86_BUILTIN_PCMPEQB256
,
26771 IX86_BUILTIN_PCMPEQW256
,
26772 IX86_BUILTIN_PCMPEQD256
,
26773 IX86_BUILTIN_PCMPEQQ256
,
26774 IX86_BUILTIN_PCMPGTB256
,
26775 IX86_BUILTIN_PCMPGTW256
,
26776 IX86_BUILTIN_PCMPGTD256
,
26777 IX86_BUILTIN_PCMPGTQ256
,
26778 IX86_BUILTIN_PHADDW256
,
26779 IX86_BUILTIN_PHADDD256
,
26780 IX86_BUILTIN_PHADDSW256
,
26781 IX86_BUILTIN_PHSUBW256
,
26782 IX86_BUILTIN_PHSUBD256
,
26783 IX86_BUILTIN_PHSUBSW256
,
26784 IX86_BUILTIN_PMADDUBSW256
,
26785 IX86_BUILTIN_PMADDWD256
,
26786 IX86_BUILTIN_PMAXSB256
,
26787 IX86_BUILTIN_PMAXSW256
,
26788 IX86_BUILTIN_PMAXSD256
,
26789 IX86_BUILTIN_PMAXUB256
,
26790 IX86_BUILTIN_PMAXUW256
,
26791 IX86_BUILTIN_PMAXUD256
,
26792 IX86_BUILTIN_PMINSB256
,
26793 IX86_BUILTIN_PMINSW256
,
26794 IX86_BUILTIN_PMINSD256
,
26795 IX86_BUILTIN_PMINUB256
,
26796 IX86_BUILTIN_PMINUW256
,
26797 IX86_BUILTIN_PMINUD256
,
26798 IX86_BUILTIN_PMOVMSKB256
,
26799 IX86_BUILTIN_PMOVSXBW256
,
26800 IX86_BUILTIN_PMOVSXBD256
,
26801 IX86_BUILTIN_PMOVSXBQ256
,
26802 IX86_BUILTIN_PMOVSXWD256
,
26803 IX86_BUILTIN_PMOVSXWQ256
,
26804 IX86_BUILTIN_PMOVSXDQ256
,
26805 IX86_BUILTIN_PMOVZXBW256
,
26806 IX86_BUILTIN_PMOVZXBD256
,
26807 IX86_BUILTIN_PMOVZXBQ256
,
26808 IX86_BUILTIN_PMOVZXWD256
,
26809 IX86_BUILTIN_PMOVZXWQ256
,
26810 IX86_BUILTIN_PMOVZXDQ256
,
26811 IX86_BUILTIN_PMULDQ256
,
26812 IX86_BUILTIN_PMULHRSW256
,
26813 IX86_BUILTIN_PMULHUW256
,
26814 IX86_BUILTIN_PMULHW256
,
26815 IX86_BUILTIN_PMULLW256
,
26816 IX86_BUILTIN_PMULLD256
,
26817 IX86_BUILTIN_PMULUDQ256
,
26818 IX86_BUILTIN_POR256
,
26819 IX86_BUILTIN_PSADBW256
,
26820 IX86_BUILTIN_PSHUFB256
,
26821 IX86_BUILTIN_PSHUFD256
,
26822 IX86_BUILTIN_PSHUFHW256
,
26823 IX86_BUILTIN_PSHUFLW256
,
26824 IX86_BUILTIN_PSIGNB256
,
26825 IX86_BUILTIN_PSIGNW256
,
26826 IX86_BUILTIN_PSIGND256
,
26827 IX86_BUILTIN_PSLLDQI256
,
26828 IX86_BUILTIN_PSLLWI256
,
26829 IX86_BUILTIN_PSLLW256
,
26830 IX86_BUILTIN_PSLLDI256
,
26831 IX86_BUILTIN_PSLLD256
,
26832 IX86_BUILTIN_PSLLQI256
,
26833 IX86_BUILTIN_PSLLQ256
,
26834 IX86_BUILTIN_PSRAWI256
,
26835 IX86_BUILTIN_PSRAW256
,
26836 IX86_BUILTIN_PSRADI256
,
26837 IX86_BUILTIN_PSRAD256
,
26838 IX86_BUILTIN_PSRLDQI256
,
26839 IX86_BUILTIN_PSRLWI256
,
26840 IX86_BUILTIN_PSRLW256
,
26841 IX86_BUILTIN_PSRLDI256
,
26842 IX86_BUILTIN_PSRLD256
,
26843 IX86_BUILTIN_PSRLQI256
,
26844 IX86_BUILTIN_PSRLQ256
,
26845 IX86_BUILTIN_PSUBB256
,
26846 IX86_BUILTIN_PSUBW256
,
26847 IX86_BUILTIN_PSUBD256
,
26848 IX86_BUILTIN_PSUBQ256
,
26849 IX86_BUILTIN_PSUBSB256
,
26850 IX86_BUILTIN_PSUBSW256
,
26851 IX86_BUILTIN_PSUBUSB256
,
26852 IX86_BUILTIN_PSUBUSW256
,
26853 IX86_BUILTIN_PUNPCKHBW256
,
26854 IX86_BUILTIN_PUNPCKHWD256
,
26855 IX86_BUILTIN_PUNPCKHDQ256
,
26856 IX86_BUILTIN_PUNPCKHQDQ256
,
26857 IX86_BUILTIN_PUNPCKLBW256
,
26858 IX86_BUILTIN_PUNPCKLWD256
,
26859 IX86_BUILTIN_PUNPCKLDQ256
,
26860 IX86_BUILTIN_PUNPCKLQDQ256
,
26861 IX86_BUILTIN_PXOR256
,
26862 IX86_BUILTIN_MOVNTDQA256
,
26863 IX86_BUILTIN_VBROADCASTSS_PS
,
26864 IX86_BUILTIN_VBROADCASTSS_PS256
,
26865 IX86_BUILTIN_VBROADCASTSD_PD256
,
26866 IX86_BUILTIN_VBROADCASTSI256
,
26867 IX86_BUILTIN_PBLENDD256
,
26868 IX86_BUILTIN_PBLENDD128
,
26869 IX86_BUILTIN_PBROADCASTB256
,
26870 IX86_BUILTIN_PBROADCASTW256
,
26871 IX86_BUILTIN_PBROADCASTD256
,
26872 IX86_BUILTIN_PBROADCASTQ256
,
26873 IX86_BUILTIN_PBROADCASTB128
,
26874 IX86_BUILTIN_PBROADCASTW128
,
26875 IX86_BUILTIN_PBROADCASTD128
,
26876 IX86_BUILTIN_PBROADCASTQ128
,
26877 IX86_BUILTIN_VPERMVARSI256
,
26878 IX86_BUILTIN_VPERMDF256
,
26879 IX86_BUILTIN_VPERMVARSF256
,
26880 IX86_BUILTIN_VPERMDI256
,
26881 IX86_BUILTIN_VPERMTI256
,
26882 IX86_BUILTIN_VEXTRACT128I256
,
26883 IX86_BUILTIN_VINSERT128I256
,
26884 IX86_BUILTIN_MASKLOADD
,
26885 IX86_BUILTIN_MASKLOADQ
,
26886 IX86_BUILTIN_MASKLOADD256
,
26887 IX86_BUILTIN_MASKLOADQ256
,
26888 IX86_BUILTIN_MASKSTORED
,
26889 IX86_BUILTIN_MASKSTOREQ
,
26890 IX86_BUILTIN_MASKSTORED256
,
26891 IX86_BUILTIN_MASKSTOREQ256
,
26892 IX86_BUILTIN_PSLLVV4DI
,
26893 IX86_BUILTIN_PSLLVV2DI
,
26894 IX86_BUILTIN_PSLLVV8SI
,
26895 IX86_BUILTIN_PSLLVV4SI
,
26896 IX86_BUILTIN_PSRAVV8SI
,
26897 IX86_BUILTIN_PSRAVV4SI
,
26898 IX86_BUILTIN_PSRLVV4DI
,
26899 IX86_BUILTIN_PSRLVV2DI
,
26900 IX86_BUILTIN_PSRLVV8SI
,
26901 IX86_BUILTIN_PSRLVV4SI
,
26903 IX86_BUILTIN_GATHERSIV2DF
,
26904 IX86_BUILTIN_GATHERSIV4DF
,
26905 IX86_BUILTIN_GATHERDIV2DF
,
26906 IX86_BUILTIN_GATHERDIV4DF
,
26907 IX86_BUILTIN_GATHERSIV4SF
,
26908 IX86_BUILTIN_GATHERSIV8SF
,
26909 IX86_BUILTIN_GATHERDIV4SF
,
26910 IX86_BUILTIN_GATHERDIV8SF
,
26911 IX86_BUILTIN_GATHERSIV2DI
,
26912 IX86_BUILTIN_GATHERSIV4DI
,
26913 IX86_BUILTIN_GATHERDIV2DI
,
26914 IX86_BUILTIN_GATHERDIV4DI
,
26915 IX86_BUILTIN_GATHERSIV4SI
,
26916 IX86_BUILTIN_GATHERSIV8SI
,
26917 IX86_BUILTIN_GATHERDIV4SI
,
26918 IX86_BUILTIN_GATHERDIV8SI
,
26920 /* Alternate 4 element gather for the vectorizer where
26921 all operands are 32-byte wide. */
26922 IX86_BUILTIN_GATHERALTSIV4DF
,
26923 IX86_BUILTIN_GATHERALTDIV8SF
,
26924 IX86_BUILTIN_GATHERALTSIV4DI
,
26925 IX86_BUILTIN_GATHERALTDIV8SI
,
26927 /* TFmode support builtins. */
26929 IX86_BUILTIN_HUGE_VALQ
,
26930 IX86_BUILTIN_FABSQ
,
26931 IX86_BUILTIN_COPYSIGNQ
,
26933 /* Vectorizer support builtins. */
26934 IX86_BUILTIN_CPYSGNPS
,
26935 IX86_BUILTIN_CPYSGNPD
,
26936 IX86_BUILTIN_CPYSGNPS256
,
26937 IX86_BUILTIN_CPYSGNPD256
,
26939 /* FMA4 instructions. */
26940 IX86_BUILTIN_VFMADDSS
,
26941 IX86_BUILTIN_VFMADDSD
,
26942 IX86_BUILTIN_VFMADDPS
,
26943 IX86_BUILTIN_VFMADDPD
,
26944 IX86_BUILTIN_VFMADDPS256
,
26945 IX86_BUILTIN_VFMADDPD256
,
26946 IX86_BUILTIN_VFMADDSUBPS
,
26947 IX86_BUILTIN_VFMADDSUBPD
,
26948 IX86_BUILTIN_VFMADDSUBPS256
,
26949 IX86_BUILTIN_VFMADDSUBPD256
,
26951 /* FMA3 instructions. */
26952 IX86_BUILTIN_VFMADDSS3
,
26953 IX86_BUILTIN_VFMADDSD3
,
26955 /* XOP instructions. */
26956 IX86_BUILTIN_VPCMOV
,
26957 IX86_BUILTIN_VPCMOV_V2DI
,
26958 IX86_BUILTIN_VPCMOV_V4SI
,
26959 IX86_BUILTIN_VPCMOV_V8HI
,
26960 IX86_BUILTIN_VPCMOV_V16QI
,
26961 IX86_BUILTIN_VPCMOV_V4SF
,
26962 IX86_BUILTIN_VPCMOV_V2DF
,
26963 IX86_BUILTIN_VPCMOV256
,
26964 IX86_BUILTIN_VPCMOV_V4DI256
,
26965 IX86_BUILTIN_VPCMOV_V8SI256
,
26966 IX86_BUILTIN_VPCMOV_V16HI256
,
26967 IX86_BUILTIN_VPCMOV_V32QI256
,
26968 IX86_BUILTIN_VPCMOV_V8SF256
,
26969 IX86_BUILTIN_VPCMOV_V4DF256
,
26971 IX86_BUILTIN_VPPERM
,
26973 IX86_BUILTIN_VPMACSSWW
,
26974 IX86_BUILTIN_VPMACSWW
,
26975 IX86_BUILTIN_VPMACSSWD
,
26976 IX86_BUILTIN_VPMACSWD
,
26977 IX86_BUILTIN_VPMACSSDD
,
26978 IX86_BUILTIN_VPMACSDD
,
26979 IX86_BUILTIN_VPMACSSDQL
,
26980 IX86_BUILTIN_VPMACSSDQH
,
26981 IX86_BUILTIN_VPMACSDQL
,
26982 IX86_BUILTIN_VPMACSDQH
,
26983 IX86_BUILTIN_VPMADCSSWD
,
26984 IX86_BUILTIN_VPMADCSWD
,
26986 IX86_BUILTIN_VPHADDBW
,
26987 IX86_BUILTIN_VPHADDBD
,
26988 IX86_BUILTIN_VPHADDBQ
,
26989 IX86_BUILTIN_VPHADDWD
,
26990 IX86_BUILTIN_VPHADDWQ
,
26991 IX86_BUILTIN_VPHADDDQ
,
26992 IX86_BUILTIN_VPHADDUBW
,
26993 IX86_BUILTIN_VPHADDUBD
,
26994 IX86_BUILTIN_VPHADDUBQ
,
26995 IX86_BUILTIN_VPHADDUWD
,
26996 IX86_BUILTIN_VPHADDUWQ
,
26997 IX86_BUILTIN_VPHADDUDQ
,
26998 IX86_BUILTIN_VPHSUBBW
,
26999 IX86_BUILTIN_VPHSUBWD
,
27000 IX86_BUILTIN_VPHSUBDQ
,
27002 IX86_BUILTIN_VPROTB
,
27003 IX86_BUILTIN_VPROTW
,
27004 IX86_BUILTIN_VPROTD
,
27005 IX86_BUILTIN_VPROTQ
,
27006 IX86_BUILTIN_VPROTB_IMM
,
27007 IX86_BUILTIN_VPROTW_IMM
,
27008 IX86_BUILTIN_VPROTD_IMM
,
27009 IX86_BUILTIN_VPROTQ_IMM
,
27011 IX86_BUILTIN_VPSHLB
,
27012 IX86_BUILTIN_VPSHLW
,
27013 IX86_BUILTIN_VPSHLD
,
27014 IX86_BUILTIN_VPSHLQ
,
27015 IX86_BUILTIN_VPSHAB
,
27016 IX86_BUILTIN_VPSHAW
,
27017 IX86_BUILTIN_VPSHAD
,
27018 IX86_BUILTIN_VPSHAQ
,
27020 IX86_BUILTIN_VFRCZSS
,
27021 IX86_BUILTIN_VFRCZSD
,
27022 IX86_BUILTIN_VFRCZPS
,
27023 IX86_BUILTIN_VFRCZPD
,
27024 IX86_BUILTIN_VFRCZPS256
,
27025 IX86_BUILTIN_VFRCZPD256
,
27027 IX86_BUILTIN_VPCOMEQUB
,
27028 IX86_BUILTIN_VPCOMNEUB
,
27029 IX86_BUILTIN_VPCOMLTUB
,
27030 IX86_BUILTIN_VPCOMLEUB
,
27031 IX86_BUILTIN_VPCOMGTUB
,
27032 IX86_BUILTIN_VPCOMGEUB
,
27033 IX86_BUILTIN_VPCOMFALSEUB
,
27034 IX86_BUILTIN_VPCOMTRUEUB
,
27036 IX86_BUILTIN_VPCOMEQUW
,
27037 IX86_BUILTIN_VPCOMNEUW
,
27038 IX86_BUILTIN_VPCOMLTUW
,
27039 IX86_BUILTIN_VPCOMLEUW
,
27040 IX86_BUILTIN_VPCOMGTUW
,
27041 IX86_BUILTIN_VPCOMGEUW
,
27042 IX86_BUILTIN_VPCOMFALSEUW
,
27043 IX86_BUILTIN_VPCOMTRUEUW
,
27045 IX86_BUILTIN_VPCOMEQUD
,
27046 IX86_BUILTIN_VPCOMNEUD
,
27047 IX86_BUILTIN_VPCOMLTUD
,
27048 IX86_BUILTIN_VPCOMLEUD
,
27049 IX86_BUILTIN_VPCOMGTUD
,
27050 IX86_BUILTIN_VPCOMGEUD
,
27051 IX86_BUILTIN_VPCOMFALSEUD
,
27052 IX86_BUILTIN_VPCOMTRUEUD
,
27054 IX86_BUILTIN_VPCOMEQUQ
,
27055 IX86_BUILTIN_VPCOMNEUQ
,
27056 IX86_BUILTIN_VPCOMLTUQ
,
27057 IX86_BUILTIN_VPCOMLEUQ
,
27058 IX86_BUILTIN_VPCOMGTUQ
,
27059 IX86_BUILTIN_VPCOMGEUQ
,
27060 IX86_BUILTIN_VPCOMFALSEUQ
,
27061 IX86_BUILTIN_VPCOMTRUEUQ
,
27063 IX86_BUILTIN_VPCOMEQB
,
27064 IX86_BUILTIN_VPCOMNEB
,
27065 IX86_BUILTIN_VPCOMLTB
,
27066 IX86_BUILTIN_VPCOMLEB
,
27067 IX86_BUILTIN_VPCOMGTB
,
27068 IX86_BUILTIN_VPCOMGEB
,
27069 IX86_BUILTIN_VPCOMFALSEB
,
27070 IX86_BUILTIN_VPCOMTRUEB
,
27072 IX86_BUILTIN_VPCOMEQW
,
27073 IX86_BUILTIN_VPCOMNEW
,
27074 IX86_BUILTIN_VPCOMLTW
,
27075 IX86_BUILTIN_VPCOMLEW
,
27076 IX86_BUILTIN_VPCOMGTW
,
27077 IX86_BUILTIN_VPCOMGEW
,
27078 IX86_BUILTIN_VPCOMFALSEW
,
27079 IX86_BUILTIN_VPCOMTRUEW
,
27081 IX86_BUILTIN_VPCOMEQD
,
27082 IX86_BUILTIN_VPCOMNED
,
27083 IX86_BUILTIN_VPCOMLTD
,
27084 IX86_BUILTIN_VPCOMLED
,
27085 IX86_BUILTIN_VPCOMGTD
,
27086 IX86_BUILTIN_VPCOMGED
,
27087 IX86_BUILTIN_VPCOMFALSED
,
27088 IX86_BUILTIN_VPCOMTRUED
,
27090 IX86_BUILTIN_VPCOMEQQ
,
27091 IX86_BUILTIN_VPCOMNEQ
,
27092 IX86_BUILTIN_VPCOMLTQ
,
27093 IX86_BUILTIN_VPCOMLEQ
,
27094 IX86_BUILTIN_VPCOMGTQ
,
27095 IX86_BUILTIN_VPCOMGEQ
,
27096 IX86_BUILTIN_VPCOMFALSEQ
,
27097 IX86_BUILTIN_VPCOMTRUEQ
,
27099 /* LWP instructions. */
27100 IX86_BUILTIN_LLWPCB
,
27101 IX86_BUILTIN_SLWPCB
,
27102 IX86_BUILTIN_LWPVAL32
,
27103 IX86_BUILTIN_LWPVAL64
,
27104 IX86_BUILTIN_LWPINS32
,
27105 IX86_BUILTIN_LWPINS64
,
27110 IX86_BUILTIN_XBEGIN
,
27112 IX86_BUILTIN_XABORT
,
27113 IX86_BUILTIN_XTEST
,
27115 /* BMI instructions. */
27116 IX86_BUILTIN_BEXTR32
,
27117 IX86_BUILTIN_BEXTR64
,
27120 /* TBM instructions. */
27121 IX86_BUILTIN_BEXTRI32
,
27122 IX86_BUILTIN_BEXTRI64
,
27124 /* BMI2 instructions. */
27125 IX86_BUILTIN_BZHI32
,
27126 IX86_BUILTIN_BZHI64
,
27127 IX86_BUILTIN_PDEP32
,
27128 IX86_BUILTIN_PDEP64
,
27129 IX86_BUILTIN_PEXT32
,
27130 IX86_BUILTIN_PEXT64
,
27132 /* ADX instructions. */
27133 IX86_BUILTIN_ADDCARRYX32
,
27134 IX86_BUILTIN_ADDCARRYX64
,
27136 /* FSGSBASE instructions. */
27137 IX86_BUILTIN_RDFSBASE32
,
27138 IX86_BUILTIN_RDFSBASE64
,
27139 IX86_BUILTIN_RDGSBASE32
,
27140 IX86_BUILTIN_RDGSBASE64
,
27141 IX86_BUILTIN_WRFSBASE32
,
27142 IX86_BUILTIN_WRFSBASE64
,
27143 IX86_BUILTIN_WRGSBASE32
,
27144 IX86_BUILTIN_WRGSBASE64
,
27146 /* RDRND instructions. */
27147 IX86_BUILTIN_RDRAND16_STEP
,
27148 IX86_BUILTIN_RDRAND32_STEP
,
27149 IX86_BUILTIN_RDRAND64_STEP
,
27151 /* RDSEED instructions. */
27152 IX86_BUILTIN_RDSEED16_STEP
,
27153 IX86_BUILTIN_RDSEED32_STEP
,
27154 IX86_BUILTIN_RDSEED64_STEP
,
27156 /* F16C instructions. */
27157 IX86_BUILTIN_CVTPH2PS
,
27158 IX86_BUILTIN_CVTPH2PS256
,
27159 IX86_BUILTIN_CVTPS2PH
,
27160 IX86_BUILTIN_CVTPS2PH256
,
27162 /* CFString built-in for darwin */
27163 IX86_BUILTIN_CFSTRING
,
27165 /* Builtins to get CPU type and supported features. */
27166 IX86_BUILTIN_CPU_INIT
,
27167 IX86_BUILTIN_CPU_IS
,
27168 IX86_BUILTIN_CPU_SUPPORTS
,
27173 /* Table for the ix86 builtin decls. */
27174 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27176 /* Table of all of the builtin functions that are possible with different ISA's
27177 but are waiting to be built until a function is declared to use that
27179 struct builtin_isa
{
27180 const char *name
; /* function name */
27181 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27182 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27183 bool const_p
; /* true if the declaration is constant */
27184 bool set_and_not_built_p
;
27187 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27190 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27191 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27192 function decl in the ix86_builtins array. Returns the function decl or
27193 NULL_TREE, if the builtin was not added.
27195 If the front end has a special hook for builtin functions, delay adding
27196 builtin functions that aren't in the current ISA until the ISA is changed
27197 with function specific optimization. Doing so, can save about 300K for the
27198 default compiler. When the builtin is expanded, check at that time whether
27201 If the front end doesn't have a special hook, record all builtins, even if
27202 it isn't an instruction set in the current ISA in case the user uses
27203 function specific options for a different ISA, so that we don't get scope
27204 errors if a builtin is added in the middle of a function scope. */
27207 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27208 enum ix86_builtin_func_type tcode
,
27209 enum ix86_builtins code
)
27211 tree decl
= NULL_TREE
;
27213 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27215 ix86_builtins_isa
[(int) code
].isa
= mask
;
27217 mask
&= ~OPTION_MASK_ISA_64BIT
;
27219 || (mask
& ix86_isa_flags
) != 0
27220 || (lang_hooks
.builtin_function
27221 == lang_hooks
.builtin_function_ext_scope
))
27224 tree type
= ix86_get_builtin_func_type (tcode
);
27225 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27227 ix86_builtins
[(int) code
] = decl
;
27228 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27232 ix86_builtins
[(int) code
] = NULL_TREE
;
27233 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27234 ix86_builtins_isa
[(int) code
].name
= name
;
27235 ix86_builtins_isa
[(int) code
].const_p
= false;
27236 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27243 /* Like def_builtin, but also marks the function decl "const". */
27246 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27247 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27249 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27251 TREE_READONLY (decl
) = 1;
27253 ix86_builtins_isa
[(int) code
].const_p
= true;
27258 /* Add any new builtin functions for a given ISA that may not have been
27259 declared. This saves a bit of space compared to adding all of the
27260 declarations to the tree, even if we didn't use them. */
27263 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27267 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27269 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27270 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27274 /* Don't define the builtin again. */
27275 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27277 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27278 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27279 type
, i
, BUILT_IN_MD
, NULL
,
27282 ix86_builtins
[i
] = decl
;
27283 if (ix86_builtins_isa
[i
].const_p
)
27284 TREE_READONLY (decl
) = 1;
27289 /* Bits for builtin_description.flag. */
27291 /* Set when we don't support the comparison natively, and should
27292 swap_comparison in order to support it. */
27293 #define BUILTIN_DESC_SWAP_OPERANDS 1
27295 struct builtin_description
27297 const HOST_WIDE_INT mask
;
27298 const enum insn_code icode
;
27299 const char *const name
;
27300 const enum ix86_builtins code
;
27301 const enum rtx_code comparison
;
27305 static const struct builtin_description bdesc_comi
[] =
27307 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27308 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27310 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27311 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27312 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27313 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27314 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27315 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27316 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27317 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27318 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27333 static const struct builtin_description bdesc_pcmpestr
[] =
27336 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27337 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27338 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27339 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27340 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27341 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27342 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27345 static const struct builtin_description bdesc_pcmpistr
[] =
27348 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27349 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27350 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27351 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27352 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27353 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27354 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27357 /* Special builtins with variable number of arguments. */
27358 static const struct builtin_description bdesc_special_args
[] =
27360 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27361 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27362 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27365 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27368 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27370 /* FXSR, XSAVE and XSAVEOPT */
27371 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27372 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27373 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27374 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27375 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27377 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27378 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27379 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27380 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27381 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27384 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27385 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27386 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27388 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27389 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27390 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27391 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27393 /* SSE or 3DNow!A */
27394 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27395 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27398 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27399 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27400 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27401 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27402 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27403 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27404 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27405 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27406 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27407 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27409 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27410 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27413 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27416 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27419 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27420 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27423 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27424 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27426 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27427 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27428 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27429 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
27430 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
27432 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27433 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27434 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27435 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27436 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27437 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
27438 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27440 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
27441 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27442 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27444 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
27445 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
27446 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
27447 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
27448 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
27449 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
27450 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
27451 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
27454 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
27455 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
27456 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
27457 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
27458 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
27459 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
27460 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
27461 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
27462 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
27464 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27465 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
27466 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
27467 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
27468 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
27469 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
27472 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27473 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27474 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27475 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27476 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27477 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27478 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27479 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27482 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27483 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27484 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
27487 /* Builtins with variable number of arguments. */
27488 static const struct builtin_description bdesc_args
[] =
27490 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
27491 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
27492 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
27493 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27494 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27495 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27496 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27499 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27500 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27501 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27502 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27503 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27504 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27506 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27507 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27508 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27509 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27510 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27511 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27512 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27513 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27515 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27516 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27518 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27519 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27520 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27521 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27523 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27524 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27525 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27526 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27527 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27528 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27530 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27531 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27532 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27533 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27534 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27535 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27537 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27538 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27539 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27541 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27543 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27544 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27545 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27546 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27547 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27548 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27550 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27551 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27552 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27553 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27554 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27555 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27557 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27558 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27559 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27560 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27563 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27564 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27565 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27566 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27568 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27569 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27570 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27571 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27572 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27573 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27574 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27575 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27576 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27577 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27578 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27579 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27580 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27581 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27582 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27585 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27586 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27587 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27588 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27589 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27590 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27593 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27596 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27597 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27598 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27599 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27600 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27601 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27602 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27603 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27604 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27606 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27608 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27609 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27610 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27611 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27612 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27615 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27619 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27620 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27621 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27622 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27623 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27624 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27625 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27626 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27627 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27628 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27629 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27630 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27631 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27632 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27633 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27634 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27635 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27636 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27638 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27639 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27640 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27641 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27643 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27645 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27646 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27648 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27650 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27651 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27652 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27653 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27654 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27656 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27657 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27658 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27660 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27662 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27663 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27664 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27666 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27667 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27669 /* SSE MMX or 3Dnow!A */
27670 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27671 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27672 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27674 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27675 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27676 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27677 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27679 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27680 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27682 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27687 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27689 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27693 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27694 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27695 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27696 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27703 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27704 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27713 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27716 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27720 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27721 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27724 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27745 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27752 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27759 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27762 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27770 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27772 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27790 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27791 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27795 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27796 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27797 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27799 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27800 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27801 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27802 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27803 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27804 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27805 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27818 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27821 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27833 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27834 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27835 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27836 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27837 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27838 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27839 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27841 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27842 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27843 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27844 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27846 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27847 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27848 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27850 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27852 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27859 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27860 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27862 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27863 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27864 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27865 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27866 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27867 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27870 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27871 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27872 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27873 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27874 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27875 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27877 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27878 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27879 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27880 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27881 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27882 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27883 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27884 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27885 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27886 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27887 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27888 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27889 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27890 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27891 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27892 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27893 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27894 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27895 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27896 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27897 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27898 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27899 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27900 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27903 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27904 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27907 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27908 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27909 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27910 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27911 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27912 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27913 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27914 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27915 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27916 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27918 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27919 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27920 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27921 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27922 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27923 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27924 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27925 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27926 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27927 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27928 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27929 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27930 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27932 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27933 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27934 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27935 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27936 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27937 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27938 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27939 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27940 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27941 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27942 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27943 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27946 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27947 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27948 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27949 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27951 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27952 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27953 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27954 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27956 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27957 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27959 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27960 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27962 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27963 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27964 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27965 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27967 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27968 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27970 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27971 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27973 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27974 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27975 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27978 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27979 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27980 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27981 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27982 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27985 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27986 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27987 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27988 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27991 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27992 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27994 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27995 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27997 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28000 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28003 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28004 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28007 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28008 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28011 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28012 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28014 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28016 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28017 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28018 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28019 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28020 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28021 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28022 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28023 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28024 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28025 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28026 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28027 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28028 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28030 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28033 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28037 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28038 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28039 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28040 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28041 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28042 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28043 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28044 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28045 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28046 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28049 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28051 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28052 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28053 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28054 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28055 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28056 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28058 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28072 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28074 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28075 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28076 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28077 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28078 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28080 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28082 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28083 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28085 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28086 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28087 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28088 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28090 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28091 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28093 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28094 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28096 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28097 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28098 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28099 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28101 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28102 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28104 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28105 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28107 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28108 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28109 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28110 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28112 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28113 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28114 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28115 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28116 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28117 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28119 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28120 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28121 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28122 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28123 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28124 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28125 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28126 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28127 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28128 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28129 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28130 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28131 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28132 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28133 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28135 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28136 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28138 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28139 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28141 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28144 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28145 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28146 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28147 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28148 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28149 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28150 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28151 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28152 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28153 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28154 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28155 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28156 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28228 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28229 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28230 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28231 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28232 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28233 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28234 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28235 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28236 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28237 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28238 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28239 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28240 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28241 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28242 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28243 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28244 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28245 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28255 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28256 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28257 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28258 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28259 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28260 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28261 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28262 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28263 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28264 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28265 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28266 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28267 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28268 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28269 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28270 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28271 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28272 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28273 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28274 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28275 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28276 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28277 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28278 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28279 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28280 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28281 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28282 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28283 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28284 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28285 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28286 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28287 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28288 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28289 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28291 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28294 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28295 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28296 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28299 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28300 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28303 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28304 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28305 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28306 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28309 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28310 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28311 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28312 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28313 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28314 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28317 /* FMA4 and XOP. */
28318 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28319 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28320 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28321 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28322 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28323 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28324 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28325 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28326 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28327 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28328 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28329 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28330 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28331 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28332 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28333 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28334 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28335 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28336 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28337 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28338 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28339 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28340 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28341 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28342 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28343 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28344 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28345 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28346 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28347 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28348 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28349 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28350 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28351 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28352 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28353 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28354 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28355 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28356 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28357 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28358 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28359 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28360 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28361 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28362 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28363 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28364 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28365 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28366 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28367 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28368 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28369 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28371 static const struct builtin_description bdesc_multi_arg
[] =
28373 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28374 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28375 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28376 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28377 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28378 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28380 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28381 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28382 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28383 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28384 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28385 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28387 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28388 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28389 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28390 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28391 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28392 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28393 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28394 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28395 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28396 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28397 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28398 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28400 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28401 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28402 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28403 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28404 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28405 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28406 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28407 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28408 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28409 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28410 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28411 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28413 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28414 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28415 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28416 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28417 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28418 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28419 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28421 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28422 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28423 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28424 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28425 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28426 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28427 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28429 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
28431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28432 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28437 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28445 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28448 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
28449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
28450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
28451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
28452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28453 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28456 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28461 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
28462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
28463 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
28464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
28465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
28466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
28468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28469 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28477 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28485 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28487 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
28488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
28489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
28490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
28492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28494 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
28496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28503 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28508 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28509 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28510 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28511 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28512 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28513 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28514 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28516 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28517 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28518 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28519 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28520 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28521 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28522 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28524 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28525 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28526 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28527 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28528 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28529 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28530 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28532 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28533 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28534 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28535 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28536 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28537 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28538 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28540 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28541 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28542 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28543 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28544 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28545 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28546 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28548 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28549 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28550 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28551 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28552 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28553 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28554 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28555 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28557 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28558 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28559 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28560 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28561 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28562 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28563 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28564 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28566 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28567 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28568 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28569 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28573 /* TM vector builtins. */
28575 /* Reuse the existing x86-specific `struct builtin_description' cause
28576 we're lazy. Add casts to make them fit. */
28577 static const struct builtin_description bdesc_tm
[] =
28579 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28580 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28581 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28582 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28583 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28584 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28585 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28587 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28588 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28589 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28590 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28591 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28592 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28593 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28595 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28596 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28597 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28598 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28599 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28600 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28601 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28603 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28604 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28605 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28608 /* TM callbacks. */
28610 /* Return the builtin decl needed to load a vector of TYPE. */
28613 ix86_builtin_tm_load (tree type
)
28615 if (TREE_CODE (type
) == VECTOR_TYPE
)
28617 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28620 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28622 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28624 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28630 /* Return the builtin decl needed to store a vector of TYPE. */
28633 ix86_builtin_tm_store (tree type
)
28635 if (TREE_CODE (type
) == VECTOR_TYPE
)
28637 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28640 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28642 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28644 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28650 /* Initialize the transactional memory vector load/store builtins. */
28653 ix86_init_tm_builtins (void)
28655 enum ix86_builtin_func_type ftype
;
28656 const struct builtin_description
*d
;
28659 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28660 tree attrs_log
, attrs_type_log
;
28665 /* If there are no builtins defined, we must be compiling in a
28666 language without trans-mem support. */
28667 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28670 /* Use whatever attributes a normal TM load has. */
28671 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28672 attrs_load
= DECL_ATTRIBUTES (decl
);
28673 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28674 /* Use whatever attributes a normal TM store has. */
28675 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28676 attrs_store
= DECL_ATTRIBUTES (decl
);
28677 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28678 /* Use whatever attributes a normal TM log has. */
28679 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28680 attrs_log
= DECL_ATTRIBUTES (decl
);
28681 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28683 for (i
= 0, d
= bdesc_tm
;
28684 i
< ARRAY_SIZE (bdesc_tm
);
28687 if ((d
->mask
& ix86_isa_flags
) != 0
28688 || (lang_hooks
.builtin_function
28689 == lang_hooks
.builtin_function_ext_scope
))
28691 tree type
, attrs
, attrs_type
;
28692 enum built_in_function code
= (enum built_in_function
) d
->code
;
28694 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28695 type
= ix86_get_builtin_func_type (ftype
);
28697 if (BUILTIN_TM_LOAD_P (code
))
28699 attrs
= attrs_load
;
28700 attrs_type
= attrs_type_load
;
28702 else if (BUILTIN_TM_STORE_P (code
))
28704 attrs
= attrs_store
;
28705 attrs_type
= attrs_type_store
;
28710 attrs_type
= attrs_type_log
;
28712 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28713 /* The builtin without the prefix for
28714 calling it directly. */
28715 d
->name
+ strlen ("__builtin_"),
28717 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28718 set the TYPE_ATTRIBUTES. */
28719 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28721 set_builtin_decl (code
, decl
, false);
28726 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28727 in the current target ISA to allow the user to compile particular modules
28728 with different target specific options that differ from the command line
28731 ix86_init_mmx_sse_builtins (void)
28733 const struct builtin_description
* d
;
28734 enum ix86_builtin_func_type ftype
;
28737 /* Add all special builtins with variable number of operands. */
28738 for (i
= 0, d
= bdesc_special_args
;
28739 i
< ARRAY_SIZE (bdesc_special_args
);
28745 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28746 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28749 /* Add all builtins with variable number of operands. */
28750 for (i
= 0, d
= bdesc_args
;
28751 i
< ARRAY_SIZE (bdesc_args
);
28757 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28758 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28761 /* pcmpestr[im] insns. */
28762 for (i
= 0, d
= bdesc_pcmpestr
;
28763 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28766 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28767 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28769 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28770 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28773 /* pcmpistr[im] insns. */
28774 for (i
= 0, d
= bdesc_pcmpistr
;
28775 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28778 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28779 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28781 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28782 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28785 /* comi/ucomi insns. */
28786 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28788 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28789 ftype
= INT_FTYPE_V2DF_V2DF
;
28791 ftype
= INT_FTYPE_V4SF_V4SF
;
28792 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28796 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28797 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28798 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28799 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28801 /* SSE or 3DNow!A */
28802 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28803 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28804 IX86_BUILTIN_MASKMOVQ
);
28807 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28808 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28810 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28811 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28812 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28813 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28816 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28817 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28818 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28819 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28822 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28823 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28824 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28825 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28826 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28827 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28828 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28829 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28830 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28831 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28832 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28833 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28836 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28837 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28840 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28841 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28842 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28843 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28844 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28845 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28846 IX86_BUILTIN_RDRAND64_STEP
);
28849 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28850 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28851 IX86_BUILTIN_GATHERSIV2DF
);
28853 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28854 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28855 IX86_BUILTIN_GATHERSIV4DF
);
28857 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28858 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28859 IX86_BUILTIN_GATHERDIV2DF
);
28861 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28862 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28863 IX86_BUILTIN_GATHERDIV4DF
);
28865 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28866 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28867 IX86_BUILTIN_GATHERSIV4SF
);
28869 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28870 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28871 IX86_BUILTIN_GATHERSIV8SF
);
28873 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28874 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28875 IX86_BUILTIN_GATHERDIV4SF
);
28877 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28878 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28879 IX86_BUILTIN_GATHERDIV8SF
);
28881 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28882 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28883 IX86_BUILTIN_GATHERSIV2DI
);
28885 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28886 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28887 IX86_BUILTIN_GATHERSIV4DI
);
28889 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28890 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28891 IX86_BUILTIN_GATHERDIV2DI
);
28893 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28894 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28895 IX86_BUILTIN_GATHERDIV4DI
);
28897 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28898 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28899 IX86_BUILTIN_GATHERSIV4SI
);
28901 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28902 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28903 IX86_BUILTIN_GATHERSIV8SI
);
28905 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28906 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28907 IX86_BUILTIN_GATHERDIV4SI
);
28909 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28910 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28911 IX86_BUILTIN_GATHERDIV8SI
);
28913 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28914 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28915 IX86_BUILTIN_GATHERALTSIV4DF
);
28917 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28918 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28919 IX86_BUILTIN_GATHERALTDIV8SF
);
28921 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28922 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28923 IX86_BUILTIN_GATHERALTSIV4DI
);
28925 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28926 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28927 IX86_BUILTIN_GATHERALTDIV8SI
);
28930 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28931 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28933 /* MMX access to the vec_init patterns. */
28934 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28935 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28937 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28938 V4HI_FTYPE_HI_HI_HI_HI
,
28939 IX86_BUILTIN_VEC_INIT_V4HI
);
28941 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28942 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28943 IX86_BUILTIN_VEC_INIT_V8QI
);
28945 /* Access to the vec_extract patterns. */
28946 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28947 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28948 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28949 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28950 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28951 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28952 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28953 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28954 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28955 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28957 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28958 "__builtin_ia32_vec_ext_v4hi",
28959 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28961 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28962 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28964 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28965 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28967 /* Access to the vec_set patterns. */
28968 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28969 "__builtin_ia32_vec_set_v2di",
28970 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28972 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28973 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28975 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28976 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28978 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28979 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28981 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28982 "__builtin_ia32_vec_set_v4hi",
28983 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28985 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28986 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28989 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28990 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28991 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28992 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28993 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28994 "__builtin_ia32_rdseed_di_step",
28995 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28998 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28999 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29000 def_builtin (OPTION_MASK_ISA_64BIT
,
29001 "__builtin_ia32_addcarryx_u64",
29002 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29003 IX86_BUILTIN_ADDCARRYX64
);
29005 /* Add FMA4 multi-arg argument instructions */
29006 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29011 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29012 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29016 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29017 to return a pointer to VERSION_DECL if the outcome of the expression
29018 formed by PREDICATE_CHAIN is true. This function will be called during
29019 version dispatch to decide which function version to execute. It returns
29020 the basic block at the end, to which more conditions can be added. */
29023 add_condition_to_bb (tree function_decl
, tree version_decl
,
29024 tree predicate_chain
, basic_block new_bb
)
29026 gimple return_stmt
;
29027 tree convert_expr
, result_var
;
29028 gimple convert_stmt
;
29029 gimple call_cond_stmt
;
29030 gimple if_else_stmt
;
29032 basic_block bb1
, bb2
, bb3
;
29035 tree cond_var
, and_expr_var
= NULL_TREE
;
29038 tree predicate_decl
, predicate_arg
;
29040 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29042 gcc_assert (new_bb
!= NULL
);
29043 gseq
= bb_seq (new_bb
);
29046 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29047 build_fold_addr_expr (version_decl
));
29048 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29049 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29050 return_stmt
= gimple_build_return (result_var
);
29052 if (predicate_chain
== NULL_TREE
)
29054 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29055 gimple_seq_add_stmt (&gseq
, return_stmt
);
29056 set_bb_seq (new_bb
, gseq
);
29057 gimple_set_bb (convert_stmt
, new_bb
);
29058 gimple_set_bb (return_stmt
, new_bb
);
29063 while (predicate_chain
!= NULL
)
29065 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29066 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29067 predicate_arg
= TREE_VALUE (predicate_chain
);
29068 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29069 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29071 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29072 gimple_set_bb (call_cond_stmt
, new_bb
);
29073 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29075 predicate_chain
= TREE_CHAIN (predicate_chain
);
29077 if (and_expr_var
== NULL
)
29078 and_expr_var
= cond_var
;
29081 gimple assign_stmt
;
29082 /* Use MIN_EXPR to check if any integer is zero?.
29083 and_expr_var = min_expr <cond_var, and_expr_var> */
29084 assign_stmt
= gimple_build_assign (and_expr_var
,
29085 build2 (MIN_EXPR
, integer_type_node
,
29086 cond_var
, and_expr_var
));
29088 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29089 gimple_set_bb (assign_stmt
, new_bb
);
29090 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29094 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29096 NULL_TREE
, NULL_TREE
);
29097 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29098 gimple_set_bb (if_else_stmt
, new_bb
);
29099 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29101 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29102 gimple_seq_add_stmt (&gseq
, return_stmt
);
29103 set_bb_seq (new_bb
, gseq
);
29106 e12
= split_block (bb1
, if_else_stmt
);
29108 e12
->flags
&= ~EDGE_FALLTHRU
;
29109 e12
->flags
|= EDGE_TRUE_VALUE
;
29111 e23
= split_block (bb2
, return_stmt
);
29113 gimple_set_bb (convert_stmt
, bb2
);
29114 gimple_set_bb (return_stmt
, bb2
);
29117 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29120 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29127 /* This parses the attribute arguments to target in DECL and determines
29128 the right builtin to use to match the platform specification.
29129 It returns the priority value for this version decl. If PREDICATE_LIST
29130 is not NULL, it stores the list of cpu features that need to be checked
29131 before dispatching this function. */
29133 static unsigned int
29134 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29137 struct cl_target_option cur_target
;
29139 struct cl_target_option
*new_target
;
29140 const char *arg_str
= NULL
;
29141 const char *attrs_str
= NULL
;
29142 char *tok_str
= NULL
;
29145 /* Priority of i386 features, greater value is higher priority. This is
29146 used to decide the order in which function dispatch must happen. For
29147 instance, a version specialized for SSE4.2 should be checked for dispatch
29148 before a version for SSE3, as SSE4.2 implies SSE3. */
29149 enum feature_priority
29170 enum feature_priority priority
= P_ZERO
;
29172 /* These are the target attribute strings for which a dispatcher is
29173 available, from fold_builtin_cpu. */
29175 static struct _feature_list
29177 const char *const name
;
29178 const enum feature_priority priority
;
29180 const feature_list
[] =
29186 {"ssse3", P_SSSE3
},
29187 {"sse4.1", P_SSE4_1
},
29188 {"sse4.2", P_SSE4_2
},
29189 {"popcnt", P_POPCNT
},
29195 static unsigned int NUM_FEATURES
29196 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29200 tree predicate_chain
= NULL_TREE
;
29201 tree predicate_decl
, predicate_arg
;
29203 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29204 gcc_assert (attrs
!= NULL
);
29206 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29208 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29209 attrs_str
= TREE_STRING_POINTER (attrs
);
29211 /* Return priority zero for default function. */
29212 if (strcmp (attrs_str
, "default") == 0)
29215 /* Handle arch= if specified. For priority, set it to be 1 more than
29216 the best instruction set the processor can handle. For instance, if
29217 there is a version for atom and a version for ssse3 (the highest ISA
29218 priority for atom), the atom version must be checked for dispatch
29219 before the ssse3 version. */
29220 if (strstr (attrs_str
, "arch=") != NULL
)
29222 cl_target_option_save (&cur_target
, &global_options
);
29223 target_node
= ix86_valid_target_attribute_tree (attrs
);
29225 gcc_assert (target_node
);
29226 new_target
= TREE_TARGET_OPTION (target_node
);
29227 gcc_assert (new_target
);
29229 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29231 switch (new_target
->arch
)
29233 case PROCESSOR_CORE2
:
29235 priority
= P_PROC_SSSE3
;
29237 case PROCESSOR_COREI7
:
29238 arg_str
= "corei7";
29239 priority
= P_PROC_SSE4_2
;
29241 case PROCESSOR_ATOM
:
29243 priority
= P_PROC_SSSE3
;
29245 case PROCESSOR_AMDFAM10
:
29246 arg_str
= "amdfam10h";
29247 priority
= P_PROC_SSE4_a
;
29249 case PROCESSOR_BDVER1
:
29250 arg_str
= "bdver1";
29251 priority
= P_PROC_FMA
;
29253 case PROCESSOR_BDVER2
:
29254 arg_str
= "bdver2";
29255 priority
= P_PROC_FMA
;
29260 cl_target_option_restore (&global_options
, &cur_target
);
29262 if (predicate_list
&& arg_str
== NULL
)
29264 error_at (DECL_SOURCE_LOCATION (decl
),
29265 "No dispatcher found for the versioning attributes");
29269 if (predicate_list
)
29271 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29272 /* For a C string literal the length includes the trailing NULL. */
29273 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29274 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29279 /* Process feature name. */
29280 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29281 strcpy (tok_str
, attrs_str
);
29282 token
= strtok (tok_str
, ",");
29283 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29285 while (token
!= NULL
)
29287 /* Do not process "arch=" */
29288 if (strncmp (token
, "arch=", 5) == 0)
29290 token
= strtok (NULL
, ",");
29293 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29295 if (strcmp (token
, feature_list
[i
].name
) == 0)
29297 if (predicate_list
)
29299 predicate_arg
= build_string_literal (
29300 strlen (feature_list
[i
].name
) + 1,
29301 feature_list
[i
].name
);
29302 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29305 /* Find the maximum priority feature. */
29306 if (feature_list
[i
].priority
> priority
)
29307 priority
= feature_list
[i
].priority
;
29312 if (predicate_list
&& i
== NUM_FEATURES
)
29314 error_at (DECL_SOURCE_LOCATION (decl
),
29315 "No dispatcher found for %s", token
);
29318 token
= strtok (NULL
, ",");
29322 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29324 error_at (DECL_SOURCE_LOCATION (decl
),
29325 "No dispatcher found for the versioning attributes : %s",
29329 else if (predicate_list
)
29331 predicate_chain
= nreverse (predicate_chain
);
29332 *predicate_list
= predicate_chain
;
29338 /* This compares the priority of target features in function DECL1
29339 and DECL2. It returns positive value if DECL1 is higher priority,
29340 negative value if DECL2 is higher priority and 0 if they are the
29344 ix86_compare_version_priority (tree decl1
, tree decl2
)
29346 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29347 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29349 return (int)priority1
- (int)priority2
;
29352 /* V1 and V2 point to function versions with different priorities
29353 based on the target ISA. This function compares their priorities. */
29356 feature_compare (const void *v1
, const void *v2
)
29358 typedef struct _function_version_info
29361 tree predicate_chain
;
29362 unsigned int dispatch_priority
;
29363 } function_version_info
;
29365 const function_version_info c1
= *(const function_version_info
*)v1
;
29366 const function_version_info c2
= *(const function_version_info
*)v2
;
29367 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29370 /* This function generates the dispatch function for
29371 multi-versioned functions. DISPATCH_DECL is the function which will
29372 contain the dispatch logic. FNDECLS are the function choices for
29373 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29374 in DISPATCH_DECL in which the dispatch code is generated. */
29377 dispatch_function_versions (tree dispatch_decl
,
29379 basic_block
*empty_bb
)
29382 gimple ifunc_cpu_init_stmt
;
29386 vec
<tree
> *fndecls
;
29387 unsigned int num_versions
= 0;
29388 unsigned int actual_versions
= 0;
29391 struct _function_version_info
29394 tree predicate_chain
;
29395 unsigned int dispatch_priority
;
29396 }*function_version_info
;
29398 gcc_assert (dispatch_decl
!= NULL
29399 && fndecls_p
!= NULL
29400 && empty_bb
!= NULL
);
29402 /*fndecls_p is actually a vector. */
29403 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29405 /* At least one more version other than the default. */
29406 num_versions
= fndecls
->length ();
29407 gcc_assert (num_versions
>= 2);
29409 function_version_info
= (struct _function_version_info
*)
29410 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29412 /* The first version in the vector is the default decl. */
29413 default_decl
= (*fndecls
)[0];
29415 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29417 gseq
= bb_seq (*empty_bb
);
29418 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29419 constructors, so explicity call __builtin_cpu_init here. */
29420 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29421 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
29422 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
29423 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
29424 set_bb_seq (*empty_bb
, gseq
);
29429 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
29431 tree version_decl
= ele
;
29432 tree predicate_chain
= NULL_TREE
;
29433 unsigned int priority
;
29434 /* Get attribute string, parse it and find the right predicate decl.
29435 The predicate function could be a lengthy combination of many
29436 features, like arch-type and various isa-variants. */
29437 priority
= get_builtin_code_for_version (version_decl
,
29440 if (predicate_chain
== NULL_TREE
)
29443 function_version_info
[actual_versions
].version_decl
= version_decl
;
29444 function_version_info
[actual_versions
].predicate_chain
29446 function_version_info
[actual_versions
].dispatch_priority
= priority
;
29450 /* Sort the versions according to descending order of dispatch priority. The
29451 priority is based on the ISA. This is not a perfect solution. There
29452 could still be ambiguity. If more than one function version is suitable
29453 to execute, which one should be dispatched? In future, allow the user
29454 to specify a dispatch priority next to the version. */
29455 qsort (function_version_info
, actual_versions
,
29456 sizeof (struct _function_version_info
), feature_compare
);
29458 for (i
= 0; i
< actual_versions
; ++i
)
29459 *empty_bb
= add_condition_to_bb (dispatch_decl
,
29460 function_version_info
[i
].version_decl
,
29461 function_version_info
[i
].predicate_chain
,
29464 /* dispatch default version at the end. */
29465 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
29468 free (function_version_info
);
29472 /* Comparator function to be used in qsort routine to sort attribute
29473 specification strings to "target". */
29476 attr_strcmp (const void *v1
, const void *v2
)
29478 const char *c1
= *(char *const*)v1
;
29479 const char *c2
= *(char *const*)v2
;
29480 return strcmp (c1
, c2
);
29483 /* ARGLIST is the argument to target attribute. This function tokenizes
29484 the comma separated arguments, sorts them and returns a string which
29485 is a unique identifier for the comma separated arguments. It also
29486 replaces non-identifier characters "=,-" with "_". */
29489 sorted_attr_string (tree arglist
)
29492 size_t str_len_sum
= 0;
29493 char **args
= NULL
;
29494 char *attr_str
, *ret_str
;
29496 unsigned int argnum
= 1;
29499 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29501 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29502 size_t len
= strlen (str
);
29503 str_len_sum
+= len
+ 1;
29504 if (arg
!= arglist
)
29506 for (i
= 0; i
< strlen (str
); i
++)
29511 attr_str
= XNEWVEC (char, str_len_sum
);
29513 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29515 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29516 size_t len
= strlen (str
);
29517 memcpy (attr_str
+ str_len_sum
, str
, len
);
29518 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29519 str_len_sum
+= len
+ 1;
29522 /* Replace "=,-" with "_". */
29523 for (i
= 0; i
< strlen (attr_str
); i
++)
29524 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29530 args
= XNEWVEC (char *, argnum
);
29533 attr
= strtok (attr_str
, ",");
29534 while (attr
!= NULL
)
29538 attr
= strtok (NULL
, ",");
29541 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29543 ret_str
= XNEWVEC (char, str_len_sum
);
29545 for (i
= 0; i
< argnum
; i
++)
29547 size_t len
= strlen (args
[i
]);
29548 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29549 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29550 str_len_sum
+= len
+ 1;
29554 XDELETEVEC (attr_str
);
29558 /* This function changes the assembler name for functions that are
29559 versions. If DECL is a function version and has a "target"
29560 attribute, it appends the attribute string to its assembler name. */
29563 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29566 const char *orig_name
, *version_string
;
29567 char *attr_str
, *assembler_name
;
29569 if (DECL_DECLARED_INLINE_P (decl
)
29570 && lookup_attribute ("gnu_inline",
29571 DECL_ATTRIBUTES (decl
)))
29572 error_at (DECL_SOURCE_LOCATION (decl
),
29573 "Function versions cannot be marked as gnu_inline,"
29574 " bodies have to be generated");
29576 if (DECL_VIRTUAL_P (decl
)
29577 || DECL_VINDEX (decl
))
29578 sorry ("Virtual function multiversioning not supported");
29580 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29582 /* target attribute string cannot be NULL. */
29583 gcc_assert (version_attr
!= NULL_TREE
);
29585 orig_name
= IDENTIFIER_POINTER (id
);
29587 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29589 if (strcmp (version_string
, "default") == 0)
29592 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29593 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29595 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29597 /* Allow assembler name to be modified if already set. */
29598 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29599 SET_DECL_RTL (decl
, NULL
);
29601 tree ret
= get_identifier (assembler_name
);
29602 XDELETEVEC (attr_str
);
29603 XDELETEVEC (assembler_name
);
29607 /* This function returns true if FN1 and FN2 are versions of the same function,
29608 that is, the target strings of the function decls are different. This assumes
29609 that FN1 and FN2 have the same signature. */
29612 ix86_function_versions (tree fn1
, tree fn2
)
29615 char *target1
, *target2
;
29618 if (TREE_CODE (fn1
) != FUNCTION_DECL
29619 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29622 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29623 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29625 /* At least one function decl should have the target attribute specified. */
29626 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29629 /* Diagnose missing target attribute if one of the decls is already
29630 multi-versioned. */
29631 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29633 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29635 if (attr2
!= NULL_TREE
)
29642 error_at (DECL_SOURCE_LOCATION (fn2
),
29643 "missing %<target%> attribute for multi-versioned %D",
29645 inform (DECL_SOURCE_LOCATION (fn1
),
29646 "previous declaration of %D", fn1
);
29647 /* Prevent diagnosing of the same error multiple times. */
29648 DECL_ATTRIBUTES (fn2
)
29649 = tree_cons (get_identifier ("target"),
29650 copy_node (TREE_VALUE (attr1
)),
29651 DECL_ATTRIBUTES (fn2
));
29656 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29657 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29659 /* The sorted target strings must be different for fn1 and fn2
29661 if (strcmp (target1
, target2
) == 0)
29666 XDELETEVEC (target1
);
29667 XDELETEVEC (target2
);
29673 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29675 /* For function version, add the target suffix to the assembler name. */
29676 if (TREE_CODE (decl
) == FUNCTION_DECL
29677 && DECL_FUNCTION_VERSIONED (decl
))
29678 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29679 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29680 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29686 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29687 is true, append the full path name of the source file. */
29690 make_name (tree decl
, const char *suffix
, bool make_unique
)
29692 char *global_var_name
;
29695 const char *unique_name
= NULL
;
29697 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29699 /* Get a unique name that can be used globally without any chances
29700 of collision at link time. */
29702 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29704 name_len
= strlen (name
) + strlen (suffix
) + 2;
29707 name_len
+= strlen (unique_name
) + 1;
29708 global_var_name
= XNEWVEC (char, name_len
);
29710 /* Use '.' to concatenate names as it is demangler friendly. */
29712 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29715 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29717 return global_var_name
;
29720 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29722 /* Make a dispatcher declaration for the multi-versioned function DECL.
29723 Calls to DECL function will be replaced with calls to the dispatcher
29724 by the front-end. Return the decl created. */
29727 make_dispatcher_decl (const tree decl
)
29731 tree fn_type
, func_type
;
29732 bool is_uniq
= false;
29734 if (TREE_PUBLIC (decl
) == 0)
29737 func_name
= make_name (decl
, "ifunc", is_uniq
);
29739 fn_type
= TREE_TYPE (decl
);
29740 func_type
= build_function_type (TREE_TYPE (fn_type
),
29741 TYPE_ARG_TYPES (fn_type
));
29743 func_decl
= build_fn_decl (func_name
, func_type
);
29744 XDELETEVEC (func_name
);
29745 TREE_USED (func_decl
) = 1;
29746 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29747 DECL_INITIAL (func_decl
) = error_mark_node
;
29748 DECL_ARTIFICIAL (func_decl
) = 1;
29749 /* Mark this func as external, the resolver will flip it again if
29750 it gets generated. */
29751 DECL_EXTERNAL (func_decl
) = 1;
29752 /* This will be of type IFUNCs have to be externally visible. */
29753 TREE_PUBLIC (func_decl
) = 1;
29760 /* Returns true if decl is multi-versioned and DECL is the default function,
29761 that is it is not tagged with target specific optimization. */
29764 is_function_default_version (const tree decl
)
29766 if (TREE_CODE (decl
) != FUNCTION_DECL
29767 || !DECL_FUNCTION_VERSIONED (decl
))
29769 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29771 attr
= TREE_VALUE (TREE_VALUE (attr
));
29772 return (TREE_CODE (attr
) == STRING_CST
29773 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29776 /* Make a dispatcher declaration for the multi-versioned function DECL.
29777 Calls to DECL function will be replaced with calls to the dispatcher
29778 by the front-end. Returns the decl of the dispatcher function. */
29781 ix86_get_function_versions_dispatcher (void *decl
)
29783 tree fn
= (tree
) decl
;
29784 struct cgraph_node
*node
= NULL
;
29785 struct cgraph_node
*default_node
= NULL
;
29786 struct cgraph_function_version_info
*node_v
= NULL
;
29787 struct cgraph_function_version_info
*first_v
= NULL
;
29789 tree dispatch_decl
= NULL
;
29791 struct cgraph_function_version_info
*default_version_info
= NULL
;
29793 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29795 node
= cgraph_get_node (fn
);
29796 gcc_assert (node
!= NULL
);
29798 node_v
= get_cgraph_node_version (node
);
29799 gcc_assert (node_v
!= NULL
);
29801 if (node_v
->dispatcher_resolver
!= NULL
)
29802 return node_v
->dispatcher_resolver
;
29804 /* Find the default version and make it the first node. */
29806 /* Go to the beginning of the chain. */
29807 while (first_v
->prev
!= NULL
)
29808 first_v
= first_v
->prev
;
29809 default_version_info
= first_v
;
29810 while (default_version_info
!= NULL
)
29812 if (is_function_default_version
29813 (default_version_info
->this_node
->symbol
.decl
))
29815 default_version_info
= default_version_info
->next
;
29818 /* If there is no default node, just return NULL. */
29819 if (default_version_info
== NULL
)
29822 /* Make default info the first node. */
29823 if (first_v
!= default_version_info
)
29825 default_version_info
->prev
->next
= default_version_info
->next
;
29826 if (default_version_info
->next
)
29827 default_version_info
->next
->prev
= default_version_info
->prev
;
29828 first_v
->prev
= default_version_info
;
29829 default_version_info
->next
= first_v
;
29830 default_version_info
->prev
= NULL
;
29833 default_node
= default_version_info
->this_node
;
29835 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29836 if (targetm
.has_ifunc_p ())
29838 struct cgraph_function_version_info
*it_v
= NULL
;
29839 struct cgraph_node
*dispatcher_node
= NULL
;
29840 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29842 /* Right now, the dispatching is done via ifunc. */
29843 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29845 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29846 gcc_assert (dispatcher_node
!= NULL
);
29847 dispatcher_node
->dispatcher_function
= 1;
29848 dispatcher_version_info
29849 = insert_new_cgraph_node_version (dispatcher_node
);
29850 dispatcher_version_info
->next
= default_version_info
;
29851 dispatcher_node
->symbol
.definition
= 1;
29853 /* Set the dispatcher for all the versions. */
29854 it_v
= default_version_info
;
29855 while (it_v
!= NULL
)
29857 it_v
->dispatcher_resolver
= dispatch_decl
;
29864 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29865 "multiversioning needs ifunc which is not supported "
29869 return dispatch_decl
;
29872 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29876 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29879 tree attr_arg_name
;
29883 attr_name
= get_identifier (name
);
29884 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29885 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29886 attr
= tree_cons (attr_name
, attr_args
, chain
);
29890 /* Make the resolver function decl to dispatch the versions of
29891 a multi-versioned function, DEFAULT_DECL. Create an
29892 empty basic block in the resolver and store the pointer in
29893 EMPTY_BB. Return the decl of the resolver function. */
29896 make_resolver_func (const tree default_decl
,
29897 const tree dispatch_decl
,
29898 basic_block
*empty_bb
)
29900 char *resolver_name
;
29901 tree decl
, type
, decl_name
, t
;
29902 bool is_uniq
= false;
29904 /* IFUNC's have to be globally visible. So, if the default_decl is
29905 not, then the name of the IFUNC should be made unique. */
29906 if (TREE_PUBLIC (default_decl
) == 0)
29909 /* Append the filename to the resolver function if the versions are
29910 not externally visible. This is because the resolver function has
29911 to be externally visible for the loader to find it. So, appending
29912 the filename will prevent conflicts with a resolver function from
29913 another module which is based on the same version name. */
29914 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29916 /* The resolver function should return a (void *). */
29917 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29919 decl
= build_fn_decl (resolver_name
, type
);
29920 decl_name
= get_identifier (resolver_name
);
29921 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29923 DECL_NAME (decl
) = decl_name
;
29924 TREE_USED (decl
) = 1;
29925 DECL_ARTIFICIAL (decl
) = 1;
29926 DECL_IGNORED_P (decl
) = 0;
29927 /* IFUNC resolvers have to be externally visible. */
29928 TREE_PUBLIC (decl
) = 1;
29929 DECL_UNINLINABLE (decl
) = 1;
29931 /* Resolver is not external, body is generated. */
29932 DECL_EXTERNAL (decl
) = 0;
29933 DECL_EXTERNAL (dispatch_decl
) = 0;
29935 DECL_CONTEXT (decl
) = NULL_TREE
;
29936 DECL_INITIAL (decl
) = make_node (BLOCK
);
29937 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29939 if (DECL_COMDAT_GROUP (default_decl
)
29940 || TREE_PUBLIC (default_decl
))
29942 /* In this case, each translation unit with a call to this
29943 versioned function will put out a resolver. Ensure it
29944 is comdat to keep just one copy. */
29945 DECL_COMDAT (decl
) = 1;
29946 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29948 /* Build result decl and add to function_decl. */
29949 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29950 DECL_ARTIFICIAL (t
) = 1;
29951 DECL_IGNORED_P (t
) = 1;
29952 DECL_RESULT (decl
) = t
;
29954 gimplify_function_tree (decl
);
29955 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29956 *empty_bb
= init_lowered_empty_function (decl
, false);
29958 cgraph_add_new_function (decl
, true);
29959 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29963 gcc_assert (dispatch_decl
!= NULL
);
29964 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29965 DECL_ATTRIBUTES (dispatch_decl
)
29966 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29968 /* Create the alias for dispatch to resolver here. */
29969 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29970 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29971 XDELETEVEC (resolver_name
);
29975 /* Generate the dispatching code body to dispatch multi-versioned function
29976 DECL. The target hook is called to process the "target" attributes and
29977 provide the code to dispatch the right function at run-time. NODE points
29978 to the dispatcher decl whose body will be created. */
29981 ix86_generate_version_dispatcher_body (void *node_p
)
29983 tree resolver_decl
;
29984 basic_block empty_bb
;
29985 vec
<tree
> fn_ver_vec
= vNULL
;
29986 tree default_ver_decl
;
29987 struct cgraph_node
*versn
;
29988 struct cgraph_node
*node
;
29990 struct cgraph_function_version_info
*node_version_info
= NULL
;
29991 struct cgraph_function_version_info
*versn_info
= NULL
;
29993 node
= (cgraph_node
*)node_p
;
29995 node_version_info
= get_cgraph_node_version (node
);
29996 gcc_assert (node
->dispatcher_function
29997 && node_version_info
!= NULL
);
29999 if (node_version_info
->dispatcher_resolver
)
30000 return node_version_info
->dispatcher_resolver
;
30002 /* The first version in the chain corresponds to the default version. */
30003 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
30005 /* node is going to be an alias, so remove the finalized bit. */
30006 node
->symbol
.definition
= false;
30008 resolver_decl
= make_resolver_func (default_ver_decl
,
30009 node
->symbol
.decl
, &empty_bb
);
30011 node_version_info
->dispatcher_resolver
= resolver_decl
;
30013 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30015 fn_ver_vec
.create (2);
30017 for (versn_info
= node_version_info
->next
; versn_info
;
30018 versn_info
= versn_info
->next
)
30020 versn
= versn_info
->this_node
;
30021 /* Check for virtual functions here again, as by this time it should
30022 have been determined if this function needs a vtable index or
30023 not. This happens for methods in derived classes that override
30024 virtual methods in base classes but are not explicitly marked as
30026 if (DECL_VINDEX (versn
->symbol
.decl
))
30027 sorry ("Virtual function multiversioning not supported");
30029 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
30032 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30033 fn_ver_vec
.release ();
30034 rebuild_cgraph_edges ();
30036 return resolver_decl
;
30038 /* This builds the processor_model struct type defined in
30039 libgcc/config/i386/cpuinfo.c */
30042 build_processor_model_struct (void)
30044 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30046 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30048 tree type
= make_node (RECORD_TYPE
);
30050 /* The first 3 fields are unsigned int. */
30051 for (i
= 0; i
< 3; ++i
)
30053 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30054 get_identifier (field_name
[i
]), unsigned_type_node
);
30055 if (field_chain
!= NULL_TREE
)
30056 DECL_CHAIN (field
) = field_chain
;
30057 field_chain
= field
;
30060 /* The last field is an array of unsigned integers of size one. */
30061 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30062 get_identifier (field_name
[3]),
30063 build_array_type (unsigned_type_node
,
30064 build_index_type (size_one_node
)));
30065 if (field_chain
!= NULL_TREE
)
30066 DECL_CHAIN (field
) = field_chain
;
30067 field_chain
= field
;
30069 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30073 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30076 make_var_decl (tree type
, const char *name
)
30080 new_decl
= build_decl (UNKNOWN_LOCATION
,
30082 get_identifier(name
),
30085 DECL_EXTERNAL (new_decl
) = 1;
30086 TREE_STATIC (new_decl
) = 1;
30087 TREE_PUBLIC (new_decl
) = 1;
30088 DECL_INITIAL (new_decl
) = 0;
30089 DECL_ARTIFICIAL (new_decl
) = 0;
30090 DECL_PRESERVE_P (new_decl
) = 1;
30092 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30093 assemble_variable (new_decl
, 0, 0, 0);
30098 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30099 into an integer defined in libgcc/config/i386/cpuinfo.c */
30102 fold_builtin_cpu (tree fndecl
, tree
*args
)
30105 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30106 DECL_FUNCTION_CODE (fndecl
);
30107 tree param_string_cst
= NULL
;
30109 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30110 enum processor_features
30126 /* These are the values for vendor types and cpu types and subtypes
30127 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30128 the corresponding start value. */
30129 enum processor_model
30140 M_CPU_SUBTYPE_START
,
30141 M_INTEL_COREI7_NEHALEM
,
30142 M_INTEL_COREI7_WESTMERE
,
30143 M_INTEL_COREI7_SANDYBRIDGE
,
30144 M_AMDFAM10H_BARCELONA
,
30145 M_AMDFAM10H_SHANGHAI
,
30146 M_AMDFAM10H_ISTANBUL
,
30147 M_AMDFAM15H_BDVER1
,
30148 M_AMDFAM15H_BDVER2
,
30152 static struct _arch_names_table
30154 const char *const name
;
30155 const enum processor_model model
;
30157 const arch_names_table
[] =
30160 {"intel", M_INTEL
},
30161 {"atom", M_INTEL_ATOM
},
30162 {"slm", M_INTEL_SLM
},
30163 {"core2", M_INTEL_CORE2
},
30164 {"corei7", M_INTEL_COREI7
},
30165 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30166 {"westmere", M_INTEL_COREI7_WESTMERE
},
30167 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30168 {"amdfam10h", M_AMDFAM10H
},
30169 {"barcelona", M_AMDFAM10H_BARCELONA
},
30170 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30171 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30172 {"amdfam15h", M_AMDFAM15H
},
30173 {"bdver1", M_AMDFAM15H_BDVER1
},
30174 {"bdver2", M_AMDFAM15H_BDVER2
},
30175 {"bdver3", M_AMDFAM15H_BDVER3
},
30178 static struct _isa_names_table
30180 const char *const name
;
30181 const enum processor_features feature
;
30183 const isa_names_table
[] =
30187 {"popcnt", F_POPCNT
},
30191 {"ssse3", F_SSSE3
},
30192 {"sse4.1", F_SSE4_1
},
30193 {"sse4.2", F_SSE4_2
},
30198 tree __processor_model_type
= build_processor_model_struct ();
30199 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30203 varpool_add_new_variable (__cpu_model_var
);
30205 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30207 param_string_cst
= *args
;
30208 while (param_string_cst
30209 && TREE_CODE (param_string_cst
) != STRING_CST
)
30211 /* *args must be a expr that can contain other EXPRS leading to a
30213 if (!EXPR_P (param_string_cst
))
30215 error ("Parameter to builtin must be a string constant or literal");
30216 return integer_zero_node
;
30218 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30221 gcc_assert (param_string_cst
);
30223 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30229 unsigned int field_val
= 0;
30230 unsigned int NUM_ARCH_NAMES
30231 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30233 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30234 if (strcmp (arch_names_table
[i
].name
,
30235 TREE_STRING_POINTER (param_string_cst
)) == 0)
30238 if (i
== NUM_ARCH_NAMES
)
30240 error ("Parameter to builtin not valid: %s",
30241 TREE_STRING_POINTER (param_string_cst
));
30242 return integer_zero_node
;
30245 field
= TYPE_FIELDS (__processor_model_type
);
30246 field_val
= arch_names_table
[i
].model
;
30248 /* CPU types are stored in the next field. */
30249 if (field_val
> M_CPU_TYPE_START
30250 && field_val
< M_CPU_SUBTYPE_START
)
30252 field
= DECL_CHAIN (field
);
30253 field_val
-= M_CPU_TYPE_START
;
30256 /* CPU subtypes are stored in the next field. */
30257 if (field_val
> M_CPU_SUBTYPE_START
)
30259 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30260 field_val
-= M_CPU_SUBTYPE_START
;
30263 /* Get the appropriate field in __cpu_model. */
30264 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30267 /* Check the value. */
30268 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30269 build_int_cstu (unsigned_type_node
, field_val
));
30270 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30272 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30279 unsigned int field_val
= 0;
30280 unsigned int NUM_ISA_NAMES
30281 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30283 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30284 if (strcmp (isa_names_table
[i
].name
,
30285 TREE_STRING_POINTER (param_string_cst
)) == 0)
30288 if (i
== NUM_ISA_NAMES
)
30290 error ("Parameter to builtin not valid: %s",
30291 TREE_STRING_POINTER (param_string_cst
));
30292 return integer_zero_node
;
30295 field
= TYPE_FIELDS (__processor_model_type
);
30296 /* Get the last field, which is __cpu_features. */
30297 while (DECL_CHAIN (field
))
30298 field
= DECL_CHAIN (field
);
30300 /* Get the appropriate field: __cpu_model.__cpu_features */
30301 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30304 /* Access the 0th element of __cpu_features array. */
30305 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30306 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30308 field_val
= (1 << isa_names_table
[i
].feature
);
30309 /* Return __cpu_model.__cpu_features[0] & field_val */
30310 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30311 build_int_cstu (unsigned_type_node
, field_val
));
30312 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30314 gcc_unreachable ();
30318 ix86_fold_builtin (tree fndecl
, int n_args
,
30319 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30321 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30323 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30324 DECL_FUNCTION_CODE (fndecl
);
30325 if (fn_code
== IX86_BUILTIN_CPU_IS
30326 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30328 gcc_assert (n_args
== 1);
30329 return fold_builtin_cpu (fndecl
, args
);
30333 #ifdef SUBTARGET_FOLD_BUILTIN
30334 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30340 /* Make builtins to detect cpu type and features supported. NAME is
30341 the builtin name, CODE is the builtin code, and FTYPE is the function
30342 type of the builtin. */
30345 make_cpu_type_builtin (const char* name
, int code
,
30346 enum ix86_builtin_func_type ftype
, bool is_const
)
30351 type
= ix86_get_builtin_func_type (ftype
);
30352 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30354 gcc_assert (decl
!= NULL_TREE
);
30355 ix86_builtins
[(int) code
] = decl
;
30356 TREE_READONLY (decl
) = is_const
;
30359 /* Make builtins to get CPU type and features supported. The created
30362 __builtin_cpu_init (), to detect cpu type and features,
30363 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30364 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30368 ix86_init_platform_type_builtins (void)
30370 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30371 INT_FTYPE_VOID
, false);
30372 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30373 INT_FTYPE_PCCHAR
, true);
30374 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30375 INT_FTYPE_PCCHAR
, true);
30378 /* Internal method for ix86_init_builtins. */
30381 ix86_init_builtins_va_builtins_abi (void)
30383 tree ms_va_ref
, sysv_va_ref
;
30384 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30385 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30386 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30387 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30391 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30392 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30393 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30395 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30398 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30399 fnvoid_va_start_ms
=
30400 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30401 fnvoid_va_end_sysv
=
30402 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30403 fnvoid_va_start_sysv
=
30404 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30406 fnvoid_va_copy_ms
=
30407 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30409 fnvoid_va_copy_sysv
=
30410 build_function_type_list (void_type_node
, sysv_va_ref
,
30411 sysv_va_ref
, NULL_TREE
);
30413 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30414 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30415 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30416 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30417 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30418 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30419 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30420 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30421 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
30422 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30423 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
30424 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30428 ix86_init_builtin_types (void)
30430 tree float128_type_node
, float80_type_node
;
30432 /* The __float80 type. */
30433 float80_type_node
= long_double_type_node
;
30434 if (TYPE_MODE (float80_type_node
) != XFmode
)
30436 /* The __float80 type. */
30437 float80_type_node
= make_node (REAL_TYPE
);
30439 TYPE_PRECISION (float80_type_node
) = 80;
30440 layout_type (float80_type_node
);
30442 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
30444 /* The __float128 type. */
30445 float128_type_node
= make_node (REAL_TYPE
);
30446 TYPE_PRECISION (float128_type_node
) = 128;
30447 layout_type (float128_type_node
);
30448 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
30450 /* This macro is built by i386-builtin-types.awk. */
30451 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
30455 ix86_init_builtins (void)
30459 ix86_init_builtin_types ();
30461 /* Builtins to get CPU type and features. */
30462 ix86_init_platform_type_builtins ();
30464 /* TFmode support builtins. */
30465 def_builtin_const (0, "__builtin_infq",
30466 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
30467 def_builtin_const (0, "__builtin_huge_valq",
30468 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
30470 /* We will expand them to normal call if SSE isn't available since
30471 they are used by libgcc. */
30472 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
30473 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
30474 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
30475 TREE_READONLY (t
) = 1;
30476 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
30478 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
30479 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
30480 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
30481 TREE_READONLY (t
) = 1;
30482 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
30484 ix86_init_tm_builtins ();
30485 ix86_init_mmx_sse_builtins ();
30488 ix86_init_builtins_va_builtins_abi ();
30490 #ifdef SUBTARGET_INIT_BUILTINS
30491 SUBTARGET_INIT_BUILTINS
;
30495 /* Return the ix86 builtin for CODE. */
30498 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
30500 if (code
>= IX86_BUILTIN_MAX
)
30501 return error_mark_node
;
30503 return ix86_builtins
[code
];
30506 /* Errors in the source file can cause expand_expr to return const0_rtx
30507 where we expect a vector. To avoid crashing, use one of the vector
30508 clear instructions. */
30510 safe_vector_operand (rtx x
, enum machine_mode mode
)
30512 if (x
== const0_rtx
)
30513 x
= CONST0_RTX (mode
);
30517 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30520 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30523 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30524 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30525 rtx op0
= expand_normal (arg0
);
30526 rtx op1
= expand_normal (arg1
);
30527 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30528 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30529 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30531 if (VECTOR_MODE_P (mode0
))
30532 op0
= safe_vector_operand (op0
, mode0
);
30533 if (VECTOR_MODE_P (mode1
))
30534 op1
= safe_vector_operand (op1
, mode1
);
30536 if (optimize
|| !target
30537 || GET_MODE (target
) != tmode
30538 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30539 target
= gen_reg_rtx (tmode
);
30541 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30543 rtx x
= gen_reg_rtx (V4SImode
);
30544 emit_insn (gen_sse2_loadd (x
, op1
));
30545 op1
= gen_lowpart (TImode
, x
);
30548 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30549 op0
= copy_to_mode_reg (mode0
, op0
);
30550 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30551 op1
= copy_to_mode_reg (mode1
, op1
);
30553 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30562 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30565 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30566 enum ix86_builtin_func_type m_type
,
30567 enum rtx_code sub_code
)
30572 bool comparison_p
= false;
30574 bool last_arg_constant
= false;
30575 int num_memory
= 0;
30578 enum machine_mode mode
;
30581 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30585 case MULTI_ARG_4_DF2_DI_I
:
30586 case MULTI_ARG_4_DF2_DI_I1
:
30587 case MULTI_ARG_4_SF2_SI_I
:
30588 case MULTI_ARG_4_SF2_SI_I1
:
30590 last_arg_constant
= true;
30593 case MULTI_ARG_3_SF
:
30594 case MULTI_ARG_3_DF
:
30595 case MULTI_ARG_3_SF2
:
30596 case MULTI_ARG_3_DF2
:
30597 case MULTI_ARG_3_DI
:
30598 case MULTI_ARG_3_SI
:
30599 case MULTI_ARG_3_SI_DI
:
30600 case MULTI_ARG_3_HI
:
30601 case MULTI_ARG_3_HI_SI
:
30602 case MULTI_ARG_3_QI
:
30603 case MULTI_ARG_3_DI2
:
30604 case MULTI_ARG_3_SI2
:
30605 case MULTI_ARG_3_HI2
:
30606 case MULTI_ARG_3_QI2
:
30610 case MULTI_ARG_2_SF
:
30611 case MULTI_ARG_2_DF
:
30612 case MULTI_ARG_2_DI
:
30613 case MULTI_ARG_2_SI
:
30614 case MULTI_ARG_2_HI
:
30615 case MULTI_ARG_2_QI
:
30619 case MULTI_ARG_2_DI_IMM
:
30620 case MULTI_ARG_2_SI_IMM
:
30621 case MULTI_ARG_2_HI_IMM
:
30622 case MULTI_ARG_2_QI_IMM
:
30624 last_arg_constant
= true;
30627 case MULTI_ARG_1_SF
:
30628 case MULTI_ARG_1_DF
:
30629 case MULTI_ARG_1_SF2
:
30630 case MULTI_ARG_1_DF2
:
30631 case MULTI_ARG_1_DI
:
30632 case MULTI_ARG_1_SI
:
30633 case MULTI_ARG_1_HI
:
30634 case MULTI_ARG_1_QI
:
30635 case MULTI_ARG_1_SI_DI
:
30636 case MULTI_ARG_1_HI_DI
:
30637 case MULTI_ARG_1_HI_SI
:
30638 case MULTI_ARG_1_QI_DI
:
30639 case MULTI_ARG_1_QI_SI
:
30640 case MULTI_ARG_1_QI_HI
:
30644 case MULTI_ARG_2_DI_CMP
:
30645 case MULTI_ARG_2_SI_CMP
:
30646 case MULTI_ARG_2_HI_CMP
:
30647 case MULTI_ARG_2_QI_CMP
:
30649 comparison_p
= true;
30652 case MULTI_ARG_2_SF_TF
:
30653 case MULTI_ARG_2_DF_TF
:
30654 case MULTI_ARG_2_DI_TF
:
30655 case MULTI_ARG_2_SI_TF
:
30656 case MULTI_ARG_2_HI_TF
:
30657 case MULTI_ARG_2_QI_TF
:
30663 gcc_unreachable ();
30666 if (optimize
|| !target
30667 || GET_MODE (target
) != tmode
30668 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30669 target
= gen_reg_rtx (tmode
);
30671 gcc_assert (nargs
<= 4);
30673 for (i
= 0; i
< nargs
; i
++)
30675 tree arg
= CALL_EXPR_ARG (exp
, i
);
30676 rtx op
= expand_normal (arg
);
30677 int adjust
= (comparison_p
) ? 1 : 0;
30678 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30680 if (last_arg_constant
&& i
== nargs
- 1)
30682 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30684 enum insn_code new_icode
= icode
;
30687 case CODE_FOR_xop_vpermil2v2df3
:
30688 case CODE_FOR_xop_vpermil2v4sf3
:
30689 case CODE_FOR_xop_vpermil2v4df3
:
30690 case CODE_FOR_xop_vpermil2v8sf3
:
30691 error ("the last argument must be a 2-bit immediate");
30692 return gen_reg_rtx (tmode
);
30693 case CODE_FOR_xop_rotlv2di3
:
30694 new_icode
= CODE_FOR_rotlv2di3
;
30696 case CODE_FOR_xop_rotlv4si3
:
30697 new_icode
= CODE_FOR_rotlv4si3
;
30699 case CODE_FOR_xop_rotlv8hi3
:
30700 new_icode
= CODE_FOR_rotlv8hi3
;
30702 case CODE_FOR_xop_rotlv16qi3
:
30703 new_icode
= CODE_FOR_rotlv16qi3
;
30705 if (CONST_INT_P (op
))
30707 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30708 op
= GEN_INT (INTVAL (op
) & mask
);
30709 gcc_checking_assert
30710 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30714 gcc_checking_assert
30716 && insn_data
[new_icode
].operand
[0].mode
== tmode
30717 && insn_data
[new_icode
].operand
[1].mode
== tmode
30718 && insn_data
[new_icode
].operand
[2].mode
== mode
30719 && insn_data
[new_icode
].operand
[0].predicate
30720 == insn_data
[icode
].operand
[0].predicate
30721 && insn_data
[new_icode
].operand
[1].predicate
30722 == insn_data
[icode
].operand
[1].predicate
);
30728 gcc_unreachable ();
30735 if (VECTOR_MODE_P (mode
))
30736 op
= safe_vector_operand (op
, mode
);
30738 /* If we aren't optimizing, only allow one memory operand to be
30740 if (memory_operand (op
, mode
))
30743 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30746 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30748 op
= force_reg (mode
, op
);
30752 args
[i
].mode
= mode
;
30758 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30763 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30764 GEN_INT ((int)sub_code
));
30765 else if (! comparison_p
)
30766 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30769 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30773 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30778 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30782 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30786 gcc_unreachable ();
30796 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30797 insns with vec_merge. */
30800 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30804 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30805 rtx op1
, op0
= expand_normal (arg0
);
30806 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30807 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30809 if (optimize
|| !target
30810 || GET_MODE (target
) != tmode
30811 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30812 target
= gen_reg_rtx (tmode
);
30814 if (VECTOR_MODE_P (mode0
))
30815 op0
= safe_vector_operand (op0
, mode0
);
30817 if ((optimize
&& !register_operand (op0
, mode0
))
30818 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30819 op0
= copy_to_mode_reg (mode0
, op0
);
30822 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30823 op1
= copy_to_mode_reg (mode0
, op1
);
30825 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30832 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30835 ix86_expand_sse_compare (const struct builtin_description
*d
,
30836 tree exp
, rtx target
, bool swap
)
30839 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30840 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30841 rtx op0
= expand_normal (arg0
);
30842 rtx op1
= expand_normal (arg1
);
30844 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30845 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30846 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30847 enum rtx_code comparison
= d
->comparison
;
30849 if (VECTOR_MODE_P (mode0
))
30850 op0
= safe_vector_operand (op0
, mode0
);
30851 if (VECTOR_MODE_P (mode1
))
30852 op1
= safe_vector_operand (op1
, mode1
);
30854 /* Swap operands if we have a comparison that isn't available in
30858 rtx tmp
= gen_reg_rtx (mode1
);
30859 emit_move_insn (tmp
, op1
);
30864 if (optimize
|| !target
30865 || GET_MODE (target
) != tmode
30866 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30867 target
= gen_reg_rtx (tmode
);
30869 if ((optimize
&& !register_operand (op0
, mode0
))
30870 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30871 op0
= copy_to_mode_reg (mode0
, op0
);
30872 if ((optimize
&& !register_operand (op1
, mode1
))
30873 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30874 op1
= copy_to_mode_reg (mode1
, op1
);
30876 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30877 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30884 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30887 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30891 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30892 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30893 rtx op0
= expand_normal (arg0
);
30894 rtx op1
= expand_normal (arg1
);
30895 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30896 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30897 enum rtx_code comparison
= d
->comparison
;
30899 if (VECTOR_MODE_P (mode0
))
30900 op0
= safe_vector_operand (op0
, mode0
);
30901 if (VECTOR_MODE_P (mode1
))
30902 op1
= safe_vector_operand (op1
, mode1
);
30904 /* Swap operands if we have a comparison that isn't available in
30906 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30913 target
= gen_reg_rtx (SImode
);
30914 emit_move_insn (target
, const0_rtx
);
30915 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30917 if ((optimize
&& !register_operand (op0
, mode0
))
30918 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30919 op0
= copy_to_mode_reg (mode0
, op0
);
30920 if ((optimize
&& !register_operand (op1
, mode1
))
30921 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30922 op1
= copy_to_mode_reg (mode1
, op1
);
30924 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30928 emit_insn (gen_rtx_SET (VOIDmode
,
30929 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30930 gen_rtx_fmt_ee (comparison
, QImode
,
30934 return SUBREG_REG (target
);
30937 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30940 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30944 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30945 rtx op1
, op0
= expand_normal (arg0
);
30946 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30947 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30949 if (optimize
|| target
== 0
30950 || GET_MODE (target
) != tmode
30951 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30952 target
= gen_reg_rtx (tmode
);
30954 if (VECTOR_MODE_P (mode0
))
30955 op0
= safe_vector_operand (op0
, mode0
);
30957 if ((optimize
&& !register_operand (op0
, mode0
))
30958 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30959 op0
= copy_to_mode_reg (mode0
, op0
);
30961 op1
= GEN_INT (d
->comparison
);
30963 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30971 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30972 tree exp
, rtx target
)
30975 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30976 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30977 rtx op0
= expand_normal (arg0
);
30978 rtx op1
= expand_normal (arg1
);
30980 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30981 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30982 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30984 if (optimize
|| target
== 0
30985 || GET_MODE (target
) != tmode
30986 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30987 target
= gen_reg_rtx (tmode
);
30989 op0
= safe_vector_operand (op0
, mode0
);
30990 op1
= safe_vector_operand (op1
, mode1
);
30992 if ((optimize
&& !register_operand (op0
, mode0
))
30993 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30994 op0
= copy_to_mode_reg (mode0
, op0
);
30995 if ((optimize
&& !register_operand (op1
, mode1
))
30996 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30997 op1
= copy_to_mode_reg (mode1
, op1
);
30999 op2
= GEN_INT (d
->comparison
);
31001 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31008 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31011 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31015 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31016 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31017 rtx op0
= expand_normal (arg0
);
31018 rtx op1
= expand_normal (arg1
);
31019 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31020 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31021 enum rtx_code comparison
= d
->comparison
;
31023 if (VECTOR_MODE_P (mode0
))
31024 op0
= safe_vector_operand (op0
, mode0
);
31025 if (VECTOR_MODE_P (mode1
))
31026 op1
= safe_vector_operand (op1
, mode1
);
31028 target
= gen_reg_rtx (SImode
);
31029 emit_move_insn (target
, const0_rtx
);
31030 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31032 if ((optimize
&& !register_operand (op0
, mode0
))
31033 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31034 op0
= copy_to_mode_reg (mode0
, op0
);
31035 if ((optimize
&& !register_operand (op1
, mode1
))
31036 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31037 op1
= copy_to_mode_reg (mode1
, op1
);
31039 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31043 emit_insn (gen_rtx_SET (VOIDmode
,
31044 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31045 gen_rtx_fmt_ee (comparison
, QImode
,
31049 return SUBREG_REG (target
);
31052 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31055 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31056 tree exp
, rtx target
)
31059 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31060 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31061 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31062 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31063 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31064 rtx scratch0
, scratch1
;
31065 rtx op0
= expand_normal (arg0
);
31066 rtx op1
= expand_normal (arg1
);
31067 rtx op2
= expand_normal (arg2
);
31068 rtx op3
= expand_normal (arg3
);
31069 rtx op4
= expand_normal (arg4
);
31070 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31072 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31073 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31074 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31075 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31076 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31077 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31078 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31080 if (VECTOR_MODE_P (modev2
))
31081 op0
= safe_vector_operand (op0
, modev2
);
31082 if (VECTOR_MODE_P (modev4
))
31083 op2
= safe_vector_operand (op2
, modev4
);
31085 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31086 op0
= copy_to_mode_reg (modev2
, op0
);
31087 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31088 op1
= copy_to_mode_reg (modei3
, op1
);
31089 if ((optimize
&& !register_operand (op2
, modev4
))
31090 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31091 op2
= copy_to_mode_reg (modev4
, op2
);
31092 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31093 op3
= copy_to_mode_reg (modei5
, op3
);
31095 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31097 error ("the fifth argument must be an 8-bit immediate");
31101 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31103 if (optimize
|| !target
31104 || GET_MODE (target
) != tmode0
31105 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31106 target
= gen_reg_rtx (tmode0
);
31108 scratch1
= gen_reg_rtx (tmode1
);
31110 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31112 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31114 if (optimize
|| !target
31115 || GET_MODE (target
) != tmode1
31116 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31117 target
= gen_reg_rtx (tmode1
);
31119 scratch0
= gen_reg_rtx (tmode0
);
31121 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31125 gcc_assert (d
->flag
);
31127 scratch0
= gen_reg_rtx (tmode0
);
31128 scratch1
= gen_reg_rtx (tmode1
);
31130 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31140 target
= gen_reg_rtx (SImode
);
31141 emit_move_insn (target
, const0_rtx
);
31142 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31145 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31146 gen_rtx_fmt_ee (EQ
, QImode
,
31147 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31150 return SUBREG_REG (target
);
31157 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31160 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31161 tree exp
, rtx target
)
31164 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31165 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31166 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31167 rtx scratch0
, scratch1
;
31168 rtx op0
= expand_normal (arg0
);
31169 rtx op1
= expand_normal (arg1
);
31170 rtx op2
= expand_normal (arg2
);
31171 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31173 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31174 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31175 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31176 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31177 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31179 if (VECTOR_MODE_P (modev2
))
31180 op0
= safe_vector_operand (op0
, modev2
);
31181 if (VECTOR_MODE_P (modev3
))
31182 op1
= safe_vector_operand (op1
, modev3
);
31184 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31185 op0
= copy_to_mode_reg (modev2
, op0
);
31186 if ((optimize
&& !register_operand (op1
, modev3
))
31187 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31188 op1
= copy_to_mode_reg (modev3
, op1
);
31190 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31192 error ("the third argument must be an 8-bit immediate");
31196 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31198 if (optimize
|| !target
31199 || GET_MODE (target
) != tmode0
31200 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31201 target
= gen_reg_rtx (tmode0
);
31203 scratch1
= gen_reg_rtx (tmode1
);
31205 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31207 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31209 if (optimize
|| !target
31210 || GET_MODE (target
) != tmode1
31211 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31212 target
= gen_reg_rtx (tmode1
);
31214 scratch0
= gen_reg_rtx (tmode0
);
31216 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31220 gcc_assert (d
->flag
);
31222 scratch0
= gen_reg_rtx (tmode0
);
31223 scratch1
= gen_reg_rtx (tmode1
);
31225 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31235 target
= gen_reg_rtx (SImode
);
31236 emit_move_insn (target
, const0_rtx
);
31237 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31240 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31241 gen_rtx_fmt_ee (EQ
, QImode
,
31242 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31245 return SUBREG_REG (target
);
31251 /* Subroutine of ix86_expand_builtin to take care of insns with
31252 variable number of operands. */
31255 ix86_expand_args_builtin (const struct builtin_description
*d
,
31256 tree exp
, rtx target
)
31258 rtx pat
, real_target
;
31259 unsigned int i
, nargs
;
31260 unsigned int nargs_constant
= 0;
31261 int num_memory
= 0;
31265 enum machine_mode mode
;
31267 bool last_arg_count
= false;
31268 enum insn_code icode
= d
->icode
;
31269 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31270 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31271 enum machine_mode rmode
= VOIDmode
;
31273 enum rtx_code comparison
= d
->comparison
;
31275 switch ((enum ix86_builtin_func_type
) d
->flag
)
31277 case V2DF_FTYPE_V2DF_ROUND
:
31278 case V4DF_FTYPE_V4DF_ROUND
:
31279 case V4SF_FTYPE_V4SF_ROUND
:
31280 case V8SF_FTYPE_V8SF_ROUND
:
31281 case V4SI_FTYPE_V4SF_ROUND
:
31282 case V8SI_FTYPE_V8SF_ROUND
:
31283 return ix86_expand_sse_round (d
, exp
, target
);
31284 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31285 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31286 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31287 case INT_FTYPE_V8SF_V8SF_PTEST
:
31288 case INT_FTYPE_V4DI_V4DI_PTEST
:
31289 case INT_FTYPE_V4DF_V4DF_PTEST
:
31290 case INT_FTYPE_V4SF_V4SF_PTEST
:
31291 case INT_FTYPE_V2DI_V2DI_PTEST
:
31292 case INT_FTYPE_V2DF_V2DF_PTEST
:
31293 return ix86_expand_sse_ptest (d
, exp
, target
);
31294 case FLOAT128_FTYPE_FLOAT128
:
31295 case FLOAT_FTYPE_FLOAT
:
31296 case INT_FTYPE_INT
:
31297 case UINT64_FTYPE_INT
:
31298 case UINT16_FTYPE_UINT16
:
31299 case INT64_FTYPE_INT64
:
31300 case INT64_FTYPE_V4SF
:
31301 case INT64_FTYPE_V2DF
:
31302 case INT_FTYPE_V16QI
:
31303 case INT_FTYPE_V8QI
:
31304 case INT_FTYPE_V8SF
:
31305 case INT_FTYPE_V4DF
:
31306 case INT_FTYPE_V4SF
:
31307 case INT_FTYPE_V2DF
:
31308 case INT_FTYPE_V32QI
:
31309 case V16QI_FTYPE_V16QI
:
31310 case V8SI_FTYPE_V8SF
:
31311 case V8SI_FTYPE_V4SI
:
31312 case V8HI_FTYPE_V8HI
:
31313 case V8HI_FTYPE_V16QI
:
31314 case V8QI_FTYPE_V8QI
:
31315 case V8SF_FTYPE_V8SF
:
31316 case V8SF_FTYPE_V8SI
:
31317 case V8SF_FTYPE_V4SF
:
31318 case V8SF_FTYPE_V8HI
:
31319 case V4SI_FTYPE_V4SI
:
31320 case V4SI_FTYPE_V16QI
:
31321 case V4SI_FTYPE_V4SF
:
31322 case V4SI_FTYPE_V8SI
:
31323 case V4SI_FTYPE_V8HI
:
31324 case V4SI_FTYPE_V4DF
:
31325 case V4SI_FTYPE_V2DF
:
31326 case V4HI_FTYPE_V4HI
:
31327 case V4DF_FTYPE_V4DF
:
31328 case V4DF_FTYPE_V4SI
:
31329 case V4DF_FTYPE_V4SF
:
31330 case V4DF_FTYPE_V2DF
:
31331 case V4SF_FTYPE_V4SF
:
31332 case V4SF_FTYPE_V4SI
:
31333 case V4SF_FTYPE_V8SF
:
31334 case V4SF_FTYPE_V4DF
:
31335 case V4SF_FTYPE_V8HI
:
31336 case V4SF_FTYPE_V2DF
:
31337 case V2DI_FTYPE_V2DI
:
31338 case V2DI_FTYPE_V16QI
:
31339 case V2DI_FTYPE_V8HI
:
31340 case V2DI_FTYPE_V4SI
:
31341 case V2DF_FTYPE_V2DF
:
31342 case V2DF_FTYPE_V4SI
:
31343 case V2DF_FTYPE_V4DF
:
31344 case V2DF_FTYPE_V4SF
:
31345 case V2DF_FTYPE_V2SI
:
31346 case V2SI_FTYPE_V2SI
:
31347 case V2SI_FTYPE_V4SF
:
31348 case V2SI_FTYPE_V2SF
:
31349 case V2SI_FTYPE_V2DF
:
31350 case V2SF_FTYPE_V2SF
:
31351 case V2SF_FTYPE_V2SI
:
31352 case V32QI_FTYPE_V32QI
:
31353 case V32QI_FTYPE_V16QI
:
31354 case V16HI_FTYPE_V16HI
:
31355 case V16HI_FTYPE_V8HI
:
31356 case V8SI_FTYPE_V8SI
:
31357 case V16HI_FTYPE_V16QI
:
31358 case V8SI_FTYPE_V16QI
:
31359 case V4DI_FTYPE_V16QI
:
31360 case V8SI_FTYPE_V8HI
:
31361 case V4DI_FTYPE_V8HI
:
31362 case V4DI_FTYPE_V4SI
:
31363 case V4DI_FTYPE_V2DI
:
31366 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31367 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31368 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31369 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31370 case V16QI_FTYPE_V16QI_V16QI
:
31371 case V16QI_FTYPE_V8HI_V8HI
:
31372 case V8QI_FTYPE_V8QI_V8QI
:
31373 case V8QI_FTYPE_V4HI_V4HI
:
31374 case V8HI_FTYPE_V8HI_V8HI
:
31375 case V8HI_FTYPE_V16QI_V16QI
:
31376 case V8HI_FTYPE_V4SI_V4SI
:
31377 case V8SF_FTYPE_V8SF_V8SF
:
31378 case V8SF_FTYPE_V8SF_V8SI
:
31379 case V4SI_FTYPE_V4SI_V4SI
:
31380 case V4SI_FTYPE_V8HI_V8HI
:
31381 case V4SI_FTYPE_V4SF_V4SF
:
31382 case V4SI_FTYPE_V2DF_V2DF
:
31383 case V4HI_FTYPE_V4HI_V4HI
:
31384 case V4HI_FTYPE_V8QI_V8QI
:
31385 case V4HI_FTYPE_V2SI_V2SI
:
31386 case V4DF_FTYPE_V4DF_V4DF
:
31387 case V4DF_FTYPE_V4DF_V4DI
:
31388 case V4SF_FTYPE_V4SF_V4SF
:
31389 case V4SF_FTYPE_V4SF_V4SI
:
31390 case V4SF_FTYPE_V4SF_V2SI
:
31391 case V4SF_FTYPE_V4SF_V2DF
:
31392 case V4SF_FTYPE_V4SF_DI
:
31393 case V4SF_FTYPE_V4SF_SI
:
31394 case V2DI_FTYPE_V2DI_V2DI
:
31395 case V2DI_FTYPE_V16QI_V16QI
:
31396 case V2DI_FTYPE_V4SI_V4SI
:
31397 case V2UDI_FTYPE_V4USI_V4USI
:
31398 case V2DI_FTYPE_V2DI_V16QI
:
31399 case V2DI_FTYPE_V2DF_V2DF
:
31400 case V2SI_FTYPE_V2SI_V2SI
:
31401 case V2SI_FTYPE_V4HI_V4HI
:
31402 case V2SI_FTYPE_V2SF_V2SF
:
31403 case V2DF_FTYPE_V2DF_V2DF
:
31404 case V2DF_FTYPE_V2DF_V4SF
:
31405 case V2DF_FTYPE_V2DF_V2DI
:
31406 case V2DF_FTYPE_V2DF_DI
:
31407 case V2DF_FTYPE_V2DF_SI
:
31408 case V2SF_FTYPE_V2SF_V2SF
:
31409 case V1DI_FTYPE_V1DI_V1DI
:
31410 case V1DI_FTYPE_V8QI_V8QI
:
31411 case V1DI_FTYPE_V2SI_V2SI
:
31412 case V32QI_FTYPE_V16HI_V16HI
:
31413 case V16HI_FTYPE_V8SI_V8SI
:
31414 case V32QI_FTYPE_V32QI_V32QI
:
31415 case V16HI_FTYPE_V32QI_V32QI
:
31416 case V16HI_FTYPE_V16HI_V16HI
:
31417 case V8SI_FTYPE_V4DF_V4DF
:
31418 case V8SI_FTYPE_V8SI_V8SI
:
31419 case V8SI_FTYPE_V16HI_V16HI
:
31420 case V4DI_FTYPE_V4DI_V4DI
:
31421 case V4DI_FTYPE_V8SI_V8SI
:
31422 case V4UDI_FTYPE_V8USI_V8USI
:
31423 if (comparison
== UNKNOWN
)
31424 return ix86_expand_binop_builtin (icode
, exp
, target
);
31427 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
31428 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
31429 gcc_assert (comparison
!= UNKNOWN
);
31433 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
31434 case V16HI_FTYPE_V16HI_SI_COUNT
:
31435 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
31436 case V8SI_FTYPE_V8SI_SI_COUNT
:
31437 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
31438 case V4DI_FTYPE_V4DI_INT_COUNT
:
31439 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
31440 case V8HI_FTYPE_V8HI_SI_COUNT
:
31441 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
31442 case V4SI_FTYPE_V4SI_SI_COUNT
:
31443 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
31444 case V4HI_FTYPE_V4HI_SI_COUNT
:
31445 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
31446 case V2DI_FTYPE_V2DI_SI_COUNT
:
31447 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
31448 case V2SI_FTYPE_V2SI_SI_COUNT
:
31449 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
31450 case V1DI_FTYPE_V1DI_SI_COUNT
:
31452 last_arg_count
= true;
31454 case UINT64_FTYPE_UINT64_UINT64
:
31455 case UINT_FTYPE_UINT_UINT
:
31456 case UINT_FTYPE_UINT_USHORT
:
31457 case UINT_FTYPE_UINT_UCHAR
:
31458 case UINT16_FTYPE_UINT16_INT
:
31459 case UINT8_FTYPE_UINT8_INT
:
31462 case V2DI_FTYPE_V2DI_INT_CONVERT
:
31465 nargs_constant
= 1;
31467 case V4DI_FTYPE_V4DI_INT_CONVERT
:
31470 nargs_constant
= 1;
31472 case V8HI_FTYPE_V8HI_INT
:
31473 case V8HI_FTYPE_V8SF_INT
:
31474 case V8HI_FTYPE_V4SF_INT
:
31475 case V8SF_FTYPE_V8SF_INT
:
31476 case V4SI_FTYPE_V4SI_INT
:
31477 case V4SI_FTYPE_V8SI_INT
:
31478 case V4HI_FTYPE_V4HI_INT
:
31479 case V4DF_FTYPE_V4DF_INT
:
31480 case V4SF_FTYPE_V4SF_INT
:
31481 case V4SF_FTYPE_V8SF_INT
:
31482 case V2DI_FTYPE_V2DI_INT
:
31483 case V2DF_FTYPE_V2DF_INT
:
31484 case V2DF_FTYPE_V4DF_INT
:
31485 case V16HI_FTYPE_V16HI_INT
:
31486 case V8SI_FTYPE_V8SI_INT
:
31487 case V4DI_FTYPE_V4DI_INT
:
31488 case V2DI_FTYPE_V4DI_INT
:
31490 nargs_constant
= 1;
31492 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
31493 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
31494 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
31495 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
31496 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
31497 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
31500 case V32QI_FTYPE_V32QI_V32QI_INT
:
31501 case V16HI_FTYPE_V16HI_V16HI_INT
:
31502 case V16QI_FTYPE_V16QI_V16QI_INT
:
31503 case V4DI_FTYPE_V4DI_V4DI_INT
:
31504 case V8HI_FTYPE_V8HI_V8HI_INT
:
31505 case V8SI_FTYPE_V8SI_V8SI_INT
:
31506 case V8SI_FTYPE_V8SI_V4SI_INT
:
31507 case V8SF_FTYPE_V8SF_V8SF_INT
:
31508 case V8SF_FTYPE_V8SF_V4SF_INT
:
31509 case V4SI_FTYPE_V4SI_V4SI_INT
:
31510 case V4DF_FTYPE_V4DF_V4DF_INT
:
31511 case V4DF_FTYPE_V4DF_V2DF_INT
:
31512 case V4SF_FTYPE_V4SF_V4SF_INT
:
31513 case V2DI_FTYPE_V2DI_V2DI_INT
:
31514 case V4DI_FTYPE_V4DI_V2DI_INT
:
31515 case V2DF_FTYPE_V2DF_V2DF_INT
:
31517 nargs_constant
= 1;
31519 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31522 nargs_constant
= 1;
31524 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31527 nargs_constant
= 1;
31529 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31532 nargs_constant
= 1;
31534 case V2DI_FTYPE_V2DI_UINT_UINT
:
31536 nargs_constant
= 2;
31538 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31539 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31540 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31541 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31543 nargs_constant
= 1;
31545 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31547 nargs_constant
= 2;
31549 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31550 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31554 gcc_unreachable ();
31557 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31559 if (comparison
!= UNKNOWN
)
31561 gcc_assert (nargs
== 2);
31562 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31565 if (rmode
== VOIDmode
|| rmode
== tmode
)
31569 || GET_MODE (target
) != tmode
31570 || !insn_p
->operand
[0].predicate (target
, tmode
))
31571 target
= gen_reg_rtx (tmode
);
31572 real_target
= target
;
31576 target
= gen_reg_rtx (rmode
);
31577 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31580 for (i
= 0; i
< nargs
; i
++)
31582 tree arg
= CALL_EXPR_ARG (exp
, i
);
31583 rtx op
= expand_normal (arg
);
31584 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31585 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31587 if (last_arg_count
&& (i
+ 1) == nargs
)
31589 /* SIMD shift insns take either an 8-bit immediate or
31590 register as count. But builtin functions take int as
31591 count. If count doesn't match, we put it in register. */
31594 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31595 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31596 op
= copy_to_reg (op
);
31599 else if ((nargs
- i
) <= nargs_constant
)
31604 case CODE_FOR_avx2_inserti128
:
31605 case CODE_FOR_avx2_extracti128
:
31606 error ("the last argument must be an 1-bit immediate");
31609 case CODE_FOR_sse4_1_roundsd
:
31610 case CODE_FOR_sse4_1_roundss
:
31612 case CODE_FOR_sse4_1_roundpd
:
31613 case CODE_FOR_sse4_1_roundps
:
31614 case CODE_FOR_avx_roundpd256
:
31615 case CODE_FOR_avx_roundps256
:
31617 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31618 case CODE_FOR_sse4_1_roundps_sfix
:
31619 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31620 case CODE_FOR_avx_roundps_sfix256
:
31622 case CODE_FOR_sse4_1_blendps
:
31623 case CODE_FOR_avx_blendpd256
:
31624 case CODE_FOR_avx_vpermilv4df
:
31625 error ("the last argument must be a 4-bit immediate");
31628 case CODE_FOR_sse4_1_blendpd
:
31629 case CODE_FOR_avx_vpermilv2df
:
31630 case CODE_FOR_xop_vpermil2v2df3
:
31631 case CODE_FOR_xop_vpermil2v4sf3
:
31632 case CODE_FOR_xop_vpermil2v4df3
:
31633 case CODE_FOR_xop_vpermil2v8sf3
:
31634 error ("the last argument must be a 2-bit immediate");
31637 case CODE_FOR_avx_vextractf128v4df
:
31638 case CODE_FOR_avx_vextractf128v8sf
:
31639 case CODE_FOR_avx_vextractf128v8si
:
31640 case CODE_FOR_avx_vinsertf128v4df
:
31641 case CODE_FOR_avx_vinsertf128v8sf
:
31642 case CODE_FOR_avx_vinsertf128v8si
:
31643 error ("the last argument must be a 1-bit immediate");
31646 case CODE_FOR_avx_vmcmpv2df3
:
31647 case CODE_FOR_avx_vmcmpv4sf3
:
31648 case CODE_FOR_avx_cmpv2df3
:
31649 case CODE_FOR_avx_cmpv4sf3
:
31650 case CODE_FOR_avx_cmpv4df3
:
31651 case CODE_FOR_avx_cmpv8sf3
:
31652 error ("the last argument must be a 5-bit immediate");
31656 switch (nargs_constant
)
31659 if ((nargs
- i
) == nargs_constant
)
31661 error ("the next to last argument must be an 8-bit immediate");
31665 error ("the last argument must be an 8-bit immediate");
31668 gcc_unreachable ();
31675 if (VECTOR_MODE_P (mode
))
31676 op
= safe_vector_operand (op
, mode
);
31678 /* If we aren't optimizing, only allow one memory operand to
31680 if (memory_operand (op
, mode
))
31683 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31685 if (optimize
|| !match
|| num_memory
> 1)
31686 op
= copy_to_mode_reg (mode
, op
);
31690 op
= copy_to_reg (op
);
31691 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31696 args
[i
].mode
= mode
;
31702 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31705 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31708 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31712 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31713 args
[2].op
, args
[3].op
);
31716 gcc_unreachable ();
31726 /* Subroutine of ix86_expand_builtin to take care of special insns
31727 with variable number of operands. */
31730 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31731 tree exp
, rtx target
)
31735 unsigned int i
, nargs
, arg_adjust
, memory
;
31739 enum machine_mode mode
;
31741 enum insn_code icode
= d
->icode
;
31742 bool last_arg_constant
= false;
31743 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31744 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31745 enum { load
, store
} klass
;
31747 switch ((enum ix86_builtin_func_type
) d
->flag
)
31749 case VOID_FTYPE_VOID
:
31750 emit_insn (GEN_FCN (icode
) (target
));
31752 case VOID_FTYPE_UINT64
:
31753 case VOID_FTYPE_UNSIGNED
:
31759 case INT_FTYPE_VOID
:
31760 case UINT64_FTYPE_VOID
:
31761 case UNSIGNED_FTYPE_VOID
:
31766 case UINT64_FTYPE_PUNSIGNED
:
31767 case V2DI_FTYPE_PV2DI
:
31768 case V4DI_FTYPE_PV4DI
:
31769 case V32QI_FTYPE_PCCHAR
:
31770 case V16QI_FTYPE_PCCHAR
:
31771 case V8SF_FTYPE_PCV4SF
:
31772 case V8SF_FTYPE_PCFLOAT
:
31773 case V4SF_FTYPE_PCFLOAT
:
31774 case V4DF_FTYPE_PCV2DF
:
31775 case V4DF_FTYPE_PCDOUBLE
:
31776 case V2DF_FTYPE_PCDOUBLE
:
31777 case VOID_FTYPE_PVOID
:
31782 case VOID_FTYPE_PV2SF_V4SF
:
31783 case VOID_FTYPE_PV4DI_V4DI
:
31784 case VOID_FTYPE_PV2DI_V2DI
:
31785 case VOID_FTYPE_PCHAR_V32QI
:
31786 case VOID_FTYPE_PCHAR_V16QI
:
31787 case VOID_FTYPE_PFLOAT_V8SF
:
31788 case VOID_FTYPE_PFLOAT_V4SF
:
31789 case VOID_FTYPE_PDOUBLE_V4DF
:
31790 case VOID_FTYPE_PDOUBLE_V2DF
:
31791 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31792 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31793 case VOID_FTYPE_PINT_INT
:
31796 /* Reserve memory operand for target. */
31797 memory
= ARRAY_SIZE (args
);
31799 case V4SF_FTYPE_V4SF_PCV2SF
:
31800 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31805 case V8SF_FTYPE_PCV8SF_V8SI
:
31806 case V4DF_FTYPE_PCV4DF_V4DI
:
31807 case V4SF_FTYPE_PCV4SF_V4SI
:
31808 case V2DF_FTYPE_PCV2DF_V2DI
:
31809 case V8SI_FTYPE_PCV8SI_V8SI
:
31810 case V4DI_FTYPE_PCV4DI_V4DI
:
31811 case V4SI_FTYPE_PCV4SI_V4SI
:
31812 case V2DI_FTYPE_PCV2DI_V2DI
:
31817 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31818 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31819 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31820 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31821 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31822 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31823 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31824 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31827 /* Reserve memory operand for target. */
31828 memory
= ARRAY_SIZE (args
);
31830 case VOID_FTYPE_UINT_UINT_UINT
:
31831 case VOID_FTYPE_UINT64_UINT_UINT
:
31832 case UCHAR_FTYPE_UINT_UINT_UINT
:
31833 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31836 memory
= ARRAY_SIZE (args
);
31837 last_arg_constant
= true;
31840 gcc_unreachable ();
31843 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31845 if (klass
== store
)
31847 arg
= CALL_EXPR_ARG (exp
, 0);
31848 op
= expand_normal (arg
);
31849 gcc_assert (target
== 0);
31852 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31853 target
= gen_rtx_MEM (tmode
, op
);
31856 target
= force_reg (tmode
, op
);
31864 || !register_operand (target
, tmode
)
31865 || GET_MODE (target
) != tmode
)
31866 target
= gen_reg_rtx (tmode
);
31869 for (i
= 0; i
< nargs
; i
++)
31871 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31874 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31875 op
= expand_normal (arg
);
31876 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31878 if (last_arg_constant
&& (i
+ 1) == nargs
)
31882 if (icode
== CODE_FOR_lwp_lwpvalsi3
31883 || icode
== CODE_FOR_lwp_lwpinssi3
31884 || icode
== CODE_FOR_lwp_lwpvaldi3
31885 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31886 error ("the last argument must be a 32-bit immediate");
31888 error ("the last argument must be an 8-bit immediate");
31896 /* This must be the memory operand. */
31897 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31898 op
= gen_rtx_MEM (mode
, op
);
31899 gcc_assert (GET_MODE (op
) == mode
31900 || GET_MODE (op
) == VOIDmode
);
31904 /* This must be register. */
31905 if (VECTOR_MODE_P (mode
))
31906 op
= safe_vector_operand (op
, mode
);
31908 gcc_assert (GET_MODE (op
) == mode
31909 || GET_MODE (op
) == VOIDmode
);
31910 op
= copy_to_mode_reg (mode
, op
);
31915 args
[i
].mode
= mode
;
31921 pat
= GEN_FCN (icode
) (target
);
31924 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31927 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31930 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31933 gcc_unreachable ();
31939 return klass
== store
? 0 : target
;
31942 /* Return the integer constant in ARG. Constrain it to be in the range
31943 of the subparts of VEC_TYPE; issue an error if not. */
31946 get_element_number (tree vec_type
, tree arg
)
31948 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31950 if (!host_integerp (arg
, 1)
31951 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31953 error ("selector must be an integer constant in the range 0..%wi", max
);
31960 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31961 ix86_expand_vector_init. We DO have language-level syntax for this, in
31962 the form of (type){ init-list }. Except that since we can't place emms
31963 instructions from inside the compiler, we can't allow the use of MMX
31964 registers unless the user explicitly asks for it. So we do *not* define
31965 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31966 we have builtins invoked by mmintrin.h that gives us license to emit
31967 these sorts of instructions. */
31970 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31972 enum machine_mode tmode
= TYPE_MODE (type
);
31973 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31974 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31975 rtvec v
= rtvec_alloc (n_elt
);
31977 gcc_assert (VECTOR_MODE_P (tmode
));
31978 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31980 for (i
= 0; i
< n_elt
; ++i
)
31982 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31983 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31986 if (!target
|| !register_operand (target
, tmode
))
31987 target
= gen_reg_rtx (tmode
);
31989 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31993 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31994 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31995 had a language-level syntax for referencing vector elements. */
31998 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32000 enum machine_mode tmode
, mode0
;
32005 arg0
= CALL_EXPR_ARG (exp
, 0);
32006 arg1
= CALL_EXPR_ARG (exp
, 1);
32008 op0
= expand_normal (arg0
);
32009 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32011 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32012 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32013 gcc_assert (VECTOR_MODE_P (mode0
));
32015 op0
= force_reg (mode0
, op0
);
32017 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32018 target
= gen_reg_rtx (tmode
);
32020 ix86_expand_vector_extract (true, target
, op0
, elt
);
32025 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32026 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32027 a language-level syntax for referencing vector elements. */
32030 ix86_expand_vec_set_builtin (tree exp
)
32032 enum machine_mode tmode
, mode1
;
32033 tree arg0
, arg1
, arg2
;
32035 rtx op0
, op1
, target
;
32037 arg0
= CALL_EXPR_ARG (exp
, 0);
32038 arg1
= CALL_EXPR_ARG (exp
, 1);
32039 arg2
= CALL_EXPR_ARG (exp
, 2);
32041 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32042 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32043 gcc_assert (VECTOR_MODE_P (tmode
));
32045 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32046 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32047 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32049 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32050 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32052 op0
= force_reg (tmode
, op0
);
32053 op1
= force_reg (mode1
, op1
);
32055 /* OP0 is the source of these builtin functions and shouldn't be
32056 modified. Create a copy, use it and return it as target. */
32057 target
= gen_reg_rtx (tmode
);
32058 emit_move_insn (target
, op0
);
32059 ix86_expand_vector_set (true, target
, op1
, elt
);
32064 /* Expand an expression EXP that calls a built-in function,
32065 with result going to TARGET if that's convenient
32066 (and in mode MODE if that's convenient).
32067 SUBTARGET may be used as the target for computing one of EXP's operands.
32068 IGNORE is nonzero if the value is to be ignored. */
32071 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32072 enum machine_mode mode
, int ignore
)
32074 const struct builtin_description
*d
;
32076 enum insn_code icode
;
32077 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32078 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32079 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32080 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32081 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32083 /* For CPU builtins that can be folded, fold first and expand the fold. */
32086 case IX86_BUILTIN_CPU_INIT
:
32088 /* Make it call __cpu_indicator_init in libgcc. */
32089 tree call_expr
, fndecl
, type
;
32090 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32091 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32092 call_expr
= build_call_expr (fndecl
, 0);
32093 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32095 case IX86_BUILTIN_CPU_IS
:
32096 case IX86_BUILTIN_CPU_SUPPORTS
:
32098 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32099 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32100 gcc_assert (fold_expr
!= NULL_TREE
);
32101 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32105 /* Determine whether the builtin function is available under the current ISA.
32106 Originally the builtin was not created if it wasn't applicable to the
32107 current ISA based on the command line switches. With function specific
32108 options, we need to check in the context of the function making the call
32109 whether it is supported. */
32110 if (ix86_builtins_isa
[fcode
].isa
32111 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32113 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32114 NULL
, (enum fpmath_unit
) 0, false);
32117 error ("%qE needs unknown isa option", fndecl
);
32120 gcc_assert (opts
!= NULL
);
32121 error ("%qE needs isa option %s", fndecl
, opts
);
32129 case IX86_BUILTIN_MASKMOVQ
:
32130 case IX86_BUILTIN_MASKMOVDQU
:
32131 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32132 ? CODE_FOR_mmx_maskmovq
32133 : CODE_FOR_sse2_maskmovdqu
);
32134 /* Note the arg order is different from the operand order. */
32135 arg1
= CALL_EXPR_ARG (exp
, 0);
32136 arg2
= CALL_EXPR_ARG (exp
, 1);
32137 arg0
= CALL_EXPR_ARG (exp
, 2);
32138 op0
= expand_normal (arg0
);
32139 op1
= expand_normal (arg1
);
32140 op2
= expand_normal (arg2
);
32141 mode0
= insn_data
[icode
].operand
[0].mode
;
32142 mode1
= insn_data
[icode
].operand
[1].mode
;
32143 mode2
= insn_data
[icode
].operand
[2].mode
;
32145 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32146 op0
= gen_rtx_MEM (mode1
, op0
);
32148 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32149 op0
= copy_to_mode_reg (mode0
, op0
);
32150 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32151 op1
= copy_to_mode_reg (mode1
, op1
);
32152 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32153 op2
= copy_to_mode_reg (mode2
, op2
);
32154 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32160 case IX86_BUILTIN_LDMXCSR
:
32161 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32162 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32163 emit_move_insn (target
, op0
);
32164 emit_insn (gen_sse_ldmxcsr (target
));
32167 case IX86_BUILTIN_STMXCSR
:
32168 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32169 emit_insn (gen_sse_stmxcsr (target
));
32170 return copy_to_mode_reg (SImode
, target
);
32172 case IX86_BUILTIN_CLFLUSH
:
32173 arg0
= CALL_EXPR_ARG (exp
, 0);
32174 op0
= expand_normal (arg0
);
32175 icode
= CODE_FOR_sse2_clflush
;
32176 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32177 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32179 emit_insn (gen_sse2_clflush (op0
));
32182 case IX86_BUILTIN_MONITOR
:
32183 arg0
= CALL_EXPR_ARG (exp
, 0);
32184 arg1
= CALL_EXPR_ARG (exp
, 1);
32185 arg2
= CALL_EXPR_ARG (exp
, 2);
32186 op0
= expand_normal (arg0
);
32187 op1
= expand_normal (arg1
);
32188 op2
= expand_normal (arg2
);
32190 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32192 op1
= copy_to_mode_reg (SImode
, op1
);
32194 op2
= copy_to_mode_reg (SImode
, op2
);
32195 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32198 case IX86_BUILTIN_MWAIT
:
32199 arg0
= CALL_EXPR_ARG (exp
, 0);
32200 arg1
= CALL_EXPR_ARG (exp
, 1);
32201 op0
= expand_normal (arg0
);
32202 op1
= expand_normal (arg1
);
32204 op0
= copy_to_mode_reg (SImode
, op0
);
32206 op1
= copy_to_mode_reg (SImode
, op1
);
32207 emit_insn (gen_sse3_mwait (op0
, op1
));
32210 case IX86_BUILTIN_VEC_INIT_V2SI
:
32211 case IX86_BUILTIN_VEC_INIT_V4HI
:
32212 case IX86_BUILTIN_VEC_INIT_V8QI
:
32213 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32215 case IX86_BUILTIN_VEC_EXT_V2DF
:
32216 case IX86_BUILTIN_VEC_EXT_V2DI
:
32217 case IX86_BUILTIN_VEC_EXT_V4SF
:
32218 case IX86_BUILTIN_VEC_EXT_V4SI
:
32219 case IX86_BUILTIN_VEC_EXT_V8HI
:
32220 case IX86_BUILTIN_VEC_EXT_V2SI
:
32221 case IX86_BUILTIN_VEC_EXT_V4HI
:
32222 case IX86_BUILTIN_VEC_EXT_V16QI
:
32223 return ix86_expand_vec_ext_builtin (exp
, target
);
32225 case IX86_BUILTIN_VEC_SET_V2DI
:
32226 case IX86_BUILTIN_VEC_SET_V4SF
:
32227 case IX86_BUILTIN_VEC_SET_V4SI
:
32228 case IX86_BUILTIN_VEC_SET_V8HI
:
32229 case IX86_BUILTIN_VEC_SET_V4HI
:
32230 case IX86_BUILTIN_VEC_SET_V16QI
:
32231 return ix86_expand_vec_set_builtin (exp
);
32233 case IX86_BUILTIN_INFQ
:
32234 case IX86_BUILTIN_HUGE_VALQ
:
32236 REAL_VALUE_TYPE inf
;
32240 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32242 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32245 target
= gen_reg_rtx (mode
);
32247 emit_move_insn (target
, tmp
);
32251 case IX86_BUILTIN_RDPMC
:
32252 case IX86_BUILTIN_RDTSC
:
32253 case IX86_BUILTIN_RDTSCP
:
32255 op0
= gen_reg_rtx (DImode
);
32256 op1
= gen_reg_rtx (DImode
);
32258 if (fcode
== IX86_BUILTIN_RDPMC
)
32260 arg0
= CALL_EXPR_ARG (exp
, 0);
32261 op2
= expand_normal (arg0
);
32262 if (!register_operand (op2
, SImode
))
32263 op2
= copy_to_mode_reg (SImode
, op2
);
32265 insn
= (TARGET_64BIT
32266 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32267 : gen_rdpmc (op0
, op2
));
32270 else if (fcode
== IX86_BUILTIN_RDTSC
)
32272 insn
= (TARGET_64BIT
32273 ? gen_rdtsc_rex64 (op0
, op1
)
32274 : gen_rdtsc (op0
));
32279 op2
= gen_reg_rtx (SImode
);
32281 insn
= (TARGET_64BIT
32282 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32283 : gen_rdtscp (op0
, op2
));
32286 arg0
= CALL_EXPR_ARG (exp
, 0);
32287 op4
= expand_normal (arg0
);
32288 if (!address_operand (op4
, VOIDmode
))
32290 op4
= convert_memory_address (Pmode
, op4
);
32291 op4
= copy_addr_to_reg (op4
);
32293 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32298 /* mode is VOIDmode if __builtin_rd* has been called
32300 if (mode
== VOIDmode
)
32302 target
= gen_reg_rtx (mode
);
32307 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32308 op1
, 1, OPTAB_DIRECT
);
32309 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32310 op0
, 1, OPTAB_DIRECT
);
32313 emit_move_insn (target
, op0
);
32316 case IX86_BUILTIN_FXSAVE
:
32317 case IX86_BUILTIN_FXRSTOR
:
32318 case IX86_BUILTIN_FXSAVE64
:
32319 case IX86_BUILTIN_FXRSTOR64
:
32322 case IX86_BUILTIN_FXSAVE
:
32323 icode
= CODE_FOR_fxsave
;
32325 case IX86_BUILTIN_FXRSTOR
:
32326 icode
= CODE_FOR_fxrstor
;
32328 case IX86_BUILTIN_FXSAVE64
:
32329 icode
= CODE_FOR_fxsave64
;
32331 case IX86_BUILTIN_FXRSTOR64
:
32332 icode
= CODE_FOR_fxrstor64
;
32335 gcc_unreachable ();
32338 arg0
= CALL_EXPR_ARG (exp
, 0);
32339 op0
= expand_normal (arg0
);
32341 if (!address_operand (op0
, VOIDmode
))
32343 op0
= convert_memory_address (Pmode
, op0
);
32344 op0
= copy_addr_to_reg (op0
);
32346 op0
= gen_rtx_MEM (BLKmode
, op0
);
32348 pat
= GEN_FCN (icode
) (op0
);
32353 case IX86_BUILTIN_XSAVE
:
32354 case IX86_BUILTIN_XRSTOR
:
32355 case IX86_BUILTIN_XSAVE64
:
32356 case IX86_BUILTIN_XRSTOR64
:
32357 case IX86_BUILTIN_XSAVEOPT
:
32358 case IX86_BUILTIN_XSAVEOPT64
:
32359 arg0
= CALL_EXPR_ARG (exp
, 0);
32360 arg1
= CALL_EXPR_ARG (exp
, 1);
32361 op0
= expand_normal (arg0
);
32362 op1
= expand_normal (arg1
);
32364 if (!address_operand (op0
, VOIDmode
))
32366 op0
= convert_memory_address (Pmode
, op0
);
32367 op0
= copy_addr_to_reg (op0
);
32369 op0
= gen_rtx_MEM (BLKmode
, op0
);
32371 op1
= force_reg (DImode
, op1
);
32375 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32376 NULL
, 1, OPTAB_DIRECT
);
32379 case IX86_BUILTIN_XSAVE
:
32380 icode
= CODE_FOR_xsave_rex64
;
32382 case IX86_BUILTIN_XRSTOR
:
32383 icode
= CODE_FOR_xrstor_rex64
;
32385 case IX86_BUILTIN_XSAVE64
:
32386 icode
= CODE_FOR_xsave64
;
32388 case IX86_BUILTIN_XRSTOR64
:
32389 icode
= CODE_FOR_xrstor64
;
32391 case IX86_BUILTIN_XSAVEOPT
:
32392 icode
= CODE_FOR_xsaveopt_rex64
;
32394 case IX86_BUILTIN_XSAVEOPT64
:
32395 icode
= CODE_FOR_xsaveopt64
;
32398 gcc_unreachable ();
32401 op2
= gen_lowpart (SImode
, op2
);
32402 op1
= gen_lowpart (SImode
, op1
);
32403 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32409 case IX86_BUILTIN_XSAVE
:
32410 icode
= CODE_FOR_xsave
;
32412 case IX86_BUILTIN_XRSTOR
:
32413 icode
= CODE_FOR_xrstor
;
32415 case IX86_BUILTIN_XSAVEOPT
:
32416 icode
= CODE_FOR_xsaveopt
;
32419 gcc_unreachable ();
32421 pat
= GEN_FCN (icode
) (op0
, op1
);
32428 case IX86_BUILTIN_LLWPCB
:
32429 arg0
= CALL_EXPR_ARG (exp
, 0);
32430 op0
= expand_normal (arg0
);
32431 icode
= CODE_FOR_lwp_llwpcb
;
32432 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32433 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32434 emit_insn (gen_lwp_llwpcb (op0
));
32437 case IX86_BUILTIN_SLWPCB
:
32438 icode
= CODE_FOR_lwp_slwpcb
;
32440 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
32441 target
= gen_reg_rtx (Pmode
);
32442 emit_insn (gen_lwp_slwpcb (target
));
32445 case IX86_BUILTIN_BEXTRI32
:
32446 case IX86_BUILTIN_BEXTRI64
:
32447 arg0
= CALL_EXPR_ARG (exp
, 0);
32448 arg1
= CALL_EXPR_ARG (exp
, 1);
32449 op0
= expand_normal (arg0
);
32450 op1
= expand_normal (arg1
);
32451 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
32452 ? CODE_FOR_tbm_bextri_si
32453 : CODE_FOR_tbm_bextri_di
);
32454 if (!CONST_INT_P (op1
))
32456 error ("last argument must be an immediate");
32461 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
32462 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
32463 op1
= GEN_INT (length
);
32464 op2
= GEN_INT (lsb_index
);
32465 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
32471 case IX86_BUILTIN_RDRAND16_STEP
:
32472 icode
= CODE_FOR_rdrandhi_1
;
32476 case IX86_BUILTIN_RDRAND32_STEP
:
32477 icode
= CODE_FOR_rdrandsi_1
;
32481 case IX86_BUILTIN_RDRAND64_STEP
:
32482 icode
= CODE_FOR_rdranddi_1
;
32486 op0
= gen_reg_rtx (mode0
);
32487 emit_insn (GEN_FCN (icode
) (op0
));
32489 arg0
= CALL_EXPR_ARG (exp
, 0);
32490 op1
= expand_normal (arg0
);
32491 if (!address_operand (op1
, VOIDmode
))
32493 op1
= convert_memory_address (Pmode
, op1
);
32494 op1
= copy_addr_to_reg (op1
);
32496 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32498 op1
= gen_reg_rtx (SImode
);
32499 emit_move_insn (op1
, CONST1_RTX (SImode
));
32501 /* Emit SImode conditional move. */
32502 if (mode0
== HImode
)
32504 op2
= gen_reg_rtx (SImode
);
32505 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32507 else if (mode0
== SImode
)
32510 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32513 target
= gen_reg_rtx (SImode
);
32515 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32517 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32518 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32521 case IX86_BUILTIN_RDSEED16_STEP
:
32522 icode
= CODE_FOR_rdseedhi_1
;
32526 case IX86_BUILTIN_RDSEED32_STEP
:
32527 icode
= CODE_FOR_rdseedsi_1
;
32531 case IX86_BUILTIN_RDSEED64_STEP
:
32532 icode
= CODE_FOR_rdseeddi_1
;
32536 op0
= gen_reg_rtx (mode0
);
32537 emit_insn (GEN_FCN (icode
) (op0
));
32539 arg0
= CALL_EXPR_ARG (exp
, 0);
32540 op1
= expand_normal (arg0
);
32541 if (!address_operand (op1
, VOIDmode
))
32543 op1
= convert_memory_address (Pmode
, op1
);
32544 op1
= copy_addr_to_reg (op1
);
32546 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32548 op2
= gen_reg_rtx (QImode
);
32550 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32552 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32555 target
= gen_reg_rtx (SImode
);
32557 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32560 case IX86_BUILTIN_ADDCARRYX32
:
32561 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32565 case IX86_BUILTIN_ADDCARRYX64
:
32566 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32570 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32571 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32572 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32573 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32575 op0
= gen_reg_rtx (QImode
);
32577 /* Generate CF from input operand. */
32578 op1
= expand_normal (arg0
);
32579 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32580 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32582 /* Gen ADCX instruction to compute X+Y+CF. */
32583 op2
= expand_normal (arg1
);
32584 op3
= expand_normal (arg2
);
32587 op2
= copy_to_mode_reg (mode0
, op2
);
32589 op3
= copy_to_mode_reg (mode0
, op3
);
32591 op0
= gen_reg_rtx (mode0
);
32593 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32594 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32595 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32597 /* Store the result. */
32598 op4
= expand_normal (arg3
);
32599 if (!address_operand (op4
, VOIDmode
))
32601 op4
= convert_memory_address (Pmode
, op4
);
32602 op4
= copy_addr_to_reg (op4
);
32604 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32606 /* Return current CF value. */
32608 target
= gen_reg_rtx (QImode
);
32610 PUT_MODE (pat
, QImode
);
32611 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32614 case IX86_BUILTIN_GATHERSIV2DF
:
32615 icode
= CODE_FOR_avx2_gathersiv2df
;
32617 case IX86_BUILTIN_GATHERSIV4DF
:
32618 icode
= CODE_FOR_avx2_gathersiv4df
;
32620 case IX86_BUILTIN_GATHERDIV2DF
:
32621 icode
= CODE_FOR_avx2_gatherdiv2df
;
32623 case IX86_BUILTIN_GATHERDIV4DF
:
32624 icode
= CODE_FOR_avx2_gatherdiv4df
;
32626 case IX86_BUILTIN_GATHERSIV4SF
:
32627 icode
= CODE_FOR_avx2_gathersiv4sf
;
32629 case IX86_BUILTIN_GATHERSIV8SF
:
32630 icode
= CODE_FOR_avx2_gathersiv8sf
;
32632 case IX86_BUILTIN_GATHERDIV4SF
:
32633 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32635 case IX86_BUILTIN_GATHERDIV8SF
:
32636 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32638 case IX86_BUILTIN_GATHERSIV2DI
:
32639 icode
= CODE_FOR_avx2_gathersiv2di
;
32641 case IX86_BUILTIN_GATHERSIV4DI
:
32642 icode
= CODE_FOR_avx2_gathersiv4di
;
32644 case IX86_BUILTIN_GATHERDIV2DI
:
32645 icode
= CODE_FOR_avx2_gatherdiv2di
;
32647 case IX86_BUILTIN_GATHERDIV4DI
:
32648 icode
= CODE_FOR_avx2_gatherdiv4di
;
32650 case IX86_BUILTIN_GATHERSIV4SI
:
32651 icode
= CODE_FOR_avx2_gathersiv4si
;
32653 case IX86_BUILTIN_GATHERSIV8SI
:
32654 icode
= CODE_FOR_avx2_gathersiv8si
;
32656 case IX86_BUILTIN_GATHERDIV4SI
:
32657 icode
= CODE_FOR_avx2_gatherdiv4si
;
32659 case IX86_BUILTIN_GATHERDIV8SI
:
32660 icode
= CODE_FOR_avx2_gatherdiv8si
;
32662 case IX86_BUILTIN_GATHERALTSIV4DF
:
32663 icode
= CODE_FOR_avx2_gathersiv4df
;
32665 case IX86_BUILTIN_GATHERALTDIV8SF
:
32666 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32668 case IX86_BUILTIN_GATHERALTSIV4DI
:
32669 icode
= CODE_FOR_avx2_gathersiv4di
;
32671 case IX86_BUILTIN_GATHERALTDIV8SI
:
32672 icode
= CODE_FOR_avx2_gatherdiv8si
;
32676 arg0
= CALL_EXPR_ARG (exp
, 0);
32677 arg1
= CALL_EXPR_ARG (exp
, 1);
32678 arg2
= CALL_EXPR_ARG (exp
, 2);
32679 arg3
= CALL_EXPR_ARG (exp
, 3);
32680 arg4
= CALL_EXPR_ARG (exp
, 4);
32681 op0
= expand_normal (arg0
);
32682 op1
= expand_normal (arg1
);
32683 op2
= expand_normal (arg2
);
32684 op3
= expand_normal (arg3
);
32685 op4
= expand_normal (arg4
);
32686 /* Note the arg order is different from the operand order. */
32687 mode0
= insn_data
[icode
].operand
[1].mode
;
32688 mode2
= insn_data
[icode
].operand
[3].mode
;
32689 mode3
= insn_data
[icode
].operand
[4].mode
;
32690 mode4
= insn_data
[icode
].operand
[5].mode
;
32692 if (target
== NULL_RTX
32693 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32694 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32696 subtarget
= target
;
32698 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32699 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32701 rtx half
= gen_reg_rtx (V4SImode
);
32702 if (!nonimmediate_operand (op2
, V8SImode
))
32703 op2
= copy_to_mode_reg (V8SImode
, op2
);
32704 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32707 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32708 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32710 rtx (*gen
) (rtx
, rtx
);
32711 rtx half
= gen_reg_rtx (mode0
);
32712 if (mode0
== V4SFmode
)
32713 gen
= gen_vec_extract_lo_v8sf
;
32715 gen
= gen_vec_extract_lo_v8si
;
32716 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32717 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32718 emit_insn (gen (half
, op0
));
32720 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32721 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32722 emit_insn (gen (half
, op3
));
32726 /* Force memory operand only with base register here. But we
32727 don't want to do it on memory operand for other builtin
32729 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32731 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32732 op0
= copy_to_mode_reg (mode0
, op0
);
32733 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32734 op1
= copy_to_mode_reg (Pmode
, op1
);
32735 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32736 op2
= copy_to_mode_reg (mode2
, op2
);
32737 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32738 op3
= copy_to_mode_reg (mode3
, op3
);
32739 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32741 error ("last argument must be scale 1, 2, 4, 8");
32745 /* Optimize. If mask is known to have all high bits set,
32746 replace op0 with pc_rtx to signal that the instruction
32747 overwrites the whole destination and doesn't use its
32748 previous contents. */
32751 if (TREE_CODE (arg3
) == VECTOR_CST
)
32753 unsigned int negative
= 0;
32754 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32756 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32757 if (TREE_CODE (cst
) == INTEGER_CST
32758 && tree_int_cst_sign_bit (cst
))
32760 else if (TREE_CODE (cst
) == REAL_CST
32761 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32764 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32767 else if (TREE_CODE (arg3
) == SSA_NAME
)
32769 /* Recognize also when mask is like:
32770 __v2df src = _mm_setzero_pd ();
32771 __v2df mask = _mm_cmpeq_pd (src, src);
32773 __v8sf src = _mm256_setzero_ps ();
32774 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32775 as that is a cheaper way to load all ones into
32776 a register than having to load a constant from
32778 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32779 if (is_gimple_call (def_stmt
))
32781 tree fndecl
= gimple_call_fndecl (def_stmt
);
32783 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32784 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32786 case IX86_BUILTIN_CMPPD
:
32787 case IX86_BUILTIN_CMPPS
:
32788 case IX86_BUILTIN_CMPPD256
:
32789 case IX86_BUILTIN_CMPPS256
:
32790 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32793 case IX86_BUILTIN_CMPEQPD
:
32794 case IX86_BUILTIN_CMPEQPS
:
32795 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32796 && initializer_zerop (gimple_call_arg (def_stmt
,
32807 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32812 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32813 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32815 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32816 ? V4SFmode
: V4SImode
;
32817 if (target
== NULL_RTX
)
32818 target
= gen_reg_rtx (tmode
);
32819 if (tmode
== V4SFmode
)
32820 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32822 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32825 target
= subtarget
;
32829 case IX86_BUILTIN_XABORT
:
32830 icode
= CODE_FOR_xabort
;
32831 arg0
= CALL_EXPR_ARG (exp
, 0);
32832 op0
= expand_normal (arg0
);
32833 mode0
= insn_data
[icode
].operand
[0].mode
;
32834 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32836 error ("the xabort's argument must be an 8-bit immediate");
32839 emit_insn (gen_xabort (op0
));
32846 for (i
= 0, d
= bdesc_special_args
;
32847 i
< ARRAY_SIZE (bdesc_special_args
);
32849 if (d
->code
== fcode
)
32850 return ix86_expand_special_args_builtin (d
, exp
, target
);
32852 for (i
= 0, d
= bdesc_args
;
32853 i
< ARRAY_SIZE (bdesc_args
);
32855 if (d
->code
== fcode
)
32858 case IX86_BUILTIN_FABSQ
:
32859 case IX86_BUILTIN_COPYSIGNQ
:
32861 /* Emit a normal call if SSE isn't available. */
32862 return expand_call (exp
, target
, ignore
);
32864 return ix86_expand_args_builtin (d
, exp
, target
);
32867 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32868 if (d
->code
== fcode
)
32869 return ix86_expand_sse_comi (d
, exp
, target
);
32871 for (i
= 0, d
= bdesc_pcmpestr
;
32872 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32874 if (d
->code
== fcode
)
32875 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32877 for (i
= 0, d
= bdesc_pcmpistr
;
32878 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32880 if (d
->code
== fcode
)
32881 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32883 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32884 if (d
->code
== fcode
)
32885 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32886 (enum ix86_builtin_func_type
)
32887 d
->flag
, d
->comparison
);
32889 gcc_unreachable ();
32892 /* Returns a function decl for a vectorized version of the builtin function
32893 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32894 if it is not available. */
32897 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32900 enum machine_mode in_mode
, out_mode
;
32902 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32904 if (TREE_CODE (type_out
) != VECTOR_TYPE
32905 || TREE_CODE (type_in
) != VECTOR_TYPE
32906 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32909 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32910 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32911 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32912 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32916 case BUILT_IN_SQRT
:
32917 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32919 if (out_n
== 2 && in_n
== 2)
32920 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32921 else if (out_n
== 4 && in_n
== 4)
32922 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32926 case BUILT_IN_SQRTF
:
32927 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32929 if (out_n
== 4 && in_n
== 4)
32930 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32931 else if (out_n
== 8 && in_n
== 8)
32932 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32936 case BUILT_IN_IFLOOR
:
32937 case BUILT_IN_LFLOOR
:
32938 case BUILT_IN_LLFLOOR
:
32939 /* The round insn does not trap on denormals. */
32940 if (flag_trapping_math
|| !TARGET_ROUND
)
32943 if (out_mode
== SImode
&& in_mode
== DFmode
)
32945 if (out_n
== 4 && in_n
== 2)
32946 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32947 else if (out_n
== 8 && in_n
== 4)
32948 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32952 case BUILT_IN_IFLOORF
:
32953 case BUILT_IN_LFLOORF
:
32954 case BUILT_IN_LLFLOORF
:
32955 /* The round insn does not trap on denormals. */
32956 if (flag_trapping_math
|| !TARGET_ROUND
)
32959 if (out_mode
== SImode
&& in_mode
== SFmode
)
32961 if (out_n
== 4 && in_n
== 4)
32962 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32963 else if (out_n
== 8 && in_n
== 8)
32964 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32968 case BUILT_IN_ICEIL
:
32969 case BUILT_IN_LCEIL
:
32970 case BUILT_IN_LLCEIL
:
32971 /* The round insn does not trap on denormals. */
32972 if (flag_trapping_math
|| !TARGET_ROUND
)
32975 if (out_mode
== SImode
&& in_mode
== DFmode
)
32977 if (out_n
== 4 && in_n
== 2)
32978 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32979 else if (out_n
== 8 && in_n
== 4)
32980 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32984 case BUILT_IN_ICEILF
:
32985 case BUILT_IN_LCEILF
:
32986 case BUILT_IN_LLCEILF
:
32987 /* The round insn does not trap on denormals. */
32988 if (flag_trapping_math
|| !TARGET_ROUND
)
32991 if (out_mode
== SImode
&& in_mode
== SFmode
)
32993 if (out_n
== 4 && in_n
== 4)
32994 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32995 else if (out_n
== 8 && in_n
== 8)
32996 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33000 case BUILT_IN_IRINT
:
33001 case BUILT_IN_LRINT
:
33002 case BUILT_IN_LLRINT
:
33003 if (out_mode
== SImode
&& in_mode
== DFmode
)
33005 if (out_n
== 4 && in_n
== 2)
33006 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33007 else if (out_n
== 8 && in_n
== 4)
33008 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33012 case BUILT_IN_IRINTF
:
33013 case BUILT_IN_LRINTF
:
33014 case BUILT_IN_LLRINTF
:
33015 if (out_mode
== SImode
&& in_mode
== SFmode
)
33017 if (out_n
== 4 && in_n
== 4)
33018 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33019 else if (out_n
== 8 && in_n
== 8)
33020 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33024 case BUILT_IN_IROUND
:
33025 case BUILT_IN_LROUND
:
33026 case BUILT_IN_LLROUND
:
33027 /* The round insn does not trap on denormals. */
33028 if (flag_trapping_math
|| !TARGET_ROUND
)
33031 if (out_mode
== SImode
&& in_mode
== DFmode
)
33033 if (out_n
== 4 && in_n
== 2)
33034 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33035 else if (out_n
== 8 && in_n
== 4)
33036 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33040 case BUILT_IN_IROUNDF
:
33041 case BUILT_IN_LROUNDF
:
33042 case BUILT_IN_LLROUNDF
:
33043 /* The round insn does not trap on denormals. */
33044 if (flag_trapping_math
|| !TARGET_ROUND
)
33047 if (out_mode
== SImode
&& in_mode
== SFmode
)
33049 if (out_n
== 4 && in_n
== 4)
33050 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33051 else if (out_n
== 8 && in_n
== 8)
33052 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33056 case BUILT_IN_COPYSIGN
:
33057 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33059 if (out_n
== 2 && in_n
== 2)
33060 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33061 else if (out_n
== 4 && in_n
== 4)
33062 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33066 case BUILT_IN_COPYSIGNF
:
33067 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33069 if (out_n
== 4 && in_n
== 4)
33070 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33071 else if (out_n
== 8 && in_n
== 8)
33072 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33076 case BUILT_IN_FLOOR
:
33077 /* The round insn does not trap on denormals. */
33078 if (flag_trapping_math
|| !TARGET_ROUND
)
33081 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33083 if (out_n
== 2 && in_n
== 2)
33084 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33085 else if (out_n
== 4 && in_n
== 4)
33086 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33090 case BUILT_IN_FLOORF
:
33091 /* The round insn does not trap on denormals. */
33092 if (flag_trapping_math
|| !TARGET_ROUND
)
33095 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33097 if (out_n
== 4 && in_n
== 4)
33098 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33099 else if (out_n
== 8 && in_n
== 8)
33100 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33104 case BUILT_IN_CEIL
:
33105 /* The round insn does not trap on denormals. */
33106 if (flag_trapping_math
|| !TARGET_ROUND
)
33109 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33111 if (out_n
== 2 && in_n
== 2)
33112 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33113 else if (out_n
== 4 && in_n
== 4)
33114 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33118 case BUILT_IN_CEILF
:
33119 /* The round insn does not trap on denormals. */
33120 if (flag_trapping_math
|| !TARGET_ROUND
)
33123 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33125 if (out_n
== 4 && in_n
== 4)
33126 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33127 else if (out_n
== 8 && in_n
== 8)
33128 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33132 case BUILT_IN_TRUNC
:
33133 /* The round insn does not trap on denormals. */
33134 if (flag_trapping_math
|| !TARGET_ROUND
)
33137 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33139 if (out_n
== 2 && in_n
== 2)
33140 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33141 else if (out_n
== 4 && in_n
== 4)
33142 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33146 case BUILT_IN_TRUNCF
:
33147 /* The round insn does not trap on denormals. */
33148 if (flag_trapping_math
|| !TARGET_ROUND
)
33151 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33153 if (out_n
== 4 && in_n
== 4)
33154 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33155 else if (out_n
== 8 && in_n
== 8)
33156 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33160 case BUILT_IN_RINT
:
33161 /* The round insn does not trap on denormals. */
33162 if (flag_trapping_math
|| !TARGET_ROUND
)
33165 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33167 if (out_n
== 2 && in_n
== 2)
33168 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33169 else if (out_n
== 4 && in_n
== 4)
33170 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33174 case BUILT_IN_RINTF
:
33175 /* The round insn does not trap on denormals. */
33176 if (flag_trapping_math
|| !TARGET_ROUND
)
33179 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33181 if (out_n
== 4 && in_n
== 4)
33182 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33183 else if (out_n
== 8 && in_n
== 8)
33184 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33188 case BUILT_IN_ROUND
:
33189 /* The round insn does not trap on denormals. */
33190 if (flag_trapping_math
|| !TARGET_ROUND
)
33193 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33195 if (out_n
== 2 && in_n
== 2)
33196 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33197 else if (out_n
== 4 && in_n
== 4)
33198 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33202 case BUILT_IN_ROUNDF
:
33203 /* The round insn does not trap on denormals. */
33204 if (flag_trapping_math
|| !TARGET_ROUND
)
33207 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33209 if (out_n
== 4 && in_n
== 4)
33210 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33211 else if (out_n
== 8 && in_n
== 8)
33212 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33217 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33219 if (out_n
== 2 && in_n
== 2)
33220 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33221 if (out_n
== 4 && in_n
== 4)
33222 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33226 case BUILT_IN_FMAF
:
33227 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33229 if (out_n
== 4 && in_n
== 4)
33230 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33231 if (out_n
== 8 && in_n
== 8)
33232 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33240 /* Dispatch to a handler for a vectorization library. */
33241 if (ix86_veclib_handler
)
33242 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33248 /* Handler for an SVML-style interface to
33249 a library with vectorized intrinsics. */
33252 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33255 tree fntype
, new_fndecl
, args
;
33258 enum machine_mode el_mode
, in_mode
;
33261 /* The SVML is suitable for unsafe math only. */
33262 if (!flag_unsafe_math_optimizations
)
33265 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33266 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33267 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33268 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33269 if (el_mode
!= in_mode
33277 case BUILT_IN_LOG10
:
33279 case BUILT_IN_TANH
:
33281 case BUILT_IN_ATAN
:
33282 case BUILT_IN_ATAN2
:
33283 case BUILT_IN_ATANH
:
33284 case BUILT_IN_CBRT
:
33285 case BUILT_IN_SINH
:
33287 case BUILT_IN_ASINH
:
33288 case BUILT_IN_ASIN
:
33289 case BUILT_IN_COSH
:
33291 case BUILT_IN_ACOSH
:
33292 case BUILT_IN_ACOS
:
33293 if (el_mode
!= DFmode
|| n
!= 2)
33297 case BUILT_IN_EXPF
:
33298 case BUILT_IN_LOGF
:
33299 case BUILT_IN_LOG10F
:
33300 case BUILT_IN_POWF
:
33301 case BUILT_IN_TANHF
:
33302 case BUILT_IN_TANF
:
33303 case BUILT_IN_ATANF
:
33304 case BUILT_IN_ATAN2F
:
33305 case BUILT_IN_ATANHF
:
33306 case BUILT_IN_CBRTF
:
33307 case BUILT_IN_SINHF
:
33308 case BUILT_IN_SINF
:
33309 case BUILT_IN_ASINHF
:
33310 case BUILT_IN_ASINF
:
33311 case BUILT_IN_COSHF
:
33312 case BUILT_IN_COSF
:
33313 case BUILT_IN_ACOSHF
:
33314 case BUILT_IN_ACOSF
:
33315 if (el_mode
!= SFmode
|| n
!= 4)
33323 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33325 if (fn
== BUILT_IN_LOGF
)
33326 strcpy (name
, "vmlsLn4");
33327 else if (fn
== BUILT_IN_LOG
)
33328 strcpy (name
, "vmldLn2");
33331 sprintf (name
, "vmls%s", bname
+10);
33332 name
[strlen (name
)-1] = '4';
33335 sprintf (name
, "vmld%s2", bname
+10);
33337 /* Convert to uppercase. */
33341 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33343 args
= TREE_CHAIN (args
))
33347 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33349 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33351 /* Build a function declaration for the vectorized function. */
33352 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33353 FUNCTION_DECL
, get_identifier (name
), fntype
);
33354 TREE_PUBLIC (new_fndecl
) = 1;
33355 DECL_EXTERNAL (new_fndecl
) = 1;
33356 DECL_IS_NOVOPS (new_fndecl
) = 1;
33357 TREE_READONLY (new_fndecl
) = 1;
33362 /* Handler for an ACML-style interface to
33363 a library with vectorized intrinsics. */
33366 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33368 char name
[20] = "__vr.._";
33369 tree fntype
, new_fndecl
, args
;
33372 enum machine_mode el_mode
, in_mode
;
33375 /* The ACML is 64bits only and suitable for unsafe math only as
33376 it does not correctly support parts of IEEE with the required
33377 precision such as denormals. */
33379 || !flag_unsafe_math_optimizations
)
33382 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33383 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33384 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33385 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33386 if (el_mode
!= in_mode
33396 case BUILT_IN_LOG2
:
33397 case BUILT_IN_LOG10
:
33400 if (el_mode
!= DFmode
33405 case BUILT_IN_SINF
:
33406 case BUILT_IN_COSF
:
33407 case BUILT_IN_EXPF
:
33408 case BUILT_IN_POWF
:
33409 case BUILT_IN_LOGF
:
33410 case BUILT_IN_LOG2F
:
33411 case BUILT_IN_LOG10F
:
33414 if (el_mode
!= SFmode
33423 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33424 sprintf (name
+ 7, "%s", bname
+10);
33427 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33429 args
= TREE_CHAIN (args
))
33433 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33435 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33437 /* Build a function declaration for the vectorized function. */
33438 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33439 FUNCTION_DECL
, get_identifier (name
), fntype
);
33440 TREE_PUBLIC (new_fndecl
) = 1;
33441 DECL_EXTERNAL (new_fndecl
) = 1;
33442 DECL_IS_NOVOPS (new_fndecl
) = 1;
33443 TREE_READONLY (new_fndecl
) = 1;
33448 /* Returns a decl of a function that implements gather load with
33449 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33450 Return NULL_TREE if it is not available. */
33453 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
33454 const_tree index_type
, int scale
)
33457 enum ix86_builtins code
;
33462 if ((TREE_CODE (index_type
) != INTEGER_TYPE
33463 && !POINTER_TYPE_P (index_type
))
33464 || (TYPE_MODE (index_type
) != SImode
33465 && TYPE_MODE (index_type
) != DImode
))
33468 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
33471 /* v*gather* insn sign extends index to pointer mode. */
33472 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
33473 && TYPE_UNSIGNED (index_type
))
33478 || (scale
& (scale
- 1)) != 0)
33481 si
= TYPE_MODE (index_type
) == SImode
;
33482 switch (TYPE_MODE (mem_vectype
))
33485 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
33488 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
33491 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
33494 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
33497 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
33500 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
33503 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
33506 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33512 return ix86_builtins
[code
];
33515 /* Returns a code for a target-specific builtin that implements
33516 reciprocal of the function, or NULL_TREE if not available. */
33519 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33520 bool sqrt ATTRIBUTE_UNUSED
)
33522 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33523 && flag_finite_math_only
&& !flag_trapping_math
33524 && flag_unsafe_math_optimizations
))
33528 /* Machine dependent builtins. */
33531 /* Vectorized version of sqrt to rsqrt conversion. */
33532 case IX86_BUILTIN_SQRTPS_NR
:
33533 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33535 case IX86_BUILTIN_SQRTPS_NR256
:
33536 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33542 /* Normal builtins. */
33545 /* Sqrt to rsqrt conversion. */
33546 case BUILT_IN_SQRTF
:
33547 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33554 /* Helper for avx_vpermilps256_operand et al. This is also used by
33555 the expansion functions to turn the parallel back into a mask.
33556 The return value is 0 for no match and the imm8+1 for a match. */
33559 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33561 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33563 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33565 if (XVECLEN (par
, 0) != (int) nelt
)
33568 /* Validate that all of the elements are constants, and not totally
33569 out of range. Copy the data into an integral array to make the
33570 subsequent checks easier. */
33571 for (i
= 0; i
< nelt
; ++i
)
33573 rtx er
= XVECEXP (par
, 0, i
);
33574 unsigned HOST_WIDE_INT ei
;
33576 if (!CONST_INT_P (er
))
33587 /* In the 256-bit DFmode case, we can only move elements within
33589 for (i
= 0; i
< 2; ++i
)
33593 mask
|= ipar
[i
] << i
;
33595 for (i
= 2; i
< 4; ++i
)
33599 mask
|= (ipar
[i
] - 2) << i
;
33604 /* In the 256-bit SFmode case, we have full freedom of movement
33605 within the low 128-bit lane, but the high 128-bit lane must
33606 mirror the exact same pattern. */
33607 for (i
= 0; i
< 4; ++i
)
33608 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33615 /* In the 128-bit case, we've full freedom in the placement of
33616 the elements from the source operand. */
33617 for (i
= 0; i
< nelt
; ++i
)
33618 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33622 gcc_unreachable ();
33625 /* Make sure success has a non-zero value by adding one. */
33629 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33630 the expansion functions to turn the parallel back into a mask.
33631 The return value is 0 for no match and the imm8+1 for a match. */
33634 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33636 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33638 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33640 if (XVECLEN (par
, 0) != (int) nelt
)
33643 /* Validate that all of the elements are constants, and not totally
33644 out of range. Copy the data into an integral array to make the
33645 subsequent checks easier. */
33646 for (i
= 0; i
< nelt
; ++i
)
33648 rtx er
= XVECEXP (par
, 0, i
);
33649 unsigned HOST_WIDE_INT ei
;
33651 if (!CONST_INT_P (er
))
33654 if (ei
>= 2 * nelt
)
33659 /* Validate that the halves of the permute are halves. */
33660 for (i
= 0; i
< nelt2
- 1; ++i
)
33661 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33663 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33664 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33667 /* Reconstruct the mask. */
33668 for (i
= 0; i
< 2; ++i
)
33670 unsigned e
= ipar
[i
* nelt2
];
33674 mask
|= e
<< (i
* 4);
33677 /* Make sure success has a non-zero value by adding one. */
33681 /* Store OPERAND to the memory after reload is completed. This means
33682 that we can't easily use assign_stack_local. */
33684 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33688 gcc_assert (reload_completed
);
33689 if (ix86_using_red_zone ())
33691 result
= gen_rtx_MEM (mode
,
33692 gen_rtx_PLUS (Pmode
,
33694 GEN_INT (-RED_ZONE_SIZE
)));
33695 emit_move_insn (result
, operand
);
33697 else if (TARGET_64BIT
)
33703 operand
= gen_lowpart (DImode
, operand
);
33707 gen_rtx_SET (VOIDmode
,
33708 gen_rtx_MEM (DImode
,
33709 gen_rtx_PRE_DEC (DImode
,
33710 stack_pointer_rtx
)),
33714 gcc_unreachable ();
33716 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33725 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33727 gen_rtx_SET (VOIDmode
,
33728 gen_rtx_MEM (SImode
,
33729 gen_rtx_PRE_DEC (Pmode
,
33730 stack_pointer_rtx
)),
33733 gen_rtx_SET (VOIDmode
,
33734 gen_rtx_MEM (SImode
,
33735 gen_rtx_PRE_DEC (Pmode
,
33736 stack_pointer_rtx
)),
33741 /* Store HImodes as SImodes. */
33742 operand
= gen_lowpart (SImode
, operand
);
33746 gen_rtx_SET (VOIDmode
,
33747 gen_rtx_MEM (GET_MODE (operand
),
33748 gen_rtx_PRE_DEC (SImode
,
33749 stack_pointer_rtx
)),
33753 gcc_unreachable ();
33755 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33760 /* Free operand from the memory. */
33762 ix86_free_from_memory (enum machine_mode mode
)
33764 if (!ix86_using_red_zone ())
33768 if (mode
== DImode
|| TARGET_64BIT
)
33772 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33773 to pop or add instruction if registers are available. */
33774 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33775 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33780 /* Return a register priority for hard reg REGNO. */
33782 ix86_register_priority (int hard_regno
)
33784 /* ebp and r13 as the base always wants a displacement, r12 as the
33785 base always wants an index. So discourage their usage in an
33787 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33789 if (hard_regno
== BP_REG
)
33791 /* New x86-64 int registers result in bigger code size. Discourage
33793 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33795 /* New x86-64 SSE registers result in bigger code size. Discourage
33797 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33799 /* Usage of AX register results in smaller code. Prefer it. */
33800 if (hard_regno
== 0)
33805 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33807 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33808 QImode must go into class Q_REGS.
33809 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33810 movdf to do mem-to-mem moves through integer regs. */
33813 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33815 enum machine_mode mode
= GET_MODE (x
);
33817 /* We're only allowed to return a subclass of CLASS. Many of the
33818 following checks fail for NO_REGS, so eliminate that early. */
33819 if (regclass
== NO_REGS
)
33822 /* All classes can load zeros. */
33823 if (x
== CONST0_RTX (mode
))
33826 /* Force constants into memory if we are loading a (nonzero) constant into
33827 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
33828 instructions to load from a constant. */
33830 && (MAYBE_MMX_CLASS_P (regclass
)
33831 || MAYBE_SSE_CLASS_P (regclass
)
33832 || MAYBE_MASK_CLASS_P (regclass
)))
33835 /* Prefer SSE regs only, if we can use them for math. */
33836 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33837 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33839 /* Floating-point constants need more complex checks. */
33840 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33842 /* General regs can load everything. */
33843 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33846 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33847 zero above. We only want to wind up preferring 80387 registers if
33848 we plan on doing computation with them. */
33850 && standard_80387_constant_p (x
) > 0)
33852 /* Limit class to non-sse. */
33853 if (regclass
== FLOAT_SSE_REGS
)
33855 if (regclass
== FP_TOP_SSE_REGS
)
33857 if (regclass
== FP_SECOND_SSE_REGS
)
33858 return FP_SECOND_REG
;
33859 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33866 /* Generally when we see PLUS here, it's the function invariant
33867 (plus soft-fp const_int). Which can only be computed into general
33869 if (GET_CODE (x
) == PLUS
)
33870 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33872 /* QImode constants are easy to load, but non-constant QImode data
33873 must go into Q_REGS. */
33874 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33876 if (reg_class_subset_p (regclass
, Q_REGS
))
33878 if (reg_class_subset_p (Q_REGS
, regclass
))
33886 /* Discourage putting floating-point values in SSE registers unless
33887 SSE math is being used, and likewise for the 387 registers. */
33889 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33891 enum machine_mode mode
= GET_MODE (x
);
33893 /* Restrict the output reload class to the register bank that we are doing
33894 math on. If we would like not to return a subset of CLASS, reject this
33895 alternative: if reload cannot do this, it will still use its choice. */
33896 mode
= GET_MODE (x
);
33897 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33898 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
33900 if (X87_FLOAT_MODE_P (mode
))
33902 if (regclass
== FP_TOP_SSE_REGS
)
33904 else if (regclass
== FP_SECOND_SSE_REGS
)
33905 return FP_SECOND_REG
;
33907 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33914 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33915 enum machine_mode mode
, secondary_reload_info
*sri
)
33917 /* Double-word spills from general registers to non-offsettable memory
33918 references (zero-extended addresses) require special handling. */
33921 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33922 && INTEGER_CLASS_P (rclass
)
33923 && !offsettable_memref_p (x
))
33926 ? CODE_FOR_reload_noff_load
33927 : CODE_FOR_reload_noff_store
);
33928 /* Add the cost of moving address to a temporary. */
33929 sri
->extra_cost
= 1;
33934 /* QImode spills from non-QI registers require
33935 intermediate register on 32bit targets. */
33937 && (MAYBE_MASK_CLASS_P (rclass
)
33938 || (!TARGET_64BIT
&& !in_p
33939 && INTEGER_CLASS_P (rclass
)
33940 && MAYBE_NON_Q_CLASS_P (rclass
))))
33949 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33950 regno
= true_regnum (x
);
33952 /* Return Q_REGS if the operand is in memory. */
33957 /* This condition handles corner case where an expression involving
33958 pointers gets vectorized. We're trying to use the address of a
33959 stack slot as a vector initializer.
33961 (set (reg:V2DI 74 [ vect_cst_.2 ])
33962 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33964 Eventually frame gets turned into sp+offset like this:
33966 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33967 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33968 (const_int 392 [0x188]))))
33970 That later gets turned into:
33972 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33973 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33974 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33976 We'll have the following reload recorded:
33978 Reload 0: reload_in (DI) =
33979 (plus:DI (reg/f:DI 7 sp)
33980 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33981 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33982 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33983 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33984 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33985 reload_reg_rtx: (reg:V2DI 22 xmm1)
33987 Which isn't going to work since SSE instructions can't handle scalar
33988 additions. Returning GENERAL_REGS forces the addition into integer
33989 register and reload can handle subsequent reloads without problems. */
33991 if (in_p
&& GET_CODE (x
) == PLUS
33992 && SSE_CLASS_P (rclass
)
33993 && SCALAR_INT_MODE_P (mode
))
33994 return GENERAL_REGS
;
33999 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34002 ix86_class_likely_spilled_p (reg_class_t rclass
)
34013 case SSE_FIRST_REG
:
34015 case FP_SECOND_REG
:
34025 /* If we are copying between general and FP registers, we need a memory
34026 location. The same is true for SSE and MMX registers.
34028 To optimize register_move_cost performance, allow inline variant.
34030 The macro can't work reliably when one of the CLASSES is class containing
34031 registers from multiple units (SSE, MMX, integer). We avoid this by never
34032 combining those units in single alternative in the machine description.
34033 Ensure that this constraint holds to avoid unexpected surprises.
34035 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34036 enforce these sanity checks. */
34039 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34040 enum machine_mode mode
, int strict
)
34042 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34044 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34045 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34046 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34047 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34048 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34049 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34051 gcc_assert (!strict
|| lra_in_progress
);
34055 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34058 /* ??? This is a lie. We do have moves between mmx/general, and for
34059 mmx/sse2. But by saying we need secondary memory we discourage the
34060 register allocator from using the mmx registers unless needed. */
34061 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34064 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34066 /* SSE1 doesn't have any direct moves from other classes. */
34070 /* If the target says that inter-unit moves are more expensive
34071 than moving through memory, then don't generate them. */
34072 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34073 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34076 /* Between SSE and general, we have moves no larger than word size. */
34077 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34085 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34086 enum machine_mode mode
, int strict
)
34088 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34091 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34093 On the 80386, this is the size of MODE in words,
34094 except in the FP regs, where a single reg is always enough. */
34096 static unsigned char
34097 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34099 if (MAYBE_INTEGER_CLASS_P (rclass
))
34101 if (mode
== XFmode
)
34102 return (TARGET_64BIT
? 2 : 3);
34103 else if (mode
== XCmode
)
34104 return (TARGET_64BIT
? 4 : 6);
34106 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34110 if (COMPLEX_MODE_P (mode
))
34117 /* Return true if the registers in CLASS cannot represent the change from
34118 modes FROM to TO. */
34121 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34122 enum reg_class regclass
)
34127 /* x87 registers can't do subreg at all, as all values are reformatted
34128 to extended precision. */
34129 if (MAYBE_FLOAT_CLASS_P (regclass
))
34132 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34134 /* Vector registers do not support QI or HImode loads. If we don't
34135 disallow a change to these modes, reload will assume it's ok to
34136 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34137 the vec_dupv4hi pattern. */
34138 if (GET_MODE_SIZE (from
) < 4)
34141 /* Vector registers do not support subreg with nonzero offsets, which
34142 are otherwise valid for integer registers. Since we can't see
34143 whether we have a nonzero offset from here, prohibit all
34144 nonparadoxical subregs changing size. */
34145 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34152 /* Return the cost of moving data of mode M between a
34153 register and memory. A value of 2 is the default; this cost is
34154 relative to those in `REGISTER_MOVE_COST'.
34156 This function is used extensively by register_move_cost that is used to
34157 build tables at startup. Make it inline in this case.
34158 When IN is 2, return maximum of in and out move cost.
34160 If moving between registers and memory is more expensive than
34161 between two registers, you should define this macro to express the
34164 Model also increased moving costs of QImode registers in non
34168 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34172 if (FLOAT_CLASS_P (regclass
))
34190 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34191 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34193 if (SSE_CLASS_P (regclass
))
34196 switch (GET_MODE_SIZE (mode
))
34211 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34212 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34214 if (MMX_CLASS_P (regclass
))
34217 switch (GET_MODE_SIZE (mode
))
34229 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34230 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34232 switch (GET_MODE_SIZE (mode
))
34235 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34238 return ix86_cost
->int_store
[0];
34239 if (TARGET_PARTIAL_REG_DEPENDENCY
34240 && optimize_function_for_speed_p (cfun
))
34241 cost
= ix86_cost
->movzbl_load
;
34243 cost
= ix86_cost
->int_load
[0];
34245 return MAX (cost
, ix86_cost
->int_store
[0]);
34251 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34253 return ix86_cost
->movzbl_load
;
34255 return ix86_cost
->int_store
[0] + 4;
34260 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34261 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34263 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34264 if (mode
== TFmode
)
34267 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34269 cost
= ix86_cost
->int_load
[2];
34271 cost
= ix86_cost
->int_store
[2];
34272 return (cost
* (((int) GET_MODE_SIZE (mode
)
34273 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34278 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34281 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34285 /* Return the cost of moving data from a register in class CLASS1 to
34286 one in class CLASS2.
34288 It is not required that the cost always equal 2 when FROM is the same as TO;
34289 on some machines it is expensive to move between registers if they are not
34290 general registers. */
34293 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34294 reg_class_t class2_i
)
34296 enum reg_class class1
= (enum reg_class
) class1_i
;
34297 enum reg_class class2
= (enum reg_class
) class2_i
;
34299 /* In case we require secondary memory, compute cost of the store followed
34300 by load. In order to avoid bad register allocation choices, we need
34301 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34303 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34307 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34308 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34310 /* In case of copying from general_purpose_register we may emit multiple
34311 stores followed by single load causing memory size mismatch stall.
34312 Count this as arbitrarily high cost of 20. */
34313 if (targetm
.class_max_nregs (class1
, mode
)
34314 > targetm
.class_max_nregs (class2
, mode
))
34317 /* In the case of FP/MMX moves, the registers actually overlap, and we
34318 have to switch modes in order to treat them differently. */
34319 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34320 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34326 /* Moves between SSE/MMX and integer unit are expensive. */
34327 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34328 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34330 /* ??? By keeping returned value relatively high, we limit the number
34331 of moves between integer and MMX/SSE registers for all targets.
34332 Additionally, high value prevents problem with x86_modes_tieable_p(),
34333 where integer modes in MMX/SSE registers are not tieable
34334 because of missing QImode and HImode moves to, from or between
34335 MMX/SSE registers. */
34336 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34338 if (MAYBE_FLOAT_CLASS_P (class1
))
34339 return ix86_cost
->fp_move
;
34340 if (MAYBE_SSE_CLASS_P (class1
))
34341 return ix86_cost
->sse_move
;
34342 if (MAYBE_MMX_CLASS_P (class1
))
34343 return ix86_cost
->mmx_move
;
34347 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34351 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34353 /* Flags and only flags can only hold CCmode values. */
34354 if (CC_REGNO_P (regno
))
34355 return GET_MODE_CLASS (mode
) == MODE_CC
;
34356 if (GET_MODE_CLASS (mode
) == MODE_CC
34357 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34358 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34360 if (STACK_REGNO_P (regno
))
34361 return VALID_FP_MODE_P (mode
);
34362 if (MASK_REGNO_P (regno
))
34363 return VALID_MASK_REG_MODE (mode
);
34364 if (SSE_REGNO_P (regno
))
34366 /* We implement the move patterns for all vector modes into and
34367 out of SSE registers, even when no operation instructions
34370 /* For AVX-512 we allow, regardless of regno:
34372 - any of 512-bit wide vector mode
34373 - any scalar mode. */
34376 || VALID_AVX512F_REG_MODE (mode
)
34377 || VALID_AVX512F_SCALAR_MODE (mode
)))
34380 /* xmm16-xmm31 are only available for AVX-512. */
34381 if (EXT_REX_SSE_REGNO_P (regno
))
34384 /* OImode move is available only when AVX is enabled. */
34385 return ((TARGET_AVX
&& mode
== OImode
)
34386 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34387 || VALID_SSE_REG_MODE (mode
)
34388 || VALID_SSE2_REG_MODE (mode
)
34389 || VALID_MMX_REG_MODE (mode
)
34390 || VALID_MMX_REG_MODE_3DNOW (mode
));
34392 if (MMX_REGNO_P (regno
))
34394 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34395 so if the register is available at all, then we can move data of
34396 the given mode into or out of it. */
34397 return (VALID_MMX_REG_MODE (mode
)
34398 || VALID_MMX_REG_MODE_3DNOW (mode
));
34401 if (mode
== QImode
)
34403 /* Take care for QImode values - they can be in non-QI regs,
34404 but then they do cause partial register stalls. */
34405 if (ANY_QI_REGNO_P (regno
))
34407 if (!TARGET_PARTIAL_REG_STALL
)
34409 /* LRA checks if the hard register is OK for the given mode.
34410 QImode values can live in non-QI regs, so we allow all
34412 if (lra_in_progress
)
34414 return !can_create_pseudo_p ();
34416 /* We handle both integer and floats in the general purpose registers. */
34417 else if (VALID_INT_MODE_P (mode
))
34419 else if (VALID_FP_MODE_P (mode
))
34421 else if (VALID_DFP_MODE_P (mode
))
34423 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34424 on to use that value in smaller contexts, this can easily force a
34425 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34426 supporting DImode, allow it. */
34427 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
34433 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34434 tieable integer mode. */
34437 ix86_tieable_integer_mode_p (enum machine_mode mode
)
34446 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
34449 return TARGET_64BIT
;
34456 /* Return true if MODE1 is accessible in a register that can hold MODE2
34457 without copying. That is, all register classes that can hold MODE2
34458 can also hold MODE1. */
34461 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
34463 if (mode1
== mode2
)
34466 if (ix86_tieable_integer_mode_p (mode1
)
34467 && ix86_tieable_integer_mode_p (mode2
))
34470 /* MODE2 being XFmode implies fp stack or general regs, which means we
34471 can tie any smaller floating point modes to it. Note that we do not
34472 tie this with TFmode. */
34473 if (mode2
== XFmode
)
34474 return mode1
== SFmode
|| mode1
== DFmode
;
34476 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34477 that we can tie it with SFmode. */
34478 if (mode2
== DFmode
)
34479 return mode1
== SFmode
;
34481 /* If MODE2 is only appropriate for an SSE register, then tie with
34482 any other mode acceptable to SSE registers. */
34483 if (GET_MODE_SIZE (mode2
) == 32
34484 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34485 return (GET_MODE_SIZE (mode1
) == 32
34486 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34487 if (GET_MODE_SIZE (mode2
) == 16
34488 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34489 return (GET_MODE_SIZE (mode1
) == 16
34490 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34492 /* If MODE2 is appropriate for an MMX register, then tie
34493 with any other mode acceptable to MMX registers. */
34494 if (GET_MODE_SIZE (mode2
) == 8
34495 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
34496 return (GET_MODE_SIZE (mode1
) == 8
34497 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
34502 /* Return the cost of moving between two registers of mode MODE. */
34505 ix86_set_reg_reg_cost (enum machine_mode mode
)
34507 unsigned int units
= UNITS_PER_WORD
;
34509 switch (GET_MODE_CLASS (mode
))
34515 units
= GET_MODE_SIZE (CCmode
);
34519 if ((TARGET_SSE
&& mode
== TFmode
)
34520 || (TARGET_80387
&& mode
== XFmode
)
34521 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
34522 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
34523 units
= GET_MODE_SIZE (mode
);
34526 case MODE_COMPLEX_FLOAT
:
34527 if ((TARGET_SSE
&& mode
== TCmode
)
34528 || (TARGET_80387
&& mode
== XCmode
)
34529 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
34530 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34531 units
= GET_MODE_SIZE (mode
);
34534 case MODE_VECTOR_INT
:
34535 case MODE_VECTOR_FLOAT
:
34536 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
34537 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34538 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34539 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34540 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34541 units
= GET_MODE_SIZE (mode
);
34544 /* Return the cost of moving between two registers of mode MODE,
34545 assuming that the move will be in pieces of at most UNITS bytes. */
34546 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34549 /* Compute a (partial) cost for rtx X. Return true if the complete
34550 cost has been computed, and false if subexpressions should be
34551 scanned. In either case, *TOTAL contains the cost result. */
34554 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34557 enum rtx_code code
= (enum rtx_code
) code_i
;
34558 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34559 enum machine_mode mode
= GET_MODE (x
);
34560 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34565 if (register_operand (SET_DEST (x
), VOIDmode
)
34566 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34568 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34577 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34579 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34581 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34583 || (!GET_CODE (x
) != LABEL_REF
34584 && (GET_CODE (x
) != SYMBOL_REF
34585 || !SYMBOL_REF_LOCAL_P (x
)))))
34592 if (mode
== VOIDmode
)
34597 switch (standard_80387_constant_p (x
))
34602 default: /* Other constants */
34609 if (SSE_FLOAT_MODE_P (mode
))
34612 switch (standard_sse_constant_p (x
))
34616 case 1: /* 0: xor eliminates false dependency */
34619 default: /* -1: cmp contains false dependency */
34624 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34625 it'll probably end up. Add a penalty for size. */
34626 *total
= (COSTS_N_INSNS (1)
34627 + (flag_pic
!= 0 && !TARGET_64BIT
)
34628 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34632 /* The zero extensions is often completely free on x86_64, so make
34633 it as cheap as possible. */
34634 if (TARGET_64BIT
&& mode
== DImode
34635 && GET_MODE (XEXP (x
, 0)) == SImode
)
34637 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34638 *total
= cost
->add
;
34640 *total
= cost
->movzx
;
34644 *total
= cost
->movsx
;
34648 if (SCALAR_INT_MODE_P (mode
)
34649 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34650 && CONST_INT_P (XEXP (x
, 1)))
34652 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34655 *total
= cost
->add
;
34658 if ((value
== 2 || value
== 3)
34659 && cost
->lea
<= cost
->shift_const
)
34661 *total
= cost
->lea
;
34671 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34673 /* ??? Should be SSE vector operation cost. */
34674 /* At least for published AMD latencies, this really is the same
34675 as the latency for a simple fpu operation like fabs. */
34676 /* V*QImode is emulated with 1-11 insns. */
34677 if (mode
== V16QImode
|| mode
== V32QImode
)
34680 if (TARGET_XOP
&& mode
== V16QImode
)
34682 /* For XOP we use vpshab, which requires a broadcast of the
34683 value to the variable shift insn. For constants this
34684 means a V16Q const in mem; even when we can perform the
34685 shift with one insn set the cost to prefer paddb. */
34686 if (CONSTANT_P (XEXP (x
, 1)))
34688 *total
= (cost
->fabs
34689 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34690 + (speed
? 2 : COSTS_N_BYTES (16)));
34695 else if (TARGET_SSSE3
)
34697 *total
= cost
->fabs
* count
;
34700 *total
= cost
->fabs
;
34702 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34704 if (CONST_INT_P (XEXP (x
, 1)))
34706 if (INTVAL (XEXP (x
, 1)) > 32)
34707 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34709 *total
= cost
->shift_const
* 2;
34713 if (GET_CODE (XEXP (x
, 1)) == AND
)
34714 *total
= cost
->shift_var
* 2;
34716 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34721 if (CONST_INT_P (XEXP (x
, 1)))
34722 *total
= cost
->shift_const
;
34723 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
34724 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
34726 /* Return the cost after shift-and truncation. */
34727 *total
= cost
->shift_var
;
34731 *total
= cost
->shift_var
;
34739 gcc_assert (FLOAT_MODE_P (mode
));
34740 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34742 /* ??? SSE scalar/vector cost should be used here. */
34743 /* ??? Bald assumption that fma has the same cost as fmul. */
34744 *total
= cost
->fmul
;
34745 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34747 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34749 if (GET_CODE (sub
) == NEG
)
34750 sub
= XEXP (sub
, 0);
34751 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34754 if (GET_CODE (sub
) == NEG
)
34755 sub
= XEXP (sub
, 0);
34756 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34761 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34763 /* ??? SSE scalar cost should be used here. */
34764 *total
= cost
->fmul
;
34767 else if (X87_FLOAT_MODE_P (mode
))
34769 *total
= cost
->fmul
;
34772 else if (FLOAT_MODE_P (mode
))
34774 /* ??? SSE vector cost should be used here. */
34775 *total
= cost
->fmul
;
34778 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34780 /* V*QImode is emulated with 7-13 insns. */
34781 if (mode
== V16QImode
|| mode
== V32QImode
)
34784 if (TARGET_XOP
&& mode
== V16QImode
)
34786 else if (TARGET_SSSE3
)
34788 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34790 /* V*DImode is emulated with 5-8 insns. */
34791 else if (mode
== V2DImode
|| mode
== V4DImode
)
34793 if (TARGET_XOP
&& mode
== V2DImode
)
34794 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34796 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34798 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34799 insns, including two PMULUDQ. */
34800 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34801 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34803 *total
= cost
->fmul
;
34808 rtx op0
= XEXP (x
, 0);
34809 rtx op1
= XEXP (x
, 1);
34811 if (CONST_INT_P (XEXP (x
, 1)))
34813 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34814 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34818 /* This is arbitrary. */
34821 /* Compute costs correctly for widening multiplication. */
34822 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34823 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34824 == GET_MODE_SIZE (mode
))
34826 int is_mulwiden
= 0;
34827 enum machine_mode inner_mode
= GET_MODE (op0
);
34829 if (GET_CODE (op0
) == GET_CODE (op1
))
34830 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34831 else if (CONST_INT_P (op1
))
34833 if (GET_CODE (op0
) == SIGN_EXTEND
)
34834 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34837 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34841 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34844 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34845 + nbits
* cost
->mult_bit
34846 + rtx_cost (op0
, outer_code
, opno
, speed
)
34847 + rtx_cost (op1
, outer_code
, opno
, speed
));
34856 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34857 /* ??? SSE cost should be used here. */
34858 *total
= cost
->fdiv
;
34859 else if (X87_FLOAT_MODE_P (mode
))
34860 *total
= cost
->fdiv
;
34861 else if (FLOAT_MODE_P (mode
))
34862 /* ??? SSE vector cost should be used here. */
34863 *total
= cost
->fdiv
;
34865 *total
= cost
->divide
[MODE_INDEX (mode
)];
34869 if (GET_MODE_CLASS (mode
) == MODE_INT
34870 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34872 if (GET_CODE (XEXP (x
, 0)) == PLUS
34873 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34874 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34875 && CONSTANT_P (XEXP (x
, 1)))
34877 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34878 if (val
== 2 || val
== 4 || val
== 8)
34880 *total
= cost
->lea
;
34881 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34882 outer_code
, opno
, speed
);
34883 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34884 outer_code
, opno
, speed
);
34885 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34889 else if (GET_CODE (XEXP (x
, 0)) == MULT
34890 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34892 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34893 if (val
== 2 || val
== 4 || val
== 8)
34895 *total
= cost
->lea
;
34896 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34897 outer_code
, opno
, speed
);
34898 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34902 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34904 *total
= cost
->lea
;
34905 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34906 outer_code
, opno
, speed
);
34907 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34908 outer_code
, opno
, speed
);
34909 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34916 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34918 /* ??? SSE cost should be used here. */
34919 *total
= cost
->fadd
;
34922 else if (X87_FLOAT_MODE_P (mode
))
34924 *total
= cost
->fadd
;
34927 else if (FLOAT_MODE_P (mode
))
34929 /* ??? SSE vector cost should be used here. */
34930 *total
= cost
->fadd
;
34938 if (GET_MODE_CLASS (mode
) == MODE_INT
34939 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34941 *total
= (cost
->add
* 2
34942 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34943 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34944 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34945 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34951 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34953 /* ??? SSE cost should be used here. */
34954 *total
= cost
->fchs
;
34957 else if (X87_FLOAT_MODE_P (mode
))
34959 *total
= cost
->fchs
;
34962 else if (FLOAT_MODE_P (mode
))
34964 /* ??? SSE vector cost should be used here. */
34965 *total
= cost
->fchs
;
34971 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34973 /* ??? Should be SSE vector operation cost. */
34974 /* At least for published AMD latencies, this really is the same
34975 as the latency for a simple fpu operation like fabs. */
34976 *total
= cost
->fabs
;
34978 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34979 *total
= cost
->add
* 2;
34981 *total
= cost
->add
;
34985 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34986 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34987 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34988 && XEXP (x
, 1) == const0_rtx
)
34990 /* This kind of construct is implemented using test[bwl].
34991 Treat it as if we had an AND. */
34992 *total
= (cost
->add
34993 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34994 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35000 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35005 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35006 /* ??? SSE cost should be used here. */
35007 *total
= cost
->fabs
;
35008 else if (X87_FLOAT_MODE_P (mode
))
35009 *total
= cost
->fabs
;
35010 else if (FLOAT_MODE_P (mode
))
35011 /* ??? SSE vector cost should be used here. */
35012 *total
= cost
->fabs
;
35016 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35017 /* ??? SSE cost should be used here. */
35018 *total
= cost
->fsqrt
;
35019 else if (X87_FLOAT_MODE_P (mode
))
35020 *total
= cost
->fsqrt
;
35021 else if (FLOAT_MODE_P (mode
))
35022 /* ??? SSE vector cost should be used here. */
35023 *total
= cost
->fsqrt
;
35027 if (XINT (x
, 1) == UNSPEC_TP
)
35034 case VEC_DUPLICATE
:
35035 /* ??? Assume all of these vector manipulation patterns are
35036 recognizable. In which case they all pretty much have the
35038 *total
= cost
->fabs
;
35048 static int current_machopic_label_num
;
35050 /* Given a symbol name and its associated stub, write out the
35051 definition of the stub. */
35054 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35056 unsigned int length
;
35057 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35058 int label
= ++current_machopic_label_num
;
35060 /* For 64-bit we shouldn't get here. */
35061 gcc_assert (!TARGET_64BIT
);
35063 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35064 symb
= targetm
.strip_name_encoding (symb
);
35066 length
= strlen (stub
);
35067 binder_name
= XALLOCAVEC (char, length
+ 32);
35068 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35070 length
= strlen (symb
);
35071 symbol_name
= XALLOCAVEC (char, length
+ 32);
35072 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35074 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35076 if (MACHOPIC_ATT_STUB
)
35077 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35078 else if (MACHOPIC_PURE
)
35079 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35081 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35083 fprintf (file
, "%s:\n", stub
);
35084 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35086 if (MACHOPIC_ATT_STUB
)
35088 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35090 else if (MACHOPIC_PURE
)
35093 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35094 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35095 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35096 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35097 label
, lazy_ptr_name
, label
);
35098 fprintf (file
, "\tjmp\t*%%ecx\n");
35101 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35103 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35104 it needs no stub-binding-helper. */
35105 if (MACHOPIC_ATT_STUB
)
35108 fprintf (file
, "%s:\n", binder_name
);
35112 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35113 fprintf (file
, "\tpushl\t%%ecx\n");
35116 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35118 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35120 /* N.B. Keep the correspondence of these
35121 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35122 old-pic/new-pic/non-pic stubs; altering this will break
35123 compatibility with existing dylibs. */
35126 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35127 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35130 /* 16-byte -mdynamic-no-pic stub. */
35131 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35133 fprintf (file
, "%s:\n", lazy_ptr_name
);
35134 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35135 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35137 #endif /* TARGET_MACHO */
35139 /* Order the registers for register allocator. */
35142 x86_order_regs_for_local_alloc (void)
35147 /* First allocate the local general purpose registers. */
35148 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35149 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35150 reg_alloc_order
[pos
++] = i
;
35152 /* Global general purpose registers. */
35153 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35154 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35155 reg_alloc_order
[pos
++] = i
;
35157 /* x87 registers come first in case we are doing FP math
35159 if (!TARGET_SSE_MATH
)
35160 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35161 reg_alloc_order
[pos
++] = i
;
35163 /* SSE registers. */
35164 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35165 reg_alloc_order
[pos
++] = i
;
35166 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35167 reg_alloc_order
[pos
++] = i
;
35169 /* Extended REX SSE registers. */
35170 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
35171 reg_alloc_order
[pos
++] = i
;
35173 /* Mask register. */
35174 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
35175 reg_alloc_order
[pos
++] = i
;
35177 /* x87 registers. */
35178 if (TARGET_SSE_MATH
)
35179 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35180 reg_alloc_order
[pos
++] = i
;
35182 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35183 reg_alloc_order
[pos
++] = i
;
35185 /* Initialize the rest of array as we do not allocate some registers
35187 while (pos
< FIRST_PSEUDO_REGISTER
)
35188 reg_alloc_order
[pos
++] = 0;
35191 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35192 in struct attribute_spec handler. */
35194 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35196 int flags ATTRIBUTE_UNUSED
,
35197 bool *no_add_attrs
)
35199 if (TREE_CODE (*node
) != FUNCTION_TYPE
35200 && TREE_CODE (*node
) != METHOD_TYPE
35201 && TREE_CODE (*node
) != FIELD_DECL
35202 && TREE_CODE (*node
) != TYPE_DECL
)
35204 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35206 *no_add_attrs
= true;
35211 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35213 *no_add_attrs
= true;
35216 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35220 cst
= TREE_VALUE (args
);
35221 if (TREE_CODE (cst
) != INTEGER_CST
)
35223 warning (OPT_Wattributes
,
35224 "%qE attribute requires an integer constant argument",
35226 *no_add_attrs
= true;
35228 else if (compare_tree_int (cst
, 0) != 0
35229 && compare_tree_int (cst
, 1) != 0)
35231 warning (OPT_Wattributes
,
35232 "argument to %qE attribute is neither zero, nor one",
35234 *no_add_attrs
= true;
35243 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35244 struct attribute_spec.handler. */
35246 ix86_handle_abi_attribute (tree
*node
, tree name
,
35247 tree args ATTRIBUTE_UNUSED
,
35248 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35250 if (TREE_CODE (*node
) != FUNCTION_TYPE
35251 && TREE_CODE (*node
) != METHOD_TYPE
35252 && TREE_CODE (*node
) != FIELD_DECL
35253 && TREE_CODE (*node
) != TYPE_DECL
)
35255 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35257 *no_add_attrs
= true;
35261 /* Can combine regparm with all attributes but fastcall. */
35262 if (is_attribute_p ("ms_abi", name
))
35264 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35266 error ("ms_abi and sysv_abi attributes are not compatible");
35271 else if (is_attribute_p ("sysv_abi", name
))
35273 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35275 error ("ms_abi and sysv_abi attributes are not compatible");
35284 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35285 struct attribute_spec.handler. */
35287 ix86_handle_struct_attribute (tree
*node
, tree name
,
35288 tree args ATTRIBUTE_UNUSED
,
35289 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35292 if (DECL_P (*node
))
35294 if (TREE_CODE (*node
) == TYPE_DECL
)
35295 type
= &TREE_TYPE (*node
);
35300 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35302 warning (OPT_Wattributes
, "%qE attribute ignored",
35304 *no_add_attrs
= true;
35307 else if ((is_attribute_p ("ms_struct", name
)
35308 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35309 || ((is_attribute_p ("gcc_struct", name
)
35310 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35312 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35314 *no_add_attrs
= true;
35321 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35322 tree args ATTRIBUTE_UNUSED
,
35323 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35325 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35327 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35329 *no_add_attrs
= true;
35335 ix86_ms_bitfield_layout_p (const_tree record_type
)
35337 return ((TARGET_MS_BITFIELD_LAYOUT
35338 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35339 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35342 /* Returns an expression indicating where the this parameter is
35343 located on entry to the FUNCTION. */
35346 x86_this_parameter (tree function
)
35348 tree type
= TREE_TYPE (function
);
35349 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35354 const int *parm_regs
;
35356 if (ix86_function_type_abi (type
) == MS_ABI
)
35357 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35359 parm_regs
= x86_64_int_parameter_registers
;
35360 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35363 nregs
= ix86_function_regparm (type
, function
);
35365 if (nregs
> 0 && !stdarg_p (type
))
35368 unsigned int ccvt
= ix86_get_callcvt (type
);
35370 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35371 regno
= aggr
? DX_REG
: CX_REG
;
35372 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35376 return gen_rtx_MEM (SImode
,
35377 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35386 return gen_rtx_MEM (SImode
,
35387 plus_constant (Pmode
,
35388 stack_pointer_rtx
, 4));
35391 return gen_rtx_REG (SImode
, regno
);
35394 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35398 /* Determine whether x86_output_mi_thunk can succeed. */
35401 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35402 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35403 HOST_WIDE_INT vcall_offset
, const_tree function
)
35405 /* 64-bit can handle anything. */
35409 /* For 32-bit, everything's fine if we have one free register. */
35410 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35413 /* Need a free register for vcall_offset. */
35417 /* Need a free register for GOT references. */
35418 if (flag_pic
&& !targetm
.binds_local_p (function
))
35421 /* Otherwise ok. */
35425 /* Output the assembler code for a thunk function. THUNK_DECL is the
35426 declaration for the thunk function itself, FUNCTION is the decl for
35427 the target function. DELTA is an immediate constant offset to be
35428 added to THIS. If VCALL_OFFSET is nonzero, the word at
35429 *(*this + vcall_offset) should be added to THIS. */
35432 x86_output_mi_thunk (FILE *file
,
35433 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
35434 HOST_WIDE_INT vcall_offset
, tree function
)
35436 rtx this_param
= x86_this_parameter (function
);
35437 rtx this_reg
, tmp
, fnaddr
;
35438 unsigned int tmp_regno
;
35441 tmp_regno
= R10_REG
;
35444 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
35445 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35446 tmp_regno
= AX_REG
;
35447 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35448 tmp_regno
= DX_REG
;
35450 tmp_regno
= CX_REG
;
35453 emit_note (NOTE_INSN_PROLOGUE_END
);
35455 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35456 pull it in now and let DELTA benefit. */
35457 if (REG_P (this_param
))
35458 this_reg
= this_param
;
35459 else if (vcall_offset
)
35461 /* Put the this parameter into %eax. */
35462 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
35463 emit_move_insn (this_reg
, this_param
);
35466 this_reg
= NULL_RTX
;
35468 /* Adjust the this parameter by a fixed constant. */
35471 rtx delta_rtx
= GEN_INT (delta
);
35472 rtx delta_dst
= this_reg
? this_reg
: this_param
;
35476 if (!x86_64_general_operand (delta_rtx
, Pmode
))
35478 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35479 emit_move_insn (tmp
, delta_rtx
);
35484 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
35487 /* Adjust the this parameter by a value stored in the vtable. */
35490 rtx vcall_addr
, vcall_mem
, this_mem
;
35492 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35494 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
35495 if (Pmode
!= ptr_mode
)
35496 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
35497 emit_move_insn (tmp
, this_mem
);
35499 /* Adjust the this parameter. */
35500 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
35502 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
35504 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
35505 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
35506 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
35509 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
35510 if (Pmode
!= ptr_mode
)
35511 emit_insn (gen_addsi_1_zext (this_reg
,
35512 gen_rtx_REG (ptr_mode
,
35516 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
35519 /* If necessary, drop THIS back to its stack slot. */
35520 if (this_reg
&& this_reg
!= this_param
)
35521 emit_move_insn (this_param
, this_reg
);
35523 fnaddr
= XEXP (DECL_RTL (function
), 0);
35526 if (!flag_pic
|| targetm
.binds_local_p (function
)
35531 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
35532 tmp
= gen_rtx_CONST (Pmode
, tmp
);
35533 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
35538 if (!flag_pic
|| targetm
.binds_local_p (function
))
35541 else if (TARGET_MACHO
)
35543 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
35544 fnaddr
= XEXP (fnaddr
, 0);
35546 #endif /* TARGET_MACHO */
35549 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35550 output_set_got (tmp
, NULL_RTX
);
35552 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35553 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35554 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35558 /* Our sibling call patterns do not allow memories, because we have no
35559 predicate that can distinguish between frame and non-frame memory.
35560 For our purposes here, we can get away with (ab)using a jump pattern,
35561 because we're going to do no optimization. */
35562 if (MEM_P (fnaddr
))
35563 emit_jump_insn (gen_indirect_jump (fnaddr
));
35566 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35567 fnaddr
= legitimize_pic_address (fnaddr
,
35568 gen_rtx_REG (Pmode
, tmp_regno
));
35570 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35572 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35573 if (GET_MODE (fnaddr
) != word_mode
)
35574 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35575 emit_move_insn (tmp
, fnaddr
);
35579 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35580 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35581 tmp
= emit_call_insn (tmp
);
35582 SIBLING_CALL_P (tmp
) = 1;
35586 /* Emit just enough of rest_of_compilation to get the insns emitted.
35587 Note that use_thunk calls assemble_start_function et al. */
35588 tmp
= get_insns ();
35589 shorten_branches (tmp
);
35590 final_start_function (tmp
, file
, 1);
35591 final (tmp
, file
, 1);
35592 final_end_function ();
35596 x86_file_start (void)
35598 default_file_start ();
35600 darwin_file_start ();
35602 if (X86_FILE_START_VERSION_DIRECTIVE
)
35603 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35604 if (X86_FILE_START_FLTUSED
)
35605 fputs ("\t.global\t__fltused\n", asm_out_file
);
35606 if (ix86_asm_dialect
== ASM_INTEL
)
35607 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35611 x86_field_alignment (tree field
, int computed
)
35613 enum machine_mode mode
;
35614 tree type
= TREE_TYPE (field
);
35616 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35618 mode
= TYPE_MODE (strip_array_types (type
));
35619 if (mode
== DFmode
|| mode
== DCmode
35620 || GET_MODE_CLASS (mode
) == MODE_INT
35621 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35622 return MIN (32, computed
);
35626 /* Output assembler code to FILE to increment profiler label # LABELNO
35627 for profiling a function entry. */
35629 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35631 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35636 #ifndef NO_PROFILE_COUNTERS
35637 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35640 if (!TARGET_PECOFF
&& flag_pic
)
35641 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35643 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35647 #ifndef NO_PROFILE_COUNTERS
35648 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35651 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35655 #ifndef NO_PROFILE_COUNTERS
35656 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35659 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35663 /* We don't have exact information about the insn sizes, but we may assume
35664 quite safely that we are informed about all 1 byte insns and memory
35665 address sizes. This is enough to eliminate unnecessary padding in
35669 min_insn_size (rtx insn
)
35673 if (!INSN_P (insn
) || !active_insn_p (insn
))
35676 /* Discard alignments we've emit and jump instructions. */
35677 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35678 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35681 /* Important case - calls are always 5 bytes.
35682 It is common to have many calls in the row. */
35684 && symbolic_reference_mentioned_p (PATTERN (insn
))
35685 && !SIBLING_CALL_P (insn
))
35687 len
= get_attr_length (insn
);
35691 /* For normal instructions we rely on get_attr_length being exact,
35692 with a few exceptions. */
35693 if (!JUMP_P (insn
))
35695 enum attr_type type
= get_attr_type (insn
);
35700 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35701 || asm_noperands (PATTERN (insn
)) >= 0)
35708 /* Otherwise trust get_attr_length. */
35712 l
= get_attr_length_address (insn
);
35713 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35722 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35724 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35728 ix86_avoid_jump_mispredicts (void)
35730 rtx insn
, start
= get_insns ();
35731 int nbytes
= 0, njumps
= 0;
35734 /* Look for all minimal intervals of instructions containing 4 jumps.
35735 The intervals are bounded by START and INSN. NBYTES is the total
35736 size of instructions in the interval including INSN and not including
35737 START. When the NBYTES is smaller than 16 bytes, it is possible
35738 that the end of START and INSN ends up in the same 16byte page.
35740 The smallest offset in the page INSN can start is the case where START
35741 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35742 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35744 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35748 if (LABEL_P (insn
))
35750 int align
= label_to_alignment (insn
);
35751 int max_skip
= label_to_max_skip (insn
);
35755 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35756 already in the current 16 byte page, because otherwise
35757 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35758 bytes to reach 16 byte boundary. */
35760 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35763 fprintf (dump_file
, "Label %i with max_skip %i\n",
35764 INSN_UID (insn
), max_skip
);
35767 while (nbytes
+ max_skip
>= 16)
35769 start
= NEXT_INSN (start
);
35770 if (JUMP_P (start
) || CALL_P (start
))
35771 njumps
--, isjump
= 1;
35774 nbytes
-= min_insn_size (start
);
35780 min_size
= min_insn_size (insn
);
35781 nbytes
+= min_size
;
35783 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35784 INSN_UID (insn
), min_size
);
35785 if (JUMP_P (insn
) || CALL_P (insn
))
35792 start
= NEXT_INSN (start
);
35793 if (JUMP_P (start
) || CALL_P (start
))
35794 njumps
--, isjump
= 1;
35797 nbytes
-= min_insn_size (start
);
35799 gcc_assert (njumps
>= 0);
35801 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35802 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35804 if (njumps
== 3 && isjump
&& nbytes
< 16)
35806 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35809 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35810 INSN_UID (insn
), padsize
);
35811 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35817 /* AMD Athlon works faster
35818 when RET is not destination of conditional jump or directly preceded
35819 by other jump instruction. We avoid the penalty by inserting NOP just
35820 before the RET instructions in such cases. */
35822 ix86_pad_returns (void)
35827 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35829 basic_block bb
= e
->src
;
35830 rtx ret
= BB_END (bb
);
35832 bool replace
= false;
35834 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35835 || optimize_bb_for_size_p (bb
))
35837 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35838 if (active_insn_p (prev
) || LABEL_P (prev
))
35840 if (prev
&& LABEL_P (prev
))
35845 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35846 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35847 && !(e
->flags
& EDGE_FALLTHRU
))
35855 prev
= prev_active_insn (ret
);
35857 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35860 /* Empty functions get branch mispredict even when
35861 the jump destination is not visible to us. */
35862 if (!prev
&& !optimize_function_for_size_p (cfun
))
35867 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35873 /* Count the minimum number of instructions in BB. Return 4 if the
35874 number of instructions >= 4. */
35877 ix86_count_insn_bb (basic_block bb
)
35880 int insn_count
= 0;
35882 /* Count number of instructions in this block. Return 4 if the number
35883 of instructions >= 4. */
35884 FOR_BB_INSNS (bb
, insn
)
35886 /* Only happen in exit blocks. */
35888 && ANY_RETURN_P (PATTERN (insn
)))
35891 if (NONDEBUG_INSN_P (insn
)
35892 && GET_CODE (PATTERN (insn
)) != USE
35893 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35896 if (insn_count
>= 4)
35905 /* Count the minimum number of instructions in code path in BB.
35906 Return 4 if the number of instructions >= 4. */
35909 ix86_count_insn (basic_block bb
)
35913 int min_prev_count
;
35915 /* Only bother counting instructions along paths with no
35916 more than 2 basic blocks between entry and exit. Given
35917 that BB has an edge to exit, determine if a predecessor
35918 of BB has an edge from entry. If so, compute the number
35919 of instructions in the predecessor block. If there
35920 happen to be multiple such blocks, compute the minimum. */
35921 min_prev_count
= 4;
35922 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35925 edge_iterator prev_ei
;
35927 if (e
->src
== ENTRY_BLOCK_PTR
)
35929 min_prev_count
= 0;
35932 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35934 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35936 int count
= ix86_count_insn_bb (e
->src
);
35937 if (count
< min_prev_count
)
35938 min_prev_count
= count
;
35944 if (min_prev_count
< 4)
35945 min_prev_count
+= ix86_count_insn_bb (bb
);
35947 return min_prev_count
;
35950 /* Pad short function to 4 instructions. */
35953 ix86_pad_short_function (void)
35958 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35960 rtx ret
= BB_END (e
->src
);
35961 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35963 int insn_count
= ix86_count_insn (e
->src
);
35965 /* Pad short function. */
35966 if (insn_count
< 4)
35970 /* Find epilogue. */
35973 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35974 insn
= PREV_INSN (insn
);
35979 /* Two NOPs count as one instruction. */
35980 insn_count
= 2 * (4 - insn_count
);
35981 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35987 /* Fix up a Windows system unwinder issue. If an EH region falls through into
35988 the epilogue, the Windows system unwinder will apply epilogue logic and
35989 produce incorrect offsets. This can be avoided by adding a nop between
35990 the last insn that can throw and the first insn of the epilogue. */
35993 ix86_seh_fixup_eh_fallthru (void)
35998 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36002 /* Find the beginning of the epilogue. */
36003 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36004 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36009 /* We only care about preceding insns that can throw. */
36010 insn
= prev_active_insn (insn
);
36011 if (insn
== NULL
|| !can_throw_internal (insn
))
36014 /* Do not separate calls from their debug information. */
36015 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36017 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36018 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36023 emit_insn_after (gen_nops (const1_rtx
), insn
);
36027 /* Implement machine specific optimizations. We implement padding of returns
36028 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36032 /* We are freeing block_for_insn in the toplev to keep compatibility
36033 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36034 compute_bb_for_insn ();
36036 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36037 ix86_seh_fixup_eh_fallthru ();
36039 if (optimize
&& optimize_function_for_speed_p (cfun
))
36041 if (TARGET_PAD_SHORT_FUNCTION
)
36042 ix86_pad_short_function ();
36043 else if (TARGET_PAD_RETURNS
)
36044 ix86_pad_returns ();
36045 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36046 if (TARGET_FOUR_JUMP_LIMIT
)
36047 ix86_avoid_jump_mispredicts ();
36052 /* Return nonzero when QImode register that must be represented via REX prefix
36055 x86_extended_QIreg_mentioned_p (rtx insn
)
36058 extract_insn_cached (insn
);
36059 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36060 if (GENERAL_REG_P (recog_data
.operand
[i
])
36061 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36066 /* Return nonzero when P points to register encoded via REX prefix.
36067 Called via for_each_rtx. */
36069 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36071 unsigned int regno
;
36074 regno
= REGNO (*p
);
36075 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36078 /* Return true when INSN mentions register that must be encoded using REX
36081 x86_extended_reg_mentioned_p (rtx insn
)
36083 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36084 extended_reg_mentioned_1
, NULL
);
36087 /* If profitable, negate (without causing overflow) integer constant
36088 of mode MODE at location LOC. Return true in this case. */
36090 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36094 if (!CONST_INT_P (*loc
))
36100 /* DImode x86_64 constants must fit in 32 bits. */
36101 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36112 gcc_unreachable ();
36115 /* Avoid overflows. */
36116 if (mode_signbit_p (mode
, *loc
))
36119 val
= INTVAL (*loc
);
36121 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36122 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36123 if ((val
< 0 && val
!= -128)
36126 *loc
= GEN_INT (-val
);
36133 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36134 optabs would emit if we didn't have TFmode patterns. */
36137 x86_emit_floatuns (rtx operands
[2])
36139 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36140 enum machine_mode mode
, inmode
;
36142 inmode
= GET_MODE (operands
[1]);
36143 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36146 in
= force_reg (inmode
, operands
[1]);
36147 mode
= GET_MODE (out
);
36148 neglab
= gen_label_rtx ();
36149 donelab
= gen_label_rtx ();
36150 f0
= gen_reg_rtx (mode
);
36152 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36154 expand_float (out
, in
, 0);
36156 emit_jump_insn (gen_jump (donelab
));
36159 emit_label (neglab
);
36161 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36163 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36165 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36167 expand_float (f0
, i0
, 0);
36169 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36171 emit_label (donelab
);
36174 /* AVX512F does support 64-byte integer vector operations,
36175 thus the longest vector we are faced with is V64QImode. */
36176 #define MAX_VECT_LEN 64
36178 struct expand_vec_perm_d
36180 rtx target
, op0
, op1
;
36181 unsigned char perm
[MAX_VECT_LEN
];
36182 enum machine_mode vmode
;
36183 unsigned char nelt
;
36184 bool one_operand_p
;
36188 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36189 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36190 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36192 /* Get a vector mode of the same size as the original but with elements
36193 twice as wide. This is only guaranteed to apply to integral vectors. */
36195 static inline enum machine_mode
36196 get_mode_wider_vector (enum machine_mode o
)
36198 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36199 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36200 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36201 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36205 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36206 with all elements equal to VAR. Return true if successful. */
36209 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36210 rtx target
, rtx val
)
36233 /* First attempt to recognize VAL as-is. */
36234 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36235 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36236 if (recog_memoized (insn
) < 0)
36239 /* If that fails, force VAL into a register. */
36242 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36243 seq
= get_insns ();
36246 emit_insn_before (seq
, insn
);
36248 ok
= recog_memoized (insn
) >= 0;
36257 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36261 val
= gen_lowpart (SImode
, val
);
36262 x
= gen_rtx_TRUNCATE (HImode
, val
);
36263 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36264 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36277 struct expand_vec_perm_d dperm
;
36281 memset (&dperm
, 0, sizeof (dperm
));
36282 dperm
.target
= target
;
36283 dperm
.vmode
= mode
;
36284 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36285 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36286 dperm
.one_operand_p
= true;
36288 /* Extend to SImode using a paradoxical SUBREG. */
36289 tmp1
= gen_reg_rtx (SImode
);
36290 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36292 /* Insert the SImode value as low element of a V4SImode vector. */
36293 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
36294 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36296 ok
= (expand_vec_perm_1 (&dperm
)
36297 || expand_vec_perm_broadcast_1 (&dperm
));
36309 /* Replicate the value once into the next wider mode and recurse. */
36311 enum machine_mode smode
, wsmode
, wvmode
;
36314 smode
= GET_MODE_INNER (mode
);
36315 wvmode
= get_mode_wider_vector (mode
);
36316 wsmode
= GET_MODE_INNER (wvmode
);
36318 val
= convert_modes (wsmode
, smode
, val
, true);
36319 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36320 GEN_INT (GET_MODE_BITSIZE (smode
)),
36321 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36322 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36324 x
= gen_lowpart (wvmode
, target
);
36325 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36333 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36334 rtx x
= gen_reg_rtx (hvmode
);
36336 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36339 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36340 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36349 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36350 whose ONE_VAR element is VAR, and other elements are zero. Return true
36354 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36355 rtx target
, rtx var
, int one_var
)
36357 enum machine_mode vsimode
;
36360 bool use_vector_set
= false;
36365 /* For SSE4.1, we normally use vector set. But if the second
36366 element is zero and inter-unit moves are OK, we use movq
36368 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36369 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36375 use_vector_set
= TARGET_SSE4_1
;
36378 use_vector_set
= TARGET_SSE2
;
36381 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36388 use_vector_set
= TARGET_AVX
;
36391 /* Use ix86_expand_vector_set in 64bit mode only. */
36392 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36398 if (use_vector_set
)
36400 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36401 var
= force_reg (GET_MODE_INNER (mode
), var
);
36402 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36418 var
= force_reg (GET_MODE_INNER (mode
), var
);
36419 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
36420 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36425 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
36426 new_target
= gen_reg_rtx (mode
);
36428 new_target
= target
;
36429 var
= force_reg (GET_MODE_INNER (mode
), var
);
36430 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
36431 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
36432 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
36435 /* We need to shuffle the value to the correct position, so
36436 create a new pseudo to store the intermediate result. */
36438 /* With SSE2, we can use the integer shuffle insns. */
36439 if (mode
!= V4SFmode
&& TARGET_SSE2
)
36441 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
36443 GEN_INT (one_var
== 1 ? 0 : 1),
36444 GEN_INT (one_var
== 2 ? 0 : 1),
36445 GEN_INT (one_var
== 3 ? 0 : 1)));
36446 if (target
!= new_target
)
36447 emit_move_insn (target
, new_target
);
36451 /* Otherwise convert the intermediate result to V4SFmode and
36452 use the SSE1 shuffle instructions. */
36453 if (mode
!= V4SFmode
)
36455 tmp
= gen_reg_rtx (V4SFmode
);
36456 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
36461 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
36463 GEN_INT (one_var
== 1 ? 0 : 1),
36464 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
36465 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
36467 if (mode
!= V4SFmode
)
36468 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
36469 else if (tmp
!= target
)
36470 emit_move_insn (target
, tmp
);
36472 else if (target
!= new_target
)
36473 emit_move_insn (target
, new_target
);
36478 vsimode
= V4SImode
;
36484 vsimode
= V2SImode
;
36490 /* Zero extend the variable element to SImode and recurse. */
36491 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
36493 x
= gen_reg_rtx (vsimode
);
36494 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
36496 gcc_unreachable ();
36498 emit_move_insn (target
, gen_lowpart (mode
, x
));
36506 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36507 consisting of the values in VALS. It is known that all elements
36508 except ONE_VAR are constants. Return true if successful. */
36511 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
36512 rtx target
, rtx vals
, int one_var
)
36514 rtx var
= XVECEXP (vals
, 0, one_var
);
36515 enum machine_mode wmode
;
36518 const_vec
= copy_rtx (vals
);
36519 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
36520 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
36528 /* For the two element vectors, it's just as easy to use
36529 the general case. */
36533 /* Use ix86_expand_vector_set in 64bit mode only. */
36556 /* There's no way to set one QImode entry easily. Combine
36557 the variable value with its adjacent constant value, and
36558 promote to an HImode set. */
36559 x
= XVECEXP (vals
, 0, one_var
^ 1);
36562 var
= convert_modes (HImode
, QImode
, var
, true);
36563 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
36564 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36565 x
= GEN_INT (INTVAL (x
) & 0xff);
36569 var
= convert_modes (HImode
, QImode
, var
, true);
36570 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
36572 if (x
!= const0_rtx
)
36573 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
36574 1, OPTAB_LIB_WIDEN
);
36576 x
= gen_reg_rtx (wmode
);
36577 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
36578 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
36580 emit_move_insn (target
, gen_lowpart (mode
, x
));
36587 emit_move_insn (target
, const_vec
);
36588 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36592 /* A subroutine of ix86_expand_vector_init_general. Use vector
36593 concatenate to handle the most general case: all values variable,
36594 and none identical. */
36597 ix86_expand_vector_init_concat (enum machine_mode mode
,
36598 rtx target
, rtx
*ops
, int n
)
36600 enum machine_mode cmode
, hmode
= VOIDmode
;
36601 rtx first
[8], second
[4];
36641 gcc_unreachable ();
36644 if (!register_operand (ops
[1], cmode
))
36645 ops
[1] = force_reg (cmode
, ops
[1]);
36646 if (!register_operand (ops
[0], cmode
))
36647 ops
[0] = force_reg (cmode
, ops
[0]);
36648 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36649 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36669 gcc_unreachable ();
36685 gcc_unreachable ();
36690 /* FIXME: We process inputs backward to help RA. PR 36222. */
36693 for (; i
> 0; i
-= 2, j
--)
36695 first
[j
] = gen_reg_rtx (cmode
);
36696 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36697 ix86_expand_vector_init (false, first
[j
],
36698 gen_rtx_PARALLEL (cmode
, v
));
36704 gcc_assert (hmode
!= VOIDmode
);
36705 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36707 second
[j
] = gen_reg_rtx (hmode
);
36708 ix86_expand_vector_init_concat (hmode
, second
[j
],
36712 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36715 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36719 gcc_unreachable ();
36723 /* A subroutine of ix86_expand_vector_init_general. Use vector
36724 interleave to handle the most general case: all values variable,
36725 and none identical. */
36728 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36729 rtx target
, rtx
*ops
, int n
)
36731 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36734 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36735 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36736 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36741 gen_load_even
= gen_vec_setv8hi
;
36742 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36743 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36744 inner_mode
= HImode
;
36745 first_imode
= V4SImode
;
36746 second_imode
= V2DImode
;
36747 third_imode
= VOIDmode
;
36750 gen_load_even
= gen_vec_setv16qi
;
36751 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36752 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36753 inner_mode
= QImode
;
36754 first_imode
= V8HImode
;
36755 second_imode
= V4SImode
;
36756 third_imode
= V2DImode
;
36759 gcc_unreachable ();
36762 for (i
= 0; i
< n
; i
++)
36764 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36765 op0
= gen_reg_rtx (SImode
);
36766 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36768 /* Insert the SImode value as low element of V4SImode vector. */
36769 op1
= gen_reg_rtx (V4SImode
);
36770 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36771 gen_rtx_VEC_DUPLICATE (V4SImode
,
36773 CONST0_RTX (V4SImode
),
36775 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36777 /* Cast the V4SImode vector back to a vector in orignal mode. */
36778 op0
= gen_reg_rtx (mode
);
36779 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36781 /* Load even elements into the second position. */
36782 emit_insn (gen_load_even (op0
,
36783 force_reg (inner_mode
,
36787 /* Cast vector to FIRST_IMODE vector. */
36788 ops
[i
] = gen_reg_rtx (first_imode
);
36789 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36792 /* Interleave low FIRST_IMODE vectors. */
36793 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36795 op0
= gen_reg_rtx (first_imode
);
36796 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36798 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36799 ops
[j
] = gen_reg_rtx (second_imode
);
36800 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36803 /* Interleave low SECOND_IMODE vectors. */
36804 switch (second_imode
)
36807 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36809 op0
= gen_reg_rtx (second_imode
);
36810 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36813 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36815 ops
[j
] = gen_reg_rtx (third_imode
);
36816 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36818 second_imode
= V2DImode
;
36819 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36823 op0
= gen_reg_rtx (second_imode
);
36824 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36827 /* Cast the SECOND_IMODE vector back to a vector on original
36829 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36830 gen_lowpart (mode
, op0
)));
36834 gcc_unreachable ();
36838 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36839 all values variable, and none identical. */
36842 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36843 rtx target
, rtx vals
)
36845 rtx ops
[32], op0
, op1
;
36846 enum machine_mode half_mode
= VOIDmode
;
36853 if (!mmx_ok
&& !TARGET_SSE
)
36865 n
= GET_MODE_NUNITS (mode
);
36866 for (i
= 0; i
< n
; i
++)
36867 ops
[i
] = XVECEXP (vals
, 0, i
);
36868 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36872 half_mode
= V16QImode
;
36876 half_mode
= V8HImode
;
36880 n
= GET_MODE_NUNITS (mode
);
36881 for (i
= 0; i
< n
; i
++)
36882 ops
[i
] = XVECEXP (vals
, 0, i
);
36883 op0
= gen_reg_rtx (half_mode
);
36884 op1
= gen_reg_rtx (half_mode
);
36885 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36887 ix86_expand_vector_init_interleave (half_mode
, op1
,
36888 &ops
[n
>> 1], n
>> 2);
36889 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36890 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36894 if (!TARGET_SSE4_1
)
36902 /* Don't use ix86_expand_vector_init_interleave if we can't
36903 move from GPR to SSE register directly. */
36904 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
36907 n
= GET_MODE_NUNITS (mode
);
36908 for (i
= 0; i
< n
; i
++)
36909 ops
[i
] = XVECEXP (vals
, 0, i
);
36910 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36918 gcc_unreachable ();
36922 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36923 enum machine_mode inner_mode
;
36924 rtx words
[4], shift
;
36926 inner_mode
= GET_MODE_INNER (mode
);
36927 n_elts
= GET_MODE_NUNITS (mode
);
36928 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36929 n_elt_per_word
= n_elts
/ n_words
;
36930 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36932 for (i
= 0; i
< n_words
; ++i
)
36934 rtx word
= NULL_RTX
;
36936 for (j
= 0; j
< n_elt_per_word
; ++j
)
36938 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36939 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36945 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36946 word
, 1, OPTAB_LIB_WIDEN
);
36947 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36948 word
, 1, OPTAB_LIB_WIDEN
);
36956 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36957 else if (n_words
== 2)
36959 rtx tmp
= gen_reg_rtx (mode
);
36960 emit_clobber (tmp
);
36961 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36962 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36963 emit_move_insn (target
, tmp
);
36965 else if (n_words
== 4)
36967 rtx tmp
= gen_reg_rtx (V4SImode
);
36968 gcc_assert (word_mode
== SImode
);
36969 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36970 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36971 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36974 gcc_unreachable ();
36978 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36979 instructions unless MMX_OK is true. */
36982 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36984 enum machine_mode mode
= GET_MODE (target
);
36985 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36986 int n_elts
= GET_MODE_NUNITS (mode
);
36987 int n_var
= 0, one_var
= -1;
36988 bool all_same
= true, all_const_zero
= true;
36992 for (i
= 0; i
< n_elts
; ++i
)
36994 x
= XVECEXP (vals
, 0, i
);
36995 if (!(CONST_INT_P (x
)
36996 || GET_CODE (x
) == CONST_DOUBLE
36997 || GET_CODE (x
) == CONST_FIXED
))
36998 n_var
++, one_var
= i
;
36999 else if (x
!= CONST0_RTX (inner_mode
))
37000 all_const_zero
= false;
37001 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37005 /* Constants are best loaded from the constant pool. */
37008 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37012 /* If all values are identical, broadcast the value. */
37014 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37015 XVECEXP (vals
, 0, 0)))
37018 /* Values where only one field is non-constant are best loaded from
37019 the pool and overwritten via move later. */
37023 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37024 XVECEXP (vals
, 0, one_var
),
37028 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37032 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37036 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37038 enum machine_mode mode
= GET_MODE (target
);
37039 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37040 enum machine_mode half_mode
;
37041 bool use_vec_merge
= false;
37043 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37045 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37046 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37047 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37048 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37049 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37050 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37052 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37054 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37055 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37056 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37057 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37058 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37059 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37069 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37070 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37072 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37074 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37075 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37081 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37085 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37086 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37088 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37090 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37091 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37098 /* For the two element vectors, we implement a VEC_CONCAT with
37099 the extraction of the other element. */
37101 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37102 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37105 op0
= val
, op1
= tmp
;
37107 op0
= tmp
, op1
= val
;
37109 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37110 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37115 use_vec_merge
= TARGET_SSE4_1
;
37122 use_vec_merge
= true;
37126 /* tmp = target = A B C D */
37127 tmp
= copy_to_reg (target
);
37128 /* target = A A B B */
37129 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37130 /* target = X A B B */
37131 ix86_expand_vector_set (false, target
, val
, 0);
37132 /* target = A X C D */
37133 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37134 const1_rtx
, const0_rtx
,
37135 GEN_INT (2+4), GEN_INT (3+4)));
37139 /* tmp = target = A B C D */
37140 tmp
= copy_to_reg (target
);
37141 /* tmp = X B C D */
37142 ix86_expand_vector_set (false, tmp
, val
, 0);
37143 /* target = A B X D */
37144 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37145 const0_rtx
, const1_rtx
,
37146 GEN_INT (0+4), GEN_INT (3+4)));
37150 /* tmp = target = A B C D */
37151 tmp
= copy_to_reg (target
);
37152 /* tmp = X B C D */
37153 ix86_expand_vector_set (false, tmp
, val
, 0);
37154 /* target = A B X D */
37155 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37156 const0_rtx
, const1_rtx
,
37157 GEN_INT (2+4), GEN_INT (0+4)));
37161 gcc_unreachable ();
37166 use_vec_merge
= TARGET_SSE4_1
;
37170 /* Element 0 handled by vec_merge below. */
37173 use_vec_merge
= true;
37179 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37180 store into element 0, then shuffle them back. */
37184 order
[0] = GEN_INT (elt
);
37185 order
[1] = const1_rtx
;
37186 order
[2] = const2_rtx
;
37187 order
[3] = GEN_INT (3);
37188 order
[elt
] = const0_rtx
;
37190 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37191 order
[1], order
[2], order
[3]));
37193 ix86_expand_vector_set (false, target
, val
, 0);
37195 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37196 order
[1], order
[2], order
[3]));
37200 /* For SSE1, we have to reuse the V4SF code. */
37201 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
37202 gen_lowpart (SFmode
, val
), elt
);
37207 use_vec_merge
= TARGET_SSE2
;
37210 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37214 use_vec_merge
= TARGET_SSE4_1
;
37221 half_mode
= V16QImode
;
37227 half_mode
= V8HImode
;
37233 half_mode
= V4SImode
;
37239 half_mode
= V2DImode
;
37245 half_mode
= V4SFmode
;
37251 half_mode
= V2DFmode
;
37257 /* Compute offset. */
37261 gcc_assert (i
<= 1);
37263 /* Extract the half. */
37264 tmp
= gen_reg_rtx (half_mode
);
37265 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37267 /* Put val in tmp at elt. */
37268 ix86_expand_vector_set (false, tmp
, val
, elt
);
37271 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37280 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37281 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37282 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37286 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37288 emit_move_insn (mem
, target
);
37290 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37291 emit_move_insn (tmp
, val
);
37293 emit_move_insn (target
, mem
);
37298 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37300 enum machine_mode mode
= GET_MODE (vec
);
37301 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37302 bool use_vec_extr
= false;
37315 use_vec_extr
= true;
37319 use_vec_extr
= TARGET_SSE4_1
;
37331 tmp
= gen_reg_rtx (mode
);
37332 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37333 GEN_INT (elt
), GEN_INT (elt
),
37334 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37338 tmp
= gen_reg_rtx (mode
);
37339 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37343 gcc_unreachable ();
37346 use_vec_extr
= true;
37351 use_vec_extr
= TARGET_SSE4_1
;
37365 tmp
= gen_reg_rtx (mode
);
37366 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37367 GEN_INT (elt
), GEN_INT (elt
),
37368 GEN_INT (elt
), GEN_INT (elt
)));
37372 tmp
= gen_reg_rtx (mode
);
37373 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37377 gcc_unreachable ();
37380 use_vec_extr
= true;
37385 /* For SSE1, we have to reuse the V4SF code. */
37386 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37387 gen_lowpart (V4SFmode
, vec
), elt
);
37393 use_vec_extr
= TARGET_SSE2
;
37396 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37400 use_vec_extr
= TARGET_SSE4_1
;
37406 tmp
= gen_reg_rtx (V4SFmode
);
37408 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37410 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37411 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37419 tmp
= gen_reg_rtx (V2DFmode
);
37421 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
37423 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
37424 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37432 tmp
= gen_reg_rtx (V16QImode
);
37434 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
37436 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
37437 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
37445 tmp
= gen_reg_rtx (V8HImode
);
37447 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
37449 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
37450 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
37458 tmp
= gen_reg_rtx (V4SImode
);
37460 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
37462 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
37463 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37471 tmp
= gen_reg_rtx (V2DImode
);
37473 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
37475 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
37476 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37482 /* ??? Could extract the appropriate HImode element and shift. */
37489 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
37490 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
37492 /* Let the rtl optimizers know about the zero extension performed. */
37493 if (inner_mode
== QImode
|| inner_mode
== HImode
)
37495 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
37496 target
= gen_lowpart (SImode
, target
);
37499 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37503 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37505 emit_move_insn (mem
, vec
);
37507 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37508 emit_move_insn (target
, tmp
);
37512 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37513 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37514 The upper bits of DEST are undefined, though they shouldn't cause
37515 exceptions (some bits from src or all zeros are ok). */
37518 emit_reduc_half (rtx dest
, rtx src
, int i
)
37521 switch (GET_MODE (src
))
37525 tem
= gen_sse_movhlps (dest
, src
, src
);
37527 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
37528 GEN_INT (1 + 4), GEN_INT (1 + 4));
37531 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
37537 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
37538 gen_lowpart (V1TImode
, src
),
37543 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
37545 tem
= gen_avx_shufps256 (dest
, src
, src
,
37546 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
37550 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
37552 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
37559 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
37560 gen_lowpart (V4DImode
, src
),
37561 gen_lowpart (V4DImode
, src
),
37564 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
37565 gen_lowpart (V2TImode
, src
),
37569 gcc_unreachable ();
37574 /* Expand a vector reduction. FN is the binary pattern to reduce;
37575 DEST is the destination; IN is the input vector. */
37578 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
37580 rtx half
, dst
, vec
= in
;
37581 enum machine_mode mode
= GET_MODE (in
);
37584 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37586 && mode
== V8HImode
37587 && fn
== gen_uminv8hi3
)
37589 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37593 for (i
= GET_MODE_BITSIZE (mode
);
37594 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37597 half
= gen_reg_rtx (mode
);
37598 emit_reduc_half (half
, vec
, i
);
37599 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37602 dst
= gen_reg_rtx (mode
);
37603 emit_insn (fn (dst
, half
, vec
));
37608 /* Target hook for scalar_mode_supported_p. */
37610 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37612 if (DECIMAL_FLOAT_MODE_P (mode
))
37613 return default_decimal_float_supported_p ();
37614 else if (mode
== TFmode
)
37617 return default_scalar_mode_supported_p (mode
);
37620 /* Implements target hook vector_mode_supported_p. */
37622 ix86_vector_mode_supported_p (enum machine_mode mode
)
37624 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37626 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37628 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37630 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37632 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37637 /* Target hook for c_mode_for_suffix. */
37638 static enum machine_mode
37639 ix86_c_mode_for_suffix (char suffix
)
37649 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37651 We do this in the new i386 backend to maintain source compatibility
37652 with the old cc0-based compiler. */
37655 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37656 tree inputs ATTRIBUTE_UNUSED
,
37659 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37661 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37666 /* Implements target vector targetm.asm.encode_section_info. */
37668 static void ATTRIBUTE_UNUSED
37669 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37671 default_encode_section_info (decl
, rtl
, first
);
37673 if (TREE_CODE (decl
) == VAR_DECL
37674 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37675 && ix86_in_large_data_p (decl
))
37676 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37679 /* Worker function for REVERSE_CONDITION. */
37682 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37684 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37685 ? reverse_condition (code
)
37686 : reverse_condition_maybe_unordered (code
));
37689 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37693 output_387_reg_move (rtx insn
, rtx
*operands
)
37695 if (REG_P (operands
[0]))
37697 if (REG_P (operands
[1])
37698 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37700 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37701 return output_387_ffreep (operands
, 0);
37702 return "fstp\t%y0";
37704 if (STACK_TOP_P (operands
[0]))
37705 return "fld%Z1\t%y1";
37708 else if (MEM_P (operands
[0]))
37710 gcc_assert (REG_P (operands
[1]));
37711 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37712 return "fstp%Z0\t%y0";
37715 /* There is no non-popping store to memory for XFmode.
37716 So if we need one, follow the store with a load. */
37717 if (GET_MODE (operands
[0]) == XFmode
)
37718 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37720 return "fst%Z0\t%y0";
37727 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37728 FP status register is set. */
37731 ix86_emit_fp_unordered_jump (rtx label
)
37733 rtx reg
= gen_reg_rtx (HImode
);
37736 emit_insn (gen_x86_fnstsw_1 (reg
));
37738 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37740 emit_insn (gen_x86_sahf_1 (reg
));
37742 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37743 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37747 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37749 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37750 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37753 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37754 gen_rtx_LABEL_REF (VOIDmode
, label
),
37756 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37758 emit_jump_insn (temp
);
37759 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37762 /* Output code to perform a log1p XFmode calculation. */
37764 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37766 rtx label1
= gen_label_rtx ();
37767 rtx label2
= gen_label_rtx ();
37769 rtx tmp
= gen_reg_rtx (XFmode
);
37770 rtx tmp2
= gen_reg_rtx (XFmode
);
37773 emit_insn (gen_absxf2 (tmp
, op1
));
37774 test
= gen_rtx_GE (VOIDmode
, tmp
,
37775 CONST_DOUBLE_FROM_REAL_VALUE (
37776 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37778 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37780 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37781 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37782 emit_jump (label2
);
37784 emit_label (label1
);
37785 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37786 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37787 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37788 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37790 emit_label (label2
);
37793 /* Emit code for round calculation. */
37794 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37796 enum machine_mode inmode
= GET_MODE (op1
);
37797 enum machine_mode outmode
= GET_MODE (op0
);
37798 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37799 rtx scratch
= gen_reg_rtx (HImode
);
37800 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37801 rtx jump_label
= gen_label_rtx ();
37803 rtx (*gen_abs
) (rtx
, rtx
);
37804 rtx (*gen_neg
) (rtx
, rtx
);
37809 gen_abs
= gen_abssf2
;
37812 gen_abs
= gen_absdf2
;
37815 gen_abs
= gen_absxf2
;
37818 gcc_unreachable ();
37824 gen_neg
= gen_negsf2
;
37827 gen_neg
= gen_negdf2
;
37830 gen_neg
= gen_negxf2
;
37833 gen_neg
= gen_neghi2
;
37836 gen_neg
= gen_negsi2
;
37839 gen_neg
= gen_negdi2
;
37842 gcc_unreachable ();
37845 e1
= gen_reg_rtx (inmode
);
37846 e2
= gen_reg_rtx (inmode
);
37847 res
= gen_reg_rtx (outmode
);
37849 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37851 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37853 /* scratch = fxam(op1) */
37854 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37855 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37857 /* e1 = fabs(op1) */
37858 emit_insn (gen_abs (e1
, op1
));
37860 /* e2 = e1 + 0.5 */
37861 half
= force_reg (inmode
, half
);
37862 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37863 gen_rtx_PLUS (inmode
, e1
, half
)));
37865 /* res = floor(e2) */
37866 if (inmode
!= XFmode
)
37868 tmp1
= gen_reg_rtx (XFmode
);
37870 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37871 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37881 rtx tmp0
= gen_reg_rtx (XFmode
);
37883 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37885 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37886 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37887 UNSPEC_TRUNC_NOOP
)));
37891 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37894 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37897 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37900 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37903 gcc_unreachable ();
37906 /* flags = signbit(a) */
37907 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37909 /* if (flags) then res = -res */
37910 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37911 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37912 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37914 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37915 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37916 JUMP_LABEL (insn
) = jump_label
;
37918 emit_insn (gen_neg (res
, res
));
37920 emit_label (jump_label
);
37921 LABEL_NUSES (jump_label
) = 1;
37923 emit_move_insn (op0
, res
);
37926 /* Output code to perform a Newton-Rhapson approximation of a single precision
37927 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37929 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37931 rtx x0
, x1
, e0
, e1
;
37933 x0
= gen_reg_rtx (mode
);
37934 e0
= gen_reg_rtx (mode
);
37935 e1
= gen_reg_rtx (mode
);
37936 x1
= gen_reg_rtx (mode
);
37938 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37940 b
= force_reg (mode
, b
);
37942 /* x0 = rcp(b) estimate */
37943 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37944 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37947 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37948 gen_rtx_MULT (mode
, x0
, b
)));
37951 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37952 gen_rtx_MULT (mode
, x0
, e0
)));
37955 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37956 gen_rtx_PLUS (mode
, x0
, x0
)));
37959 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37960 gen_rtx_MINUS (mode
, e1
, e0
)));
37963 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37964 gen_rtx_MULT (mode
, a
, x1
)));
37967 /* Output code to perform a Newton-Rhapson approximation of a
37968 single precision floating point [reciprocal] square root. */
37970 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37973 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37976 x0
= gen_reg_rtx (mode
);
37977 e0
= gen_reg_rtx (mode
);
37978 e1
= gen_reg_rtx (mode
);
37979 e2
= gen_reg_rtx (mode
);
37980 e3
= gen_reg_rtx (mode
);
37982 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37983 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37985 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37986 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37988 if (VECTOR_MODE_P (mode
))
37990 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37991 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37994 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37995 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37997 a
= force_reg (mode
, a
);
37999 /* x0 = rsqrt(a) estimate */
38000 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38001 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38004 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38009 zero
= gen_reg_rtx (mode
);
38010 mask
= gen_reg_rtx (mode
);
38012 zero
= force_reg (mode
, CONST0_RTX(mode
));
38013 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38014 gen_rtx_NE (mode
, zero
, a
)));
38016 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38017 gen_rtx_AND (mode
, x0
, mask
)));
38021 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38022 gen_rtx_MULT (mode
, x0
, a
)));
38024 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38025 gen_rtx_MULT (mode
, e0
, x0
)));
38028 mthree
= force_reg (mode
, mthree
);
38029 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38030 gen_rtx_PLUS (mode
, e1
, mthree
)));
38032 mhalf
= force_reg (mode
, mhalf
);
38034 /* e3 = -.5 * x0 */
38035 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38036 gen_rtx_MULT (mode
, x0
, mhalf
)));
38038 /* e3 = -.5 * e0 */
38039 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38040 gen_rtx_MULT (mode
, e0
, mhalf
)));
38041 /* ret = e2 * e3 */
38042 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38043 gen_rtx_MULT (mode
, e2
, e3
)));
38046 #ifdef TARGET_SOLARIS
38047 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38050 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38053 /* With Binutils 2.15, the "@unwind" marker must be specified on
38054 every occurrence of the ".eh_frame" section, not just the first
38057 && strcmp (name
, ".eh_frame") == 0)
38059 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38060 flags
& SECTION_WRITE
? "aw" : "a");
38065 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38067 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38072 default_elf_asm_named_section (name
, flags
, decl
);
38074 #endif /* TARGET_SOLARIS */
38076 /* Return the mangling of TYPE if it is an extended fundamental type. */
38078 static const char *
38079 ix86_mangle_type (const_tree type
)
38081 type
= TYPE_MAIN_VARIANT (type
);
38083 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38084 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38087 switch (TYPE_MODE (type
))
38090 /* __float128 is "g". */
38093 /* "long double" or __float80 is "e". */
38100 /* For 32-bit code we can save PIC register setup by using
38101 __stack_chk_fail_local hidden function instead of calling
38102 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38103 register, so it is better to call __stack_chk_fail directly. */
38105 static tree ATTRIBUTE_UNUSED
38106 ix86_stack_protect_fail (void)
38108 return TARGET_64BIT
38109 ? default_external_stack_protect_fail ()
38110 : default_hidden_stack_protect_fail ();
38113 /* Select a format to encode pointers in exception handling data. CODE
38114 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38115 true if the symbol may be affected by dynamic relocations.
38117 ??? All x86 object file formats are capable of representing this.
38118 After all, the relocation needed is the same as for the call insn.
38119 Whether or not a particular assembler allows us to enter such, I
38120 guess we'll have to see. */
38122 asm_preferred_eh_data_format (int code
, int global
)
38126 int type
= DW_EH_PE_sdata8
;
38128 || ix86_cmodel
== CM_SMALL_PIC
38129 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38130 type
= DW_EH_PE_sdata4
;
38131 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38133 if (ix86_cmodel
== CM_SMALL
38134 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38135 return DW_EH_PE_udata4
;
38136 return DW_EH_PE_absptr
;
38139 /* Expand copysign from SIGN to the positive value ABS_VALUE
38140 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38143 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38145 enum machine_mode mode
= GET_MODE (sign
);
38146 rtx sgn
= gen_reg_rtx (mode
);
38147 if (mask
== NULL_RTX
)
38149 enum machine_mode vmode
;
38151 if (mode
== SFmode
)
38153 else if (mode
== DFmode
)
38158 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38159 if (!VECTOR_MODE_P (mode
))
38161 /* We need to generate a scalar mode mask in this case. */
38162 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38163 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38164 mask
= gen_reg_rtx (mode
);
38165 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38169 mask
= gen_rtx_NOT (mode
, mask
);
38170 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38171 gen_rtx_AND (mode
, mask
, sign
)));
38172 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38173 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38176 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38177 mask for masking out the sign-bit is stored in *SMASK, if that is
38180 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38182 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38185 xa
= gen_reg_rtx (mode
);
38186 if (mode
== SFmode
)
38188 else if (mode
== DFmode
)
38192 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38193 if (!VECTOR_MODE_P (mode
))
38195 /* We need to generate a scalar mode mask in this case. */
38196 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38197 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38198 mask
= gen_reg_rtx (mode
);
38199 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38201 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38202 gen_rtx_AND (mode
, op0
, mask
)));
38210 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38211 swapping the operands if SWAP_OPERANDS is true. The expanded
38212 code is a forward jump to a newly created label in case the
38213 comparison is true. The generated label rtx is returned. */
38215 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38216 bool swap_operands
)
38227 label
= gen_label_rtx ();
38228 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
38229 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38230 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
38231 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38232 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38233 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38234 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38235 JUMP_LABEL (tmp
) = label
;
38240 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38241 using comparison code CODE. Operands are swapped for the comparison if
38242 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38244 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38245 bool swap_operands
)
38247 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38248 enum machine_mode mode
= GET_MODE (op0
);
38249 rtx mask
= gen_reg_rtx (mode
);
38258 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38260 emit_insn (insn (mask
, op0
, op1
,
38261 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38265 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38266 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38268 ix86_gen_TWO52 (enum machine_mode mode
)
38270 REAL_VALUE_TYPE TWO52r
;
38273 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38274 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38275 TWO52
= force_reg (mode
, TWO52
);
38280 /* Expand SSE sequence for computing lround from OP1 storing
38283 ix86_expand_lround (rtx op0
, rtx op1
)
38285 /* C code for the stuff we're doing below:
38286 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38289 enum machine_mode mode
= GET_MODE (op1
);
38290 const struct real_format
*fmt
;
38291 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38294 /* load nextafter (0.5, 0.0) */
38295 fmt
= REAL_MODE_FORMAT (mode
);
38296 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38297 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38299 /* adj = copysign (0.5, op1) */
38300 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38301 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38303 /* adj = op1 + adj */
38304 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38306 /* op0 = (imode)adj */
38307 expand_fix (op0
, adj
, 0);
38310 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38313 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38315 /* C code for the stuff we're doing below (for do_floor):
38317 xi -= (double)xi > op1 ? 1 : 0;
38320 enum machine_mode fmode
= GET_MODE (op1
);
38321 enum machine_mode imode
= GET_MODE (op0
);
38322 rtx ireg
, freg
, label
, tmp
;
38324 /* reg = (long)op1 */
38325 ireg
= gen_reg_rtx (imode
);
38326 expand_fix (ireg
, op1
, 0);
38328 /* freg = (double)reg */
38329 freg
= gen_reg_rtx (fmode
);
38330 expand_float (freg
, ireg
, 0);
38332 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38333 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38334 freg
, op1
, !do_floor
);
38335 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38336 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38337 emit_move_insn (ireg
, tmp
);
38339 emit_label (label
);
38340 LABEL_NUSES (label
) = 1;
38342 emit_move_insn (op0
, ireg
);
38345 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38346 result in OPERAND0. */
38348 ix86_expand_rint (rtx operand0
, rtx operand1
)
38350 /* C code for the stuff we're doing below:
38351 xa = fabs (operand1);
38352 if (!isless (xa, 2**52))
38354 xa = xa + 2**52 - 2**52;
38355 return copysign (xa, operand1);
38357 enum machine_mode mode
= GET_MODE (operand0
);
38358 rtx res
, xa
, label
, TWO52
, mask
;
38360 res
= gen_reg_rtx (mode
);
38361 emit_move_insn (res
, operand1
);
38363 /* xa = abs (operand1) */
38364 xa
= ix86_expand_sse_fabs (res
, &mask
);
38366 /* if (!isless (xa, TWO52)) goto label; */
38367 TWO52
= ix86_gen_TWO52 (mode
);
38368 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38370 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38371 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38373 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38375 emit_label (label
);
38376 LABEL_NUSES (label
) = 1;
38378 emit_move_insn (operand0
, res
);
38381 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38384 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38386 /* C code for the stuff we expand below.
38387 double xa = fabs (x), x2;
38388 if (!isless (xa, TWO52))
38390 xa = xa + TWO52 - TWO52;
38391 x2 = copysign (xa, x);
38400 enum machine_mode mode
= GET_MODE (operand0
);
38401 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38403 TWO52
= ix86_gen_TWO52 (mode
);
38405 /* Temporary for holding the result, initialized to the input
38406 operand to ease control flow. */
38407 res
= gen_reg_rtx (mode
);
38408 emit_move_insn (res
, operand1
);
38410 /* xa = abs (operand1) */
38411 xa
= ix86_expand_sse_fabs (res
, &mask
);
38413 /* if (!isless (xa, TWO52)) goto label; */
38414 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38416 /* xa = xa + TWO52 - TWO52; */
38417 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38418 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38420 /* xa = copysign (xa, operand1) */
38421 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
38423 /* generate 1.0 or -1.0 */
38424 one
= force_reg (mode
,
38425 const_double_from_real_value (do_floor
38426 ? dconst1
: dconstm1
, mode
));
38428 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38429 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38430 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38431 gen_rtx_AND (mode
, one
, tmp
)));
38432 /* We always need to subtract here to preserve signed zero. */
38433 tmp
= expand_simple_binop (mode
, MINUS
,
38434 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38435 emit_move_insn (res
, tmp
);
38437 emit_label (label
);
38438 LABEL_NUSES (label
) = 1;
38440 emit_move_insn (operand0
, res
);
38443 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38446 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
38448 /* C code for the stuff we expand below.
38449 double xa = fabs (x), x2;
38450 if (!isless (xa, TWO52))
38452 x2 = (double)(long)x;
38459 if (HONOR_SIGNED_ZEROS (mode))
38460 return copysign (x2, x);
38463 enum machine_mode mode
= GET_MODE (operand0
);
38464 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
38466 TWO52
= ix86_gen_TWO52 (mode
);
38468 /* Temporary for holding the result, initialized to the input
38469 operand to ease control flow. */
38470 res
= gen_reg_rtx (mode
);
38471 emit_move_insn (res
, operand1
);
38473 /* xa = abs (operand1) */
38474 xa
= ix86_expand_sse_fabs (res
, &mask
);
38476 /* if (!isless (xa, TWO52)) goto label; */
38477 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38479 /* xa = (double)(long)x */
38480 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38481 expand_fix (xi
, res
, 0);
38482 expand_float (xa
, xi
, 0);
38485 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38487 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38488 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38489 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38490 gen_rtx_AND (mode
, one
, tmp
)));
38491 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
38492 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38493 emit_move_insn (res
, tmp
);
38495 if (HONOR_SIGNED_ZEROS (mode
))
38496 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38498 emit_label (label
);
38499 LABEL_NUSES (label
) = 1;
38501 emit_move_insn (operand0
, res
);
38504 /* Expand SSE sequence for computing round from OPERAND1 storing
38505 into OPERAND0. Sequence that works without relying on DImode truncation
38506 via cvttsd2siq that is only available on 64bit targets. */
38508 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
38510 /* C code for the stuff we expand below.
38511 double xa = fabs (x), xa2, x2;
38512 if (!isless (xa, TWO52))
38514 Using the absolute value and copying back sign makes
38515 -0.0 -> -0.0 correct.
38516 xa2 = xa + TWO52 - TWO52;
38521 else if (dxa > 0.5)
38523 x2 = copysign (xa2, x);
38526 enum machine_mode mode
= GET_MODE (operand0
);
38527 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
38529 TWO52
= ix86_gen_TWO52 (mode
);
38531 /* Temporary for holding the result, initialized to the input
38532 operand to ease control flow. */
38533 res
= gen_reg_rtx (mode
);
38534 emit_move_insn (res
, operand1
);
38536 /* xa = abs (operand1) */
38537 xa
= ix86_expand_sse_fabs (res
, &mask
);
38539 /* if (!isless (xa, TWO52)) goto label; */
38540 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38542 /* xa2 = xa + TWO52 - TWO52; */
38543 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38544 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
38546 /* dxa = xa2 - xa; */
38547 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
38549 /* generate 0.5, 1.0 and -0.5 */
38550 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
38551 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38552 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
38556 tmp
= gen_reg_rtx (mode
);
38557 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38558 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
38559 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38560 gen_rtx_AND (mode
, one
, tmp
)));
38561 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38562 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38563 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
38564 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38565 gen_rtx_AND (mode
, one
, tmp
)));
38566 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38568 /* res = copysign (xa2, operand1) */
38569 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
38571 emit_label (label
);
38572 LABEL_NUSES (label
) = 1;
38574 emit_move_insn (operand0
, res
);
38577 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38580 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38582 /* C code for SSE variant we expand below.
38583 double xa = fabs (x), x2;
38584 if (!isless (xa, TWO52))
38586 x2 = (double)(long)x;
38587 if (HONOR_SIGNED_ZEROS (mode))
38588 return copysign (x2, x);
38591 enum machine_mode mode
= GET_MODE (operand0
);
38592 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38594 TWO52
= ix86_gen_TWO52 (mode
);
38596 /* Temporary for holding the result, initialized to the input
38597 operand to ease control flow. */
38598 res
= gen_reg_rtx (mode
);
38599 emit_move_insn (res
, operand1
);
38601 /* xa = abs (operand1) */
38602 xa
= ix86_expand_sse_fabs (res
, &mask
);
38604 /* if (!isless (xa, TWO52)) goto label; */
38605 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38607 /* x = (double)(long)x */
38608 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38609 expand_fix (xi
, res
, 0);
38610 expand_float (res
, xi
, 0);
38612 if (HONOR_SIGNED_ZEROS (mode
))
38613 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38615 emit_label (label
);
38616 LABEL_NUSES (label
) = 1;
38618 emit_move_insn (operand0
, res
);
38621 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38624 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38626 enum machine_mode mode
= GET_MODE (operand0
);
38627 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38629 /* C code for SSE variant we expand below.
38630 double xa = fabs (x), x2;
38631 if (!isless (xa, TWO52))
38633 xa2 = xa + TWO52 - TWO52;
38637 x2 = copysign (xa2, x);
38641 TWO52
= ix86_gen_TWO52 (mode
);
38643 /* Temporary for holding the result, initialized to the input
38644 operand to ease control flow. */
38645 res
= gen_reg_rtx (mode
);
38646 emit_move_insn (res
, operand1
);
38648 /* xa = abs (operand1) */
38649 xa
= ix86_expand_sse_fabs (res
, &smask
);
38651 /* if (!isless (xa, TWO52)) goto label; */
38652 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38654 /* res = xa + TWO52 - TWO52; */
38655 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38656 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38657 emit_move_insn (res
, tmp
);
38660 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38662 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38663 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38664 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38665 gen_rtx_AND (mode
, mask
, one
)));
38666 tmp
= expand_simple_binop (mode
, MINUS
,
38667 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38668 emit_move_insn (res
, tmp
);
38670 /* res = copysign (res, operand1) */
38671 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38673 emit_label (label
);
38674 LABEL_NUSES (label
) = 1;
38676 emit_move_insn (operand0
, res
);
38679 /* Expand SSE sequence for computing round from OPERAND1 storing
38682 ix86_expand_round (rtx operand0
, rtx operand1
)
38684 /* C code for the stuff we're doing below:
38685 double xa = fabs (x);
38686 if (!isless (xa, TWO52))
38688 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38689 return copysign (xa, x);
38691 enum machine_mode mode
= GET_MODE (operand0
);
38692 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38693 const struct real_format
*fmt
;
38694 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38696 /* Temporary for holding the result, initialized to the input
38697 operand to ease control flow. */
38698 res
= gen_reg_rtx (mode
);
38699 emit_move_insn (res
, operand1
);
38701 TWO52
= ix86_gen_TWO52 (mode
);
38702 xa
= ix86_expand_sse_fabs (res
, &mask
);
38703 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38705 /* load nextafter (0.5, 0.0) */
38706 fmt
= REAL_MODE_FORMAT (mode
);
38707 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38708 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38710 /* xa = xa + 0.5 */
38711 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38712 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38714 /* xa = (double)(int64_t)xa */
38715 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38716 expand_fix (xi
, xa
, 0);
38717 expand_float (xa
, xi
, 0);
38719 /* res = copysign (xa, operand1) */
38720 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38722 emit_label (label
);
38723 LABEL_NUSES (label
) = 1;
38725 emit_move_insn (operand0
, res
);
38728 /* Expand SSE sequence for computing round
38729 from OP1 storing into OP0 using sse4 round insn. */
38731 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38733 enum machine_mode mode
= GET_MODE (op0
);
38734 rtx e1
, e2
, res
, half
;
38735 const struct real_format
*fmt
;
38736 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38737 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38738 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38743 gen_copysign
= gen_copysignsf3
;
38744 gen_round
= gen_sse4_1_roundsf2
;
38747 gen_copysign
= gen_copysigndf3
;
38748 gen_round
= gen_sse4_1_rounddf2
;
38751 gcc_unreachable ();
38754 /* round (a) = trunc (a + copysign (0.5, a)) */
38756 /* load nextafter (0.5, 0.0) */
38757 fmt
= REAL_MODE_FORMAT (mode
);
38758 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38759 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38760 half
= const_double_from_real_value (pred_half
, mode
);
38762 /* e1 = copysign (0.5, op1) */
38763 e1
= gen_reg_rtx (mode
);
38764 emit_insn (gen_copysign (e1
, half
, op1
));
38766 /* e2 = op1 + e1 */
38767 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38769 /* res = trunc (e2) */
38770 res
= gen_reg_rtx (mode
);
38771 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38773 emit_move_insn (op0
, res
);
38777 /* Table of valid machine attributes. */
38778 static const struct attribute_spec ix86_attribute_table
[] =
38780 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38781 affects_type_identity } */
38782 /* Stdcall attribute says callee is responsible for popping arguments
38783 if they are not variable. */
38784 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38786 /* Fastcall attribute says callee is responsible for popping arguments
38787 if they are not variable. */
38788 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38790 /* Thiscall attribute says callee is responsible for popping arguments
38791 if they are not variable. */
38792 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38794 /* Cdecl attribute says the callee is a normal C declaration */
38795 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38797 /* Regparm attribute specifies how many integer arguments are to be
38798 passed in registers. */
38799 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38801 /* Sseregparm attribute says we are using x86_64 calling conventions
38802 for FP arguments. */
38803 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38805 /* The transactional memory builtins are implicitly regparm or fastcall
38806 depending on the ABI. Override the generic do-nothing attribute that
38807 these builtins were declared with. */
38808 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38810 /* force_align_arg_pointer says this function realigns the stack at entry. */
38811 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38812 false, true, true, ix86_handle_cconv_attribute
, false },
38813 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38814 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38815 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38816 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38819 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38821 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38823 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38824 SUBTARGET_ATTRIBUTE_TABLE
,
38826 /* ms_abi and sysv_abi calling convention function attributes. */
38827 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38828 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38829 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38831 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38832 ix86_handle_callee_pop_aggregate_return
, true },
38834 { NULL
, 0, 0, false, false, false, NULL
, false }
38837 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38839 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38841 int misalign ATTRIBUTE_UNUSED
)
38845 switch (type_of_cost
)
38848 return ix86_cost
->scalar_stmt_cost
;
38851 return ix86_cost
->scalar_load_cost
;
38854 return ix86_cost
->scalar_store_cost
;
38857 return ix86_cost
->vec_stmt_cost
;
38860 return ix86_cost
->vec_align_load_cost
;
38863 return ix86_cost
->vec_store_cost
;
38865 case vec_to_scalar
:
38866 return ix86_cost
->vec_to_scalar_cost
;
38868 case scalar_to_vec
:
38869 return ix86_cost
->scalar_to_vec_cost
;
38871 case unaligned_load
:
38872 case unaligned_store
:
38873 return ix86_cost
->vec_unalign_load_cost
;
38875 case cond_branch_taken
:
38876 return ix86_cost
->cond_taken_branch_cost
;
38878 case cond_branch_not_taken
:
38879 return ix86_cost
->cond_not_taken_branch_cost
;
38882 case vec_promote_demote
:
38883 return ix86_cost
->vec_stmt_cost
;
38885 case vec_construct
:
38886 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38887 return elements
/ 2 + 1;
38890 gcc_unreachable ();
38894 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38895 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38896 insn every time. */
38898 static GTY(()) rtx vselect_insn
;
38900 /* Initialize vselect_insn. */
38903 init_vselect_insn (void)
38908 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38909 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38910 XVECEXP (x
, 0, i
) = const0_rtx
;
38911 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38913 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38915 vselect_insn
= emit_insn (x
);
38919 /* Construct (set target (vec_select op0 (parallel perm))) and
38920 return true if that's a valid instruction in the active ISA. */
38923 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38924 unsigned nelt
, bool testing_p
)
38927 rtx x
, save_vconcat
;
38930 if (vselect_insn
== NULL_RTX
)
38931 init_vselect_insn ();
38933 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38934 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38935 for (i
= 0; i
< nelt
; ++i
)
38936 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38937 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38938 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38939 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38940 SET_DEST (PATTERN (vselect_insn
)) = target
;
38941 icode
= recog_memoized (vselect_insn
);
38943 if (icode
>= 0 && !testing_p
)
38944 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38946 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38947 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38948 INSN_CODE (vselect_insn
) = -1;
38953 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38956 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38957 const unsigned char *perm
, unsigned nelt
,
38960 enum machine_mode v2mode
;
38964 if (vselect_insn
== NULL_RTX
)
38965 init_vselect_insn ();
38967 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38968 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38969 PUT_MODE (x
, v2mode
);
38972 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38973 XEXP (x
, 0) = const0_rtx
;
38974 XEXP (x
, 1) = const0_rtx
;
38978 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38979 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38982 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38984 enum machine_mode vmode
= d
->vmode
;
38985 unsigned i
, mask
, nelt
= d
->nelt
;
38986 rtx target
, op0
, op1
, x
;
38987 rtx rperm
[32], vperm
;
38989 if (d
->one_operand_p
)
38991 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38993 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38995 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39000 /* This is a blend, not a permute. Elements must stay in their
39001 respective lanes. */
39002 for (i
= 0; i
< nelt
; ++i
)
39004 unsigned e
= d
->perm
[i
];
39005 if (!(e
== i
|| e
== i
+ nelt
))
39012 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39013 decision should be extracted elsewhere, so that we only try that
39014 sequence once all budget==3 options have been tried. */
39015 target
= d
->target
;
39028 for (i
= 0; i
< nelt
; ++i
)
39029 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39033 for (i
= 0; i
< 2; ++i
)
39034 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39039 for (i
= 0; i
< 4; ++i
)
39040 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39045 /* See if bytes move in pairs so we can use pblendw with
39046 an immediate argument, rather than pblendvb with a vector
39048 for (i
= 0; i
< 16; i
+= 2)
39049 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39052 for (i
= 0; i
< nelt
; ++i
)
39053 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39056 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39057 vperm
= force_reg (vmode
, vperm
);
39059 if (GET_MODE_SIZE (vmode
) == 16)
39060 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39062 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39066 for (i
= 0; i
< 8; ++i
)
39067 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39072 target
= gen_lowpart (vmode
, target
);
39073 op0
= gen_lowpart (vmode
, op0
);
39074 op1
= gen_lowpart (vmode
, op1
);
39078 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39079 for (i
= 0; i
< 32; i
+= 2)
39080 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39082 /* See if bytes move in quadruplets. If yes, vpblendd
39083 with immediate can be used. */
39084 for (i
= 0; i
< 32; i
+= 4)
39085 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39089 /* See if bytes move the same in both lanes. If yes,
39090 vpblendw with immediate can be used. */
39091 for (i
= 0; i
< 16; i
+= 2)
39092 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39095 /* Use vpblendw. */
39096 for (i
= 0; i
< 16; ++i
)
39097 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39102 /* Use vpblendd. */
39103 for (i
= 0; i
< 8; ++i
)
39104 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39109 /* See if words move in pairs. If yes, vpblendd can be used. */
39110 for (i
= 0; i
< 16; i
+= 2)
39111 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39115 /* See if words move the same in both lanes. If not,
39116 vpblendvb must be used. */
39117 for (i
= 0; i
< 8; i
++)
39118 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39120 /* Use vpblendvb. */
39121 for (i
= 0; i
< 32; ++i
)
39122 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39126 target
= gen_lowpart (vmode
, target
);
39127 op0
= gen_lowpart (vmode
, op0
);
39128 op1
= gen_lowpart (vmode
, op1
);
39129 goto finish_pblendvb
;
39132 /* Use vpblendw. */
39133 for (i
= 0; i
< 16; ++i
)
39134 mask
|= (d
->perm
[i
] >= 16) << i
;
39138 /* Use vpblendd. */
39139 for (i
= 0; i
< 8; ++i
)
39140 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39145 /* Use vpblendd. */
39146 for (i
= 0; i
< 4; ++i
)
39147 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39152 gcc_unreachable ();
39155 /* This matches five different patterns with the different modes. */
39156 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39157 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39163 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39164 in terms of the variable form of vpermilps.
39166 Note that we will have already failed the immediate input vpermilps,
39167 which requires that the high and low part shuffle be identical; the
39168 variable form doesn't require that. */
39171 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39173 rtx rperm
[8], vperm
;
39176 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39179 /* We can only permute within the 128-bit lane. */
39180 for (i
= 0; i
< 8; ++i
)
39182 unsigned e
= d
->perm
[i
];
39183 if (i
< 4 ? e
>= 4 : e
< 4)
39190 for (i
= 0; i
< 8; ++i
)
39192 unsigned e
= d
->perm
[i
];
39194 /* Within each 128-bit lane, the elements of op0 are numbered
39195 from 0 and the elements of op1 are numbered from 4. */
39201 rperm
[i
] = GEN_INT (e
);
39204 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39205 vperm
= force_reg (V8SImode
, vperm
);
39206 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39211 /* Return true if permutation D can be performed as VMODE permutation
39215 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39217 unsigned int i
, j
, chunk
;
39219 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39220 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39221 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39224 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39227 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39228 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39229 if (d
->perm
[i
] & (chunk
- 1))
39232 for (j
= 1; j
< chunk
; ++j
)
39233 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39239 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39240 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39243 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39245 unsigned i
, nelt
, eltsz
, mask
;
39246 unsigned char perm
[32];
39247 enum machine_mode vmode
= V16QImode
;
39248 rtx rperm
[32], vperm
, target
, op0
, op1
;
39252 if (!d
->one_operand_p
)
39254 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39257 && valid_perm_using_mode_p (V2TImode
, d
))
39262 /* Use vperm2i128 insn. The pattern uses
39263 V4DImode instead of V2TImode. */
39264 target
= gen_lowpart (V4DImode
, d
->target
);
39265 op0
= gen_lowpart (V4DImode
, d
->op0
);
39266 op1
= gen_lowpart (V4DImode
, d
->op1
);
39268 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39269 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39270 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39278 if (GET_MODE_SIZE (d
->vmode
) == 16)
39283 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39288 /* V4DImode should be already handled through
39289 expand_vselect by vpermq instruction. */
39290 gcc_assert (d
->vmode
!= V4DImode
);
39293 if (d
->vmode
== V8SImode
39294 || d
->vmode
== V16HImode
39295 || d
->vmode
== V32QImode
)
39297 /* First see if vpermq can be used for
39298 V8SImode/V16HImode/V32QImode. */
39299 if (valid_perm_using_mode_p (V4DImode
, d
))
39301 for (i
= 0; i
< 4; i
++)
39302 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39305 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
39306 gen_lowpart (V4DImode
, d
->op0
),
39310 /* Next see if vpermd can be used. */
39311 if (valid_perm_using_mode_p (V8SImode
, d
))
39314 /* Or if vpermps can be used. */
39315 else if (d
->vmode
== V8SFmode
)
39318 if (vmode
== V32QImode
)
39320 /* vpshufb only works intra lanes, it is not
39321 possible to shuffle bytes in between the lanes. */
39322 for (i
= 0; i
< nelt
; ++i
)
39323 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39334 if (vmode
== V8SImode
)
39335 for (i
= 0; i
< 8; ++i
)
39336 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39339 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39340 if (!d
->one_operand_p
)
39341 mask
= 2 * nelt
- 1;
39342 else if (vmode
== V16QImode
)
39345 mask
= nelt
/ 2 - 1;
39347 for (i
= 0; i
< nelt
; ++i
)
39349 unsigned j
, e
= d
->perm
[i
] & mask
;
39350 for (j
= 0; j
< eltsz
; ++j
)
39351 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39355 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39356 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39357 vperm
= force_reg (vmode
, vperm
);
39359 target
= gen_lowpart (vmode
, d
->target
);
39360 op0
= gen_lowpart (vmode
, d
->op0
);
39361 if (d
->one_operand_p
)
39363 if (vmode
== V16QImode
)
39364 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39365 else if (vmode
== V32QImode
)
39366 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39367 else if (vmode
== V8SFmode
)
39368 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39370 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39374 op1
= gen_lowpart (vmode
, d
->op1
);
39375 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39381 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39382 in a single instruction. */
39385 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
39387 unsigned i
, nelt
= d
->nelt
;
39388 unsigned char perm2
[MAX_VECT_LEN
];
39390 /* Check plain VEC_SELECT first, because AVX has instructions that could
39391 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39392 input where SEL+CONCAT may not. */
39393 if (d
->one_operand_p
)
39395 int mask
= nelt
- 1;
39396 bool identity_perm
= true;
39397 bool broadcast_perm
= true;
39399 for (i
= 0; i
< nelt
; i
++)
39401 perm2
[i
] = d
->perm
[i
] & mask
;
39403 identity_perm
= false;
39405 broadcast_perm
= false;
39411 emit_move_insn (d
->target
, d
->op0
);
39414 else if (broadcast_perm
&& TARGET_AVX2
)
39416 /* Use vpbroadcast{b,w,d}. */
39417 rtx (*gen
) (rtx
, rtx
) = NULL
;
39421 gen
= gen_avx2_pbroadcastv32qi_1
;
39424 gen
= gen_avx2_pbroadcastv16hi_1
;
39427 gen
= gen_avx2_pbroadcastv8si_1
;
39430 gen
= gen_avx2_pbroadcastv16qi
;
39433 gen
= gen_avx2_pbroadcastv8hi
;
39436 gen
= gen_avx2_vec_dupv8sf_1
;
39438 /* For other modes prefer other shuffles this function creates. */
39444 emit_insn (gen (d
->target
, d
->op0
));
39449 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
39452 /* There are plenty of patterns in sse.md that are written for
39453 SEL+CONCAT and are not replicated for a single op. Perhaps
39454 that should be changed, to avoid the nastiness here. */
39456 /* Recognize interleave style patterns, which means incrementing
39457 every other permutation operand. */
39458 for (i
= 0; i
< nelt
; i
+= 2)
39460 perm2
[i
] = d
->perm
[i
] & mask
;
39461 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
39463 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39467 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39470 for (i
= 0; i
< nelt
; i
+= 4)
39472 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
39473 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
39474 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
39475 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
39478 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39484 /* Finally, try the fully general two operand permute. */
39485 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
39489 /* Recognize interleave style patterns with reversed operands. */
39490 if (!d
->one_operand_p
)
39492 for (i
= 0; i
< nelt
; ++i
)
39494 unsigned e
= d
->perm
[i
];
39502 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
39507 /* Try the SSE4.1 blend variable merge instructions. */
39508 if (expand_vec_perm_blend (d
))
39511 /* Try one of the AVX vpermil variable permutations. */
39512 if (expand_vec_perm_vpermil (d
))
39515 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39516 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39517 if (expand_vec_perm_pshufb (d
))
39523 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39524 in terms of a pair of pshuflw + pshufhw instructions. */
39527 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
39529 unsigned char perm2
[MAX_VECT_LEN
];
39533 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
39536 /* The two permutations only operate in 64-bit lanes. */
39537 for (i
= 0; i
< 4; ++i
)
39538 if (d
->perm
[i
] >= 4)
39540 for (i
= 4; i
< 8; ++i
)
39541 if (d
->perm
[i
] < 4)
39547 /* Emit the pshuflw. */
39548 memcpy (perm2
, d
->perm
, 4);
39549 for (i
= 4; i
< 8; ++i
)
39551 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
39554 /* Emit the pshufhw. */
39555 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
39556 for (i
= 0; i
< 4; ++i
)
39558 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
39564 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39565 the permutation using the SSSE3 palignr instruction. This succeeds
39566 when all of the elements in PERM fit within one vector and we merely
39567 need to shift them down so that a single vector permutation has a
39568 chance to succeed. */
39571 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
39573 unsigned i
, nelt
= d
->nelt
;
39578 /* Even with AVX, palignr only operates on 128-bit vectors. */
39579 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39582 min
= nelt
, max
= 0;
39583 for (i
= 0; i
< nelt
; ++i
)
39585 unsigned e
= d
->perm
[i
];
39591 if (min
== 0 || max
- min
>= nelt
)
39594 /* Given that we have SSSE3, we know we'll be able to implement the
39595 single operand permutation after the palignr with pshufb. */
39599 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39600 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39601 gen_lowpart (TImode
, d
->op1
),
39602 gen_lowpart (TImode
, d
->op0
), shift
));
39604 d
->op0
= d
->op1
= d
->target
;
39605 d
->one_operand_p
= true;
39608 for (i
= 0; i
< nelt
; ++i
)
39610 unsigned e
= d
->perm
[i
] - min
;
39616 /* Test for the degenerate case where the alignment by itself
39617 produces the desired permutation. */
39621 ok
= expand_vec_perm_1 (d
);
39627 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39629 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39630 a two vector permutation into a single vector permutation by using
39631 an interleave operation to merge the vectors. */
39634 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39636 struct expand_vec_perm_d dremap
, dfinal
;
39637 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39638 unsigned HOST_WIDE_INT contents
;
39639 unsigned char remap
[2 * MAX_VECT_LEN
];
39641 bool ok
, same_halves
= false;
39643 if (GET_MODE_SIZE (d
->vmode
) == 16)
39645 if (d
->one_operand_p
)
39648 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39652 /* For 32-byte modes allow even d->one_operand_p.
39653 The lack of cross-lane shuffling in some instructions
39654 might prevent a single insn shuffle. */
39656 dfinal
.testing_p
= true;
39657 /* If expand_vec_perm_interleave3 can expand this into
39658 a 3 insn sequence, give up and let it be expanded as
39659 3 insn sequence. While that is one insn longer,
39660 it doesn't need a memory operand and in the common
39661 case that both interleave low and high permutations
39662 with the same operands are adjacent needs 4 insns
39663 for both after CSE. */
39664 if (expand_vec_perm_interleave3 (&dfinal
))
39670 /* Examine from whence the elements come. */
39672 for (i
= 0; i
< nelt
; ++i
)
39673 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39675 memset (remap
, 0xff, sizeof (remap
));
39678 if (GET_MODE_SIZE (d
->vmode
) == 16)
39680 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39682 /* Split the two input vectors into 4 halves. */
39683 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39688 /* If the elements from the low halves use interleave low, and similarly
39689 for interleave high. If the elements are from mis-matched halves, we
39690 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39691 if ((contents
& (h1
| h3
)) == contents
)
39694 for (i
= 0; i
< nelt2
; ++i
)
39697 remap
[i
+ nelt
] = i
* 2 + 1;
39698 dremap
.perm
[i
* 2] = i
;
39699 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39701 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39702 dremap
.vmode
= V4SFmode
;
39704 else if ((contents
& (h2
| h4
)) == contents
)
39707 for (i
= 0; i
< nelt2
; ++i
)
39709 remap
[i
+ nelt2
] = i
* 2;
39710 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39711 dremap
.perm
[i
* 2] = i
+ nelt2
;
39712 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39714 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39715 dremap
.vmode
= V4SFmode
;
39717 else if ((contents
& (h1
| h4
)) == contents
)
39720 for (i
= 0; i
< nelt2
; ++i
)
39723 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39724 dremap
.perm
[i
] = i
;
39725 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39730 dremap
.vmode
= V2DImode
;
39732 dremap
.perm
[0] = 0;
39733 dremap
.perm
[1] = 3;
39736 else if ((contents
& (h2
| h3
)) == contents
)
39739 for (i
= 0; i
< nelt2
; ++i
)
39741 remap
[i
+ nelt2
] = i
;
39742 remap
[i
+ nelt
] = i
+ nelt2
;
39743 dremap
.perm
[i
] = i
+ nelt2
;
39744 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39749 dremap
.vmode
= V2DImode
;
39751 dremap
.perm
[0] = 1;
39752 dremap
.perm
[1] = 2;
39760 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39761 unsigned HOST_WIDE_INT q
[8];
39762 unsigned int nonzero_halves
[4];
39764 /* Split the two input vectors into 8 quarters. */
39765 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39766 for (i
= 1; i
< 8; ++i
)
39767 q
[i
] = q
[0] << (nelt4
* i
);
39768 for (i
= 0; i
< 4; ++i
)
39769 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39771 nonzero_halves
[nzcnt
] = i
;
39777 gcc_assert (d
->one_operand_p
);
39778 nonzero_halves
[1] = nonzero_halves
[0];
39779 same_halves
= true;
39781 else if (d
->one_operand_p
)
39783 gcc_assert (nonzero_halves
[0] == 0);
39784 gcc_assert (nonzero_halves
[1] == 1);
39789 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39791 /* Attempt to increase the likelihood that dfinal
39792 shuffle will be intra-lane. */
39793 char tmph
= nonzero_halves
[0];
39794 nonzero_halves
[0] = nonzero_halves
[1];
39795 nonzero_halves
[1] = tmph
;
39798 /* vperm2f128 or vperm2i128. */
39799 for (i
= 0; i
< nelt2
; ++i
)
39801 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39802 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39803 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39804 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39807 if (d
->vmode
!= V8SFmode
39808 && d
->vmode
!= V4DFmode
39809 && d
->vmode
!= V8SImode
)
39811 dremap
.vmode
= V8SImode
;
39813 for (i
= 0; i
< 4; ++i
)
39815 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39816 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39820 else if (d
->one_operand_p
)
39822 else if (TARGET_AVX2
39823 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39826 for (i
= 0; i
< nelt4
; ++i
)
39829 remap
[i
+ nelt
] = i
* 2 + 1;
39830 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39831 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39832 dremap
.perm
[i
* 2] = i
;
39833 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39834 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39835 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39838 else if (TARGET_AVX2
39839 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39842 for (i
= 0; i
< nelt4
; ++i
)
39844 remap
[i
+ nelt4
] = i
* 2;
39845 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39846 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39847 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39848 dremap
.perm
[i
* 2] = i
+ nelt4
;
39849 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39850 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39851 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39858 /* Use the remapping array set up above to move the elements from their
39859 swizzled locations into their final destinations. */
39861 for (i
= 0; i
< nelt
; ++i
)
39863 unsigned e
= remap
[d
->perm
[i
]];
39864 gcc_assert (e
< nelt
);
39865 /* If same_halves is true, both halves of the remapped vector are the
39866 same. Avoid cross-lane accesses if possible. */
39867 if (same_halves
&& i
>= nelt2
)
39869 gcc_assert (e
< nelt2
);
39870 dfinal
.perm
[i
] = e
+ nelt2
;
39873 dfinal
.perm
[i
] = e
;
39875 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39876 dfinal
.op1
= dfinal
.op0
;
39877 dfinal
.one_operand_p
= true;
39878 dremap
.target
= dfinal
.op0
;
39880 /* Test if the final remap can be done with a single insn. For V4SFmode or
39881 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39883 ok
= expand_vec_perm_1 (&dfinal
);
39884 seq
= get_insns ();
39893 if (dremap
.vmode
!= dfinal
.vmode
)
39895 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39896 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39897 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39900 ok
= expand_vec_perm_1 (&dremap
);
39907 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39908 a single vector cross-lane permutation into vpermq followed
39909 by any of the single insn permutations. */
39912 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39914 struct expand_vec_perm_d dremap
, dfinal
;
39915 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39916 unsigned contents
[2];
39920 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39921 && d
->one_operand_p
))
39926 for (i
= 0; i
< nelt2
; ++i
)
39928 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39929 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39932 for (i
= 0; i
< 2; ++i
)
39934 unsigned int cnt
= 0;
39935 for (j
= 0; j
< 4; ++j
)
39936 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39944 dremap
.vmode
= V4DImode
;
39946 dremap
.target
= gen_reg_rtx (V4DImode
);
39947 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39948 dremap
.op1
= dremap
.op0
;
39949 dremap
.one_operand_p
= true;
39950 for (i
= 0; i
< 2; ++i
)
39952 unsigned int cnt
= 0;
39953 for (j
= 0; j
< 4; ++j
)
39954 if ((contents
[i
] & (1u << j
)) != 0)
39955 dremap
.perm
[2 * i
+ cnt
++] = j
;
39956 for (; cnt
< 2; ++cnt
)
39957 dremap
.perm
[2 * i
+ cnt
] = 0;
39961 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39962 dfinal
.op1
= dfinal
.op0
;
39963 dfinal
.one_operand_p
= true;
39964 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39968 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39969 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39971 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39972 dfinal
.perm
[i
] |= nelt4
;
39974 gcc_unreachable ();
39977 ok
= expand_vec_perm_1 (&dremap
);
39980 ok
= expand_vec_perm_1 (&dfinal
);
39986 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39987 a vector permutation using two instructions, vperm2f128 resp.
39988 vperm2i128 followed by any single in-lane permutation. */
39991 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39993 struct expand_vec_perm_d dfirst
, dsecond
;
39994 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39998 || GET_MODE_SIZE (d
->vmode
) != 32
39999 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40003 dsecond
.one_operand_p
= false;
40004 dsecond
.testing_p
= true;
40006 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40007 immediate. For perm < 16 the second permutation uses
40008 d->op0 as first operand, for perm >= 16 it uses d->op1
40009 as first operand. The second operand is the result of
40011 for (perm
= 0; perm
< 32; perm
++)
40013 /* Ignore permutations which do not move anything cross-lane. */
40016 /* The second shuffle for e.g. V4DFmode has
40017 0123 and ABCD operands.
40018 Ignore AB23, as 23 is already in the second lane
40019 of the first operand. */
40020 if ((perm
& 0xc) == (1 << 2)) continue;
40021 /* And 01CD, as 01 is in the first lane of the first
40023 if ((perm
& 3) == 0) continue;
40024 /* And 4567, as then the vperm2[fi]128 doesn't change
40025 anything on the original 4567 second operand. */
40026 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40030 /* The second shuffle for e.g. V4DFmode has
40031 4567 and ABCD operands.
40032 Ignore AB67, as 67 is already in the second lane
40033 of the first operand. */
40034 if ((perm
& 0xc) == (3 << 2)) continue;
40035 /* And 45CD, as 45 is in the first lane of the first
40037 if ((perm
& 3) == 2) continue;
40038 /* And 0123, as then the vperm2[fi]128 doesn't change
40039 anything on the original 0123 first operand. */
40040 if ((perm
& 0xf) == (1 << 2)) continue;
40043 for (i
= 0; i
< nelt
; i
++)
40045 j
= d
->perm
[i
] / nelt2
;
40046 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40047 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40048 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40049 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40057 ok
= expand_vec_perm_1 (&dsecond
);
40068 /* Found a usable second shuffle. dfirst will be
40069 vperm2f128 on d->op0 and d->op1. */
40070 dsecond
.testing_p
= false;
40072 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40073 for (i
= 0; i
< nelt
; i
++)
40074 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40075 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40077 ok
= expand_vec_perm_1 (&dfirst
);
40080 /* And dsecond is some single insn shuffle, taking
40081 d->op0 and result of vperm2f128 (if perm < 16) or
40082 d->op1 and result of vperm2f128 (otherwise). */
40083 dsecond
.op1
= dfirst
.target
;
40085 dsecond
.op0
= dfirst
.op1
;
40087 ok
= expand_vec_perm_1 (&dsecond
);
40093 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40094 if (d
->one_operand_p
)
40101 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40102 a two vector permutation using 2 intra-lane interleave insns
40103 and cross-lane shuffle for 32-byte vectors. */
40106 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40109 rtx (*gen
) (rtx
, rtx
, rtx
);
40111 if (d
->one_operand_p
)
40113 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40115 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40121 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40123 for (i
= 0; i
< nelt
; i
+= 2)
40124 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40125 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40135 gen
= gen_vec_interleave_highv32qi
;
40137 gen
= gen_vec_interleave_lowv32qi
;
40141 gen
= gen_vec_interleave_highv16hi
;
40143 gen
= gen_vec_interleave_lowv16hi
;
40147 gen
= gen_vec_interleave_highv8si
;
40149 gen
= gen_vec_interleave_lowv8si
;
40153 gen
= gen_vec_interleave_highv4di
;
40155 gen
= gen_vec_interleave_lowv4di
;
40159 gen
= gen_vec_interleave_highv8sf
;
40161 gen
= gen_vec_interleave_lowv8sf
;
40165 gen
= gen_vec_interleave_highv4df
;
40167 gen
= gen_vec_interleave_lowv4df
;
40170 gcc_unreachable ();
40173 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40177 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40178 a single vector permutation using a single intra-lane vector
40179 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40180 the non-swapped and swapped vectors together. */
40183 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40185 struct expand_vec_perm_d dfirst
, dsecond
;
40186 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40189 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40193 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40194 || !d
->one_operand_p
)
40198 for (i
= 0; i
< nelt
; i
++)
40199 dfirst
.perm
[i
] = 0xff;
40200 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40202 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40203 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40205 dfirst
.perm
[j
] = d
->perm
[i
];
40209 for (i
= 0; i
< nelt
; i
++)
40210 if (dfirst
.perm
[i
] == 0xff)
40211 dfirst
.perm
[i
] = i
;
40214 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40217 ok
= expand_vec_perm_1 (&dfirst
);
40218 seq
= get_insns ();
40230 dsecond
.op0
= dfirst
.target
;
40231 dsecond
.op1
= dfirst
.target
;
40232 dsecond
.one_operand_p
= true;
40233 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40234 for (i
= 0; i
< nelt
; i
++)
40235 dsecond
.perm
[i
] = i
^ nelt2
;
40237 ok
= expand_vec_perm_1 (&dsecond
);
40240 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40241 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40245 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40246 permutation using two vperm2f128, followed by a vshufpd insn blending
40247 the two vectors together. */
40250 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40252 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40255 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40265 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40266 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40267 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40268 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40269 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40270 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40271 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40272 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40273 dthird
.perm
[0] = (d
->perm
[0] % 2);
40274 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40275 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40276 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40278 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40279 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40280 dthird
.op0
= dfirst
.target
;
40281 dthird
.op1
= dsecond
.target
;
40282 dthird
.one_operand_p
= false;
40284 canonicalize_perm (&dfirst
);
40285 canonicalize_perm (&dsecond
);
40287 ok
= expand_vec_perm_1 (&dfirst
)
40288 && expand_vec_perm_1 (&dsecond
)
40289 && expand_vec_perm_1 (&dthird
);
40296 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40297 permutation with two pshufb insns and an ior. We should have already
40298 failed all two instruction sequences. */
40301 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40303 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40304 unsigned int i
, nelt
, eltsz
;
40306 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40308 gcc_assert (!d
->one_operand_p
);
40311 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40313 /* Generate two permutation masks. If the required element is within
40314 the given vector it is shuffled into the proper lane. If the required
40315 element is in the other vector, force a zero into the lane by setting
40316 bit 7 in the permutation mask. */
40317 m128
= GEN_INT (-128);
40318 for (i
= 0; i
< nelt
; ++i
)
40320 unsigned j
, e
= d
->perm
[i
];
40321 unsigned which
= (e
>= nelt
);
40325 for (j
= 0; j
< eltsz
; ++j
)
40327 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40328 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40332 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40333 vperm
= force_reg (V16QImode
, vperm
);
40335 l
= gen_reg_rtx (V16QImode
);
40336 op
= gen_lowpart (V16QImode
, d
->op0
);
40337 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40339 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40340 vperm
= force_reg (V16QImode
, vperm
);
40342 h
= gen_reg_rtx (V16QImode
);
40343 op
= gen_lowpart (V16QImode
, d
->op1
);
40344 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40346 op
= gen_lowpart (V16QImode
, d
->target
);
40347 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40352 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40353 with two vpshufb insns, vpermq and vpor. We should have already failed
40354 all two or three instruction sequences. */
40357 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40359 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40360 unsigned int i
, nelt
, eltsz
;
40363 || !d
->one_operand_p
40364 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40371 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40373 /* Generate two permutation masks. If the required element is within
40374 the same lane, it is shuffled in. If the required element from the
40375 other lane, force a zero by setting bit 7 in the permutation mask.
40376 In the other mask the mask has non-negative elements if element
40377 is requested from the other lane, but also moved to the other lane,
40378 so that the result of vpshufb can have the two V2TImode halves
40380 m128
= GEN_INT (-128);
40381 for (i
= 0; i
< nelt
; ++i
)
40383 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40384 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40386 for (j
= 0; j
< eltsz
; ++j
)
40388 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
40389 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
40393 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40394 vperm
= force_reg (V32QImode
, vperm
);
40396 h
= gen_reg_rtx (V32QImode
);
40397 op
= gen_lowpart (V32QImode
, d
->op0
);
40398 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40400 /* Swap the 128-byte lanes of h into hp. */
40401 hp
= gen_reg_rtx (V4DImode
);
40402 op
= gen_lowpart (V4DImode
, h
);
40403 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
40406 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40407 vperm
= force_reg (V32QImode
, vperm
);
40409 l
= gen_reg_rtx (V32QImode
);
40410 op
= gen_lowpart (V32QImode
, d
->op0
);
40411 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40413 op
= gen_lowpart (V32QImode
, d
->target
);
40414 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
40419 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40420 and extract-odd permutations of two V32QImode and V16QImode operand
40421 with two vpshufb insns, vpor and vpermq. We should have already
40422 failed all two or three instruction sequences. */
40425 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
40427 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
40428 unsigned int i
, nelt
, eltsz
;
40431 || d
->one_operand_p
40432 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40435 for (i
= 0; i
< d
->nelt
; ++i
)
40436 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
40443 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40445 /* Generate two permutation masks. In the first permutation mask
40446 the first quarter will contain indexes for the first half
40447 of the op0, the second quarter will contain bit 7 set, third quarter
40448 will contain indexes for the second half of the op0 and the
40449 last quarter bit 7 set. In the second permutation mask
40450 the first quarter will contain bit 7 set, the second quarter
40451 indexes for the first half of the op1, the third quarter bit 7 set
40452 and last quarter indexes for the second half of the op1.
40453 I.e. the first mask e.g. for V32QImode extract even will be:
40454 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40455 (all values masked with 0xf except for -128) and second mask
40456 for extract even will be
40457 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40458 m128
= GEN_INT (-128);
40459 for (i
= 0; i
< nelt
; ++i
)
40461 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40462 unsigned which
= d
->perm
[i
] >= nelt
;
40463 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
40465 for (j
= 0; j
< eltsz
; ++j
)
40467 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
40468 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
40472 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40473 vperm
= force_reg (V32QImode
, vperm
);
40475 l
= gen_reg_rtx (V32QImode
);
40476 op
= gen_lowpart (V32QImode
, d
->op0
);
40477 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40479 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40480 vperm
= force_reg (V32QImode
, vperm
);
40482 h
= gen_reg_rtx (V32QImode
);
40483 op
= gen_lowpart (V32QImode
, d
->op1
);
40484 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40486 ior
= gen_reg_rtx (V32QImode
);
40487 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
40489 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40490 op
= gen_lowpart (V4DImode
, d
->target
);
40491 ior
= gen_lowpart (V4DImode
, ior
);
40492 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
40493 const1_rtx
, GEN_INT (3)));
40498 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40499 and extract-odd permutations. */
40502 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
40509 t1
= gen_reg_rtx (V4DFmode
);
40510 t2
= gen_reg_rtx (V4DFmode
);
40512 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40513 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40514 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40516 /* Now an unpck[lh]pd will produce the result required. */
40518 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
40520 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
40526 int mask
= odd
? 0xdd : 0x88;
40528 t1
= gen_reg_rtx (V8SFmode
);
40529 t2
= gen_reg_rtx (V8SFmode
);
40530 t3
= gen_reg_rtx (V8SFmode
);
40532 /* Shuffle within the 128-bit lanes to produce:
40533 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40534 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
40537 /* Shuffle the lanes around to produce:
40538 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40539 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
40542 /* Shuffle within the 128-bit lanes to produce:
40543 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40544 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
40546 /* Shuffle within the 128-bit lanes to produce:
40547 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40548 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
40550 /* Shuffle the lanes around to produce:
40551 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40552 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
40561 /* These are always directly implementable by expand_vec_perm_1. */
40562 gcc_unreachable ();
40566 return expand_vec_perm_pshufb2 (d
);
40569 /* We need 2*log2(N)-1 operations to achieve odd/even
40570 with interleave. */
40571 t1
= gen_reg_rtx (V8HImode
);
40572 t2
= gen_reg_rtx (V8HImode
);
40573 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
40574 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
40575 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
40576 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
40578 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
40580 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40587 return expand_vec_perm_pshufb2 (d
);
40590 t1
= gen_reg_rtx (V16QImode
);
40591 t2
= gen_reg_rtx (V16QImode
);
40592 t3
= gen_reg_rtx (V16QImode
);
40593 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40594 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40595 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40596 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40597 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40598 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40600 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40602 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40609 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40614 struct expand_vec_perm_d d_copy
= *d
;
40615 d_copy
.vmode
= V4DFmode
;
40616 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40617 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40618 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40619 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40622 t1
= gen_reg_rtx (V4DImode
);
40623 t2
= gen_reg_rtx (V4DImode
);
40625 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40626 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40627 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40629 /* Now an vpunpck[lh]qdq will produce the result required. */
40631 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40633 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40640 struct expand_vec_perm_d d_copy
= *d
;
40641 d_copy
.vmode
= V8SFmode
;
40642 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40643 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40644 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40645 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40648 t1
= gen_reg_rtx (V8SImode
);
40649 t2
= gen_reg_rtx (V8SImode
);
40651 /* Shuffle the lanes around into
40652 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40653 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40654 gen_lowpart (V4DImode
, d
->op0
),
40655 gen_lowpart (V4DImode
, d
->op1
),
40657 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40658 gen_lowpart (V4DImode
, d
->op0
),
40659 gen_lowpart (V4DImode
, d
->op1
),
40662 /* Swap the 2nd and 3rd position in each lane into
40663 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40664 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40665 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40666 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40667 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40669 /* Now an vpunpck[lh]qdq will produce
40670 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40672 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40673 gen_lowpart (V4DImode
, t1
),
40674 gen_lowpart (V4DImode
, t2
));
40676 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40677 gen_lowpart (V4DImode
, t1
),
40678 gen_lowpart (V4DImode
, t2
));
40683 gcc_unreachable ();
40689 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40690 extract-even and extract-odd permutations. */
40693 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40695 unsigned i
, odd
, nelt
= d
->nelt
;
40698 if (odd
!= 0 && odd
!= 1)
40701 for (i
= 1; i
< nelt
; ++i
)
40702 if (d
->perm
[i
] != 2 * i
+ odd
)
40705 return expand_vec_perm_even_odd_1 (d
, odd
);
40708 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40709 permutations. We assume that expand_vec_perm_1 has already failed. */
40712 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40714 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40715 enum machine_mode vmode
= d
->vmode
;
40716 unsigned char perm2
[4];
40724 /* These are special-cased in sse.md so that we can optionally
40725 use the vbroadcast instruction. They expand to two insns
40726 if the input happens to be in a register. */
40727 gcc_unreachable ();
40733 /* These are always implementable using standard shuffle patterns. */
40734 gcc_unreachable ();
40738 /* These can be implemented via interleave. We save one insn by
40739 stopping once we have promoted to V4SImode and then use pshufd. */
40743 rtx (*gen
) (rtx
, rtx
, rtx
)
40744 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40745 : gen_vec_interleave_lowv8hi
;
40749 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40750 : gen_vec_interleave_highv8hi
;
40755 dest
= gen_reg_rtx (vmode
);
40756 emit_insn (gen (dest
, op0
, op0
));
40757 vmode
= get_mode_wider_vector (vmode
);
40758 op0
= gen_lowpart (vmode
, dest
);
40760 while (vmode
!= V4SImode
);
40762 memset (perm2
, elt
, 4);
40763 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40772 /* For AVX2 broadcasts of the first element vpbroadcast* or
40773 vpermq should be used by expand_vec_perm_1. */
40774 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40778 gcc_unreachable ();
40782 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40783 broadcast permutations. */
40786 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40788 unsigned i
, elt
, nelt
= d
->nelt
;
40790 if (!d
->one_operand_p
)
40794 for (i
= 1; i
< nelt
; ++i
)
40795 if (d
->perm
[i
] != elt
)
40798 return expand_vec_perm_broadcast_1 (d
);
40801 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40802 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40803 all the shorter instruction sequences. */
40806 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40808 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40809 unsigned int i
, nelt
, eltsz
;
40813 || d
->one_operand_p
40814 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40821 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40823 /* Generate 4 permutation masks. If the required element is within
40824 the same lane, it is shuffled in. If the required element from the
40825 other lane, force a zero by setting bit 7 in the permutation mask.
40826 In the other mask the mask has non-negative elements if element
40827 is requested from the other lane, but also moved to the other lane,
40828 so that the result of vpshufb can have the two V2TImode halves
40830 m128
= GEN_INT (-128);
40831 for (i
= 0; i
< 32; ++i
)
40833 rperm
[0][i
] = m128
;
40834 rperm
[1][i
] = m128
;
40835 rperm
[2][i
] = m128
;
40836 rperm
[3][i
] = m128
;
40842 for (i
= 0; i
< nelt
; ++i
)
40844 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40845 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40846 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40848 for (j
= 0; j
< eltsz
; ++j
)
40849 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40850 used
[which
] = true;
40853 for (i
= 0; i
< 2; ++i
)
40855 if (!used
[2 * i
+ 1])
40860 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40861 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40862 vperm
= force_reg (V32QImode
, vperm
);
40863 h
[i
] = gen_reg_rtx (V32QImode
);
40864 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40865 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40868 /* Swap the 128-byte lanes of h[X]. */
40869 for (i
= 0; i
< 2; ++i
)
40871 if (h
[i
] == NULL_RTX
)
40873 op
= gen_reg_rtx (V4DImode
);
40874 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40875 const2_rtx
, GEN_INT (3), const0_rtx
,
40877 h
[i
] = gen_lowpart (V32QImode
, op
);
40880 for (i
= 0; i
< 2; ++i
)
40887 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40888 vperm
= force_reg (V32QImode
, vperm
);
40889 l
[i
] = gen_reg_rtx (V32QImode
);
40890 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40891 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40894 for (i
= 0; i
< 2; ++i
)
40898 op
= gen_reg_rtx (V32QImode
);
40899 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40906 gcc_assert (l
[0] && l
[1]);
40907 op
= gen_lowpart (V32QImode
, d
->target
);
40908 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40912 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40913 With all of the interface bits taken care of, perform the expansion
40914 in D and return true on success. */
40917 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40919 /* Try a single instruction expansion. */
40920 if (expand_vec_perm_1 (d
))
40923 /* Try sequences of two instructions. */
40925 if (expand_vec_perm_pshuflw_pshufhw (d
))
40928 if (expand_vec_perm_palignr (d
))
40931 if (expand_vec_perm_interleave2 (d
))
40934 if (expand_vec_perm_broadcast (d
))
40937 if (expand_vec_perm_vpermq_perm_1 (d
))
40940 if (expand_vec_perm_vperm2f128 (d
))
40943 /* Try sequences of three instructions. */
40945 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40948 if (expand_vec_perm_pshufb2 (d
))
40951 if (expand_vec_perm_interleave3 (d
))
40954 if (expand_vec_perm_vperm2f128_vblend (d
))
40957 /* Try sequences of four instructions. */
40959 if (expand_vec_perm_vpshufb2_vpermq (d
))
40962 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40965 /* ??? Look for narrow permutations whose element orderings would
40966 allow the promotion to a wider mode. */
40968 /* ??? Look for sequences of interleave or a wider permute that place
40969 the data into the correct lanes for a half-vector shuffle like
40970 pshuf[lh]w or vpermilps. */
40972 /* ??? Look for sequences of interleave that produce the desired results.
40973 The combinatorics of punpck[lh] get pretty ugly... */
40975 if (expand_vec_perm_even_odd (d
))
40978 /* Even longer sequences. */
40979 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40985 /* If a permutation only uses one operand, make it clear. Returns true
40986 if the permutation references both operands. */
40989 canonicalize_perm (struct expand_vec_perm_d
*d
)
40991 int i
, which
, nelt
= d
->nelt
;
40993 for (i
= which
= 0; i
< nelt
; ++i
)
40994 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40996 d
->one_operand_p
= true;
41003 if (!rtx_equal_p (d
->op0
, d
->op1
))
41005 d
->one_operand_p
= false;
41008 /* The elements of PERM do not suggest that only the first operand
41009 is used, but both operands are identical. Allow easier matching
41010 of the permutation by folding the permutation into the single
41015 for (i
= 0; i
< nelt
; ++i
)
41016 d
->perm
[i
] &= nelt
- 1;
41025 return (which
== 3);
41029 ix86_expand_vec_perm_const (rtx operands
[4])
41031 struct expand_vec_perm_d d
;
41032 unsigned char perm
[MAX_VECT_LEN
];
41037 d
.target
= operands
[0];
41038 d
.op0
= operands
[1];
41039 d
.op1
= operands
[2];
41042 d
.vmode
= GET_MODE (d
.target
);
41043 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41044 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41045 d
.testing_p
= false;
41047 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41048 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41049 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41051 for (i
= 0; i
< nelt
; ++i
)
41053 rtx e
= XVECEXP (sel
, 0, i
);
41054 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41059 two_args
= canonicalize_perm (&d
);
41061 if (ix86_expand_vec_perm_const_1 (&d
))
41064 /* If the selector says both arguments are needed, but the operands are the
41065 same, the above tried to expand with one_operand_p and flattened selector.
41066 If that didn't work, retry without one_operand_p; we succeeded with that
41068 if (two_args
&& d
.one_operand_p
)
41070 d
.one_operand_p
= false;
41071 memcpy (d
.perm
, perm
, sizeof (perm
));
41072 return ix86_expand_vec_perm_const_1 (&d
);
41078 /* Implement targetm.vectorize.vec_perm_const_ok. */
41081 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41082 const unsigned char *sel
)
41084 struct expand_vec_perm_d d
;
41085 unsigned int i
, nelt
, which
;
41089 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41090 d
.testing_p
= true;
41092 /* Given sufficient ISA support we can just return true here
41093 for selected vector modes. */
41094 if (GET_MODE_SIZE (d
.vmode
) == 16)
41096 /* All implementable with a single vpperm insn. */
41099 /* All implementable with 2 pshufb + 1 ior. */
41102 /* All implementable with shufpd or unpck[lh]pd. */
41107 /* Extract the values from the vector CST into the permutation
41109 memcpy (d
.perm
, sel
, nelt
);
41110 for (i
= which
= 0; i
< nelt
; ++i
)
41112 unsigned char e
= d
.perm
[i
];
41113 gcc_assert (e
< 2 * nelt
);
41114 which
|= (e
< nelt
? 1 : 2);
41117 /* For all elements from second vector, fold the elements to first. */
41119 for (i
= 0; i
< nelt
; ++i
)
41122 /* Check whether the mask can be applied to the vector type. */
41123 d
.one_operand_p
= (which
!= 3);
41125 /* Implementable with shufps or pshufd. */
41126 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41129 /* Otherwise we have to go through the motions and see if we can
41130 figure out how to generate the requested permutation. */
41131 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41132 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41133 if (!d
.one_operand_p
)
41134 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41137 ret
= ix86_expand_vec_perm_const_1 (&d
);
41144 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41146 struct expand_vec_perm_d d
;
41152 d
.vmode
= GET_MODE (targ
);
41153 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41154 d
.one_operand_p
= false;
41155 d
.testing_p
= false;
41157 for (i
= 0; i
< nelt
; ++i
)
41158 d
.perm
[i
] = i
* 2 + odd
;
41160 /* We'll either be able to implement the permutation directly... */
41161 if (expand_vec_perm_1 (&d
))
41164 /* ... or we use the special-case patterns. */
41165 expand_vec_perm_even_odd_1 (&d
, odd
);
41169 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41171 struct expand_vec_perm_d d
;
41172 unsigned i
, nelt
, base
;
41178 d
.vmode
= GET_MODE (targ
);
41179 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41180 d
.one_operand_p
= false;
41181 d
.testing_p
= false;
41183 base
= high_p
? nelt
/ 2 : 0;
41184 for (i
= 0; i
< nelt
/ 2; ++i
)
41186 d
.perm
[i
* 2] = i
+ base
;
41187 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41190 /* Note that for AVX this isn't one instruction. */
41191 ok
= ix86_expand_vec_perm_const_1 (&d
);
41196 /* Expand a vector operation CODE for a V*QImode in terms of the
41197 same operation on V*HImode. */
41200 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41202 enum machine_mode qimode
= GET_MODE (dest
);
41203 enum machine_mode himode
;
41204 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41205 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41206 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41207 struct expand_vec_perm_d d
;
41208 bool ok
, full_interleave
;
41209 bool uns_p
= false;
41216 gen_il
= gen_vec_interleave_lowv16qi
;
41217 gen_ih
= gen_vec_interleave_highv16qi
;
41220 himode
= V16HImode
;
41221 gen_il
= gen_avx2_interleave_lowv32qi
;
41222 gen_ih
= gen_avx2_interleave_highv32qi
;
41225 gcc_unreachable ();
41228 op2_l
= op2_h
= op2
;
41232 /* Unpack data such that we've got a source byte in each low byte of
41233 each word. We don't care what goes into the high byte of each word.
41234 Rather than trying to get zero in there, most convenient is to let
41235 it be a copy of the low byte. */
41236 op2_l
= gen_reg_rtx (qimode
);
41237 op2_h
= gen_reg_rtx (qimode
);
41238 emit_insn (gen_il (op2_l
, op2
, op2
));
41239 emit_insn (gen_ih (op2_h
, op2
, op2
));
41242 op1_l
= gen_reg_rtx (qimode
);
41243 op1_h
= gen_reg_rtx (qimode
);
41244 emit_insn (gen_il (op1_l
, op1
, op1
));
41245 emit_insn (gen_ih (op1_h
, op1
, op1
));
41246 full_interleave
= qimode
== V16QImode
;
41254 op1_l
= gen_reg_rtx (himode
);
41255 op1_h
= gen_reg_rtx (himode
);
41256 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41257 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41258 full_interleave
= true;
41261 gcc_unreachable ();
41264 /* Perform the operation. */
41265 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41267 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41269 gcc_assert (res_l
&& res_h
);
41271 /* Merge the data back into the right place. */
41273 d
.op0
= gen_lowpart (qimode
, res_l
);
41274 d
.op1
= gen_lowpart (qimode
, res_h
);
41276 d
.nelt
= GET_MODE_NUNITS (qimode
);
41277 d
.one_operand_p
= false;
41278 d
.testing_p
= false;
41280 if (full_interleave
)
41282 /* For SSE2, we used an full interleave, so the desired
41283 results are in the even elements. */
41284 for (i
= 0; i
< 32; ++i
)
41289 /* For AVX, the interleave used above was not cross-lane. So the
41290 extraction is evens but with the second and third quarter swapped.
41291 Happily, that is even one insn shorter than even extraction. */
41292 for (i
= 0; i
< 32; ++i
)
41293 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41296 ok
= ix86_expand_vec_perm_const_1 (&d
);
41299 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41300 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41303 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41304 if op is CONST_VECTOR with all odd elements equal to their
41305 preceding element. */
41308 const_vector_equal_evenodd_p (rtx op
)
41310 enum machine_mode mode
= GET_MODE (op
);
41311 int i
, nunits
= GET_MODE_NUNITS (mode
);
41312 if (GET_CODE (op
) != CONST_VECTOR
41313 || nunits
!= CONST_VECTOR_NUNITS (op
))
41315 for (i
= 0; i
< nunits
; i
+= 2)
41316 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41322 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41323 bool uns_p
, bool odd_p
)
41325 enum machine_mode mode
= GET_MODE (op1
);
41326 enum machine_mode wmode
= GET_MODE (dest
);
41328 rtx orig_op1
= op1
, orig_op2
= op2
;
41330 if (!nonimmediate_operand (op1
, mode
))
41331 op1
= force_reg (mode
, op1
);
41332 if (!nonimmediate_operand (op2
, mode
))
41333 op2
= force_reg (mode
, op2
);
41335 /* We only play even/odd games with vectors of SImode. */
41336 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41338 /* If we're looking for the odd results, shift those members down to
41339 the even slots. For some cpus this is faster than a PSHUFD. */
41342 /* For XOP use vpmacsdqh, but only for smult, as it is only
41344 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41346 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41347 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41351 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41352 if (!const_vector_equal_evenodd_p (orig_op1
))
41353 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
41354 x
, NULL
, 1, OPTAB_DIRECT
);
41355 if (!const_vector_equal_evenodd_p (orig_op2
))
41356 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
41357 x
, NULL
, 1, OPTAB_DIRECT
);
41358 op1
= gen_lowpart (mode
, op1
);
41359 op2
= gen_lowpart (mode
, op2
);
41362 if (mode
== V8SImode
)
41365 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
41367 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
41370 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
41371 else if (TARGET_SSE4_1
)
41372 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
41375 rtx s1
, s2
, t0
, t1
, t2
;
41377 /* The easiest way to implement this without PMULDQ is to go through
41378 the motions as if we are performing a full 64-bit multiply. With
41379 the exception that we need to do less shuffling of the elements. */
41381 /* Compute the sign-extension, aka highparts, of the two operands. */
41382 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41383 op1
, pc_rtx
, pc_rtx
);
41384 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41385 op2
, pc_rtx
, pc_rtx
);
41387 /* Multiply LO(A) * HI(B), and vice-versa. */
41388 t1
= gen_reg_rtx (wmode
);
41389 t2
= gen_reg_rtx (wmode
);
41390 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
41391 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
41393 /* Multiply LO(A) * LO(B). */
41394 t0
= gen_reg_rtx (wmode
);
41395 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
41397 /* Combine and shift the highparts into place. */
41398 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
41399 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
41402 /* Combine high and low parts. */
41403 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
41410 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
41411 bool uns_p
, bool high_p
)
41413 enum machine_mode wmode
= GET_MODE (dest
);
41414 enum machine_mode mode
= GET_MODE (op1
);
41415 rtx t1
, t2
, t3
, t4
, mask
;
41420 t1
= gen_reg_rtx (mode
);
41421 t2
= gen_reg_rtx (mode
);
41422 if (TARGET_XOP
&& !uns_p
)
41424 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41425 shuffle the elements once so that all elements are in the right
41426 place for immediate use: { A C B D }. */
41427 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
41428 const1_rtx
, GEN_INT (3)));
41429 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
41430 const1_rtx
, GEN_INT (3)));
41434 /* Put the elements into place for the multiply. */
41435 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
41436 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
41439 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
41443 /* Shuffle the elements between the lanes. After this we
41444 have { A B E F | C D G H } for each operand. */
41445 t1
= gen_reg_rtx (V4DImode
);
41446 t2
= gen_reg_rtx (V4DImode
);
41447 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
41448 const0_rtx
, const2_rtx
,
41449 const1_rtx
, GEN_INT (3)));
41450 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
41451 const0_rtx
, const2_rtx
,
41452 const1_rtx
, GEN_INT (3)));
41454 /* Shuffle the elements within the lanes. After this we
41455 have { A A B B | C C D D } or { E E F F | G G H H }. */
41456 t3
= gen_reg_rtx (V8SImode
);
41457 t4
= gen_reg_rtx (V8SImode
);
41458 mask
= GEN_INT (high_p
41459 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41460 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41461 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
41462 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
41464 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
41469 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
41470 uns_p
, OPTAB_DIRECT
);
41471 t2
= expand_binop (mode
,
41472 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
41473 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
41474 gcc_assert (t1
&& t2
);
41476 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
41481 t1
= gen_reg_rtx (wmode
);
41482 t2
= gen_reg_rtx (wmode
);
41483 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
41484 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
41486 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
41490 gcc_unreachable ();
41495 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
41499 res_1
= gen_reg_rtx (V4SImode
);
41500 res_2
= gen_reg_rtx (V4SImode
);
41501 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
41502 op1
, op2
, true, false);
41503 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
41504 op1
, op2
, true, true);
41506 /* Move the results in element 2 down to element 1; we don't care
41507 what goes in elements 2 and 3. Then we can merge the parts
41508 back together with an interleave.
41510 Note that two other sequences were tried:
41511 (1) Use interleaves at the start instead of psrldq, which allows
41512 us to use a single shufps to merge things back at the end.
41513 (2) Use shufps here to combine the two vectors, then pshufd to
41514 put the elements in the correct order.
41515 In both cases the cost of the reformatting stall was too high
41516 and the overall sequence slower. */
41518 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
41519 const0_rtx
, const0_rtx
));
41520 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
41521 const0_rtx
, const0_rtx
));
41522 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
41524 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
41528 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
41530 enum machine_mode mode
= GET_MODE (op0
);
41531 rtx t1
, t2
, t3
, t4
, t5
, t6
;
41533 if (TARGET_XOP
&& mode
== V2DImode
)
41535 /* op1: A,B,C,D, op2: E,F,G,H */
41536 op1
= gen_lowpart (V4SImode
, op1
);
41537 op2
= gen_lowpart (V4SImode
, op2
);
41539 t1
= gen_reg_rtx (V4SImode
);
41540 t2
= gen_reg_rtx (V4SImode
);
41541 t3
= gen_reg_rtx (V2DImode
);
41542 t4
= gen_reg_rtx (V2DImode
);
41545 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
41551 /* t2: (B*E),(A*F),(D*G),(C*H) */
41552 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
41554 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41555 emit_insn (gen_xop_phadddq (t3
, t2
));
41557 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41558 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
41560 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41561 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
41565 enum machine_mode nmode
;
41566 rtx (*umul
) (rtx
, rtx
, rtx
);
41568 if (mode
== V2DImode
)
41570 umul
= gen_vec_widen_umult_even_v4si
;
41573 else if (mode
== V4DImode
)
41575 umul
= gen_vec_widen_umult_even_v8si
;
41579 gcc_unreachable ();
41582 /* Multiply low parts. */
41583 t1
= gen_reg_rtx (mode
);
41584 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
41586 /* Shift input vectors right 32 bits so we can multiply high parts. */
41588 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
41589 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
41591 /* Multiply high parts by low parts. */
41592 t4
= gen_reg_rtx (mode
);
41593 t5
= gen_reg_rtx (mode
);
41594 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
41595 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
41597 /* Combine and shift the highparts back. */
41598 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
41599 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
41601 /* Combine high and low parts. */
41602 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
41605 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41606 gen_rtx_MULT (mode
, op1
, op2
));
41609 /* Expand an insert into a vector register through pinsr insn.
41610 Return true if successful. */
41613 ix86_expand_pinsr (rtx
*operands
)
41615 rtx dst
= operands
[0];
41616 rtx src
= operands
[3];
41618 unsigned int size
= INTVAL (operands
[1]);
41619 unsigned int pos
= INTVAL (operands
[2]);
41621 if (GET_CODE (dst
) == SUBREG
)
41623 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41624 dst
= SUBREG_REG (dst
);
41627 if (GET_CODE (src
) == SUBREG
)
41628 src
= SUBREG_REG (src
);
41630 switch (GET_MODE (dst
))
41637 enum machine_mode srcmode
, dstmode
;
41638 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41640 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41645 if (!TARGET_SSE4_1
)
41647 dstmode
= V16QImode
;
41648 pinsr
= gen_sse4_1_pinsrb
;
41654 dstmode
= V8HImode
;
41655 pinsr
= gen_sse2_pinsrw
;
41659 if (!TARGET_SSE4_1
)
41661 dstmode
= V4SImode
;
41662 pinsr
= gen_sse4_1_pinsrd
;
41666 gcc_assert (TARGET_64BIT
);
41667 if (!TARGET_SSE4_1
)
41669 dstmode
= V2DImode
;
41670 pinsr
= gen_sse4_1_pinsrq
;
41677 dst
= gen_lowpart (dstmode
, dst
);
41678 src
= gen_lowpart (srcmode
, src
);
41682 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41691 /* This function returns the calling abi specific va_list type node.
41692 It returns the FNDECL specific va_list type. */
41695 ix86_fn_abi_va_list (tree fndecl
)
41698 return va_list_type_node
;
41699 gcc_assert (fndecl
!= NULL_TREE
);
41701 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41702 return ms_va_list_type_node
;
41704 return sysv_va_list_type_node
;
41707 /* Returns the canonical va_list type specified by TYPE. If there
41708 is no valid TYPE provided, it return NULL_TREE. */
41711 ix86_canonical_va_list_type (tree type
)
41715 /* Resolve references and pointers to va_list type. */
41716 if (TREE_CODE (type
) == MEM_REF
)
41717 type
= TREE_TYPE (type
);
41718 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41719 type
= TREE_TYPE (type
);
41720 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41721 type
= TREE_TYPE (type
);
41723 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41725 wtype
= va_list_type_node
;
41726 gcc_assert (wtype
!= NULL_TREE
);
41728 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41730 /* If va_list is an array type, the argument may have decayed
41731 to a pointer type, e.g. by being passed to another function.
41732 In that case, unwrap both types so that we can compare the
41733 underlying records. */
41734 if (TREE_CODE (htype
) == ARRAY_TYPE
41735 || POINTER_TYPE_P (htype
))
41737 wtype
= TREE_TYPE (wtype
);
41738 htype
= TREE_TYPE (htype
);
41741 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41742 return va_list_type_node
;
41743 wtype
= sysv_va_list_type_node
;
41744 gcc_assert (wtype
!= NULL_TREE
);
41746 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41748 /* If va_list is an array type, the argument may have decayed
41749 to a pointer type, e.g. by being passed to another function.
41750 In that case, unwrap both types so that we can compare the
41751 underlying records. */
41752 if (TREE_CODE (htype
) == ARRAY_TYPE
41753 || POINTER_TYPE_P (htype
))
41755 wtype
= TREE_TYPE (wtype
);
41756 htype
= TREE_TYPE (htype
);
41759 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41760 return sysv_va_list_type_node
;
41761 wtype
= ms_va_list_type_node
;
41762 gcc_assert (wtype
!= NULL_TREE
);
41764 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41766 /* If va_list is an array type, the argument may have decayed
41767 to a pointer type, e.g. by being passed to another function.
41768 In that case, unwrap both types so that we can compare the
41769 underlying records. */
41770 if (TREE_CODE (htype
) == ARRAY_TYPE
41771 || POINTER_TYPE_P (htype
))
41773 wtype
= TREE_TYPE (wtype
);
41774 htype
= TREE_TYPE (htype
);
41777 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41778 return ms_va_list_type_node
;
41781 return std_canonical_va_list_type (type
);
41784 /* Iterate through the target-specific builtin types for va_list.
41785 IDX denotes the iterator, *PTREE is set to the result type of
41786 the va_list builtin, and *PNAME to its internal type.
41787 Returns zero if there is no element for this index, otherwise
41788 IDX should be increased upon the next call.
41789 Note, do not iterate a base builtin's name like __builtin_va_list.
41790 Used from c_common_nodes_and_builtins. */
41793 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41803 *ptree
= ms_va_list_type_node
;
41804 *pname
= "__builtin_ms_va_list";
41808 *ptree
= sysv_va_list_type_node
;
41809 *pname
= "__builtin_sysv_va_list";
41817 #undef TARGET_SCHED_DISPATCH
41818 #define TARGET_SCHED_DISPATCH has_dispatch
41819 #undef TARGET_SCHED_DISPATCH_DO
41820 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41821 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41822 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41823 #undef TARGET_SCHED_REORDER
41824 #define TARGET_SCHED_REORDER ix86_sched_reorder
41825 #undef TARGET_SCHED_ADJUST_PRIORITY
41826 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41827 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41828 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41829 ix86_dependencies_evaluation_hook
41831 /* The size of the dispatch window is the total number of bytes of
41832 object code allowed in a window. */
41833 #define DISPATCH_WINDOW_SIZE 16
41835 /* Number of dispatch windows considered for scheduling. */
41836 #define MAX_DISPATCH_WINDOWS 3
41838 /* Maximum number of instructions in a window. */
41841 /* Maximum number of immediate operands in a window. */
41844 /* Maximum number of immediate bits allowed in a window. */
41845 #define MAX_IMM_SIZE 128
41847 /* Maximum number of 32 bit immediates allowed in a window. */
41848 #define MAX_IMM_32 4
41850 /* Maximum number of 64 bit immediates allowed in a window. */
41851 #define MAX_IMM_64 2
41853 /* Maximum total of loads or prefetches allowed in a window. */
41856 /* Maximum total of stores allowed in a window. */
41857 #define MAX_STORE 1
41863 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41864 enum dispatch_group
{
41879 /* Number of allowable groups in a dispatch window. It is an array
41880 indexed by dispatch_group enum. 100 is used as a big number,
41881 because the number of these kind of operations does not have any
41882 effect in dispatch window, but we need them for other reasons in
41884 static unsigned int num_allowable_groups
[disp_last
] = {
41885 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41888 char group_name
[disp_last
+ 1][16] = {
41889 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41890 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41891 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41894 /* Instruction path. */
41897 path_single
, /* Single micro op. */
41898 path_double
, /* Double micro op. */
41899 path_multi
, /* Instructions with more than 2 micro op.. */
41903 /* sched_insn_info defines a window to the instructions scheduled in
41904 the basic block. It contains a pointer to the insn_info table and
41905 the instruction scheduled.
41907 Windows are allocated for each basic block and are linked
41909 typedef struct sched_insn_info_s
{
41911 enum dispatch_group group
;
41912 enum insn_path path
;
41917 /* Linked list of dispatch windows. This is a two way list of
41918 dispatch windows of a basic block. It contains information about
41919 the number of uops in the window and the total number of
41920 instructions and of bytes in the object code for this dispatch
41922 typedef struct dispatch_windows_s
{
41923 int num_insn
; /* Number of insn in the window. */
41924 int num_uops
; /* Number of uops in the window. */
41925 int window_size
; /* Number of bytes in the window. */
41926 int window_num
; /* Window number between 0 or 1. */
41927 int num_imm
; /* Number of immediates in an insn. */
41928 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41929 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41930 int imm_size
; /* Total immediates in the window. */
41931 int num_loads
; /* Total memory loads in the window. */
41932 int num_stores
; /* Total memory stores in the window. */
41933 int violation
; /* Violation exists in window. */
41934 sched_insn_info
*window
; /* Pointer to the window. */
41935 struct dispatch_windows_s
*next
;
41936 struct dispatch_windows_s
*prev
;
41937 } dispatch_windows
;
41939 /* Immediate valuse used in an insn. */
41940 typedef struct imm_info_s
41947 static dispatch_windows
*dispatch_window_list
;
41948 static dispatch_windows
*dispatch_window_list1
;
41950 /* Get dispatch group of insn. */
41952 static enum dispatch_group
41953 get_mem_group (rtx insn
)
41955 enum attr_memory memory
;
41957 if (INSN_CODE (insn
) < 0)
41958 return disp_no_group
;
41959 memory
= get_attr_memory (insn
);
41960 if (memory
== MEMORY_STORE
)
41963 if (memory
== MEMORY_LOAD
)
41966 if (memory
== MEMORY_BOTH
)
41967 return disp_load_store
;
41969 return disp_no_group
;
41972 /* Return true if insn is a compare instruction. */
41977 enum attr_type type
;
41979 type
= get_attr_type (insn
);
41980 return (type
== TYPE_TEST
41981 || type
== TYPE_ICMP
41982 || type
== TYPE_FCMP
41983 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41986 /* Return true if a dispatch violation encountered. */
41989 dispatch_violation (void)
41991 if (dispatch_window_list
->next
)
41992 return dispatch_window_list
->next
->violation
;
41993 return dispatch_window_list
->violation
;
41996 /* Return true if insn is a branch instruction. */
41999 is_branch (rtx insn
)
42001 return (CALL_P (insn
) || JUMP_P (insn
));
42004 /* Return true if insn is a prefetch instruction. */
42007 is_prefetch (rtx insn
)
42009 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42012 /* This function initializes a dispatch window and the list container holding a
42013 pointer to the window. */
42016 init_window (int window_num
)
42019 dispatch_windows
*new_list
;
42021 if (window_num
== 0)
42022 new_list
= dispatch_window_list
;
42024 new_list
= dispatch_window_list1
;
42026 new_list
->num_insn
= 0;
42027 new_list
->num_uops
= 0;
42028 new_list
->window_size
= 0;
42029 new_list
->next
= NULL
;
42030 new_list
->prev
= NULL
;
42031 new_list
->window_num
= window_num
;
42032 new_list
->num_imm
= 0;
42033 new_list
->num_imm_32
= 0;
42034 new_list
->num_imm_64
= 0;
42035 new_list
->imm_size
= 0;
42036 new_list
->num_loads
= 0;
42037 new_list
->num_stores
= 0;
42038 new_list
->violation
= false;
42040 for (i
= 0; i
< MAX_INSN
; i
++)
42042 new_list
->window
[i
].insn
= NULL
;
42043 new_list
->window
[i
].group
= disp_no_group
;
42044 new_list
->window
[i
].path
= no_path
;
42045 new_list
->window
[i
].byte_len
= 0;
42046 new_list
->window
[i
].imm_bytes
= 0;
42051 /* This function allocates and initializes a dispatch window and the
42052 list container holding a pointer to the window. */
42054 static dispatch_windows
*
42055 allocate_window (void)
42057 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42058 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42063 /* This routine initializes the dispatch scheduling information. It
42064 initiates building dispatch scheduler tables and constructs the
42065 first dispatch window. */
42068 init_dispatch_sched (void)
42070 /* Allocate a dispatch list and a window. */
42071 dispatch_window_list
= allocate_window ();
42072 dispatch_window_list1
= allocate_window ();
42077 /* This function returns true if a branch is detected. End of a basic block
42078 does not have to be a branch, but here we assume only branches end a
42082 is_end_basic_block (enum dispatch_group group
)
42084 return group
== disp_branch
;
42087 /* This function is called when the end of a window processing is reached. */
42090 process_end_window (void)
42092 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42093 if (dispatch_window_list
->next
)
42095 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42096 gcc_assert (dispatch_window_list
->window_size
42097 + dispatch_window_list1
->window_size
<= 48);
42103 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42104 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42105 for 48 bytes of instructions. Note that these windows are not dispatch
42106 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42108 static dispatch_windows
*
42109 allocate_next_window (int window_num
)
42111 if (window_num
== 0)
42113 if (dispatch_window_list
->next
)
42116 return dispatch_window_list
;
42119 dispatch_window_list
->next
= dispatch_window_list1
;
42120 dispatch_window_list1
->prev
= dispatch_window_list
;
42122 return dispatch_window_list1
;
42125 /* Increment the number of immediate operands of an instruction. */
42128 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42133 switch ( GET_CODE (*in_rtx
))
42138 (imm_values
->imm
)++;
42139 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42140 (imm_values
->imm32
)++;
42142 (imm_values
->imm64
)++;
42146 (imm_values
->imm
)++;
42147 (imm_values
->imm64
)++;
42151 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42153 (imm_values
->imm
)++;
42154 (imm_values
->imm32
)++;
42165 /* Compute number of immediate operands of an instruction. */
42168 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42170 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42171 (rtx_function
) find_constant_1
, (void *) imm_values
);
42174 /* Return total size of immediate operands of an instruction along with number
42175 of corresponding immediate-operands. It initializes its parameters to zero
42176 befor calling FIND_CONSTANT.
42177 INSN is the input instruction. IMM is the total of immediates.
42178 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42182 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42184 imm_info imm_values
= {0, 0, 0};
42186 find_constant (insn
, &imm_values
);
42187 *imm
= imm_values
.imm
;
42188 *imm32
= imm_values
.imm32
;
42189 *imm64
= imm_values
.imm64
;
42190 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42193 /* This function indicates if an operand of an instruction is an
42197 has_immediate (rtx insn
)
42199 int num_imm_operand
;
42200 int num_imm32_operand
;
42201 int num_imm64_operand
;
42204 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42205 &num_imm64_operand
);
42209 /* Return single or double path for instructions. */
42211 static enum insn_path
42212 get_insn_path (rtx insn
)
42214 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42216 if ((int)path
== 0)
42217 return path_single
;
42219 if ((int)path
== 1)
42220 return path_double
;
42225 /* Return insn dispatch group. */
42227 static enum dispatch_group
42228 get_insn_group (rtx insn
)
42230 enum dispatch_group group
= get_mem_group (insn
);
42234 if (is_branch (insn
))
42235 return disp_branch
;
42240 if (has_immediate (insn
))
42243 if (is_prefetch (insn
))
42244 return disp_prefetch
;
42246 return disp_no_group
;
42249 /* Count number of GROUP restricted instructions in a dispatch
42250 window WINDOW_LIST. */
42253 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42255 enum dispatch_group group
= get_insn_group (insn
);
42257 int num_imm_operand
;
42258 int num_imm32_operand
;
42259 int num_imm64_operand
;
42261 if (group
== disp_no_group
)
42264 if (group
== disp_imm
)
42266 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42267 &num_imm64_operand
);
42268 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42269 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42270 || (num_imm32_operand
> 0
42271 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42272 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42273 || (num_imm64_operand
> 0
42274 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42275 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42276 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42277 && num_imm64_operand
> 0
42278 && ((window_list
->num_imm_64
> 0
42279 && window_list
->num_insn
>= 2)
42280 || window_list
->num_insn
>= 3)))
42286 if ((group
== disp_load_store
42287 && (window_list
->num_loads
>= MAX_LOAD
42288 || window_list
->num_stores
>= MAX_STORE
))
42289 || ((group
== disp_load
42290 || group
== disp_prefetch
)
42291 && window_list
->num_loads
>= MAX_LOAD
)
42292 || (group
== disp_store
42293 && window_list
->num_stores
>= MAX_STORE
))
42299 /* This function returns true if insn satisfies dispatch rules on the
42300 last window scheduled. */
42303 fits_dispatch_window (rtx insn
)
42305 dispatch_windows
*window_list
= dispatch_window_list
;
42306 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
42307 unsigned int num_restrict
;
42308 enum dispatch_group group
= get_insn_group (insn
);
42309 enum insn_path path
= get_insn_path (insn
);
42312 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42313 instructions should be given the lowest priority in the
42314 scheduling process in Haifa scheduler to make sure they will be
42315 scheduled in the same dispatch window as the reference to them. */
42316 if (group
== disp_jcc
|| group
== disp_cmp
)
42319 /* Check nonrestricted. */
42320 if (group
== disp_no_group
|| group
== disp_branch
)
42323 /* Get last dispatch window. */
42324 if (window_list_next
)
42325 window_list
= window_list_next
;
42327 if (window_list
->window_num
== 1)
42329 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
42332 || (min_insn_size (insn
) + sum
) >= 48)
42333 /* Window 1 is full. Go for next window. */
42337 num_restrict
= count_num_restricted (insn
, window_list
);
42339 if (num_restrict
> num_allowable_groups
[group
])
42342 /* See if it fits in the first window. */
42343 if (window_list
->window_num
== 0)
42345 /* The first widow should have only single and double path
42347 if (path
== path_double
42348 && (window_list
->num_uops
+ 2) > MAX_INSN
)
42350 else if (path
!= path_single
)
42356 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42357 dispatch window WINDOW_LIST. */
42360 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
42362 int byte_len
= min_insn_size (insn
);
42363 int num_insn
= window_list
->num_insn
;
42365 sched_insn_info
*window
= window_list
->window
;
42366 enum dispatch_group group
= get_insn_group (insn
);
42367 enum insn_path path
= get_insn_path (insn
);
42368 int num_imm_operand
;
42369 int num_imm32_operand
;
42370 int num_imm64_operand
;
42372 if (!window_list
->violation
&& group
!= disp_cmp
42373 && !fits_dispatch_window (insn
))
42374 window_list
->violation
= true;
42376 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42377 &num_imm64_operand
);
42379 /* Initialize window with new instruction. */
42380 window
[num_insn
].insn
= insn
;
42381 window
[num_insn
].byte_len
= byte_len
;
42382 window
[num_insn
].group
= group
;
42383 window
[num_insn
].path
= path
;
42384 window
[num_insn
].imm_bytes
= imm_size
;
42386 window_list
->window_size
+= byte_len
;
42387 window_list
->num_insn
= num_insn
+ 1;
42388 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
42389 window_list
->imm_size
+= imm_size
;
42390 window_list
->num_imm
+= num_imm_operand
;
42391 window_list
->num_imm_32
+= num_imm32_operand
;
42392 window_list
->num_imm_64
+= num_imm64_operand
;
42394 if (group
== disp_store
)
42395 window_list
->num_stores
+= 1;
42396 else if (group
== disp_load
42397 || group
== disp_prefetch
)
42398 window_list
->num_loads
+= 1;
42399 else if (group
== disp_load_store
)
42401 window_list
->num_stores
+= 1;
42402 window_list
->num_loads
+= 1;
42406 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42407 If the total bytes of instructions or the number of instructions in
42408 the window exceed allowable, it allocates a new window. */
42411 add_to_dispatch_window (rtx insn
)
42414 dispatch_windows
*window_list
;
42415 dispatch_windows
*next_list
;
42416 dispatch_windows
*window0_list
;
42417 enum insn_path path
;
42418 enum dispatch_group insn_group
;
42426 if (INSN_CODE (insn
) < 0)
42429 byte_len
= min_insn_size (insn
);
42430 window_list
= dispatch_window_list
;
42431 next_list
= window_list
->next
;
42432 path
= get_insn_path (insn
);
42433 insn_group
= get_insn_group (insn
);
42435 /* Get the last dispatch window. */
42437 window_list
= dispatch_window_list
->next
;
42439 if (path
== path_single
)
42441 else if (path
== path_double
)
42444 insn_num_uops
= (int) path
;
42446 /* If current window is full, get a new window.
42447 Window number zero is full, if MAX_INSN uops are scheduled in it.
42448 Window number one is full, if window zero's bytes plus window
42449 one's bytes is 32, or if the bytes of the new instruction added
42450 to the total makes it greater than 48, or it has already MAX_INSN
42451 instructions in it. */
42452 num_insn
= window_list
->num_insn
;
42453 num_uops
= window_list
->num_uops
;
42454 window_num
= window_list
->window_num
;
42455 insn_fits
= fits_dispatch_window (insn
);
42457 if (num_insn
>= MAX_INSN
42458 || num_uops
+ insn_num_uops
> MAX_INSN
42461 window_num
= ~window_num
& 1;
42462 window_list
= allocate_next_window (window_num
);
42465 if (window_num
== 0)
42467 add_insn_window (insn
, window_list
, insn_num_uops
);
42468 if (window_list
->num_insn
>= MAX_INSN
42469 && insn_group
== disp_branch
)
42471 process_end_window ();
42475 else if (window_num
== 1)
42477 window0_list
= window_list
->prev
;
42478 sum
= window0_list
->window_size
+ window_list
->window_size
;
42480 || (byte_len
+ sum
) >= 48)
42482 process_end_window ();
42483 window_list
= dispatch_window_list
;
42486 add_insn_window (insn
, window_list
, insn_num_uops
);
42489 gcc_unreachable ();
42491 if (is_end_basic_block (insn_group
))
42493 /* End of basic block is reached do end-basic-block process. */
42494 process_end_window ();
42499 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42501 DEBUG_FUNCTION
static void
42502 debug_dispatch_window_file (FILE *file
, int window_num
)
42504 dispatch_windows
*list
;
42507 if (window_num
== 0)
42508 list
= dispatch_window_list
;
42510 list
= dispatch_window_list1
;
42512 fprintf (file
, "Window #%d:\n", list
->window_num
);
42513 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
42514 list
->num_insn
, list
->num_uops
, list
->window_size
);
42515 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42516 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
42518 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
42520 fprintf (file
, " insn info:\n");
42522 for (i
= 0; i
< MAX_INSN
; i
++)
42524 if (!list
->window
[i
].insn
)
42526 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42527 i
, group_name
[list
->window
[i
].group
],
42528 i
, (void *)list
->window
[i
].insn
,
42529 i
, list
->window
[i
].path
,
42530 i
, list
->window
[i
].byte_len
,
42531 i
, list
->window
[i
].imm_bytes
);
42535 /* Print to stdout a dispatch window. */
42537 DEBUG_FUNCTION
void
42538 debug_dispatch_window (int window_num
)
42540 debug_dispatch_window_file (stdout
, window_num
);
42543 /* Print INSN dispatch information to FILE. */
42545 DEBUG_FUNCTION
static void
42546 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
42549 enum insn_path path
;
42550 enum dispatch_group group
;
42552 int num_imm_operand
;
42553 int num_imm32_operand
;
42554 int num_imm64_operand
;
42556 if (INSN_CODE (insn
) < 0)
42559 byte_len
= min_insn_size (insn
);
42560 path
= get_insn_path (insn
);
42561 group
= get_insn_group (insn
);
42562 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42563 &num_imm64_operand
);
42565 fprintf (file
, " insn info:\n");
42566 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
42567 group_name
[group
], path
, byte_len
);
42568 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42569 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
42572 /* Print to STDERR the status of the ready list with respect to
42573 dispatch windows. */
42575 DEBUG_FUNCTION
void
42576 debug_ready_dispatch (void)
42579 int no_ready
= number_in_ready ();
42581 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
42583 for (i
= 0; i
< no_ready
; i
++)
42584 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
42587 /* This routine is the driver of the dispatch scheduler. */
42590 do_dispatch (rtx insn
, int mode
)
42592 if (mode
== DISPATCH_INIT
)
42593 init_dispatch_sched ();
42594 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
42595 add_to_dispatch_window (insn
);
42598 /* Return TRUE if Dispatch Scheduling is supported. */
42601 has_dispatch (rtx insn
, int action
)
42603 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
42604 && flag_dispatch_scheduler
)
42610 case IS_DISPATCH_ON
:
42615 return is_cmp (insn
);
42617 case DISPATCH_VIOLATION
:
42618 return dispatch_violation ();
42620 case FITS_DISPATCH_WINDOW
:
42621 return fits_dispatch_window (insn
);
42627 /* Implementation of reassociation_width target hook used by
42628 reassoc phase to identify parallelism level in reassociated
42629 tree. Statements tree_code is passed in OPC. Arguments type
42632 Currently parallel reassociation is enabled for Atom
42633 processors only and we set reassociation width to be 2
42634 because Atom may issue up to 2 instructions per cycle.
42636 Return value should be fixed if parallel reassociation is
42637 enabled for other processors. */
42640 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42641 enum machine_mode mode
)
42645 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42647 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42653 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42654 place emms and femms instructions. */
42656 static enum machine_mode
42657 ix86_preferred_simd_mode (enum machine_mode mode
)
42665 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42667 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42669 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42671 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42674 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42680 if (!TARGET_VECTORIZE_DOUBLE
)
42682 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42684 else if (TARGET_SSE2
)
42693 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42696 static unsigned int
42697 ix86_autovectorize_vector_sizes (void)
42699 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42704 /* Return class of registers which could be used for pseudo of MODE
42705 and of class RCLASS for spilling instead of memory. Return NO_REGS
42706 if it is not possible or non-profitable. */
42708 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42710 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42711 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42712 && INTEGER_CLASS_P (rclass
))
42713 return ALL_SSE_REGS
;
42717 /* Implement targetm.vectorize.init_cost. */
42720 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42722 unsigned *cost
= XNEWVEC (unsigned, 3);
42723 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42727 /* Implement targetm.vectorize.add_stmt_cost. */
42730 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42731 struct _stmt_vec_info
*stmt_info
, int misalign
,
42732 enum vect_cost_model_location where
)
42734 unsigned *cost
= (unsigned *) data
;
42735 unsigned retval
= 0;
42737 if (flag_vect_cost_model
)
42739 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42740 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42742 /* Statements in an inner loop relative to the loop being
42743 vectorized are weighted more heavily. The value here is
42744 arbitrary and could potentially be improved with analysis. */
42745 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42746 count
*= 50; /* FIXME. */
42748 retval
= (unsigned) (count
* stmt_cost
);
42749 cost
[where
] += retval
;
42755 /* Implement targetm.vectorize.finish_cost. */
42758 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42759 unsigned *body_cost
, unsigned *epilogue_cost
)
42761 unsigned *cost
= (unsigned *) data
;
42762 *prologue_cost
= cost
[vect_prologue
];
42763 *body_cost
= cost
[vect_body
];
42764 *epilogue_cost
= cost
[vect_epilogue
];
42767 /* Implement targetm.vectorize.destroy_cost_data. */
42770 ix86_destroy_cost_data (void *data
)
42775 /* Validate target specific memory model bits in VAL. */
42777 static unsigned HOST_WIDE_INT
42778 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42780 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42783 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42785 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42787 warning (OPT_Winvalid_memory_model
,
42788 "Unknown architecture specific memory model");
42789 return MEMMODEL_SEQ_CST
;
42791 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42792 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42794 warning (OPT_Winvalid_memory_model
,
42795 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42796 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42798 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42800 warning (OPT_Winvalid_memory_model
,
42801 "HLE_RELEASE not used with RELEASE or stronger memory model");
42802 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42807 /* Initialize the GCC target structure. */
42808 #undef TARGET_RETURN_IN_MEMORY
42809 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42811 #undef TARGET_LEGITIMIZE_ADDRESS
42812 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42814 #undef TARGET_ATTRIBUTE_TABLE
42815 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42816 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
42817 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
42818 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42819 # undef TARGET_MERGE_DECL_ATTRIBUTES
42820 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42823 #undef TARGET_COMP_TYPE_ATTRIBUTES
42824 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42826 #undef TARGET_INIT_BUILTINS
42827 #define TARGET_INIT_BUILTINS ix86_init_builtins
42828 #undef TARGET_BUILTIN_DECL
42829 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42830 #undef TARGET_EXPAND_BUILTIN
42831 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42833 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42834 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42835 ix86_builtin_vectorized_function
42837 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42838 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42840 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42841 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42843 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42844 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42846 #undef TARGET_BUILTIN_RECIPROCAL
42847 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42849 #undef TARGET_ASM_FUNCTION_EPILOGUE
42850 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42852 #undef TARGET_ENCODE_SECTION_INFO
42853 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42854 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42856 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42859 #undef TARGET_ASM_OPEN_PAREN
42860 #define TARGET_ASM_OPEN_PAREN ""
42861 #undef TARGET_ASM_CLOSE_PAREN
42862 #define TARGET_ASM_CLOSE_PAREN ""
42864 #undef TARGET_ASM_BYTE_OP
42865 #define TARGET_ASM_BYTE_OP ASM_BYTE
42867 #undef TARGET_ASM_ALIGNED_HI_OP
42868 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42869 #undef TARGET_ASM_ALIGNED_SI_OP
42870 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42872 #undef TARGET_ASM_ALIGNED_DI_OP
42873 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42876 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42877 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42879 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42880 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42882 #undef TARGET_ASM_UNALIGNED_HI_OP
42883 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42884 #undef TARGET_ASM_UNALIGNED_SI_OP
42885 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42886 #undef TARGET_ASM_UNALIGNED_DI_OP
42887 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42889 #undef TARGET_PRINT_OPERAND
42890 #define TARGET_PRINT_OPERAND ix86_print_operand
42891 #undef TARGET_PRINT_OPERAND_ADDRESS
42892 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42893 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42894 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42895 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42896 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42898 #undef TARGET_SCHED_INIT_GLOBAL
42899 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42900 #undef TARGET_SCHED_ADJUST_COST
42901 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42902 #undef TARGET_SCHED_ISSUE_RATE
42903 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42904 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42905 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42906 ia32_multipass_dfa_lookahead
42908 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42909 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42911 #undef TARGET_MEMMODEL_CHECK
42912 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42915 #undef TARGET_HAVE_TLS
42916 #define TARGET_HAVE_TLS true
42918 #undef TARGET_CANNOT_FORCE_CONST_MEM
42919 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42920 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42921 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42923 #undef TARGET_DELEGITIMIZE_ADDRESS
42924 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42926 #undef TARGET_MS_BITFIELD_LAYOUT_P
42927 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42930 #undef TARGET_BINDS_LOCAL_P
42931 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42933 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42934 #undef TARGET_BINDS_LOCAL_P
42935 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42938 #undef TARGET_ASM_OUTPUT_MI_THUNK
42939 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42940 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42941 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42943 #undef TARGET_ASM_FILE_START
42944 #define TARGET_ASM_FILE_START x86_file_start
42946 #undef TARGET_OPTION_OVERRIDE
42947 #define TARGET_OPTION_OVERRIDE ix86_option_override
42949 #undef TARGET_REGISTER_MOVE_COST
42950 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42951 #undef TARGET_MEMORY_MOVE_COST
42952 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42953 #undef TARGET_RTX_COSTS
42954 #define TARGET_RTX_COSTS ix86_rtx_costs
42955 #undef TARGET_ADDRESS_COST
42956 #define TARGET_ADDRESS_COST ix86_address_cost
42958 #undef TARGET_FIXED_CONDITION_CODE_REGS
42959 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42960 #undef TARGET_CC_MODES_COMPATIBLE
42961 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42963 #undef TARGET_MACHINE_DEPENDENT_REORG
42964 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42966 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42967 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42969 #undef TARGET_BUILD_BUILTIN_VA_LIST
42970 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42972 #undef TARGET_FOLD_BUILTIN
42973 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42975 #undef TARGET_COMPARE_VERSION_PRIORITY
42976 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42978 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42979 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42980 ix86_generate_version_dispatcher_body
42982 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42983 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42984 ix86_get_function_versions_dispatcher
42986 #undef TARGET_ENUM_VA_LIST_P
42987 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42989 #undef TARGET_FN_ABI_VA_LIST
42990 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42992 #undef TARGET_CANONICAL_VA_LIST_TYPE
42993 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42995 #undef TARGET_EXPAND_BUILTIN_VA_START
42996 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42998 #undef TARGET_MD_ASM_CLOBBERS
42999 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43001 #undef TARGET_PROMOTE_PROTOTYPES
43002 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43003 #undef TARGET_STRUCT_VALUE_RTX
43004 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
43005 #undef TARGET_SETUP_INCOMING_VARARGS
43006 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
43007 #undef TARGET_MUST_PASS_IN_STACK
43008 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
43009 #undef TARGET_FUNCTION_ARG_ADVANCE
43010 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
43011 #undef TARGET_FUNCTION_ARG
43012 #define TARGET_FUNCTION_ARG ix86_function_arg
43013 #undef TARGET_FUNCTION_ARG_BOUNDARY
43014 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
43015 #undef TARGET_PASS_BY_REFERENCE
43016 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
43017 #undef TARGET_INTERNAL_ARG_POINTER
43018 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
43019 #undef TARGET_UPDATE_STACK_BOUNDARY
43020 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
43021 #undef TARGET_GET_DRAP_RTX
43022 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
43023 #undef TARGET_STRICT_ARGUMENT_NAMING
43024 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
43025 #undef TARGET_STATIC_CHAIN
43026 #define TARGET_STATIC_CHAIN ix86_static_chain
43027 #undef TARGET_TRAMPOLINE_INIT
43028 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
43029 #undef TARGET_RETURN_POPS_ARGS
43030 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
43032 #undef TARGET_LEGITIMATE_COMBINED_INSN
43033 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
43035 #undef TARGET_ASAN_SHADOW_OFFSET
43036 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
43038 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
43039 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
43041 #undef TARGET_SCALAR_MODE_SUPPORTED_P
43042 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
43044 #undef TARGET_VECTOR_MODE_SUPPORTED_P
43045 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
43047 #undef TARGET_C_MODE_FOR_SUFFIX
43048 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
43051 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
43052 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
43055 #ifdef SUBTARGET_INSERT_ATTRIBUTES
43056 #undef TARGET_INSERT_ATTRIBUTES
43057 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
43060 #undef TARGET_MANGLE_TYPE
43061 #define TARGET_MANGLE_TYPE ix86_mangle_type
43064 #undef TARGET_STACK_PROTECT_FAIL
43065 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43068 #undef TARGET_FUNCTION_VALUE
43069 #define TARGET_FUNCTION_VALUE ix86_function_value
43071 #undef TARGET_FUNCTION_VALUE_REGNO_P
43072 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43074 #undef TARGET_PROMOTE_FUNCTION_MODE
43075 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43077 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43078 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43080 #undef TARGET_INSTANTIATE_DECLS
43081 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43083 #undef TARGET_SECONDARY_RELOAD
43084 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43086 #undef TARGET_CLASS_MAX_NREGS
43087 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43089 #undef TARGET_PREFERRED_RELOAD_CLASS
43090 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43091 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43092 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43093 #undef TARGET_CLASS_LIKELY_SPILLED_P
43094 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43096 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43097 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43098 ix86_builtin_vectorization_cost
43099 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43100 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43101 ix86_vectorize_vec_perm_const_ok
43102 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43103 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43104 ix86_preferred_simd_mode
43105 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43106 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43107 ix86_autovectorize_vector_sizes
43108 #undef TARGET_VECTORIZE_INIT_COST
43109 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43110 #undef TARGET_VECTORIZE_ADD_STMT_COST
43111 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43112 #undef TARGET_VECTORIZE_FINISH_COST
43113 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43114 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43115 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43117 #undef TARGET_SET_CURRENT_FUNCTION
43118 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43120 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43121 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43123 #undef TARGET_OPTION_SAVE
43124 #define TARGET_OPTION_SAVE ix86_function_specific_save
43126 #undef TARGET_OPTION_RESTORE
43127 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43129 #undef TARGET_OPTION_PRINT
43130 #define TARGET_OPTION_PRINT ix86_function_specific_print
43132 #undef TARGET_OPTION_FUNCTION_VERSIONS
43133 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43135 #undef TARGET_CAN_INLINE_P
43136 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43138 #undef TARGET_EXPAND_TO_RTL_HOOK
43139 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43141 #undef TARGET_LEGITIMATE_ADDRESS_P
43142 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43144 #undef TARGET_LRA_P
43145 #define TARGET_LRA_P hook_bool_void_true
43147 #undef TARGET_REGISTER_PRIORITY
43148 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43150 #undef TARGET_REGISTER_USAGE_LEVELING_P
43151 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43153 #undef TARGET_LEGITIMATE_CONSTANT_P
43154 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43156 #undef TARGET_FRAME_POINTER_REQUIRED
43157 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43159 #undef TARGET_CAN_ELIMINATE
43160 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43162 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43163 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43165 #undef TARGET_ASM_CODE_END
43166 #define TARGET_ASM_CODE_END ix86_code_end
43168 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43169 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43172 #undef TARGET_INIT_LIBFUNCS
43173 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43176 #undef TARGET_SPILL_CLASS
43177 #define TARGET_SPILL_CLASS ix86_spill_class
43179 struct gcc_target targetm
= TARGET_INITIALIZER
;
43181 #include "gt-i386.h"