1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 #include "pass_manager.h"
68 static rtx
legitimize_dllimport_symbol (rtx
, bool);
69 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
70 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
72 #ifndef CHECK_STACK_LIMIT
73 #define CHECK_STACK_LIMIT (-1)
76 /* Return index of given mode in mult and division cost tables. */
77 #define MODE_INDEX(mode) \
78 ((mode) == QImode ? 0 \
79 : (mode) == HImode ? 1 \
80 : (mode) == SImode ? 2 \
81 : (mode) == DImode ? 3 \
84 /* Processor costs (relative to an add) */
85 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
86 #define COSTS_N_BYTES(N) ((N) * 2)
88 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
90 static stringop_algs ix86_size_memcpy
[2] = {
91 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
92 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
93 static stringop_algs ix86_size_memset
[2] = {
94 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
95 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
98 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
99 COSTS_N_BYTES (2), /* cost of an add instruction */
100 COSTS_N_BYTES (3), /* cost of a lea instruction */
101 COSTS_N_BYTES (2), /* variable shift costs */
102 COSTS_N_BYTES (3), /* constant shift costs */
103 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
104 COSTS_N_BYTES (3), /* HI */
105 COSTS_N_BYTES (3), /* SI */
106 COSTS_N_BYTES (3), /* DI */
107 COSTS_N_BYTES (5)}, /* other */
108 0, /* cost of multiply per each bit set */
109 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
110 COSTS_N_BYTES (3), /* HI */
111 COSTS_N_BYTES (3), /* SI */
112 COSTS_N_BYTES (3), /* DI */
113 COSTS_N_BYTES (5)}, /* other */
114 COSTS_N_BYTES (3), /* cost of movsx */
115 COSTS_N_BYTES (3), /* cost of movzx */
116 0, /* "large" insn */
118 2, /* cost for loading QImode using movzbl */
119 {2, 2, 2}, /* cost of loading integer registers
120 in QImode, HImode and SImode.
121 Relative to reg-reg move (2). */
122 {2, 2, 2}, /* cost of storing integer registers */
123 2, /* cost of reg,reg fld/fst */
124 {2, 2, 2}, /* cost of loading fp registers
125 in SFmode, DFmode and XFmode */
126 {2, 2, 2}, /* cost of storing fp registers
127 in SFmode, DFmode and XFmode */
128 3, /* cost of moving MMX register */
129 {3, 3}, /* cost of loading MMX registers
130 in SImode and DImode */
131 {3, 3}, /* cost of storing MMX registers
132 in SImode and DImode */
133 3, /* cost of moving SSE register */
134 {3, 3, 3}, /* cost of loading SSE registers
135 in SImode, DImode and TImode */
136 {3, 3, 3}, /* cost of storing SSE registers
137 in SImode, DImode and TImode */
138 3, /* MMX or SSE register to integer */
139 0, /* size of l1 cache */
140 0, /* size of l2 cache */
141 0, /* size of prefetch block */
142 0, /* number of parallel prefetches */
144 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
145 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
146 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
147 COSTS_N_BYTES (2), /* cost of FABS instruction. */
148 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
149 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
152 1, /* scalar_stmt_cost. */
153 1, /* scalar load_cost. */
154 1, /* scalar_store_cost. */
155 1, /* vec_stmt_cost. */
156 1, /* vec_to_scalar_cost. */
157 1, /* scalar_to_vec_cost. */
158 1, /* vec_align_load_cost. */
159 1, /* vec_unalign_load_cost. */
160 1, /* vec_store_cost. */
161 1, /* cond_taken_branch_cost. */
162 1, /* cond_not_taken_branch_cost. */
165 /* Processor costs (relative to an add) */
166 static stringop_algs i386_memcpy
[2] = {
167 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
168 DUMMY_STRINGOP_ALGS
};
169 static stringop_algs i386_memset
[2] = {
170 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
171 DUMMY_STRINGOP_ALGS
};
174 struct processor_costs i386_cost
= { /* 386 specific costs */
175 COSTS_N_INSNS (1), /* cost of an add instruction */
176 COSTS_N_INSNS (1), /* cost of a lea instruction */
177 COSTS_N_INSNS (3), /* variable shift costs */
178 COSTS_N_INSNS (2), /* constant shift costs */
179 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
180 COSTS_N_INSNS (6), /* HI */
181 COSTS_N_INSNS (6), /* SI */
182 COSTS_N_INSNS (6), /* DI */
183 COSTS_N_INSNS (6)}, /* other */
184 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
185 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
186 COSTS_N_INSNS (23), /* HI */
187 COSTS_N_INSNS (23), /* SI */
188 COSTS_N_INSNS (23), /* DI */
189 COSTS_N_INSNS (23)}, /* other */
190 COSTS_N_INSNS (3), /* cost of movsx */
191 COSTS_N_INSNS (2), /* cost of movzx */
192 15, /* "large" insn */
194 4, /* cost for loading QImode using movzbl */
195 {2, 4, 2}, /* cost of loading integer registers
196 in QImode, HImode and SImode.
197 Relative to reg-reg move (2). */
198 {2, 4, 2}, /* cost of storing integer registers */
199 2, /* cost of reg,reg fld/fst */
200 {8, 8, 8}, /* cost of loading fp registers
201 in SFmode, DFmode and XFmode */
202 {8, 8, 8}, /* cost of storing fp registers
203 in SFmode, DFmode and XFmode */
204 2, /* cost of moving MMX register */
205 {4, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {4, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
214 3, /* MMX or SSE register to integer */
215 0, /* size of l1 cache */
216 0, /* size of l2 cache */
217 0, /* size of prefetch block */
218 0, /* number of parallel prefetches */
220 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
221 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
222 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
223 COSTS_N_INSNS (22), /* cost of FABS instruction. */
224 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
225 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
228 1, /* scalar_stmt_cost. */
229 1, /* scalar load_cost. */
230 1, /* scalar_store_cost. */
231 1, /* vec_stmt_cost. */
232 1, /* vec_to_scalar_cost. */
233 1, /* scalar_to_vec_cost. */
234 1, /* vec_align_load_cost. */
235 2, /* vec_unalign_load_cost. */
236 1, /* vec_store_cost. */
237 3, /* cond_taken_branch_cost. */
238 1, /* cond_not_taken_branch_cost. */
241 static stringop_algs i486_memcpy
[2] = {
242 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
243 DUMMY_STRINGOP_ALGS
};
244 static stringop_algs i486_memset
[2] = {
245 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
246 DUMMY_STRINGOP_ALGS
};
249 struct processor_costs i486_cost
= { /* 486 specific costs */
250 COSTS_N_INSNS (1), /* cost of an add instruction */
251 COSTS_N_INSNS (1), /* cost of a lea instruction */
252 COSTS_N_INSNS (3), /* variable shift costs */
253 COSTS_N_INSNS (2), /* constant shift costs */
254 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
255 COSTS_N_INSNS (12), /* HI */
256 COSTS_N_INSNS (12), /* SI */
257 COSTS_N_INSNS (12), /* DI */
258 COSTS_N_INSNS (12)}, /* other */
259 1, /* cost of multiply per each bit set */
260 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
261 COSTS_N_INSNS (40), /* HI */
262 COSTS_N_INSNS (40), /* SI */
263 COSTS_N_INSNS (40), /* DI */
264 COSTS_N_INSNS (40)}, /* other */
265 COSTS_N_INSNS (3), /* cost of movsx */
266 COSTS_N_INSNS (2), /* cost of movzx */
267 15, /* "large" insn */
269 4, /* cost for loading QImode using movzbl */
270 {2, 4, 2}, /* cost of loading integer registers
271 in QImode, HImode and SImode.
272 Relative to reg-reg move (2). */
273 {2, 4, 2}, /* cost of storing integer registers */
274 2, /* cost of reg,reg fld/fst */
275 {8, 8, 8}, /* cost of loading fp registers
276 in SFmode, DFmode and XFmode */
277 {8, 8, 8}, /* cost of storing fp registers
278 in SFmode, DFmode and XFmode */
279 2, /* cost of moving MMX register */
280 {4, 8}, /* cost of loading MMX registers
281 in SImode and DImode */
282 {4, 8}, /* cost of storing MMX registers
283 in SImode and DImode */
284 2, /* cost of moving SSE register */
285 {4, 8, 16}, /* cost of loading SSE registers
286 in SImode, DImode and TImode */
287 {4, 8, 16}, /* cost of storing SSE registers
288 in SImode, DImode and TImode */
289 3, /* MMX or SSE register to integer */
290 4, /* size of l1 cache. 486 has 8kB cache
291 shared for code and data, so 4kB is
292 not really precise. */
293 4, /* size of l2 cache */
294 0, /* size of prefetch block */
295 0, /* number of parallel prefetches */
297 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
298 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
299 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
300 COSTS_N_INSNS (3), /* cost of FABS instruction. */
301 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
302 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
305 1, /* scalar_stmt_cost. */
306 1, /* scalar load_cost. */
307 1, /* scalar_store_cost. */
308 1, /* vec_stmt_cost. */
309 1, /* vec_to_scalar_cost. */
310 1, /* scalar_to_vec_cost. */
311 1, /* vec_align_load_cost. */
312 2, /* vec_unalign_load_cost. */
313 1, /* vec_store_cost. */
314 3, /* cond_taken_branch_cost. */
315 1, /* cond_not_taken_branch_cost. */
318 static stringop_algs pentium_memcpy
[2] = {
319 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
320 DUMMY_STRINGOP_ALGS
};
321 static stringop_algs pentium_memset
[2] = {
322 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
323 DUMMY_STRINGOP_ALGS
};
326 struct processor_costs pentium_cost
= {
327 COSTS_N_INSNS (1), /* cost of an add instruction */
328 COSTS_N_INSNS (1), /* cost of a lea instruction */
329 COSTS_N_INSNS (4), /* variable shift costs */
330 COSTS_N_INSNS (1), /* constant shift costs */
331 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
332 COSTS_N_INSNS (11), /* HI */
333 COSTS_N_INSNS (11), /* SI */
334 COSTS_N_INSNS (11), /* DI */
335 COSTS_N_INSNS (11)}, /* other */
336 0, /* cost of multiply per each bit set */
337 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
338 COSTS_N_INSNS (25), /* HI */
339 COSTS_N_INSNS (25), /* SI */
340 COSTS_N_INSNS (25), /* DI */
341 COSTS_N_INSNS (25)}, /* other */
342 COSTS_N_INSNS (3), /* cost of movsx */
343 COSTS_N_INSNS (2), /* cost of movzx */
344 8, /* "large" insn */
346 6, /* cost for loading QImode using movzbl */
347 {2, 4, 2}, /* cost of loading integer registers
348 in QImode, HImode and SImode.
349 Relative to reg-reg move (2). */
350 {2, 4, 2}, /* cost of storing integer registers */
351 2, /* cost of reg,reg fld/fst */
352 {2, 2, 6}, /* cost of loading fp registers
353 in SFmode, DFmode and XFmode */
354 {4, 4, 6}, /* cost of storing fp registers
355 in SFmode, DFmode and XFmode */
356 8, /* cost of moving MMX register */
357 {8, 8}, /* cost of loading MMX registers
358 in SImode and DImode */
359 {8, 8}, /* cost of storing MMX registers
360 in SImode and DImode */
361 2, /* cost of moving SSE register */
362 {4, 8, 16}, /* cost of loading SSE registers
363 in SImode, DImode and TImode */
364 {4, 8, 16}, /* cost of storing SSE registers
365 in SImode, DImode and TImode */
366 3, /* MMX or SSE register to integer */
367 8, /* size of l1 cache. */
368 8, /* size of l2 cache */
369 0, /* size of prefetch block */
370 0, /* number of parallel prefetches */
372 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
373 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
374 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
375 COSTS_N_INSNS (1), /* cost of FABS instruction. */
376 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
377 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
380 1, /* scalar_stmt_cost. */
381 1, /* scalar load_cost. */
382 1, /* scalar_store_cost. */
383 1, /* vec_stmt_cost. */
384 1, /* vec_to_scalar_cost. */
385 1, /* scalar_to_vec_cost. */
386 1, /* vec_align_load_cost. */
387 2, /* vec_unalign_load_cost. */
388 1, /* vec_store_cost. */
389 3, /* cond_taken_branch_cost. */
390 1, /* cond_not_taken_branch_cost. */
393 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
394 (we ensure the alignment). For small blocks inline loop is still a
395 noticeable win, for bigger blocks either rep movsl or rep movsb is
396 way to go. Rep movsb has apparently more expensive startup time in CPU,
397 but after 4K the difference is down in the noise. */
398 static stringop_algs pentiumpro_memcpy
[2] = {
399 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
400 {8192, rep_prefix_4_byte
, false},
401 {-1, rep_prefix_1_byte
, false}}},
402 DUMMY_STRINGOP_ALGS
};
403 static stringop_algs pentiumpro_memset
[2] = {
404 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
405 {8192, rep_prefix_4_byte
, false},
406 {-1, libcall
, false}}},
407 DUMMY_STRINGOP_ALGS
};
409 struct processor_costs pentiumpro_cost
= {
410 COSTS_N_INSNS (1), /* cost of an add instruction */
411 COSTS_N_INSNS (1), /* cost of a lea instruction */
412 COSTS_N_INSNS (1), /* variable shift costs */
413 COSTS_N_INSNS (1), /* constant shift costs */
414 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
415 COSTS_N_INSNS (4), /* HI */
416 COSTS_N_INSNS (4), /* SI */
417 COSTS_N_INSNS (4), /* DI */
418 COSTS_N_INSNS (4)}, /* other */
419 0, /* cost of multiply per each bit set */
420 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
421 COSTS_N_INSNS (17), /* HI */
422 COSTS_N_INSNS (17), /* SI */
423 COSTS_N_INSNS (17), /* DI */
424 COSTS_N_INSNS (17)}, /* other */
425 COSTS_N_INSNS (1), /* cost of movsx */
426 COSTS_N_INSNS (1), /* cost of movzx */
427 8, /* "large" insn */
429 2, /* cost for loading QImode using movzbl */
430 {4, 4, 4}, /* cost of loading integer registers
431 in QImode, HImode and SImode.
432 Relative to reg-reg move (2). */
433 {2, 2, 2}, /* cost of storing integer registers */
434 2, /* cost of reg,reg fld/fst */
435 {2, 2, 6}, /* cost of loading fp registers
436 in SFmode, DFmode and XFmode */
437 {4, 4, 6}, /* cost of storing fp registers
438 in SFmode, DFmode and XFmode */
439 2, /* cost of moving MMX register */
440 {2, 2}, /* cost of loading MMX registers
441 in SImode and DImode */
442 {2, 2}, /* cost of storing MMX registers
443 in SImode and DImode */
444 2, /* cost of moving SSE register */
445 {2, 2, 8}, /* cost of loading SSE registers
446 in SImode, DImode and TImode */
447 {2, 2, 8}, /* cost of storing SSE registers
448 in SImode, DImode and TImode */
449 3, /* MMX or SSE register to integer */
450 8, /* size of l1 cache. */
451 256, /* size of l2 cache */
452 32, /* size of prefetch block */
453 6, /* number of parallel prefetches */
455 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
456 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
457 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
458 COSTS_N_INSNS (2), /* cost of FABS instruction. */
459 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
460 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
463 1, /* scalar_stmt_cost. */
464 1, /* scalar load_cost. */
465 1, /* scalar_store_cost. */
466 1, /* vec_stmt_cost. */
467 1, /* vec_to_scalar_cost. */
468 1, /* scalar_to_vec_cost. */
469 1, /* vec_align_load_cost. */
470 2, /* vec_unalign_load_cost. */
471 1, /* vec_store_cost. */
472 3, /* cond_taken_branch_cost. */
473 1, /* cond_not_taken_branch_cost. */
476 static stringop_algs geode_memcpy
[2] = {
477 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
478 DUMMY_STRINGOP_ALGS
};
479 static stringop_algs geode_memset
[2] = {
480 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
481 DUMMY_STRINGOP_ALGS
};
483 struct processor_costs geode_cost
= {
484 COSTS_N_INSNS (1), /* cost of an add instruction */
485 COSTS_N_INSNS (1), /* cost of a lea instruction */
486 COSTS_N_INSNS (2), /* variable shift costs */
487 COSTS_N_INSNS (1), /* constant shift costs */
488 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
489 COSTS_N_INSNS (4), /* HI */
490 COSTS_N_INSNS (7), /* SI */
491 COSTS_N_INSNS (7), /* DI */
492 COSTS_N_INSNS (7)}, /* other */
493 0, /* cost of multiply per each bit set */
494 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
495 COSTS_N_INSNS (23), /* HI */
496 COSTS_N_INSNS (39), /* SI */
497 COSTS_N_INSNS (39), /* DI */
498 COSTS_N_INSNS (39)}, /* other */
499 COSTS_N_INSNS (1), /* cost of movsx */
500 COSTS_N_INSNS (1), /* cost of movzx */
501 8, /* "large" insn */
503 1, /* cost for loading QImode using movzbl */
504 {1, 1, 1}, /* cost of loading integer registers
505 in QImode, HImode and SImode.
506 Relative to reg-reg move (2). */
507 {1, 1, 1}, /* cost of storing integer registers */
508 1, /* cost of reg,reg fld/fst */
509 {1, 1, 1}, /* cost of loading fp registers
510 in SFmode, DFmode and XFmode */
511 {4, 6, 6}, /* cost of storing fp registers
512 in SFmode, DFmode and XFmode */
514 1, /* cost of moving MMX register */
515 {1, 1}, /* cost of loading MMX registers
516 in SImode and DImode */
517 {1, 1}, /* cost of storing MMX registers
518 in SImode and DImode */
519 1, /* cost of moving SSE register */
520 {1, 1, 1}, /* cost of loading SSE registers
521 in SImode, DImode and TImode */
522 {1, 1, 1}, /* cost of storing SSE registers
523 in SImode, DImode and TImode */
524 1, /* MMX or SSE register to integer */
525 64, /* size of l1 cache. */
526 128, /* size of l2 cache. */
527 32, /* size of prefetch block */
528 1, /* number of parallel prefetches */
530 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
531 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
532 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
533 COSTS_N_INSNS (1), /* cost of FABS instruction. */
534 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
535 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
538 1, /* scalar_stmt_cost. */
539 1, /* scalar load_cost. */
540 1, /* scalar_store_cost. */
541 1, /* vec_stmt_cost. */
542 1, /* vec_to_scalar_cost. */
543 1, /* scalar_to_vec_cost. */
544 1, /* vec_align_load_cost. */
545 2, /* vec_unalign_load_cost. */
546 1, /* vec_store_cost. */
547 3, /* cond_taken_branch_cost. */
548 1, /* cond_not_taken_branch_cost. */
551 static stringop_algs k6_memcpy
[2] = {
552 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
553 DUMMY_STRINGOP_ALGS
};
554 static stringop_algs k6_memset
[2] = {
555 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
556 DUMMY_STRINGOP_ALGS
};
558 struct processor_costs k6_cost
= {
559 COSTS_N_INSNS (1), /* cost of an add instruction */
560 COSTS_N_INSNS (2), /* cost of a lea instruction */
561 COSTS_N_INSNS (1), /* variable shift costs */
562 COSTS_N_INSNS (1), /* constant shift costs */
563 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
564 COSTS_N_INSNS (3), /* HI */
565 COSTS_N_INSNS (3), /* SI */
566 COSTS_N_INSNS (3), /* DI */
567 COSTS_N_INSNS (3)}, /* other */
568 0, /* cost of multiply per each bit set */
569 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
570 COSTS_N_INSNS (18), /* HI */
571 COSTS_N_INSNS (18), /* SI */
572 COSTS_N_INSNS (18), /* DI */
573 COSTS_N_INSNS (18)}, /* other */
574 COSTS_N_INSNS (2), /* cost of movsx */
575 COSTS_N_INSNS (2), /* cost of movzx */
576 8, /* "large" insn */
578 3, /* cost for loading QImode using movzbl */
579 {4, 5, 4}, /* cost of loading integer registers
580 in QImode, HImode and SImode.
581 Relative to reg-reg move (2). */
582 {2, 3, 2}, /* cost of storing integer registers */
583 4, /* cost of reg,reg fld/fst */
584 {6, 6, 6}, /* cost of loading fp registers
585 in SFmode, DFmode and XFmode */
586 {4, 4, 4}, /* cost of storing fp registers
587 in SFmode, DFmode and XFmode */
588 2, /* cost of moving MMX register */
589 {2, 2}, /* cost of loading MMX registers
590 in SImode and DImode */
591 {2, 2}, /* cost of storing MMX registers
592 in SImode and DImode */
593 2, /* cost of moving SSE register */
594 {2, 2, 8}, /* cost of loading SSE registers
595 in SImode, DImode and TImode */
596 {2, 2, 8}, /* cost of storing SSE registers
597 in SImode, DImode and TImode */
598 6, /* MMX or SSE register to integer */
599 32, /* size of l1 cache. */
600 32, /* size of l2 cache. Some models
601 have integrated l2 cache, but
602 optimizing for k6 is not important
603 enough to worry about that. */
604 32, /* size of prefetch block */
605 1, /* number of parallel prefetches */
607 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
608 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
609 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
610 COSTS_N_INSNS (2), /* cost of FABS instruction. */
611 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
612 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
615 1, /* scalar_stmt_cost. */
616 1, /* scalar load_cost. */
617 1, /* scalar_store_cost. */
618 1, /* vec_stmt_cost. */
619 1, /* vec_to_scalar_cost. */
620 1, /* scalar_to_vec_cost. */
621 1, /* vec_align_load_cost. */
622 2, /* vec_unalign_load_cost. */
623 1, /* vec_store_cost. */
624 3, /* cond_taken_branch_cost. */
625 1, /* cond_not_taken_branch_cost. */
628 /* For some reason, Athlon deals better with REP prefix (relative to loops)
629 compared to K8. Alignment becomes important after 8 bytes for memcpy and
630 128 bytes for memset. */
631 static stringop_algs athlon_memcpy
[2] = {
632 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
633 DUMMY_STRINGOP_ALGS
};
634 static stringop_algs athlon_memset
[2] = {
635 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
636 DUMMY_STRINGOP_ALGS
};
638 struct processor_costs athlon_cost
= {
639 COSTS_N_INSNS (1), /* cost of an add instruction */
640 COSTS_N_INSNS (2), /* cost of a lea instruction */
641 COSTS_N_INSNS (1), /* variable shift costs */
642 COSTS_N_INSNS (1), /* constant shift costs */
643 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
644 COSTS_N_INSNS (5), /* HI */
645 COSTS_N_INSNS (5), /* SI */
646 COSTS_N_INSNS (5), /* DI */
647 COSTS_N_INSNS (5)}, /* other */
648 0, /* cost of multiply per each bit set */
649 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
650 COSTS_N_INSNS (26), /* HI */
651 COSTS_N_INSNS (42), /* SI */
652 COSTS_N_INSNS (74), /* DI */
653 COSTS_N_INSNS (74)}, /* other */
654 COSTS_N_INSNS (1), /* cost of movsx */
655 COSTS_N_INSNS (1), /* cost of movzx */
656 8, /* "large" insn */
658 4, /* cost for loading QImode using movzbl */
659 {3, 4, 3}, /* cost of loading integer registers
660 in QImode, HImode and SImode.
661 Relative to reg-reg move (2). */
662 {3, 4, 3}, /* cost of storing integer registers */
663 4, /* cost of reg,reg fld/fst */
664 {4, 4, 12}, /* cost of loading fp registers
665 in SFmode, DFmode and XFmode */
666 {6, 6, 8}, /* cost of storing fp registers
667 in SFmode, DFmode and XFmode */
668 2, /* cost of moving MMX register */
669 {4, 4}, /* cost of loading MMX registers
670 in SImode and DImode */
671 {4, 4}, /* cost of storing MMX registers
672 in SImode and DImode */
673 2, /* cost of moving SSE register */
674 {4, 4, 6}, /* cost of loading SSE registers
675 in SImode, DImode and TImode */
676 {4, 4, 5}, /* cost of storing SSE registers
677 in SImode, DImode and TImode */
678 5, /* MMX or SSE register to integer */
679 64, /* size of l1 cache. */
680 256, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 6, /* number of parallel prefetches */
684 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
685 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
686 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
687 COSTS_N_INSNS (2), /* cost of FABS instruction. */
688 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
689 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
692 1, /* scalar_stmt_cost. */
693 1, /* scalar load_cost. */
694 1, /* scalar_store_cost. */
695 1, /* vec_stmt_cost. */
696 1, /* vec_to_scalar_cost. */
697 1, /* scalar_to_vec_cost. */
698 1, /* vec_align_load_cost. */
699 2, /* vec_unalign_load_cost. */
700 1, /* vec_store_cost. */
701 3, /* cond_taken_branch_cost. */
702 1, /* cond_not_taken_branch_cost. */
705 /* K8 has optimized REP instruction for medium sized blocks, but for very
706 small blocks it is better to use loop. For large blocks, libcall can
707 do nontemporary accesses and beat inline considerably. */
708 static stringop_algs k8_memcpy
[2] = {
709 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
710 {-1, rep_prefix_4_byte
, false}}},
711 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
712 {-1, libcall
, false}}}};
713 static stringop_algs k8_memset
[2] = {
714 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
715 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
716 {libcall
, {{48, unrolled_loop
, false},
717 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
719 struct processor_costs k8_cost
= {
720 COSTS_N_INSNS (1), /* cost of an add instruction */
721 COSTS_N_INSNS (2), /* cost of a lea instruction */
722 COSTS_N_INSNS (1), /* variable shift costs */
723 COSTS_N_INSNS (1), /* constant shift costs */
724 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
725 COSTS_N_INSNS (4), /* HI */
726 COSTS_N_INSNS (3), /* SI */
727 COSTS_N_INSNS (4), /* DI */
728 COSTS_N_INSNS (5)}, /* other */
729 0, /* cost of multiply per each bit set */
730 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
731 COSTS_N_INSNS (26), /* HI */
732 COSTS_N_INSNS (42), /* SI */
733 COSTS_N_INSNS (74), /* DI */
734 COSTS_N_INSNS (74)}, /* other */
735 COSTS_N_INSNS (1), /* cost of movsx */
736 COSTS_N_INSNS (1), /* cost of movzx */
737 8, /* "large" insn */
739 4, /* cost for loading QImode using movzbl */
740 {3, 4, 3}, /* cost of loading integer registers
741 in QImode, HImode and SImode.
742 Relative to reg-reg move (2). */
743 {3, 4, 3}, /* cost of storing integer registers */
744 4, /* cost of reg,reg fld/fst */
745 {4, 4, 12}, /* cost of loading fp registers
746 in SFmode, DFmode and XFmode */
747 {6, 6, 8}, /* cost of storing fp registers
748 in SFmode, DFmode and XFmode */
749 2, /* cost of moving MMX register */
750 {3, 3}, /* cost of loading MMX registers
751 in SImode and DImode */
752 {4, 4}, /* cost of storing MMX registers
753 in SImode and DImode */
754 2, /* cost of moving SSE register */
755 {4, 3, 6}, /* cost of loading SSE registers
756 in SImode, DImode and TImode */
757 {4, 4, 5}, /* cost of storing SSE registers
758 in SImode, DImode and TImode */
759 5, /* MMX or SSE register to integer */
760 64, /* size of l1 cache. */
761 512, /* size of l2 cache. */
762 64, /* size of prefetch block */
763 /* New AMD processors never drop prefetches; if they cannot be performed
764 immediately, they are queued. We set number of simultaneous prefetches
765 to a large constant to reflect this (it probably is not a good idea not
766 to limit number of prefetches at all, as their execution also takes some
768 100, /* number of parallel prefetches */
770 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
771 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
772 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
773 COSTS_N_INSNS (2), /* cost of FABS instruction. */
774 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
775 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
779 4, /* scalar_stmt_cost. */
780 2, /* scalar load_cost. */
781 2, /* scalar_store_cost. */
782 5, /* vec_stmt_cost. */
783 0, /* vec_to_scalar_cost. */
784 2, /* scalar_to_vec_cost. */
785 2, /* vec_align_load_cost. */
786 3, /* vec_unalign_load_cost. */
787 3, /* vec_store_cost. */
788 3, /* cond_taken_branch_cost. */
789 2, /* cond_not_taken_branch_cost. */
792 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
793 very small blocks it is better to use loop. For large blocks, libcall can
794 do nontemporary accesses and beat inline considerably. */
795 static stringop_algs amdfam10_memcpy
[2] = {
796 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
797 {-1, rep_prefix_4_byte
, false}}},
798 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
799 {-1, libcall
, false}}}};
800 static stringop_algs amdfam10_memset
[2] = {
801 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
802 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
803 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
804 {-1, libcall
, false}}}};
805 struct processor_costs amdfam10_cost
= {
806 COSTS_N_INSNS (1), /* cost of an add instruction */
807 COSTS_N_INSNS (2), /* cost of a lea instruction */
808 COSTS_N_INSNS (1), /* variable shift costs */
809 COSTS_N_INSNS (1), /* constant shift costs */
810 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
811 COSTS_N_INSNS (4), /* HI */
812 COSTS_N_INSNS (3), /* SI */
813 COSTS_N_INSNS (4), /* DI */
814 COSTS_N_INSNS (5)}, /* other */
815 0, /* cost of multiply per each bit set */
816 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
817 COSTS_N_INSNS (35), /* HI */
818 COSTS_N_INSNS (51), /* SI */
819 COSTS_N_INSNS (83), /* DI */
820 COSTS_N_INSNS (83)}, /* other */
821 COSTS_N_INSNS (1), /* cost of movsx */
822 COSTS_N_INSNS (1), /* cost of movzx */
823 8, /* "large" insn */
825 4, /* cost for loading QImode using movzbl */
826 {3, 4, 3}, /* cost of loading integer registers
827 in QImode, HImode and SImode.
828 Relative to reg-reg move (2). */
829 {3, 4, 3}, /* cost of storing integer registers */
830 4, /* cost of reg,reg fld/fst */
831 {4, 4, 12}, /* cost of loading fp registers
832 in SFmode, DFmode and XFmode */
833 {6, 6, 8}, /* cost of storing fp registers
834 in SFmode, DFmode and XFmode */
835 2, /* cost of moving MMX register */
836 {3, 3}, /* cost of loading MMX registers
837 in SImode and DImode */
838 {4, 4}, /* cost of storing MMX registers
839 in SImode and DImode */
840 2, /* cost of moving SSE register */
841 {4, 4, 3}, /* cost of loading SSE registers
842 in SImode, DImode and TImode */
843 {4, 4, 5}, /* cost of storing SSE registers
844 in SImode, DImode and TImode */
845 3, /* MMX or SSE register to integer */
847 MOVD reg64, xmmreg Double FSTORE 4
848 MOVD reg32, xmmreg Double FSTORE 4
850 MOVD reg64, xmmreg Double FADD 3
852 MOVD reg32, xmmreg Double FADD 3
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
862 100, /* number of parallel prefetches */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 6, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 2, /* vec_unalign_load_cost. */
881 2, /* vec_store_cost. */
882 2, /* cond_taken_branch_cost. */
883 1, /* cond_not_taken_branch_cost. */
886 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall
888 can do nontemporary accesses and beat inline considerably. */
889 static stringop_algs bdver1_memcpy
[2] = {
890 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
891 {-1, rep_prefix_4_byte
, false}}},
892 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
893 {-1, libcall
, false}}}};
894 static stringop_algs bdver1_memset
[2] = {
895 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
896 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
897 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
898 {-1, libcall
, false}}}};
900 const struct processor_costs bdver1_cost
= {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (1), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (4), /* SI */
908 COSTS_N_INSNS (6), /* DI */
909 COSTS_N_INSNS (6)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (35), /* HI */
913 COSTS_N_INSNS (51), /* SI */
914 COSTS_N_INSNS (83), /* DI */
915 COSTS_N_INSNS (83)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
920 4, /* cost for loading QImode using movzbl */
921 {5, 5, 4}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {4, 4, 4}, /* cost of storing integer registers */
925 2, /* cost of reg,reg fld/fst */
926 {5, 5, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {4, 4, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {4, 4}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 4, 4}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 4}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 2, /* MMX or SSE register to integer */
942 MOVD reg64, xmmreg Double FSTORE 4
943 MOVD reg32, xmmreg Double FSTORE 4
945 MOVD reg64, xmmreg Double FADD 3
947 MOVD reg32, xmmreg Double FADD 3
949 16, /* size of l1 cache. */
950 2048, /* size of l2 cache. */
951 64, /* size of prefetch block */
952 /* New AMD processors never drop prefetches; if they cannot be performed
953 immediately, they are queued. We set number of simultaneous prefetches
954 to a large constant to reflect this (it probably is not a good idea not
955 to limit number of prefetches at all, as their execution also takes some
957 100, /* number of parallel prefetches */
959 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
960 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
961 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
962 COSTS_N_INSNS (2), /* cost of FABS instruction. */
963 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
964 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
968 6, /* scalar_stmt_cost. */
969 4, /* scalar load_cost. */
970 4, /* scalar_store_cost. */
971 6, /* vec_stmt_cost. */
972 0, /* vec_to_scalar_cost. */
973 2, /* scalar_to_vec_cost. */
974 4, /* vec_align_load_cost. */
975 4, /* vec_unalign_load_cost. */
976 4, /* vec_store_cost. */
977 2, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
981 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
982 very small blocks it is better to use loop. For large blocks, libcall
983 can do nontemporary accesses and beat inline considerably. */
985 static stringop_algs bdver2_memcpy
[2] = {
986 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
987 {-1, rep_prefix_4_byte
, false}}},
988 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
989 {-1, libcall
, false}}}};
990 static stringop_algs bdver2_memset
[2] = {
991 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
992 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
993 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
994 {-1, libcall
, false}}}};
996 const struct processor_costs bdver2_cost
= {
997 COSTS_N_INSNS (1), /* cost of an add instruction */
998 COSTS_N_INSNS (1), /* cost of a lea instruction */
999 COSTS_N_INSNS (1), /* variable shift costs */
1000 COSTS_N_INSNS (1), /* constant shift costs */
1001 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1002 COSTS_N_INSNS (4), /* HI */
1003 COSTS_N_INSNS (4), /* SI */
1004 COSTS_N_INSNS (6), /* DI */
1005 COSTS_N_INSNS (6)}, /* other */
1006 0, /* cost of multiply per each bit set */
1007 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1008 COSTS_N_INSNS (35), /* HI */
1009 COSTS_N_INSNS (51), /* SI */
1010 COSTS_N_INSNS (83), /* DI */
1011 COSTS_N_INSNS (83)}, /* other */
1012 COSTS_N_INSNS (1), /* cost of movsx */
1013 COSTS_N_INSNS (1), /* cost of movzx */
1014 8, /* "large" insn */
1016 4, /* cost for loading QImode using movzbl */
1017 {5, 5, 4}, /* cost of loading integer registers
1018 in QImode, HImode and SImode.
1019 Relative to reg-reg move (2). */
1020 {4, 4, 4}, /* cost of storing integer registers */
1021 2, /* cost of reg,reg fld/fst */
1022 {5, 5, 12}, /* cost of loading fp registers
1023 in SFmode, DFmode and XFmode */
1024 {4, 4, 8}, /* cost of storing fp registers
1025 in SFmode, DFmode and XFmode */
1026 2, /* cost of moving MMX register */
1027 {4, 4}, /* cost of loading MMX registers
1028 in SImode and DImode */
1029 {4, 4}, /* cost of storing MMX registers
1030 in SImode and DImode */
1031 2, /* cost of moving SSE register */
1032 {4, 4, 4}, /* cost of loading SSE registers
1033 in SImode, DImode and TImode */
1034 {4, 4, 4}, /* cost of storing SSE registers
1035 in SImode, DImode and TImode */
1036 2, /* MMX or SSE register to integer */
1038 MOVD reg64, xmmreg Double FSTORE 4
1039 MOVD reg32, xmmreg Double FSTORE 4
1041 MOVD reg64, xmmreg Double FADD 3
1043 MOVD reg32, xmmreg Double FADD 3
1045 16, /* size of l1 cache. */
1046 2048, /* size of l2 cache. */
1047 64, /* size of prefetch block */
1048 /* New AMD processors never drop prefetches; if they cannot be performed
1049 immediately, they are queued. We set number of simultaneous prefetches
1050 to a large constant to reflect this (it probably is not a good idea not
1051 to limit number of prefetches at all, as their execution also takes some
1053 100, /* number of parallel prefetches */
1054 2, /* Branch cost */
1055 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1056 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1057 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1058 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1059 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1060 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1064 6, /* scalar_stmt_cost. */
1065 4, /* scalar load_cost. */
1066 4, /* scalar_store_cost. */
1067 6, /* vec_stmt_cost. */
1068 0, /* vec_to_scalar_cost. */
1069 2, /* scalar_to_vec_cost. */
1070 4, /* vec_align_load_cost. */
1071 4, /* vec_unalign_load_cost. */
1072 4, /* vec_store_cost. */
1073 2, /* cond_taken_branch_cost. */
1074 1, /* cond_not_taken_branch_cost. */
1078 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1079 very small blocks it is better to use loop. For large blocks, libcall
1080 can do nontemporary accesses and beat inline considerably. */
1081 static stringop_algs bdver3_memcpy
[2] = {
1082 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1083 {-1, rep_prefix_4_byte
, false}}},
1084 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}};
1086 static stringop_algs bdver3_memset
[2] = {
1087 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1088 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1089 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1090 {-1, libcall
, false}}}};
1091 struct processor_costs bdver3_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (1), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (4), /* SI */
1099 COSTS_N_INSNS (6), /* DI */
1100 COSTS_N_INSNS (6)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (35), /* HI */
1104 COSTS_N_INSNS (51), /* SI */
1105 COSTS_N_INSNS (83), /* DI */
1106 COSTS_N_INSNS (83)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {5, 5, 4}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {4, 4, 4}, /* cost of storing integer registers */
1116 2, /* cost of reg,reg fld/fst */
1117 {5, 5, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {4, 4, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {4, 4}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 4, 4}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 4}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 2, /* MMX or SSE register to integer */
1132 16, /* size of l1 cache. */
1133 2048, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 2, /* Branch cost */
1142 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1151 6, /* scalar_stmt_cost. */
1152 4, /* scalar load_cost. */
1153 4, /* scalar_store_cost. */
1154 6, /* vec_stmt_cost. */
1155 0, /* vec_to_scalar_cost. */
1156 2, /* scalar_to_vec_cost. */
1157 4, /* vec_align_load_cost. */
1158 4, /* vec_unalign_load_cost. */
1159 4, /* vec_store_cost. */
1160 2, /* cond_taken_branch_cost. */
1161 1, /* cond_not_taken_branch_cost. */
1164 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1165 very small blocks it is better to use loop. For large blocks, libcall can
1166 do nontemporary accesses and beat inline considerably. */
1167 static stringop_algs btver1_memcpy
[2] = {
1168 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1169 {-1, rep_prefix_4_byte
, false}}},
1170 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1171 {-1, libcall
, false}}}};
1172 static stringop_algs btver1_memset
[2] = {
1173 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1174 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1175 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1176 {-1, libcall
, false}}}};
1177 const struct processor_costs btver1_cost
= {
1178 COSTS_N_INSNS (1), /* cost of an add instruction */
1179 COSTS_N_INSNS (2), /* cost of a lea instruction */
1180 COSTS_N_INSNS (1), /* variable shift costs */
1181 COSTS_N_INSNS (1), /* constant shift costs */
1182 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1183 COSTS_N_INSNS (4), /* HI */
1184 COSTS_N_INSNS (3), /* SI */
1185 COSTS_N_INSNS (4), /* DI */
1186 COSTS_N_INSNS (5)}, /* other */
1187 0, /* cost of multiply per each bit set */
1188 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1189 COSTS_N_INSNS (35), /* HI */
1190 COSTS_N_INSNS (51), /* SI */
1191 COSTS_N_INSNS (83), /* DI */
1192 COSTS_N_INSNS (83)}, /* other */
1193 COSTS_N_INSNS (1), /* cost of movsx */
1194 COSTS_N_INSNS (1), /* cost of movzx */
1195 8, /* "large" insn */
1197 4, /* cost for loading QImode using movzbl */
1198 {3, 4, 3}, /* cost of loading integer registers
1199 in QImode, HImode and SImode.
1200 Relative to reg-reg move (2). */
1201 {3, 4, 3}, /* cost of storing integer registers */
1202 4, /* cost of reg,reg fld/fst */
1203 {4, 4, 12}, /* cost of loading fp registers
1204 in SFmode, DFmode and XFmode */
1205 {6, 6, 8}, /* cost of storing fp registers
1206 in SFmode, DFmode and XFmode */
1207 2, /* cost of moving MMX register */
1208 {3, 3}, /* cost of loading MMX registers
1209 in SImode and DImode */
1210 {4, 4}, /* cost of storing MMX registers
1211 in SImode and DImode */
1212 2, /* cost of moving SSE register */
1213 {4, 4, 3}, /* cost of loading SSE registers
1214 in SImode, DImode and TImode */
1215 {4, 4, 5}, /* cost of storing SSE registers
1216 in SImode, DImode and TImode */
1217 3, /* MMX or SSE register to integer */
1219 MOVD reg64, xmmreg Double FSTORE 4
1220 MOVD reg32, xmmreg Double FSTORE 4
1222 MOVD reg64, xmmreg Double FADD 3
1224 MOVD reg32, xmmreg Double FADD 3
1226 32, /* size of l1 cache. */
1227 512, /* size of l2 cache. */
1228 64, /* size of prefetch block */
1229 100, /* number of parallel prefetches */
1230 2, /* Branch cost */
1231 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1232 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1233 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1234 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1235 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1236 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1240 4, /* scalar_stmt_cost. */
1241 2, /* scalar load_cost. */
1242 2, /* scalar_store_cost. */
1243 6, /* vec_stmt_cost. */
1244 0, /* vec_to_scalar_cost. */
1245 2, /* scalar_to_vec_cost. */
1246 2, /* vec_align_load_cost. */
1247 2, /* vec_unalign_load_cost. */
1248 2, /* vec_store_cost. */
1249 2, /* cond_taken_branch_cost. */
1250 1, /* cond_not_taken_branch_cost. */
1253 static stringop_algs btver2_memcpy
[2] = {
1254 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1255 {-1, rep_prefix_4_byte
, false}}},
1256 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1257 {-1, libcall
, false}}}};
1258 static stringop_algs btver2_memset
[2] = {
1259 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1260 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1261 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1262 {-1, libcall
, false}}}};
1263 const struct processor_costs btver2_cost
= {
1264 COSTS_N_INSNS (1), /* cost of an add instruction */
1265 COSTS_N_INSNS (2), /* cost of a lea instruction */
1266 COSTS_N_INSNS (1), /* variable shift costs */
1267 COSTS_N_INSNS (1), /* constant shift costs */
1268 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1269 COSTS_N_INSNS (4), /* HI */
1270 COSTS_N_INSNS (3), /* SI */
1271 COSTS_N_INSNS (4), /* DI */
1272 COSTS_N_INSNS (5)}, /* other */
1273 0, /* cost of multiply per each bit set */
1274 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1275 COSTS_N_INSNS (35), /* HI */
1276 COSTS_N_INSNS (51), /* SI */
1277 COSTS_N_INSNS (83), /* DI */
1278 COSTS_N_INSNS (83)}, /* other */
1279 COSTS_N_INSNS (1), /* cost of movsx */
1280 COSTS_N_INSNS (1), /* cost of movzx */
1281 8, /* "large" insn */
1283 4, /* cost for loading QImode using movzbl */
1284 {3, 4, 3}, /* cost of loading integer registers
1285 in QImode, HImode and SImode.
1286 Relative to reg-reg move (2). */
1287 {3, 4, 3}, /* cost of storing integer registers */
1288 4, /* cost of reg,reg fld/fst */
1289 {4, 4, 12}, /* cost of loading fp registers
1290 in SFmode, DFmode and XFmode */
1291 {6, 6, 8}, /* cost of storing fp registers
1292 in SFmode, DFmode and XFmode */
1293 2, /* cost of moving MMX register */
1294 {3, 3}, /* cost of loading MMX registers
1295 in SImode and DImode */
1296 {4, 4}, /* cost of storing MMX registers
1297 in SImode and DImode */
1298 2, /* cost of moving SSE register */
1299 {4, 4, 3}, /* cost of loading SSE registers
1300 in SImode, DImode and TImode */
1301 {4, 4, 5}, /* cost of storing SSE registers
1302 in SImode, DImode and TImode */
1303 3, /* MMX or SSE register to integer */
1305 MOVD reg64, xmmreg Double FSTORE 4
1306 MOVD reg32, xmmreg Double FSTORE 4
1308 MOVD reg64, xmmreg Double FADD 3
1310 MOVD reg32, xmmreg Double FADD 3
1312 32, /* size of l1 cache. */
1313 2048, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 100, /* number of parallel prefetches */
1316 2, /* Branch cost */
1317 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1318 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1319 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1320 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1321 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1322 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1325 4, /* scalar_stmt_cost. */
1326 2, /* scalar load_cost. */
1327 2, /* scalar_store_cost. */
1328 6, /* vec_stmt_cost. */
1329 0, /* vec_to_scalar_cost. */
1330 2, /* scalar_to_vec_cost. */
1331 2, /* vec_align_load_cost. */
1332 2, /* vec_unalign_load_cost. */
1333 2, /* vec_store_cost. */
1334 2, /* cond_taken_branch_cost. */
1335 1, /* cond_not_taken_branch_cost. */
1338 static stringop_algs pentium4_memcpy
[2] = {
1339 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1340 DUMMY_STRINGOP_ALGS
};
1341 static stringop_algs pentium4_memset
[2] = {
1342 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1343 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1344 DUMMY_STRINGOP_ALGS
};
1347 struct processor_costs pentium4_cost
= {
1348 COSTS_N_INSNS (1), /* cost of an add instruction */
1349 COSTS_N_INSNS (3), /* cost of a lea instruction */
1350 COSTS_N_INSNS (4), /* variable shift costs */
1351 COSTS_N_INSNS (4), /* constant shift costs */
1352 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1353 COSTS_N_INSNS (15), /* HI */
1354 COSTS_N_INSNS (15), /* SI */
1355 COSTS_N_INSNS (15), /* DI */
1356 COSTS_N_INSNS (15)}, /* other */
1357 0, /* cost of multiply per each bit set */
1358 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1359 COSTS_N_INSNS (56), /* HI */
1360 COSTS_N_INSNS (56), /* SI */
1361 COSTS_N_INSNS (56), /* DI */
1362 COSTS_N_INSNS (56)}, /* other */
1363 COSTS_N_INSNS (1), /* cost of movsx */
1364 COSTS_N_INSNS (1), /* cost of movzx */
1365 16, /* "large" insn */
1367 2, /* cost for loading QImode using movzbl */
1368 {4, 5, 4}, /* cost of loading integer registers
1369 in QImode, HImode and SImode.
1370 Relative to reg-reg move (2). */
1371 {2, 3, 2}, /* cost of storing integer registers */
1372 2, /* cost of reg,reg fld/fst */
1373 {2, 2, 6}, /* cost of loading fp registers
1374 in SFmode, DFmode and XFmode */
1375 {4, 4, 6}, /* cost of storing fp registers
1376 in SFmode, DFmode and XFmode */
1377 2, /* cost of moving MMX register */
1378 {2, 2}, /* cost of loading MMX registers
1379 in SImode and DImode */
1380 {2, 2}, /* cost of storing MMX registers
1381 in SImode and DImode */
1382 12, /* cost of moving SSE register */
1383 {12, 12, 12}, /* cost of loading SSE registers
1384 in SImode, DImode and TImode */
1385 {2, 2, 8}, /* cost of storing SSE registers
1386 in SImode, DImode and TImode */
1387 10, /* MMX or SSE register to integer */
1388 8, /* size of l1 cache. */
1389 256, /* size of l2 cache. */
1390 64, /* size of prefetch block */
1391 6, /* number of parallel prefetches */
1392 2, /* Branch cost */
1393 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1394 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1395 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1396 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1397 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1398 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1401 1, /* scalar_stmt_cost. */
1402 1, /* scalar load_cost. */
1403 1, /* scalar_store_cost. */
1404 1, /* vec_stmt_cost. */
1405 1, /* vec_to_scalar_cost. */
1406 1, /* scalar_to_vec_cost. */
1407 1, /* vec_align_load_cost. */
1408 2, /* vec_unalign_load_cost. */
1409 1, /* vec_store_cost. */
1410 3, /* cond_taken_branch_cost. */
1411 1, /* cond_not_taken_branch_cost. */
1414 static stringop_algs nocona_memcpy
[2] = {
1415 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1416 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1417 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1419 static stringop_algs nocona_memset
[2] = {
1420 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1421 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1422 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1423 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1426 struct processor_costs nocona_cost
= {
1427 COSTS_N_INSNS (1), /* cost of an add instruction */
1428 COSTS_N_INSNS (1), /* cost of a lea instruction */
1429 COSTS_N_INSNS (1), /* variable shift costs */
1430 COSTS_N_INSNS (1), /* constant shift costs */
1431 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1432 COSTS_N_INSNS (10), /* HI */
1433 COSTS_N_INSNS (10), /* SI */
1434 COSTS_N_INSNS (10), /* DI */
1435 COSTS_N_INSNS (10)}, /* other */
1436 0, /* cost of multiply per each bit set */
1437 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1438 COSTS_N_INSNS (66), /* HI */
1439 COSTS_N_INSNS (66), /* SI */
1440 COSTS_N_INSNS (66), /* DI */
1441 COSTS_N_INSNS (66)}, /* other */
1442 COSTS_N_INSNS (1), /* cost of movsx */
1443 COSTS_N_INSNS (1), /* cost of movzx */
1444 16, /* "large" insn */
1445 17, /* MOVE_RATIO */
1446 4, /* cost for loading QImode using movzbl */
1447 {4, 4, 4}, /* cost of loading integer registers
1448 in QImode, HImode and SImode.
1449 Relative to reg-reg move (2). */
1450 {4, 4, 4}, /* cost of storing integer registers */
1451 3, /* cost of reg,reg fld/fst */
1452 {12, 12, 12}, /* cost of loading fp registers
1453 in SFmode, DFmode and XFmode */
1454 {4, 4, 4}, /* cost of storing fp registers
1455 in SFmode, DFmode and XFmode */
1456 6, /* cost of moving MMX register */
1457 {12, 12}, /* cost of loading MMX registers
1458 in SImode and DImode */
1459 {12, 12}, /* cost of storing MMX registers
1460 in SImode and DImode */
1461 6, /* cost of moving SSE register */
1462 {12, 12, 12}, /* cost of loading SSE registers
1463 in SImode, DImode and TImode */
1464 {12, 12, 12}, /* cost of storing SSE registers
1465 in SImode, DImode and TImode */
1466 8, /* MMX or SSE register to integer */
1467 8, /* size of l1 cache. */
1468 1024, /* size of l2 cache. */
1469 128, /* size of prefetch block */
1470 8, /* number of parallel prefetches */
1471 1, /* Branch cost */
1472 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1473 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1474 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1475 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1476 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1477 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1480 1, /* scalar_stmt_cost. */
1481 1, /* scalar load_cost. */
1482 1, /* scalar_store_cost. */
1483 1, /* vec_stmt_cost. */
1484 1, /* vec_to_scalar_cost. */
1485 1, /* scalar_to_vec_cost. */
1486 1, /* vec_align_load_cost. */
1487 2, /* vec_unalign_load_cost. */
1488 1, /* vec_store_cost. */
1489 3, /* cond_taken_branch_cost. */
1490 1, /* cond_not_taken_branch_cost. */
1493 static stringop_algs atom_memcpy
[2] = {
1494 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1495 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1496 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1497 static stringop_algs atom_memset
[2] = {
1498 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1499 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1500 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1501 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1503 struct processor_costs atom_cost
= {
1504 COSTS_N_INSNS (1), /* cost of an add instruction */
1505 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1506 COSTS_N_INSNS (1), /* variable shift costs */
1507 COSTS_N_INSNS (1), /* constant shift costs */
1508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1509 COSTS_N_INSNS (4), /* HI */
1510 COSTS_N_INSNS (3), /* SI */
1511 COSTS_N_INSNS (4), /* DI */
1512 COSTS_N_INSNS (2)}, /* other */
1513 0, /* cost of multiply per each bit set */
1514 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1515 COSTS_N_INSNS (26), /* HI */
1516 COSTS_N_INSNS (42), /* SI */
1517 COSTS_N_INSNS (74), /* DI */
1518 COSTS_N_INSNS (74)}, /* other */
1519 COSTS_N_INSNS (1), /* cost of movsx */
1520 COSTS_N_INSNS (1), /* cost of movzx */
1521 8, /* "large" insn */
1522 17, /* MOVE_RATIO */
1523 4, /* cost for loading QImode using movzbl */
1524 {4, 4, 4}, /* cost of loading integer registers
1525 in QImode, HImode and SImode.
1526 Relative to reg-reg move (2). */
1527 {4, 4, 4}, /* cost of storing integer registers */
1528 4, /* cost of reg,reg fld/fst */
1529 {12, 12, 12}, /* cost of loading fp registers
1530 in SFmode, DFmode and XFmode */
1531 {6, 6, 8}, /* cost of storing fp registers
1532 in SFmode, DFmode and XFmode */
1533 2, /* cost of moving MMX register */
1534 {8, 8}, /* cost of loading MMX registers
1535 in SImode and DImode */
1536 {8, 8}, /* cost of storing MMX registers
1537 in SImode and DImode */
1538 2, /* cost of moving SSE register */
1539 {8, 8, 8}, /* cost of loading SSE registers
1540 in SImode, DImode and TImode */
1541 {8, 8, 8}, /* cost of storing SSE registers
1542 in SImode, DImode and TImode */
1543 5, /* MMX or SSE register to integer */
1544 32, /* size of l1 cache. */
1545 256, /* size of l2 cache. */
1546 64, /* size of prefetch block */
1547 6, /* number of parallel prefetches */
1548 3, /* Branch cost */
1549 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1550 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1551 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1552 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1553 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1554 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1557 1, /* scalar_stmt_cost. */
1558 1, /* scalar load_cost. */
1559 1, /* scalar_store_cost. */
1560 1, /* vec_stmt_cost. */
1561 1, /* vec_to_scalar_cost. */
1562 1, /* scalar_to_vec_cost. */
1563 1, /* vec_align_load_cost. */
1564 2, /* vec_unalign_load_cost. */
1565 1, /* vec_store_cost. */
1566 3, /* cond_taken_branch_cost. */
1567 1, /* cond_not_taken_branch_cost. */
1570 static stringop_algs slm_memcpy
[2] = {
1571 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1572 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1573 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1574 static stringop_algs slm_memset
[2] = {
1575 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1576 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1577 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1578 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1580 struct processor_costs slm_cost
= {
1581 COSTS_N_INSNS (1), /* cost of an add instruction */
1582 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1583 COSTS_N_INSNS (1), /* variable shift costs */
1584 COSTS_N_INSNS (1), /* constant shift costs */
1585 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1586 COSTS_N_INSNS (4), /* HI */
1587 COSTS_N_INSNS (3), /* SI */
1588 COSTS_N_INSNS (4), /* DI */
1589 COSTS_N_INSNS (2)}, /* other */
1590 0, /* cost of multiply per each bit set */
1591 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1592 COSTS_N_INSNS (26), /* HI */
1593 COSTS_N_INSNS (42), /* SI */
1594 COSTS_N_INSNS (74), /* DI */
1595 COSTS_N_INSNS (74)}, /* other */
1596 COSTS_N_INSNS (1), /* cost of movsx */
1597 COSTS_N_INSNS (1), /* cost of movzx */
1598 8, /* "large" insn */
1599 17, /* MOVE_RATIO */
1600 4, /* cost for loading QImode using movzbl */
1601 {4, 4, 4}, /* cost of loading integer registers
1602 in QImode, HImode and SImode.
1603 Relative to reg-reg move (2). */
1604 {4, 4, 4}, /* cost of storing integer registers */
1605 4, /* cost of reg,reg fld/fst */
1606 {12, 12, 12}, /* cost of loading fp registers
1607 in SFmode, DFmode and XFmode */
1608 {6, 6, 8}, /* cost of storing fp registers
1609 in SFmode, DFmode and XFmode */
1610 2, /* cost of moving MMX register */
1611 {8, 8}, /* cost of loading MMX registers
1612 in SImode and DImode */
1613 {8, 8}, /* cost of storing MMX registers
1614 in SImode and DImode */
1615 2, /* cost of moving SSE register */
1616 {8, 8, 8}, /* cost of loading SSE registers
1617 in SImode, DImode and TImode */
1618 {8, 8, 8}, /* cost of storing SSE registers
1619 in SImode, DImode and TImode */
1620 5, /* MMX or SSE register to integer */
1621 32, /* size of l1 cache. */
1622 256, /* size of l2 cache. */
1623 64, /* size of prefetch block */
1624 6, /* number of parallel prefetches */
1625 3, /* Branch cost */
1626 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1627 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1628 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1629 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1630 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1631 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1634 1, /* scalar_stmt_cost. */
1635 1, /* scalar load_cost. */
1636 1, /* scalar_store_cost. */
1637 1, /* vec_stmt_cost. */
1638 1, /* vec_to_scalar_cost. */
1639 1, /* scalar_to_vec_cost. */
1640 1, /* vec_align_load_cost. */
1641 2, /* vec_unalign_load_cost. */
1642 1, /* vec_store_cost. */
1643 3, /* cond_taken_branch_cost. */
1644 1, /* cond_not_taken_branch_cost. */
1647 /* Generic64 should produce code tuned for Nocona and K8. */
1649 static stringop_algs generic64_memcpy
[2] = {
1650 DUMMY_STRINGOP_ALGS
,
1651 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1652 {-1, libcall
, false}}}};
1653 static stringop_algs generic64_memset
[2] = {
1654 DUMMY_STRINGOP_ALGS
,
1655 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1656 {-1, libcall
, false}}}};
1658 struct processor_costs generic64_cost
= {
1659 COSTS_N_INSNS (1), /* cost of an add instruction */
1660 /* On all chips taken into consideration lea is 2 cycles and more. With
1661 this cost however our current implementation of synth_mult results in
1662 use of unnecessary temporary registers causing regression on several
1663 SPECfp benchmarks. */
1664 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1665 COSTS_N_INSNS (1), /* variable shift costs */
1666 COSTS_N_INSNS (1), /* constant shift costs */
1667 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1668 COSTS_N_INSNS (4), /* HI */
1669 COSTS_N_INSNS (3), /* SI */
1670 COSTS_N_INSNS (4), /* DI */
1671 COSTS_N_INSNS (2)}, /* other */
1672 0, /* cost of multiply per each bit set */
1673 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1674 COSTS_N_INSNS (26), /* HI */
1675 COSTS_N_INSNS (42), /* SI */
1676 COSTS_N_INSNS (74), /* DI */
1677 COSTS_N_INSNS (74)}, /* other */
1678 COSTS_N_INSNS (1), /* cost of movsx */
1679 COSTS_N_INSNS (1), /* cost of movzx */
1680 8, /* "large" insn */
1681 17, /* MOVE_RATIO */
1682 4, /* cost for loading QImode using movzbl */
1683 {4, 4, 4}, /* cost of loading integer registers
1684 in QImode, HImode and SImode.
1685 Relative to reg-reg move (2). */
1686 {4, 4, 4}, /* cost of storing integer registers */
1687 4, /* cost of reg,reg fld/fst */
1688 {12, 12, 12}, /* cost of loading fp registers
1689 in SFmode, DFmode and XFmode */
1690 {6, 6, 8}, /* cost of storing fp registers
1691 in SFmode, DFmode and XFmode */
1692 2, /* cost of moving MMX register */
1693 {8, 8}, /* cost of loading MMX registers
1694 in SImode and DImode */
1695 {8, 8}, /* cost of storing MMX registers
1696 in SImode and DImode */
1697 2, /* cost of moving SSE register */
1698 {8, 8, 8}, /* cost of loading SSE registers
1699 in SImode, DImode and TImode */
1700 {8, 8, 8}, /* cost of storing SSE registers
1701 in SImode, DImode and TImode */
1702 5, /* MMX or SSE register to integer */
1703 32, /* size of l1 cache. */
1704 512, /* size of l2 cache. */
1705 64, /* size of prefetch block */
1706 6, /* number of parallel prefetches */
1707 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1708 value is increased to perhaps more appropriate value of 5. */
1709 3, /* Branch cost */
1710 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1711 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1712 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1713 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1714 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1715 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1718 1, /* scalar_stmt_cost. */
1719 1, /* scalar load_cost. */
1720 1, /* scalar_store_cost. */
1721 1, /* vec_stmt_cost. */
1722 1, /* vec_to_scalar_cost. */
1723 1, /* scalar_to_vec_cost. */
1724 1, /* vec_align_load_cost. */
1725 2, /* vec_unalign_load_cost. */
1726 1, /* vec_store_cost. */
1727 3, /* cond_taken_branch_cost. */
1728 1, /* cond_not_taken_branch_cost. */
1731 /* core_cost should produce code tuned for Core familly of CPUs. */
1732 static stringop_algs core_memcpy
[2] = {
1733 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1734 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1735 {-1, libcall
, false}}}};
1736 static stringop_algs core_memset
[2] = {
1737 {libcall
, {{6, loop_1_byte
, true},
1739 {8192, rep_prefix_4_byte
, true},
1740 {-1, libcall
, false}}},
1741 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1742 {-1, libcall
, false}}}};
1745 struct processor_costs core_cost
= {
1746 COSTS_N_INSNS (1), /* cost of an add instruction */
1747 /* On all chips taken into consideration lea is 2 cycles and more. With
1748 this cost however our current implementation of synth_mult results in
1749 use of unnecessary temporary registers causing regression on several
1750 SPECfp benchmarks. */
1751 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1752 COSTS_N_INSNS (1), /* variable shift costs */
1753 COSTS_N_INSNS (1), /* constant shift costs */
1754 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1755 COSTS_N_INSNS (4), /* HI */
1756 COSTS_N_INSNS (3), /* SI */
1757 COSTS_N_INSNS (4), /* DI */
1758 COSTS_N_INSNS (2)}, /* other */
1759 0, /* cost of multiply per each bit set */
1760 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1761 COSTS_N_INSNS (26), /* HI */
1762 COSTS_N_INSNS (42), /* SI */
1763 COSTS_N_INSNS (74), /* DI */
1764 COSTS_N_INSNS (74)}, /* other */
1765 COSTS_N_INSNS (1), /* cost of movsx */
1766 COSTS_N_INSNS (1), /* cost of movzx */
1767 8, /* "large" insn */
1768 17, /* MOVE_RATIO */
1769 4, /* cost for loading QImode using movzbl */
1770 {4, 4, 4}, /* cost of loading integer registers
1771 in QImode, HImode and SImode.
1772 Relative to reg-reg move (2). */
1773 {4, 4, 4}, /* cost of storing integer registers */
1774 4, /* cost of reg,reg fld/fst */
1775 {12, 12, 12}, /* cost of loading fp registers
1776 in SFmode, DFmode and XFmode */
1777 {6, 6, 8}, /* cost of storing fp registers
1778 in SFmode, DFmode and XFmode */
1779 2, /* cost of moving MMX register */
1780 {8, 8}, /* cost of loading MMX registers
1781 in SImode and DImode */
1782 {8, 8}, /* cost of storing MMX registers
1783 in SImode and DImode */
1784 2, /* cost of moving SSE register */
1785 {8, 8, 8}, /* cost of loading SSE registers
1786 in SImode, DImode and TImode */
1787 {8, 8, 8}, /* cost of storing SSE registers
1788 in SImode, DImode and TImode */
1789 5, /* MMX or SSE register to integer */
1790 64, /* size of l1 cache. */
1791 512, /* size of l2 cache. */
1792 64, /* size of prefetch block */
1793 6, /* number of parallel prefetches */
1794 /* FIXME perhaps more appropriate value is 5. */
1795 3, /* Branch cost */
1796 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1797 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1798 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1799 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1800 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1801 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1804 1, /* scalar_stmt_cost. */
1805 1, /* scalar load_cost. */
1806 1, /* scalar_store_cost. */
1807 1, /* vec_stmt_cost. */
1808 1, /* vec_to_scalar_cost. */
1809 1, /* scalar_to_vec_cost. */
1810 1, /* vec_align_load_cost. */
1811 2, /* vec_unalign_load_cost. */
1812 1, /* vec_store_cost. */
1813 3, /* cond_taken_branch_cost. */
1814 1, /* cond_not_taken_branch_cost. */
1817 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1819 static stringop_algs generic32_memcpy
[2] = {
1820 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1821 {-1, libcall
, false}}},
1822 DUMMY_STRINGOP_ALGS
};
1823 static stringop_algs generic32_memset
[2] = {
1824 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1825 {-1, libcall
, false}}},
1826 DUMMY_STRINGOP_ALGS
};
1828 struct processor_costs generic32_cost
= {
1829 COSTS_N_INSNS (1), /* cost of an add instruction */
1830 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1831 COSTS_N_INSNS (1), /* variable shift costs */
1832 COSTS_N_INSNS (1), /* constant shift costs */
1833 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1834 COSTS_N_INSNS (4), /* HI */
1835 COSTS_N_INSNS (3), /* SI */
1836 COSTS_N_INSNS (4), /* DI */
1837 COSTS_N_INSNS (2)}, /* other */
1838 0, /* cost of multiply per each bit set */
1839 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1840 COSTS_N_INSNS (26), /* HI */
1841 COSTS_N_INSNS (42), /* SI */
1842 COSTS_N_INSNS (74), /* DI */
1843 COSTS_N_INSNS (74)}, /* other */
1844 COSTS_N_INSNS (1), /* cost of movsx */
1845 COSTS_N_INSNS (1), /* cost of movzx */
1846 8, /* "large" insn */
1847 17, /* MOVE_RATIO */
1848 4, /* cost for loading QImode using movzbl */
1849 {4, 4, 4}, /* cost of loading integer registers
1850 in QImode, HImode and SImode.
1851 Relative to reg-reg move (2). */
1852 {4, 4, 4}, /* cost of storing integer registers */
1853 4, /* cost of reg,reg fld/fst */
1854 {12, 12, 12}, /* cost of loading fp registers
1855 in SFmode, DFmode and XFmode */
1856 {6, 6, 8}, /* cost of storing fp registers
1857 in SFmode, DFmode and XFmode */
1858 2, /* cost of moving MMX register */
1859 {8, 8}, /* cost of loading MMX registers
1860 in SImode and DImode */
1861 {8, 8}, /* cost of storing MMX registers
1862 in SImode and DImode */
1863 2, /* cost of moving SSE register */
1864 {8, 8, 8}, /* cost of loading SSE registers
1865 in SImode, DImode and TImode */
1866 {8, 8, 8}, /* cost of storing SSE registers
1867 in SImode, DImode and TImode */
1868 5, /* MMX or SSE register to integer */
1869 32, /* size of l1 cache. */
1870 256, /* size of l2 cache. */
1871 64, /* size of prefetch block */
1872 6, /* number of parallel prefetches */
1873 3, /* Branch cost */
1874 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1875 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1876 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1877 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1878 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1879 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1882 1, /* scalar_stmt_cost. */
1883 1, /* scalar load_cost. */
1884 1, /* scalar_store_cost. */
1885 1, /* vec_stmt_cost. */
1886 1, /* vec_to_scalar_cost. */
1887 1, /* scalar_to_vec_cost. */
1888 1, /* vec_align_load_cost. */
1889 2, /* vec_unalign_load_cost. */
1890 1, /* vec_store_cost. */
1891 3, /* cond_taken_branch_cost. */
1892 1, /* cond_not_taken_branch_cost. */
1895 /* Set by -mtune. */
1896 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1898 /* Set by -mtune or -Os. */
1899 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1901 /* Processor feature/optimization bitmasks. */
1902 #define m_386 (1<<PROCESSOR_I386)
1903 #define m_486 (1<<PROCESSOR_I486)
1904 #define m_PENT (1<<PROCESSOR_PENTIUM)
1905 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1906 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1907 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1908 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1909 #define m_CORE2 (1<<PROCESSOR_CORE2)
1910 #define m_COREI7 (1<<PROCESSOR_COREI7)
1911 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1912 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1913 #define m_ATOM (1<<PROCESSOR_ATOM)
1914 #define m_SLM (1<<PROCESSOR_SLM)
1916 #define m_GEODE (1<<PROCESSOR_GEODE)
1917 #define m_K6 (1<<PROCESSOR_K6)
1918 #define m_K6_GEODE (m_K6 | m_GEODE)
1919 #define m_K8 (1<<PROCESSOR_K8)
1920 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1921 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1922 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1923 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1924 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1925 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1926 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1927 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1928 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1929 #define m_BTVER (m_BTVER1 | m_BTVER2)
1930 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1932 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1933 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1935 /* Generic instruction choice should be common subset of supported CPUs
1936 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1937 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1939 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1941 #define DEF_TUNE(tune, name) name,
1942 #include "x86-tune.def"
1946 /* Feature tests against the various tunings. */
1947 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1949 /* Feature tests against the various tunings used to create ix86_tune_features
1950 based on the processor mask. */
1951 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1952 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1953 negatively, so enabling for Generic64 seems like good code size
1954 tradeoff. We can't enable it for 32bit generic because it does not
1955 work well with PPro base chips. */
1956 m_386
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1958 /* X86_TUNE_PUSH_MEMORY */
1959 m_386
| m_P4_NOCONA
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1961 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1964 /* X86_TUNE_UNROLL_STRLEN */
1965 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1967 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1968 on simulation result. But after P4 was made, no performance benefit
1969 was observed with branch hints. It also increases the code size.
1970 As a result, icc never generates branch hints. */
1973 /* X86_TUNE_DOUBLE_WITH_ADD */
1976 /* X86_TUNE_USE_SAHF */
1977 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1979 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1980 partial dependencies. */
1981 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1983 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1984 register stalls on Generic32 compilation setting as well. However
1985 in current implementation the partial register stalls are not eliminated
1986 very well - they can be introduced via subregs synthesized by combine
1987 and can happen in caller/callee saving sequences. Because this option
1988 pays back little on PPro based chips and is in conflict with partial reg
1989 dependencies used by Athlon/P4 based chips, it is better to leave it off
1990 for generic32 for now. */
1993 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1994 m_CORE_ALL
| m_GENERIC
,
1996 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1997 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1998 m_CORE_ALL
| m_GENERIC
,
2000 /* X86_TUNE_USE_HIMODE_FIOP */
2001 m_386
| m_486
| m_K6_GEODE
,
2003 /* X86_TUNE_USE_SIMODE_FIOP */
2004 ~(m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
),
2006 /* X86_TUNE_USE_MOV0 */
2009 /* X86_TUNE_USE_CLTD */
2010 ~(m_PENT
| m_ATOM
| m_SLM
| m_K6
),
2012 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2015 /* X86_TUNE_SPLIT_LONG_MOVES */
2018 /* X86_TUNE_READ_MODIFY_WRITE */
2021 /* X86_TUNE_READ_MODIFY */
2024 /* X86_TUNE_PROMOTE_QIMODE */
2025 m_386
| m_486
| m_PENT
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2027 /* X86_TUNE_FAST_PREFIX */
2028 ~(m_386
| m_486
| m_PENT
),
2030 /* X86_TUNE_SINGLE_STRINGOP */
2031 m_386
| m_P4_NOCONA
,
2033 /* X86_TUNE_QIMODE_MATH */
2036 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2037 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2038 might be considered for Generic32 if our scheme for avoiding partial
2039 stalls was more effective. */
2042 /* X86_TUNE_PROMOTE_QI_REGS */
2045 /* X86_TUNE_PROMOTE_HI_REGS */
2048 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2049 over esp addition. */
2050 m_386
| m_486
| m_PENT
| m_PPRO
,
2052 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2053 over esp addition. */
2056 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2057 over esp subtraction. */
2058 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2060 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2061 over esp subtraction. */
2062 m_PENT
| m_K6_GEODE
,
2064 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2065 for DFmode copies */
2066 ~(m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
),
2068 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2069 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2071 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2072 conflict here in between PPro/Pentium4 based chips that thread 128bit
2073 SSE registers as single units versus K8 based chips that divide SSE
2074 registers to two 64bit halves. This knob promotes all store destinations
2075 to be 128bit to allow register renaming on 128bit SSE units, but usually
2076 results in one extra microop on 64bit SSE units. Experimental results
2077 shows that disabling this option on P4 brings over 20% SPECfp regression,
2078 while enabling it on K8 brings roughly 2.4% regression that can be partly
2079 masked by careful scheduling of moves. */
2080 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2082 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2083 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_SLM
,
2085 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2086 m_COREI7
| m_BDVER
| m_SLM
,
2088 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2091 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2092 are resolved on SSE register parts instead of whole registers, so we may
2093 maintain just lower part of scalar values in proper format leaving the
2094 upper part undefined. */
2097 /* X86_TUNE_SSE_TYPELESS_STORES */
2100 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2101 m_PPRO
| m_P4_NOCONA
,
2103 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2104 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PROLOGUE_USING_MOVE */
2107 m_PPRO
| m_ATHLON_K8
,
2109 /* X86_TUNE_EPILOGUE_USING_MOVE */
2110 m_PPRO
| m_ATHLON_K8
,
2112 /* X86_TUNE_SHIFT1 */
2115 /* X86_TUNE_USE_FFREEP */
2118 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */
2119 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2121 /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */
2124 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2125 ~(m_AMDFAM10
| m_BDVER
),
2127 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2128 than 4 branch instructions in the 16 byte window. */
2129 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2131 /* X86_TUNE_SCHEDULE */
2132 m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2134 /* X86_TUNE_USE_BT */
2135 m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
,
2137 /* X86_TUNE_USE_INCDEC */
2138 ~(m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_GENERIC
),
2140 /* X86_TUNE_PAD_RETURNS */
2141 m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
,
2143 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short function. */
2146 /* X86_TUNE_EXT_80387_CONSTANTS */
2147 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2149 /* X86_TUNE_AVOID_VECTOR_DECODE */
2150 m_CORE_ALL
| m_K8
| m_GENERIC64
,
2152 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2153 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2156 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2157 vector path on AMD machines. */
2158 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2160 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2162 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2164 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2168 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2169 but one byte longer. */
2172 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2173 operand that cannot be represented using a modRM byte. The XOR
2174 replacement is long decoded, so this split helps here as well. */
2177 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2179 m_CORE_ALL
| m_AMDFAM10
| m_GENERIC
,
2181 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2182 from integer to FP. */
2185 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2186 with a subsequent conditional jump instruction into a single
2187 compare-and-branch uop. */
2190 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2191 will impact LEA instruction selection. */
2194 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2198 /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2199 at -O3. For the moment, the prefetching seems badly tuned for Intel
2201 m_K6_GEODE
| m_AMD_MULTIPLE
,
2203 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2204 the auto-vectorizer. */
2207 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2208 during reassociation of integer computation. */
2211 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2212 during reassociation of fp computation. */
2213 m_ATOM
| m_SLM
| m_HASWELL
| m_BDVER1
| m_BDVER2
,
2215 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2216 regs instead of memory. */
2219 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2220 a conditional move. */
2223 /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
2224 fp converts to destination register. */
2229 /* Feature tests against the various architecture variations. */
2230 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2232 /* Feature tests against the various architecture variations, used to create
2233 ix86_arch_features based on the processor mask. */
2234 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2235 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2236 ~(m_386
| m_486
| m_PENT
| m_K6
),
2238 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2241 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2244 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2247 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2251 static const unsigned int x86_accumulate_outgoing_args
2252 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_SLM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
2254 static const unsigned int x86_arch_always_fancy_math_387
2255 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_SLM
| m_AMD_MULTIPLE
| m_GENERIC
;
2257 static const unsigned int x86_avx256_split_unaligned_load
2258 = m_COREI7
| m_GENERIC
;
2260 static const unsigned int x86_avx256_split_unaligned_store
2261 = m_COREI7
| m_BDVER
| m_GENERIC
;
2263 /* In case the average insn count for single function invocation is
2264 lower than this constant, emit fast (but longer) prologue and
2266 #define FAST_PROLOGUE_INSN_COUNT 20
2268 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2269 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2270 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2271 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2273 /* Array of the smallest class containing reg number REGNO, indexed by
2274 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2276 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2278 /* ax, dx, cx, bx */
2279 AREG
, DREG
, CREG
, BREG
,
2280 /* si, di, bp, sp */
2281 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2283 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2284 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2287 /* flags, fpsr, fpcr, frame */
2288 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2290 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2293 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2296 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2297 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2298 /* SSE REX registers */
2299 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2303 /* The "default" register map used in 32bit mode. */
2305 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2307 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2308 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2309 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2310 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2311 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2312 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2313 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2316 /* The "default" register map used in 64bit mode. */
2318 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2320 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2321 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2322 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2323 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2324 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2325 8,9,10,11,12,13,14,15, /* extended integer registers */
2326 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2329 /* Define the register numbers to be used in Dwarf debugging information.
2330 The SVR4 reference port C compiler uses the following register numbers
2331 in its Dwarf output code:
2332 0 for %eax (gcc regno = 0)
2333 1 for %ecx (gcc regno = 2)
2334 2 for %edx (gcc regno = 1)
2335 3 for %ebx (gcc regno = 3)
2336 4 for %esp (gcc regno = 7)
2337 5 for %ebp (gcc regno = 6)
2338 6 for %esi (gcc regno = 4)
2339 7 for %edi (gcc regno = 5)
2340 The following three DWARF register numbers are never generated by
2341 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2342 believes these numbers have these meanings.
2343 8 for %eip (no gcc equivalent)
2344 9 for %eflags (gcc regno = 17)
2345 10 for %trapno (no gcc equivalent)
2346 It is not at all clear how we should number the FP stack registers
2347 for the x86 architecture. If the version of SDB on x86/svr4 were
2348 a bit less brain dead with respect to floating-point then we would
2349 have a precedent to follow with respect to DWARF register numbers
2350 for x86 FP registers, but the SDB on x86/svr4 is so completely
2351 broken with respect to FP registers that it is hardly worth thinking
2352 of it as something to strive for compatibility with.
2353 The version of x86/svr4 SDB I have at the moment does (partially)
2354 seem to believe that DWARF register number 11 is associated with
2355 the x86 register %st(0), but that's about all. Higher DWARF
2356 register numbers don't seem to be associated with anything in
2357 particular, and even for DWARF regno 11, SDB only seems to under-
2358 stand that it should say that a variable lives in %st(0) (when
2359 asked via an `=' command) if we said it was in DWARF regno 11,
2360 but SDB still prints garbage when asked for the value of the
2361 variable in question (via a `/' command).
2362 (Also note that the labels SDB prints for various FP stack regs
2363 when doing an `x' command are all wrong.)
2364 Note that these problems generally don't affect the native SVR4
2365 C compiler because it doesn't allow the use of -O with -g and
2366 because when it is *not* optimizing, it allocates a memory
2367 location for each floating-point variable, and the memory
2368 location is what gets described in the DWARF AT_location
2369 attribute for the variable in question.
2370 Regardless of the severe mental illness of the x86/svr4 SDB, we
2371 do something sensible here and we use the following DWARF
2372 register numbers. Note that these are all stack-top-relative
2374 11 for %st(0) (gcc regno = 8)
2375 12 for %st(1) (gcc regno = 9)
2376 13 for %st(2) (gcc regno = 10)
2377 14 for %st(3) (gcc regno = 11)
2378 15 for %st(4) (gcc regno = 12)
2379 16 for %st(5) (gcc regno = 13)
2380 17 for %st(6) (gcc regno = 14)
2381 18 for %st(7) (gcc regno = 15)
2383 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2385 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2386 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2387 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2388 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2389 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2391 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2394 /* Define parameter passing and return registers. */
2396 static int const x86_64_int_parameter_registers
[6] =
2398 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2401 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2403 CX_REG
, DX_REG
, R8_REG
, R9_REG
2406 static int const x86_64_int_return_registers
[4] =
2408 AX_REG
, DX_REG
, DI_REG
, SI_REG
2411 /* Additional registers that are clobbered by SYSV calls. */
2413 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2417 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2418 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2421 /* Define the structure for the machine field in struct function. */
2423 struct GTY(()) stack_local_entry
{
2424 unsigned short mode
;
2427 struct stack_local_entry
*next
;
2430 /* Structure describing stack frame layout.
2431 Stack grows downward:
2437 saved static chain if ix86_static_chain_on_stack
2439 saved frame pointer if frame_pointer_needed
2440 <- HARD_FRAME_POINTER
2446 <- sse_regs_save_offset
2449 [va_arg registers] |
2453 [padding2] | = to_allocate
2462 int outgoing_arguments_size
;
2464 /* The offsets relative to ARG_POINTER. */
2465 HOST_WIDE_INT frame_pointer_offset
;
2466 HOST_WIDE_INT hard_frame_pointer_offset
;
2467 HOST_WIDE_INT stack_pointer_offset
;
2468 HOST_WIDE_INT hfp_save_offset
;
2469 HOST_WIDE_INT reg_save_offset
;
2470 HOST_WIDE_INT sse_reg_save_offset
;
2472 /* When save_regs_using_mov is set, emit prologue using
2473 move instead of push instructions. */
2474 bool save_regs_using_mov
;
2477 /* Which cpu are we scheduling for. */
2478 enum attr_cpu ix86_schedule
;
2480 /* Which cpu are we optimizing for. */
2481 enum processor_type ix86_tune
;
2483 /* Which instruction set architecture to use. */
2484 enum processor_type ix86_arch
;
2486 /* True if processor has SSE prefetch instruction. */
2487 unsigned char x86_prefetch_sse
;
2489 /* -mstackrealign option */
2490 static const char ix86_force_align_arg_pointer_string
[]
2491 = "force_align_arg_pointer";
2493 static rtx (*ix86_gen_leave
) (void);
2494 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2495 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2496 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2497 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2498 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2499 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2500 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2501 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2502 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2503 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2504 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2506 /* Preferred alignment for stack boundary in bits. */
2507 unsigned int ix86_preferred_stack_boundary
;
2509 /* Alignment for incoming stack boundary in bits specified at
2511 static unsigned int ix86_user_incoming_stack_boundary
;
2513 /* Default alignment for incoming stack boundary in bits. */
2514 static unsigned int ix86_default_incoming_stack_boundary
;
2516 /* Alignment for incoming stack boundary in bits. */
2517 unsigned int ix86_incoming_stack_boundary
;
2519 /* Calling abi specific va_list type nodes. */
2520 static GTY(()) tree sysv_va_list_type_node
;
2521 static GTY(()) tree ms_va_list_type_node
;
2523 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2524 char internal_label_prefix
[16];
2525 int internal_label_prefix_len
;
2527 /* Fence to use after loop using movnt. */
2530 /* Register class used for passing given 64bit part of the argument.
2531 These represent classes as documented by the PS ABI, with the exception
2532 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2533 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2535 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2536 whenever possible (upper half does contain padding). */
2537 enum x86_64_reg_class
2540 X86_64_INTEGER_CLASS
,
2541 X86_64_INTEGERSI_CLASS
,
2548 X86_64_COMPLEX_X87_CLASS
,
2552 #define MAX_CLASSES 4
2554 /* Table of constants used by fldpi, fldln2, etc.... */
2555 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2556 static bool ext_80387_constants_init
= 0;
2559 static struct machine_function
* ix86_init_machine_status (void);
2560 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2561 static bool ix86_function_value_regno_p (const unsigned int);
2562 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2564 static rtx
ix86_static_chain (const_tree
, bool);
2565 static int ix86_function_regparm (const_tree
, const_tree
);
2566 static void ix86_compute_frame_layout (struct ix86_frame
*);
2567 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2569 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2570 static tree
ix86_canonical_va_list_type (tree
);
2571 static void predict_jump (int);
2572 static unsigned int split_stack_prologue_scratch_regno (void);
2573 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2575 enum ix86_function_specific_strings
2577 IX86_FUNCTION_SPECIFIC_ARCH
,
2578 IX86_FUNCTION_SPECIFIC_TUNE
,
2579 IX86_FUNCTION_SPECIFIC_MAX
2582 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2583 const char *, enum fpmath_unit
, bool);
2584 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2585 static void ix86_function_specific_save (struct cl_target_option
*);
2586 static void ix86_function_specific_restore (struct cl_target_option
*);
2587 static void ix86_function_specific_print (FILE *, int,
2588 struct cl_target_option
*);
2589 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2590 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2591 struct gcc_options
*);
2592 static bool ix86_can_inline_p (tree
, tree
);
2593 static void ix86_set_current_function (tree
);
2594 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2596 static enum calling_abi
ix86_function_abi (const_tree
);
2599 #ifndef SUBTARGET32_DEFAULT_CPU
2600 #define SUBTARGET32_DEFAULT_CPU "i386"
2603 /* Whether -mtune= or -march= were specified */
2604 static int ix86_tune_defaulted
;
2605 static int ix86_arch_specified
;
2607 /* Vectorization library interface and handlers. */
2608 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2610 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2611 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2613 /* Processor target table, indexed by processor number */
2616 const struct processor_costs
*cost
; /* Processor costs */
2617 const int align_loop
; /* Default alignments. */
2618 const int align_loop_max_skip
;
2619 const int align_jump
;
2620 const int align_jump_max_skip
;
2621 const int align_func
;
2624 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2626 {&i386_cost
, 4, 3, 4, 3, 4},
2627 {&i486_cost
, 16, 15, 16, 15, 16},
2628 {&pentium_cost
, 16, 7, 16, 7, 16},
2629 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2630 {&geode_cost
, 0, 0, 0, 0, 0},
2631 {&k6_cost
, 32, 7, 32, 7, 32},
2632 {&athlon_cost
, 16, 7, 16, 7, 16},
2633 {&pentium4_cost
, 0, 0, 0, 0, 0},
2634 {&k8_cost
, 16, 7, 16, 7, 16},
2635 {&nocona_cost
, 0, 0, 0, 0, 0},
2637 {&core_cost
, 16, 10, 16, 10, 16},
2639 {&core_cost
, 16, 10, 16, 10, 16},
2641 {&core_cost
, 16, 10, 16, 10, 16},
2642 {&generic32_cost
, 16, 7, 16, 7, 16},
2643 {&generic64_cost
, 16, 10, 16, 10, 16},
2644 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2645 {&bdver1_cost
, 16, 10, 16, 7, 11},
2646 {&bdver2_cost
, 16, 10, 16, 7, 11},
2647 {&bdver3_cost
, 16, 10, 16, 7, 11},
2648 {&btver1_cost
, 16, 10, 16, 7, 11},
2649 {&btver2_cost
, 16, 10, 16, 7, 11},
2650 {&atom_cost
, 16, 15, 16, 7, 16},
2651 {&slm_cost
, 16, 15, 16, 7, 16}
2654 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2689 gate_insert_vzeroupper (void)
2691 return TARGET_AVX
&& TARGET_VZEROUPPER
;
2695 rest_of_handle_insert_vzeroupper (void)
2699 /* vzeroupper instructions are inserted immediately after reload to
2700 account for possible spills from 256bit registers. The pass
2701 reuses mode switching infrastructure by re-running mode insertion
2702 pass, so disable entities that have already been processed. */
2703 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2704 ix86_optimize_mode_switching
[i
] = 0;
2706 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2708 /* Call optimize_mode_switching. */
2709 g
->get_passes ()->execute_pass_mode_switching ();
2715 const pass_data pass_data_insert_vzeroupper
=
2717 RTL_PASS
, /* type */
2718 "vzeroupper", /* name */
2719 OPTGROUP_NONE
, /* optinfo_flags */
2720 true, /* has_gate */
2721 true, /* has_execute */
2722 TV_NONE
, /* tv_id */
2723 0, /* properties_required */
2724 0, /* properties_provided */
2725 0, /* properties_destroyed */
2726 0, /* todo_flags_start */
2727 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2730 class pass_insert_vzeroupper
: public rtl_opt_pass
2733 pass_insert_vzeroupper(gcc::context
*ctxt
)
2734 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2737 /* opt_pass methods: */
2738 bool gate () { return gate_insert_vzeroupper (); }
2739 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2741 }; // class pass_insert_vzeroupper
2746 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2748 return new pass_insert_vzeroupper (ctxt
);
2751 /* Return true if a red-zone is in use. */
2754 ix86_using_red_zone (void)
2756 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2759 /* Return a string that documents the current -m options. The caller is
2760 responsible for freeing the string. */
2763 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2764 const char *tune
, enum fpmath_unit fpmath
,
2767 struct ix86_target_opts
2769 const char *option
; /* option string */
2770 HOST_WIDE_INT mask
; /* isa mask options */
2773 /* This table is ordered so that options like -msse4.2 that imply
2774 preceding options while match those first. */
2775 static struct ix86_target_opts isa_opts
[] =
2777 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2778 { "-mfma", OPTION_MASK_ISA_FMA
},
2779 { "-mxop", OPTION_MASK_ISA_XOP
},
2780 { "-mlwp", OPTION_MASK_ISA_LWP
},
2781 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2782 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2783 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2784 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2785 { "-msse3", OPTION_MASK_ISA_SSE3
},
2786 { "-msse2", OPTION_MASK_ISA_SSE2
},
2787 { "-msse", OPTION_MASK_ISA_SSE
},
2788 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2789 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2790 { "-mmmx", OPTION_MASK_ISA_MMX
},
2791 { "-mabm", OPTION_MASK_ISA_ABM
},
2792 { "-mbmi", OPTION_MASK_ISA_BMI
},
2793 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2794 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2795 { "-mhle", OPTION_MASK_ISA_HLE
},
2796 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2797 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2798 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2799 { "-madx", OPTION_MASK_ISA_ADX
},
2800 { "-mtbm", OPTION_MASK_ISA_TBM
},
2801 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2802 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2803 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2804 { "-maes", OPTION_MASK_ISA_AES
},
2805 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2806 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2807 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2808 { "-mf16c", OPTION_MASK_ISA_F16C
},
2809 { "-mrtm", OPTION_MASK_ISA_RTM
},
2810 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2811 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2815 static struct ix86_target_opts flag_opts
[] =
2817 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2818 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2819 { "-m80387", MASK_80387
},
2820 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2821 { "-malign-double", MASK_ALIGN_DOUBLE
},
2822 { "-mcld", MASK_CLD
},
2823 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2824 { "-mieee-fp", MASK_IEEE_FP
},
2825 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2826 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2827 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2828 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2829 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2830 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2831 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2832 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2833 { "-mrecip", MASK_RECIP
},
2834 { "-mrtd", MASK_RTD
},
2835 { "-msseregparm", MASK_SSEREGPARM
},
2836 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2837 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2838 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2839 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2840 { "-mvzeroupper", MASK_VZEROUPPER
},
2841 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2842 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2843 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2846 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2849 char target_other
[40];
2859 memset (opts
, '\0', sizeof (opts
));
2861 /* Add -march= option. */
2864 opts
[num
][0] = "-march=";
2865 opts
[num
++][1] = arch
;
2868 /* Add -mtune= option. */
2871 opts
[num
][0] = "-mtune=";
2872 opts
[num
++][1] = tune
;
2875 /* Add -m32/-m64/-mx32. */
2876 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2878 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2882 isa
&= ~ (OPTION_MASK_ISA_64BIT
2883 | OPTION_MASK_ABI_64
2884 | OPTION_MASK_ABI_X32
);
2888 opts
[num
++][0] = abi
;
2890 /* Pick out the options in isa options. */
2891 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2893 if ((isa
& isa_opts
[i
].mask
) != 0)
2895 opts
[num
++][0] = isa_opts
[i
].option
;
2896 isa
&= ~ isa_opts
[i
].mask
;
2900 if (isa
&& add_nl_p
)
2902 opts
[num
++][0] = isa_other
;
2903 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2907 /* Add flag options. */
2908 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2910 if ((flags
& flag_opts
[i
].mask
) != 0)
2912 opts
[num
++][0] = flag_opts
[i
].option
;
2913 flags
&= ~ flag_opts
[i
].mask
;
2917 if (flags
&& add_nl_p
)
2919 opts
[num
++][0] = target_other
;
2920 sprintf (target_other
, "(other flags: %#x)", flags
);
2923 /* Add -fpmath= option. */
2926 opts
[num
][0] = "-mfpmath=";
2927 switch ((int) fpmath
)
2930 opts
[num
++][1] = "387";
2934 opts
[num
++][1] = "sse";
2937 case FPMATH_387
| FPMATH_SSE
:
2938 opts
[num
++][1] = "sse+387";
2950 gcc_assert (num
< ARRAY_SIZE (opts
));
2952 /* Size the string. */
2954 sep_len
= (add_nl_p
) ? 3 : 1;
2955 for (i
= 0; i
< num
; i
++)
2958 for (j
= 0; j
< 2; j
++)
2960 len
+= strlen (opts
[i
][j
]);
2963 /* Build the string. */
2964 ret
= ptr
= (char *) xmalloc (len
);
2967 for (i
= 0; i
< num
; i
++)
2971 for (j
= 0; j
< 2; j
++)
2972 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2979 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2987 for (j
= 0; j
< 2; j
++)
2990 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2992 line_len
+= len2
[j
];
2997 gcc_assert (ret
+ len
>= ptr
);
3002 /* Return true, if profiling code should be emitted before
3003 prologue. Otherwise it returns false.
3004 Note: For x86 with "hotfix" it is sorried. */
3006 ix86_profile_before_prologue (void)
3008 return flag_fentry
!= 0;
3011 /* Function that is callable from the debugger to print the current
3014 ix86_debug_options (void)
3016 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
3017 ix86_arch_string
, ix86_tune_string
,
3022 fprintf (stderr
, "%s\n\n", opts
);
3026 fputs ("<no options>\n\n", stderr
);
3031 static const char *stringop_alg_names
[] = {
3033 #define DEF_ALG(alg, name) #name,
3034 #include "stringop.def"
3039 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
3040 The string is of the following form (or comma separated list of it):
3042 strategy_alg:max_size:[align|noalign]
3044 where the full size range for the strategy is either [0, max_size] or
3045 [min_size, max_size], in which min_size is the max_size + 1 of the
3046 preceding range. The last size range must have max_size == -1.
3051 -mmemcpy-strategy=libcall:-1:noalign
3053 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
3057 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
3059 This is to tell the compiler to use the following strategy for memset
3060 1) when the expected size is between [1, 16], use rep_8byte strategy;
3061 2) when the size is between [17, 2048], use vector_loop;
3062 3) when the size is > 2048, use libcall. */
3064 struct stringop_size_range
3073 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
3075 const struct stringop_algs
*default_algs
;
3076 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
3077 char *curr_range_str
, *next_range_str
;
3081 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
3083 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
3085 curr_range_str
= strategy_str
;
3093 next_range_str
= strchr (curr_range_str
, ',');
3095 *next_range_str
++ = '\0';
3097 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
3098 alg_name
, &maxs
, align
))
3100 error ("wrong arg %s to option %s", curr_range_str
,
3101 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3105 if (n
> 0 && (maxs
< (mins
= input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
3107 error ("size ranges of option %s should be increasing",
3108 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3112 for (i
= 0; i
< last_alg
; i
++)
3114 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
3116 alg
= (stringop_alg
) i
;
3123 error ("wrong stringop strategy name %s specified for option %s",
3125 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3129 input_ranges
[n
].min
= mins
;
3130 input_ranges
[n
].max
= maxs
;
3131 input_ranges
[n
].alg
= alg
;
3132 if (!strcmp (align
, "align"))
3133 input_ranges
[n
].noalign
= false;
3134 else if (!strcmp (align
, "noalign"))
3135 input_ranges
[n
].noalign
= true;
3138 error ("unknown alignment %s specified for option %s",
3139 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3143 curr_range_str
= next_range_str
;
3145 while (curr_range_str
);
3147 if (input_ranges
[n
- 1].max
!= -1)
3149 error ("the max value for the last size range should be -1"
3151 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3155 if (n
> MAX_STRINGOP_ALGS
)
3157 error ("too many size ranges specified in option %s",
3158 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3162 /* Now override the default algs array. */
3163 for (i
= 0; i
< n
; i
++)
3165 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
3166 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
3167 = input_ranges
[i
].alg
;
3168 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
3169 = input_ranges
[i
].noalign
;
3174 /* Override various settings based on options. If MAIN_ARGS_P, the
3175 options are from the command line, otherwise they are from
3179 ix86_option_override_internal (bool main_args_p
)
3182 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3183 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3188 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3189 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3190 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3191 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3192 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3193 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3194 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3195 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3196 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3197 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3198 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3199 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3200 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3201 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3202 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3203 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3204 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3205 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3206 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3207 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3208 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3209 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3210 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3211 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3212 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3213 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3214 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3215 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3216 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3217 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3218 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3219 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3220 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3221 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3222 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3223 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3224 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3225 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3226 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3227 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3229 /* if this reaches 64, need to widen struct pta flags below */
3233 const char *const name
; /* processor name or nickname. */
3234 const enum processor_type processor
;
3235 const enum attr_cpu schedule
;
3236 const unsigned HOST_WIDE_INT flags
;
3238 const processor_alias_table
[] =
3240 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3241 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3242 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3243 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3244 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3245 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3246 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3247 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3248 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3249 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3250 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3251 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3252 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3253 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3254 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3255 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3256 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3257 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3258 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3259 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3260 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3261 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3262 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3263 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3264 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3265 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3266 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3267 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3268 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3269 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3270 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3271 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3272 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3273 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3274 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
3275 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3276 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3277 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3278 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3279 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
3280 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3281 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3282 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3283 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3284 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3285 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3286 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3287 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3288 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3289 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3291 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3292 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3293 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3294 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3295 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3296 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_MOVBE
3298 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3299 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3300 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3301 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3302 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3303 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3304 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3305 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3306 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3307 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3308 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3309 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3310 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3311 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3312 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3313 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3314 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3315 {"k8", PROCESSOR_K8
, CPU_K8
,
3316 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3317 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3318 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3319 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3320 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3321 {"opteron", PROCESSOR_K8
, CPU_K8
,
3322 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3323 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3324 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3325 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3326 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3327 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3328 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3329 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3330 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3331 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3332 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3333 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3334 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3335 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3336 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3337 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3338 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3339 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3340 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3341 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3342 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3343 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3344 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3345 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3346 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3347 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3348 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3349 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3350 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3351 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3352 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3353 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3354 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3355 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3356 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3357 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3358 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3359 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3360 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3361 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3362 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3363 | PTA_FXSR
| PTA_XSAVE
},
3364 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3365 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3366 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3367 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3368 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3369 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3371 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3372 PTA_HLE
/* flags are only used for -march switch. */ },
3373 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3375 | PTA_HLE
/* flags are only used for -march switch. */ },
3378 /* -mrecip options. */
3381 const char *string
; /* option name */
3382 unsigned int mask
; /* mask bits to set */
3384 const recip_options
[] =
3386 { "all", RECIP_MASK_ALL
},
3387 { "none", RECIP_MASK_NONE
},
3388 { "div", RECIP_MASK_DIV
},
3389 { "sqrt", RECIP_MASK_SQRT
},
3390 { "vec-div", RECIP_MASK_VEC_DIV
},
3391 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3394 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3396 /* Set up prefix/suffix so the error messages refer to either the command
3397 line argument, or the attribute(target). */
3406 prefix
= "option(\"";
3411 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3412 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3413 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3414 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3415 #ifdef TARGET_BI_ARCH
3418 #if TARGET_BI_ARCH == 1
3419 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3420 is on and OPTION_MASK_ABI_X32 is off. We turn off
3421 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3424 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3426 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3427 on and OPTION_MASK_ABI_64 is off. We turn off
3428 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3431 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3438 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3439 OPTION_MASK_ABI_64 for TARGET_X32. */
3440 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3441 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3443 else if (TARGET_LP64
)
3445 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3446 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3447 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3448 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3451 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3452 SUBTARGET_OVERRIDE_OPTIONS
;
3455 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3456 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3459 /* -fPIC is the default for x86_64. */
3460 if (TARGET_MACHO
&& TARGET_64BIT
)
3463 /* Need to check -mtune=generic first. */
3464 if (ix86_tune_string
)
3466 if (!strcmp (ix86_tune_string
, "generic")
3467 || !strcmp (ix86_tune_string
, "i686")
3468 /* As special support for cross compilers we read -mtune=native
3469 as -mtune=generic. With native compilers we won't see the
3470 -mtune=native, as it was changed by the driver. */
3471 || !strcmp (ix86_tune_string
, "native"))
3474 ix86_tune_string
= "generic64";
3476 ix86_tune_string
= "generic32";
3478 /* If this call is for setting the option attribute, allow the
3479 generic32/generic64 that was previously set. */
3480 else if (!main_args_p
3481 && (!strcmp (ix86_tune_string
, "generic32")
3482 || !strcmp (ix86_tune_string
, "generic64")))
3484 else if (!strncmp (ix86_tune_string
, "generic", 7))
3485 error ("bad value (%s) for %stune=%s %s",
3486 ix86_tune_string
, prefix
, suffix
, sw
);
3487 else if (!strcmp (ix86_tune_string
, "x86-64"))
3488 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3489 "%stune=k8%s or %stune=generic%s instead as appropriate",
3490 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3494 if (ix86_arch_string
)
3495 ix86_tune_string
= ix86_arch_string
;
3496 if (!ix86_tune_string
)
3498 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3499 ix86_tune_defaulted
= 1;
3502 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3503 need to use a sensible tune option. */
3504 if (!strcmp (ix86_tune_string
, "generic")
3505 || !strcmp (ix86_tune_string
, "x86-64")
3506 || !strcmp (ix86_tune_string
, "i686"))
3509 ix86_tune_string
= "generic64";
3511 ix86_tune_string
= "generic32";
3515 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3517 /* rep; movq isn't available in 32-bit code. */
3518 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3519 ix86_stringop_alg
= no_stringop
;
3522 if (!ix86_arch_string
)
3523 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3525 ix86_arch_specified
= 1;
3527 if (global_options_set
.x_ix86_pmode
)
3529 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3530 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3531 error ("address mode %qs not supported in the %s bit mode",
3532 TARGET_64BIT
? "short" : "long",
3533 TARGET_64BIT
? "64" : "32");
3536 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3538 if (!global_options_set
.x_ix86_abi
)
3539 ix86_abi
= DEFAULT_ABI
;
3541 if (global_options_set
.x_ix86_cmodel
)
3543 switch (ix86_cmodel
)
3548 ix86_cmodel
= CM_SMALL_PIC
;
3550 error ("code model %qs not supported in the %s bit mode",
3557 ix86_cmodel
= CM_MEDIUM_PIC
;
3559 error ("code model %qs not supported in the %s bit mode",
3561 else if (TARGET_X32
)
3562 error ("code model %qs not supported in x32 mode",
3569 ix86_cmodel
= CM_LARGE_PIC
;
3571 error ("code model %qs not supported in the %s bit mode",
3573 else if (TARGET_X32
)
3574 error ("code model %qs not supported in x32 mode",
3580 error ("code model %s does not support PIC mode", "32");
3582 error ("code model %qs not supported in the %s bit mode",
3589 error ("code model %s does not support PIC mode", "kernel");
3590 ix86_cmodel
= CM_32
;
3593 error ("code model %qs not supported in the %s bit mode",
3603 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3604 use of rip-relative addressing. This eliminates fixups that
3605 would otherwise be needed if this object is to be placed in a
3606 DLL, and is essentially just as efficient as direct addressing. */
3607 if (TARGET_64BIT
&& (TARGET_RDOS
|| TARGET_PECOFF
))
3608 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3609 else if (TARGET_64BIT
)
3610 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3612 ix86_cmodel
= CM_32
;
3614 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3616 error ("-masm=intel not supported in this configuration");
3617 ix86_asm_dialect
= ASM_ATT
;
3619 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3620 sorry ("%i-bit mode not compiled in",
3621 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3623 for (i
= 0; i
< pta_size
; i
++)
3624 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3626 ix86_schedule
= processor_alias_table
[i
].schedule
;
3627 ix86_arch
= processor_alias_table
[i
].processor
;
3628 /* Default cpu tuning to the architecture. */
3629 ix86_tune
= ix86_arch
;
3631 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3632 error ("CPU you selected does not support x86-64 "
3635 if (processor_alias_table
[i
].flags
& PTA_MMX
3636 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3637 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3638 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3639 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3640 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3641 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3642 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3643 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3644 if (processor_alias_table
[i
].flags
& PTA_SSE
3645 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3646 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3647 if (processor_alias_table
[i
].flags
& PTA_SSE2
3648 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3649 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3650 if (processor_alias_table
[i
].flags
& PTA_SSE3
3651 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3652 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3653 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3654 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3655 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3656 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3657 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3658 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3659 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3660 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3661 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3662 if (processor_alias_table
[i
].flags
& PTA_AVX
3663 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3664 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3665 if (processor_alias_table
[i
].flags
& PTA_AVX2
3666 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3667 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3668 if (processor_alias_table
[i
].flags
& PTA_FMA
3669 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3670 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3671 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3672 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3673 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3674 if (processor_alias_table
[i
].flags
& PTA_FMA4
3675 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3676 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3677 if (processor_alias_table
[i
].flags
& PTA_XOP
3678 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3679 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3680 if (processor_alias_table
[i
].flags
& PTA_LWP
3681 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3682 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3683 if (processor_alias_table
[i
].flags
& PTA_ABM
3684 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3685 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3686 if (processor_alias_table
[i
].flags
& PTA_BMI
3687 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3688 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3689 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3690 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3691 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3692 if (processor_alias_table
[i
].flags
& PTA_TBM
3693 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3694 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3695 if (processor_alias_table
[i
].flags
& PTA_BMI2
3696 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3697 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3698 if (processor_alias_table
[i
].flags
& PTA_CX16
3699 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3700 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3701 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3702 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3703 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3704 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3705 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3706 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3707 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3708 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3709 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3710 if (processor_alias_table
[i
].flags
& PTA_AES
3711 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3712 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3713 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3714 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3715 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3716 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3717 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3718 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3719 if (processor_alias_table
[i
].flags
& PTA_RDRND
3720 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3721 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3722 if (processor_alias_table
[i
].flags
& PTA_F16C
3723 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3724 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3725 if (processor_alias_table
[i
].flags
& PTA_RTM
3726 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3727 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3728 if (processor_alias_table
[i
].flags
& PTA_HLE
3729 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3730 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3731 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3732 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3733 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3734 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3735 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3736 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3737 if (processor_alias_table
[i
].flags
& PTA_ADX
3738 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3739 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3740 if (processor_alias_table
[i
].flags
& PTA_FXSR
3741 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3742 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3743 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3744 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3745 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3746 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3747 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3748 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3749 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3750 x86_prefetch_sse
= true;
3755 if (!strcmp (ix86_arch_string
, "generic"))
3756 error ("generic CPU can be used only for %stune=%s %s",
3757 prefix
, suffix
, sw
);
3758 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3759 error ("bad value (%s) for %sarch=%s %s",
3760 ix86_arch_string
, prefix
, suffix
, sw
);
3762 ix86_arch_mask
= 1u << ix86_arch
;
3763 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3764 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3766 for (i
= 0; i
< pta_size
; i
++)
3767 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3769 ix86_schedule
= processor_alias_table
[i
].schedule
;
3770 ix86_tune
= processor_alias_table
[i
].processor
;
3773 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3775 if (ix86_tune_defaulted
)
3777 ix86_tune_string
= "x86-64";
3778 for (i
= 0; i
< pta_size
; i
++)
3779 if (! strcmp (ix86_tune_string
,
3780 processor_alias_table
[i
].name
))
3782 ix86_schedule
= processor_alias_table
[i
].schedule
;
3783 ix86_tune
= processor_alias_table
[i
].processor
;
3786 error ("CPU you selected does not support x86-64 "
3792 /* Adjust tuning when compiling for 32-bit ABI. */
3795 case PROCESSOR_GENERIC64
:
3796 ix86_tune
= PROCESSOR_GENERIC32
;
3797 ix86_schedule
= CPU_PENTIUMPRO
;
3804 /* Intel CPUs have always interpreted SSE prefetch instructions as
3805 NOPs; so, we can enable SSE prefetch instructions even when
3806 -mtune (rather than -march) points us to a processor that has them.
3807 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3808 higher processors. */
3810 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3811 x86_prefetch_sse
= true;
3815 if (ix86_tune_specified
&& i
== pta_size
)
3816 error ("bad value (%s) for %stune=%s %s",
3817 ix86_tune_string
, prefix
, suffix
, sw
);
3819 ix86_tune_mask
= 1u << ix86_tune
;
3820 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3821 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3823 if (ix86_tune_ctrl_string
)
3825 /* parse the tune ctrl string in the following form:
3826 [^]tune_name1,[^]tune_name2,..a */
3827 char *next_feature_string
= NULL
;
3828 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
3829 char *orig
= curr_feature_string
;
3833 next_feature_string
= strchr (curr_feature_string
, ',');
3834 if (next_feature_string
)
3835 *next_feature_string
++ = '\0';
3836 if (*curr_feature_string
== '^')
3838 curr_feature_string
++;
3841 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3843 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
3845 ix86_tune_features
[i
] = !clear
;
3849 if (i
== X86_TUNE_LAST
)
3850 warning (0, "Unknown parameter to option -mtune-ctrl: %s",
3851 clear
? curr_feature_string
- 1 : curr_feature_string
);
3852 curr_feature_string
= next_feature_string
;
3853 } while (curr_feature_string
);
3857 #ifndef USE_IX86_FRAME_POINTER
3858 #define USE_IX86_FRAME_POINTER 0
3861 #ifndef USE_X86_64_FRAME_POINTER
3862 #define USE_X86_64_FRAME_POINTER 0
3865 /* Set the default values for switches whose default depends on TARGET_64BIT
3866 in case they weren't overwritten by command line options. */
3869 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3870 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3871 if (flag_asynchronous_unwind_tables
== 2)
3872 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3873 if (flag_pcc_struct_return
== 2)
3874 flag_pcc_struct_return
= 0;
3878 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3879 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3880 if (flag_asynchronous_unwind_tables
== 2)
3881 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3882 if (flag_pcc_struct_return
== 2)
3883 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3886 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3888 ix86_cost
= &ix86_size_cost
;
3890 ix86_cost
= ix86_tune_cost
;
3892 /* Arrange to set up i386_stack_locals for all functions. */
3893 init_machine_status
= ix86_init_machine_status
;
3895 /* Validate -mregparm= value. */
3896 if (global_options_set
.x_ix86_regparm
)
3899 warning (0, "-mregparm is ignored in 64-bit mode");
3900 if (ix86_regparm
> REGPARM_MAX
)
3902 error ("-mregparm=%d is not between 0 and %d",
3903 ix86_regparm
, REGPARM_MAX
);
3908 ix86_regparm
= REGPARM_MAX
;
3910 /* Default align_* from the processor table. */
3911 if (align_loops
== 0)
3913 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3914 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3916 if (align_jumps
== 0)
3918 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3919 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3921 if (align_functions
== 0)
3923 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3926 /* Provide default for -mbranch-cost= value. */
3927 if (!global_options_set
.x_ix86_branch_cost
)
3928 ix86_branch_cost
= ix86_cost
->branch_cost
;
3932 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3934 /* Enable by default the SSE and MMX builtins. Do allow the user to
3935 explicitly disable any of these. In particular, disabling SSE and
3936 MMX for kernel code is extremely useful. */
3937 if (!ix86_arch_specified
)
3939 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3940 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3943 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3947 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3949 if (!ix86_arch_specified
)
3951 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3953 /* i386 ABI does not specify red zone. It still makes sense to use it
3954 when programmer takes care to stack from being destroyed. */
3955 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3956 target_flags
|= MASK_NO_RED_ZONE
;
3959 /* Keep nonleaf frame pointers. */
3960 if (flag_omit_frame_pointer
)
3961 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3962 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3963 flag_omit_frame_pointer
= 1;
3965 /* If we're doing fast math, we don't care about comparison order
3966 wrt NaNs. This lets us use a shorter comparison sequence. */
3967 if (flag_finite_math_only
)
3968 target_flags
&= ~MASK_IEEE_FP
;
3970 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3971 since the insns won't need emulation. */
3972 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3973 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3975 /* Likewise, if the target doesn't have a 387, or we've specified
3976 software floating point, don't use 387 inline intrinsics. */
3978 target_flags
|= MASK_NO_FANCY_MATH_387
;
3980 /* Turn on MMX builtins for -msse. */
3982 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3984 /* Enable SSE prefetch. */
3985 if (TARGET_SSE
|| (TARGET_PRFCHW
&& !TARGET_3DNOW
))
3986 x86_prefetch_sse
= true;
3988 /* Enable prefetch{,w} instructions for -m3dnow. */
3990 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
& ~ix86_isa_flags_explicit
;
3992 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3993 if (TARGET_SSE4_2
|| TARGET_ABM
)
3994 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3996 /* Enable lzcnt instruction for -mabm. */
3998 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
4000 /* Validate -mpreferred-stack-boundary= value or default it to
4001 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4002 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
4003 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
4005 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
4006 int max
= (TARGET_SEH
? 4 : 12);
4008 if (ix86_preferred_stack_boundary_arg
< min
4009 || ix86_preferred_stack_boundary_arg
> max
)
4012 error ("-mpreferred-stack-boundary is not supported "
4015 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4016 ix86_preferred_stack_boundary_arg
, min
, max
);
4019 ix86_preferred_stack_boundary
4020 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
4023 /* Set the default value for -mstackrealign. */
4024 if (ix86_force_align_arg_pointer
== -1)
4025 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
4027 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
4029 /* Validate -mincoming-stack-boundary= value or default it to
4030 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4031 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
4032 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
4034 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
4035 || ix86_incoming_stack_boundary_arg
> 12)
4036 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4037 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
4040 ix86_user_incoming_stack_boundary
4041 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
4042 ix86_incoming_stack_boundary
4043 = ix86_user_incoming_stack_boundary
;
4047 /* Accept -msseregparm only if at least SSE support is enabled. */
4048 if (TARGET_SSEREGPARM
4050 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
4052 if (global_options_set
.x_ix86_fpmath
)
4054 if (ix86_fpmath
& FPMATH_SSE
)
4058 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4059 ix86_fpmath
= FPMATH_387
;
4061 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
4063 warning (0, "387 instruction set disabled, using SSE arithmetics");
4064 ix86_fpmath
= FPMATH_SSE
;
4069 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
4071 /* If the i387 is disabled, then do not return values in it. */
4073 target_flags
&= ~MASK_FLOAT_RETURNS
;
4075 /* Use external vectorized library in vectorizing intrinsics. */
4076 if (global_options_set
.x_ix86_veclibabi_type
)
4077 switch (ix86_veclibabi_type
)
4079 case ix86_veclibabi_type_svml
:
4080 ix86_veclib_handler
= ix86_veclibabi_svml
;
4083 case ix86_veclibabi_type_acml
:
4084 ix86_veclib_handler
= ix86_veclibabi_acml
;
4091 if ((!USE_IX86_FRAME_POINTER
4092 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
4093 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4095 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4097 /* ??? Unwind info is not correct around the CFG unless either a frame
4098 pointer is present or M_A_O_A is set. Fixing this requires rewriting
4099 unwind info generation to be aware of the CFG and propagating states
4101 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
4102 || flag_exceptions
|| flag_non_call_exceptions
)
4103 && flag_omit_frame_pointer
4104 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4106 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4107 warning (0, "unwind tables currently require either a frame pointer "
4108 "or %saccumulate-outgoing-args%s for correctness",
4110 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4113 /* If stack probes are required, the space used for large function
4114 arguments on the stack must also be probed, so enable
4115 -maccumulate-outgoing-args so this happens in the prologue. */
4116 if (TARGET_STACK_PROBE
4117 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
4119 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
4120 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4121 "for correctness", prefix
, suffix
);
4122 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
4125 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4128 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
4129 p
= strchr (internal_label_prefix
, 'X');
4130 internal_label_prefix_len
= p
- internal_label_prefix
;
4134 /* When scheduling description is not available, disable scheduler pass
4135 so it won't slow down the compilation and make x87 code slower. */
4136 if (!TARGET_SCHEDULE
)
4137 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
4139 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
4140 ix86_tune_cost
->simultaneous_prefetches
,
4141 global_options
.x_param_values
,
4142 global_options_set
.x_param_values
);
4143 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
4144 ix86_tune_cost
->prefetch_block
,
4145 global_options
.x_param_values
,
4146 global_options_set
.x_param_values
);
4147 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
4148 ix86_tune_cost
->l1_cache_size
,
4149 global_options
.x_param_values
,
4150 global_options_set
.x_param_values
);
4151 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
4152 ix86_tune_cost
->l2_cache_size
,
4153 global_options
.x_param_values
,
4154 global_options_set
.x_param_values
);
4156 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4157 if (flag_prefetch_loop_arrays
< 0
4159 && (optimize
>= 3 || flag_profile_use
)
4160 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
4161 flag_prefetch_loop_arrays
= 1;
4163 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4164 can be optimized to ap = __builtin_next_arg (0). */
4165 if (!TARGET_64BIT
&& !flag_split_stack
)
4166 targetm
.expand_builtin_va_start
= NULL
;
4170 ix86_gen_leave
= gen_leave_rex64
;
4171 if (Pmode
== DImode
)
4173 ix86_gen_monitor
= gen_sse3_monitor64_di
;
4174 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4175 ix86_gen_tls_local_dynamic_base_64
4176 = gen_tls_local_dynamic_base_64_di
;
4180 ix86_gen_monitor
= gen_sse3_monitor64_si
;
4181 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4182 ix86_gen_tls_local_dynamic_base_64
4183 = gen_tls_local_dynamic_base_64_si
;
4188 ix86_gen_leave
= gen_leave
;
4189 ix86_gen_monitor
= gen_sse3_monitor
;
4192 if (Pmode
== DImode
)
4194 ix86_gen_add3
= gen_adddi3
;
4195 ix86_gen_sub3
= gen_subdi3
;
4196 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4197 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4198 ix86_gen_andsp
= gen_anddi3
;
4199 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4200 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4201 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4205 ix86_gen_add3
= gen_addsi3
;
4206 ix86_gen_sub3
= gen_subsi3
;
4207 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4208 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4209 ix86_gen_andsp
= gen_andsi3
;
4210 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4211 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4212 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4216 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4218 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
4221 if (!TARGET_64BIT
&& flag_pic
)
4223 if (flag_fentry
> 0)
4224 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4228 else if (TARGET_SEH
)
4230 if (flag_fentry
== 0)
4231 sorry ("-mno-fentry isn%'t compatible with SEH");
4234 else if (flag_fentry
< 0)
4236 #if defined(PROFILE_BEFORE_PROLOGUE)
4243 /* When not optimize for size, enable vzeroupper optimization for
4244 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4245 AVX unaligned load/store. */
4248 if (flag_expensive_optimizations
4249 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4250 target_flags
|= MASK_VZEROUPPER
;
4251 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4252 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4253 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4254 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4255 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4256 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4257 /* Enable 128-bit AVX instruction generation
4258 for the auto-vectorizer. */
4259 if (TARGET_AVX128_OPTIMAL
4260 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
4261 target_flags
|= MASK_PREFER_AVX128
;
4264 if (ix86_recip_name
)
4266 char *p
= ASTRDUP (ix86_recip_name
);
4268 unsigned int mask
, i
;
4271 while ((q
= strtok (p
, ",")) != NULL
)
4282 if (!strcmp (q
, "default"))
4283 mask
= RECIP_MASK_ALL
;
4286 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4287 if (!strcmp (q
, recip_options
[i
].string
))
4289 mask
= recip_options
[i
].mask
;
4293 if (i
== ARRAY_SIZE (recip_options
))
4295 error ("unknown option for -mrecip=%s", q
);
4297 mask
= RECIP_MASK_NONE
;
4301 recip_mask_explicit
|= mask
;
4303 recip_mask
&= ~mask
;
4310 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4311 else if (target_flags_explicit
& MASK_RECIP
)
4312 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4314 /* Default long double to 64-bit for Bionic. */
4315 if (TARGET_HAS_BIONIC
4316 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
4317 target_flags
|= MASK_LONG_DOUBLE_64
;
4319 /* Save the initial options in case the user does function specific
4322 target_option_default_node
= target_option_current_node
4323 = build_target_option_node ();
4325 /* Handle stack protector */
4326 if (!global_options_set
.x_ix86_stack_protector_guard
)
4327 ix86_stack_protector_guard
= TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4329 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4330 if (ix86_tune_memcpy_strategy
)
4332 char *str
= xstrdup (ix86_tune_memcpy_strategy
);
4333 ix86_parse_stringop_strategy_string (str
, false);
4337 if (ix86_tune_memset_strategy
)
4339 char *str
= xstrdup (ix86_tune_memset_strategy
);
4340 ix86_parse_stringop_strategy_string (str
, true);
4345 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4348 ix86_option_override (void)
4350 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4351 static struct register_pass_info insert_vzeroupper_info
4352 = { pass_insert_vzeroupper
, "reload",
4353 1, PASS_POS_INSERT_AFTER
4356 ix86_option_override_internal (true);
4359 /* This needs to be done at start up. It's convenient to do it here. */
4360 register_pass (&insert_vzeroupper_info
);
4363 /* Update register usage after having seen the compiler flags. */
4366 ix86_conditional_register_usage (void)
4371 /* The PIC register, if it exists, is fixed. */
4372 j
= PIC_OFFSET_TABLE_REGNUM
;
4373 if (j
!= INVALID_REGNUM
)
4374 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4376 /* For 32-bit targets, squash the REX registers. */
4379 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4380 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4381 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4382 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4385 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4386 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4387 : TARGET_64BIT
? (1 << 2)
4390 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4392 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4394 /* Set/reset conditionally defined registers from
4395 CALL_USED_REGISTERS initializer. */
4396 if (call_used_regs
[i
] > 1)
4397 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4399 /* Calculate registers of CLOBBERED_REGS register set
4400 as call used registers from GENERAL_REGS register set. */
4401 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4402 && call_used_regs
[i
])
4403 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4406 /* If MMX is disabled, squash the registers. */
4408 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4409 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4410 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4412 /* If SSE is disabled, squash the registers. */
4414 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4415 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4416 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4418 /* If the FPU is disabled, squash the registers. */
4419 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4420 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4421 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4422 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4426 /* Save the current options */
4429 ix86_function_specific_save (struct cl_target_option
*ptr
)
4431 ptr
->arch
= ix86_arch
;
4432 ptr
->schedule
= ix86_schedule
;
4433 ptr
->tune
= ix86_tune
;
4434 ptr
->branch_cost
= ix86_branch_cost
;
4435 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4436 ptr
->arch_specified
= ix86_arch_specified
;
4437 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4438 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4439 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4441 /* The fields are char but the variables are not; make sure the
4442 values fit in the fields. */
4443 gcc_assert (ptr
->arch
== ix86_arch
);
4444 gcc_assert (ptr
->schedule
== ix86_schedule
);
4445 gcc_assert (ptr
->tune
== ix86_tune
);
4446 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4449 /* Restore the current options */
4452 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4454 enum processor_type old_tune
= ix86_tune
;
4455 enum processor_type old_arch
= ix86_arch
;
4456 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4459 ix86_arch
= (enum processor_type
) ptr
->arch
;
4460 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4461 ix86_tune
= (enum processor_type
) ptr
->tune
;
4462 ix86_branch_cost
= ptr
->branch_cost
;
4463 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4464 ix86_arch_specified
= ptr
->arch_specified
;
4465 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4466 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4467 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4469 /* Recreate the arch feature tests if the arch changed */
4470 if (old_arch
!= ix86_arch
)
4472 ix86_arch_mask
= 1u << ix86_arch
;
4473 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4474 ix86_arch_features
[i
]
4475 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4478 /* Recreate the tune optimization tests */
4479 if (old_tune
!= ix86_tune
)
4481 ix86_tune_mask
= 1u << ix86_tune
;
4482 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4483 ix86_tune_features
[i
]
4484 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4488 /* Print the current options */
4491 ix86_function_specific_print (FILE *file
, int indent
,
4492 struct cl_target_option
*ptr
)
4495 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4496 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4498 fprintf (file
, "%*sarch = %d (%s)\n",
4501 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4502 ? cpu_names
[ptr
->arch
]
4505 fprintf (file
, "%*stune = %d (%s)\n",
4508 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4509 ? cpu_names
[ptr
->tune
]
4512 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4516 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4517 free (target_string
);
4522 /* Inner function to process the attribute((target(...))), take an argument and
4523 set the current options from the argument. If we have a list, recursively go
4527 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4528 struct gcc_options
*enum_opts_set
)
4533 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4534 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4535 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4536 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4537 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4553 enum ix86_opt_type type
;
4558 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4559 IX86_ATTR_ISA ("abm", OPT_mabm
),
4560 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4561 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4562 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4563 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4564 IX86_ATTR_ISA ("aes", OPT_maes
),
4565 IX86_ATTR_ISA ("avx", OPT_mavx
),
4566 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4567 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4568 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4569 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4570 IX86_ATTR_ISA ("sse", OPT_msse
),
4571 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4572 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4573 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4574 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4575 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4576 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4577 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4578 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4579 IX86_ATTR_ISA ("fma", OPT_mfma
),
4580 IX86_ATTR_ISA ("xop", OPT_mxop
),
4581 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4582 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4583 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4584 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4585 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4586 IX86_ATTR_ISA ("hle", OPT_mhle
),
4587 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4588 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4589 IX86_ATTR_ISA ("adx", OPT_madx
),
4590 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4591 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4592 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4595 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4597 /* string options */
4598 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4599 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4602 IX86_ATTR_YES ("cld",
4606 IX86_ATTR_NO ("fancy-math-387",
4607 OPT_mfancy_math_387
,
4608 MASK_NO_FANCY_MATH_387
),
4610 IX86_ATTR_YES ("ieee-fp",
4614 IX86_ATTR_YES ("inline-all-stringops",
4615 OPT_minline_all_stringops
,
4616 MASK_INLINE_ALL_STRINGOPS
),
4618 IX86_ATTR_YES ("inline-stringops-dynamically",
4619 OPT_minline_stringops_dynamically
,
4620 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4622 IX86_ATTR_NO ("align-stringops",
4623 OPT_mno_align_stringops
,
4624 MASK_NO_ALIGN_STRINGOPS
),
4626 IX86_ATTR_YES ("recip",
4632 /* If this is a list, recurse to get the options. */
4633 if (TREE_CODE (args
) == TREE_LIST
)
4637 for (; args
; args
= TREE_CHAIN (args
))
4638 if (TREE_VALUE (args
)
4639 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4640 p_strings
, enum_opts_set
))
4646 else if (TREE_CODE (args
) != STRING_CST
)
4648 error ("attribute %<target%> argument not a string");
4652 /* Handle multiple arguments separated by commas. */
4653 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4655 while (next_optstr
&& *next_optstr
!= '\0')
4657 char *p
= next_optstr
;
4659 char *comma
= strchr (next_optstr
, ',');
4660 const char *opt_string
;
4661 size_t len
, opt_len
;
4666 enum ix86_opt_type type
= ix86_opt_unknown
;
4672 len
= comma
- next_optstr
;
4673 next_optstr
= comma
+ 1;
4681 /* Recognize no-xxx. */
4682 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4691 /* Find the option. */
4694 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4696 type
= attrs
[i
].type
;
4697 opt_len
= attrs
[i
].len
;
4698 if (ch
== attrs
[i
].string
[0]
4699 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4702 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4705 mask
= attrs
[i
].mask
;
4706 opt_string
= attrs
[i
].string
;
4711 /* Process the option. */
4714 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4718 else if (type
== ix86_opt_isa
)
4720 struct cl_decoded_option decoded
;
4722 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4723 ix86_handle_option (&global_options
, &global_options_set
,
4724 &decoded
, input_location
);
4727 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4729 if (type
== ix86_opt_no
)
4730 opt_set_p
= !opt_set_p
;
4733 target_flags
|= mask
;
4735 target_flags
&= ~mask
;
4738 else if (type
== ix86_opt_str
)
4742 error ("option(\"%s\") was already specified", opt_string
);
4746 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4749 else if (type
== ix86_opt_enum
)
4754 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4756 set_option (&global_options
, enum_opts_set
, opt
, value
,
4757 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4761 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4773 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4776 ix86_valid_target_attribute_tree (tree args
)
4778 const char *orig_arch_string
= ix86_arch_string
;
4779 const char *orig_tune_string
= ix86_tune_string
;
4780 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4781 int orig_tune_defaulted
= ix86_tune_defaulted
;
4782 int orig_arch_specified
= ix86_arch_specified
;
4783 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4786 struct cl_target_option
*def
4787 = TREE_TARGET_OPTION (target_option_default_node
);
4788 struct gcc_options enum_opts_set
;
4790 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4792 /* Process each of the options on the chain. */
4793 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4795 return error_mark_node
;
4797 /* If the changed options are different from the default, rerun
4798 ix86_option_override_internal, and then save the options away.
4799 The string options are are attribute options, and will be undone
4800 when we copy the save structure. */
4801 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4802 || target_flags
!= def
->x_target_flags
4803 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4804 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4805 || enum_opts_set
.x_ix86_fpmath
)
4807 /* If we are using the default tune= or arch=, undo the string assigned,
4808 and use the default. */
4809 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4810 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4811 else if (!orig_arch_specified
)
4812 ix86_arch_string
= NULL
;
4814 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4815 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4816 else if (orig_tune_defaulted
)
4817 ix86_tune_string
= NULL
;
4819 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4820 if (enum_opts_set
.x_ix86_fpmath
)
4821 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4822 else if (!TARGET_64BIT
&& TARGET_SSE
)
4824 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4825 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4828 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4829 ix86_option_override_internal (false);
4831 /* Add any builtin functions with the new isa if any. */
4832 ix86_add_new_builtins (ix86_isa_flags
);
4834 /* Save the current options unless we are validating options for
4836 t
= build_target_option_node ();
4838 ix86_arch_string
= orig_arch_string
;
4839 ix86_tune_string
= orig_tune_string
;
4840 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4842 /* Free up memory allocated to hold the strings */
4843 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4844 free (option_strings
[i
]);
4850 /* Hook to validate attribute((target("string"))). */
4853 ix86_valid_target_attribute_p (tree fndecl
,
4854 tree
ARG_UNUSED (name
),
4856 int ARG_UNUSED (flags
))
4858 struct cl_target_option cur_target
;
4861 /* attribute((target("default"))) does nothing, beyond
4862 affecting multi-versioning. */
4863 if (TREE_VALUE (args
)
4864 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4865 && TREE_CHAIN (args
) == NULL_TREE
4866 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4869 tree old_optimize
= build_optimization_node ();
4870 tree new_target
, new_optimize
;
4871 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4873 /* If the function changed the optimization levels as well as setting target
4874 options, start with the optimizations specified. */
4875 if (func_optimize
&& func_optimize
!= old_optimize
)
4876 cl_optimization_restore (&global_options
,
4877 TREE_OPTIMIZATION (func_optimize
));
4879 /* The target attributes may also change some optimization flags, so update
4880 the optimization options if necessary. */
4881 cl_target_option_save (&cur_target
, &global_options
);
4882 new_target
= ix86_valid_target_attribute_tree (args
);
4883 new_optimize
= build_optimization_node ();
4885 if (new_target
== error_mark_node
)
4888 else if (fndecl
&& new_target
)
4890 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4892 if (old_optimize
!= new_optimize
)
4893 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4896 cl_target_option_restore (&global_options
, &cur_target
);
4898 if (old_optimize
!= new_optimize
)
4899 cl_optimization_restore (&global_options
,
4900 TREE_OPTIMIZATION (old_optimize
));
4906 /* Hook to determine if one function can safely inline another. */
4909 ix86_can_inline_p (tree caller
, tree callee
)
4912 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4913 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4915 /* If callee has no option attributes, then it is ok to inline. */
4919 /* If caller has no option attributes, but callee does then it is not ok to
4921 else if (!caller_tree
)
4926 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4927 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4929 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4930 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4932 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4933 != callee_opts
->x_ix86_isa_flags
)
4936 /* See if we have the same non-isa options. */
4937 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4940 /* See if arch, tune, etc. are the same. */
4941 else if (caller_opts
->arch
!= callee_opts
->arch
)
4944 else if (caller_opts
->tune
!= callee_opts
->tune
)
4947 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4950 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4961 /* Remember the last target of ix86_set_current_function. */
4962 static GTY(()) tree ix86_previous_fndecl
;
4964 /* Invalidate ix86_previous_fndecl cache. */
4966 ix86_reset_previous_fndecl (void)
4968 ix86_previous_fndecl
= NULL_TREE
;
4971 /* Establish appropriate back-end context for processing the function
4972 FNDECL. The argument might be NULL to indicate processing at top
4973 level, outside of any function scope. */
4975 ix86_set_current_function (tree fndecl
)
4977 /* Only change the context if the function changes. This hook is called
4978 several times in the course of compiling a function, and we don't want to
4979 slow things down too much or call target_reinit when it isn't safe. */
4980 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4982 tree old_tree
= (ix86_previous_fndecl
4983 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4986 tree new_tree
= (fndecl
4987 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4990 ix86_previous_fndecl
= fndecl
;
4991 if (old_tree
== new_tree
)
4996 cl_target_option_restore (&global_options
,
4997 TREE_TARGET_OPTION (new_tree
));
5003 struct cl_target_option
*def
5004 = TREE_TARGET_OPTION (target_option_current_node
);
5006 cl_target_option_restore (&global_options
, def
);
5013 /* Return true if this goes in large data/bss. */
5016 ix86_in_large_data_p (tree exp
)
5018 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
5021 /* Functions are never large data. */
5022 if (TREE_CODE (exp
) == FUNCTION_DECL
)
5025 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
5027 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
5028 if (strcmp (section
, ".ldata") == 0
5029 || strcmp (section
, ".lbss") == 0)
5035 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
5037 /* If this is an incomplete type with size 0, then we can't put it
5038 in data because it might be too big when completed. */
5039 if (!size
|| size
> ix86_section_threshold
)
5046 /* Switch to the appropriate section for output of DECL.
5047 DECL is either a `VAR_DECL' node or a constant of some sort.
5048 RELOC indicates whether forming the initial value of DECL requires
5049 link-time relocations. */
5051 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
5055 x86_64_elf_select_section (tree decl
, int reloc
,
5056 unsigned HOST_WIDE_INT align
)
5058 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5059 && ix86_in_large_data_p (decl
))
5061 const char *sname
= NULL
;
5062 unsigned int flags
= SECTION_WRITE
;
5063 switch (categorize_decl_for_section (decl
, reloc
))
5068 case SECCAT_DATA_REL
:
5069 sname
= ".ldata.rel";
5071 case SECCAT_DATA_REL_LOCAL
:
5072 sname
= ".ldata.rel.local";
5074 case SECCAT_DATA_REL_RO
:
5075 sname
= ".ldata.rel.ro";
5077 case SECCAT_DATA_REL_RO_LOCAL
:
5078 sname
= ".ldata.rel.ro.local";
5082 flags
|= SECTION_BSS
;
5085 case SECCAT_RODATA_MERGE_STR
:
5086 case SECCAT_RODATA_MERGE_STR_INIT
:
5087 case SECCAT_RODATA_MERGE_CONST
:
5091 case SECCAT_SRODATA
:
5098 /* We don't split these for medium model. Place them into
5099 default sections and hope for best. */
5104 /* We might get called with string constants, but get_named_section
5105 doesn't like them as they are not DECLs. Also, we need to set
5106 flags in that case. */
5108 return get_section (sname
, flags
, NULL
);
5109 return get_named_section (decl
, sname
, reloc
);
5112 return default_elf_select_section (decl
, reloc
, align
);
5115 /* Build up a unique section name, expressed as a
5116 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5117 RELOC indicates whether the initial value of EXP requires
5118 link-time relocations. */
5120 static void ATTRIBUTE_UNUSED
5121 x86_64_elf_unique_section (tree decl
, int reloc
)
5123 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5124 && ix86_in_large_data_p (decl
))
5126 const char *prefix
= NULL
;
5127 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5128 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
5130 switch (categorize_decl_for_section (decl
, reloc
))
5133 case SECCAT_DATA_REL
:
5134 case SECCAT_DATA_REL_LOCAL
:
5135 case SECCAT_DATA_REL_RO
:
5136 case SECCAT_DATA_REL_RO_LOCAL
:
5137 prefix
= one_only
? ".ld" : ".ldata";
5140 prefix
= one_only
? ".lb" : ".lbss";
5143 case SECCAT_RODATA_MERGE_STR
:
5144 case SECCAT_RODATA_MERGE_STR_INIT
:
5145 case SECCAT_RODATA_MERGE_CONST
:
5146 prefix
= one_only
? ".lr" : ".lrodata";
5148 case SECCAT_SRODATA
:
5155 /* We don't split these for medium model. Place them into
5156 default sections and hope for best. */
5161 const char *name
, *linkonce
;
5164 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
5165 name
= targetm
.strip_name_encoding (name
);
5167 /* If we're using one_only, then there needs to be a .gnu.linkonce
5168 prefix to the section name. */
5169 linkonce
= one_only
? ".gnu.linkonce" : "";
5171 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
5173 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
5177 default_unique_section (decl
, reloc
);
5180 #ifdef COMMON_ASM_OP
5181 /* This says how to output assembler code to declare an
5182 uninitialized external linkage data object.
5184 For medium model x86-64 we need to use .largecomm opcode for
5187 x86_elf_aligned_common (FILE *file
,
5188 const char *name
, unsigned HOST_WIDE_INT size
,
5191 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5192 && size
> (unsigned int)ix86_section_threshold
)
5193 fputs (".largecomm\t", file
);
5195 fputs (COMMON_ASM_OP
, file
);
5196 assemble_name (file
, name
);
5197 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
5198 size
, align
/ BITS_PER_UNIT
);
5202 /* Utility function for targets to use in implementing
5203 ASM_OUTPUT_ALIGNED_BSS. */
5206 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
5207 const char *name
, unsigned HOST_WIDE_INT size
,
5210 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5211 && size
> (unsigned int)ix86_section_threshold
)
5212 switch_to_section (get_named_section (decl
, ".lbss", 0));
5214 switch_to_section (bss_section
);
5215 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
5216 #ifdef ASM_DECLARE_OBJECT_NAME
5217 last_assemble_variable_decl
= decl
;
5218 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5220 /* Standard thing is just output label for the object. */
5221 ASM_OUTPUT_LABEL (file
, name
);
5222 #endif /* ASM_DECLARE_OBJECT_NAME */
5223 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5226 /* Decide whether we must probe the stack before any space allocation
5227 on this target. It's essentially TARGET_STACK_PROBE except when
5228 -fstack-check causes the stack to be already probed differently. */
5231 ix86_target_stack_probe (void)
5233 /* Do not probe the stack twice if static stack checking is enabled. */
5234 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5237 return TARGET_STACK_PROBE
;
5240 /* Decide whether we can make a sibling call to a function. DECL is the
5241 declaration of the function being targeted by the call and EXP is the
5242 CALL_EXPR representing the call. */
5245 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5247 tree type
, decl_or_type
;
5250 /* If we are generating position-independent code, we cannot sibcall
5251 optimize any indirect call, or a direct call to a global function,
5252 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5256 && (!decl
|| !targetm
.binds_local_p (decl
)))
5259 /* If we need to align the outgoing stack, then sibcalling would
5260 unalign the stack, which may break the called function. */
5261 if (ix86_minimum_incoming_stack_boundary (true)
5262 < PREFERRED_STACK_BOUNDARY
)
5267 decl_or_type
= decl
;
5268 type
= TREE_TYPE (decl
);
5272 /* We're looking at the CALL_EXPR, we need the type of the function. */
5273 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5274 type
= TREE_TYPE (type
); /* pointer type */
5275 type
= TREE_TYPE (type
); /* function type */
5276 decl_or_type
= type
;
5279 /* Check that the return value locations are the same. Like
5280 if we are returning floats on the 80387 register stack, we cannot
5281 make a sibcall from a function that doesn't return a float to a
5282 function that does or, conversely, from a function that does return
5283 a float to a function that doesn't; the necessary stack adjustment
5284 would not be executed. This is also the place we notice
5285 differences in the return value ABI. Note that it is ok for one
5286 of the functions to have void return type as long as the return
5287 value of the other is passed in a register. */
5288 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5289 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5291 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5293 if (!rtx_equal_p (a
, b
))
5296 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5298 else if (!rtx_equal_p (a
, b
))
5303 /* The SYSV ABI has more call-clobbered registers;
5304 disallow sibcalls from MS to SYSV. */
5305 if (cfun
->machine
->call_abi
== MS_ABI
5306 && ix86_function_type_abi (type
) == SYSV_ABI
)
5311 /* If this call is indirect, we'll need to be able to use a
5312 call-clobbered register for the address of the target function.
5313 Make sure that all such registers are not used for passing
5314 parameters. Note that DLLIMPORT functions are indirect. */
5316 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5318 if (ix86_function_regparm (type
, NULL
) >= 3)
5320 /* ??? Need to count the actual number of registers to be used,
5321 not the possible number of registers. Fix later. */
5327 /* Otherwise okay. That also includes certain types of indirect calls. */
5331 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5332 and "sseregparm" calling convention attributes;
5333 arguments as in struct attribute_spec.handler. */
5336 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5338 int flags ATTRIBUTE_UNUSED
,
5341 if (TREE_CODE (*node
) != FUNCTION_TYPE
5342 && TREE_CODE (*node
) != METHOD_TYPE
5343 && TREE_CODE (*node
) != FIELD_DECL
5344 && TREE_CODE (*node
) != TYPE_DECL
)
5346 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5348 *no_add_attrs
= true;
5352 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5353 if (is_attribute_p ("regparm", name
))
5357 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5359 error ("fastcall and regparm attributes are not compatible");
5362 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5364 error ("regparam and thiscall attributes are not compatible");
5367 cst
= TREE_VALUE (args
);
5368 if (TREE_CODE (cst
) != INTEGER_CST
)
5370 warning (OPT_Wattributes
,
5371 "%qE attribute requires an integer constant argument",
5373 *no_add_attrs
= true;
5375 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5377 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5379 *no_add_attrs
= true;
5387 /* Do not warn when emulating the MS ABI. */
5388 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5389 && TREE_CODE (*node
) != METHOD_TYPE
)
5390 || ix86_function_type_abi (*node
) != MS_ABI
)
5391 warning (OPT_Wattributes
, "%qE attribute ignored",
5393 *no_add_attrs
= true;
5397 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5398 if (is_attribute_p ("fastcall", name
))
5400 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5402 error ("fastcall and cdecl attributes are not compatible");
5404 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5406 error ("fastcall and stdcall attributes are not compatible");
5408 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5410 error ("fastcall and regparm attributes are not compatible");
5412 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5414 error ("fastcall and thiscall attributes are not compatible");
5418 /* Can combine stdcall with fastcall (redundant), regparm and
5420 else if (is_attribute_p ("stdcall", name
))
5422 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5424 error ("stdcall and cdecl attributes are not compatible");
5426 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5428 error ("stdcall and fastcall attributes are not compatible");
5430 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5432 error ("stdcall and thiscall attributes are not compatible");
5436 /* Can combine cdecl with regparm and sseregparm. */
5437 else if (is_attribute_p ("cdecl", name
))
5439 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5441 error ("stdcall and cdecl attributes are not compatible");
5443 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5445 error ("fastcall and cdecl attributes are not compatible");
5447 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5449 error ("cdecl and thiscall attributes are not compatible");
5452 else if (is_attribute_p ("thiscall", name
))
5454 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5455 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5457 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5459 error ("stdcall and thiscall attributes are not compatible");
5461 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5463 error ("fastcall and thiscall attributes are not compatible");
5465 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5467 error ("cdecl and thiscall attributes are not compatible");
5471 /* Can combine sseregparm with all attributes. */
5476 /* The transactional memory builtins are implicitly regparm or fastcall
5477 depending on the ABI. Override the generic do-nothing attribute that
5478 these builtins were declared with, and replace it with one of the two
5479 attributes that we expect elsewhere. */
5482 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5483 tree args ATTRIBUTE_UNUSED
,
5484 int flags ATTRIBUTE_UNUSED
,
5489 /* In no case do we want to add the placeholder attribute. */
5490 *no_add_attrs
= true;
5492 /* The 64-bit ABI is unchanged for transactional memory. */
5496 /* ??? Is there a better way to validate 32-bit windows? We have
5497 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5498 if (CHECK_STACK_LIMIT
> 0)
5499 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5502 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5503 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5505 decl_attributes (node
, alt
, flags
);
5510 /* This function determines from TYPE the calling-convention. */
5513 ix86_get_callcvt (const_tree type
)
5515 unsigned int ret
= 0;
5520 return IX86_CALLCVT_CDECL
;
5522 attrs
= TYPE_ATTRIBUTES (type
);
5523 if (attrs
!= NULL_TREE
)
5525 if (lookup_attribute ("cdecl", attrs
))
5526 ret
|= IX86_CALLCVT_CDECL
;
5527 else if (lookup_attribute ("stdcall", attrs
))
5528 ret
|= IX86_CALLCVT_STDCALL
;
5529 else if (lookup_attribute ("fastcall", attrs
))
5530 ret
|= IX86_CALLCVT_FASTCALL
;
5531 else if (lookup_attribute ("thiscall", attrs
))
5532 ret
|= IX86_CALLCVT_THISCALL
;
5534 /* Regparam isn't allowed for thiscall and fastcall. */
5535 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5537 if (lookup_attribute ("regparm", attrs
))
5538 ret
|= IX86_CALLCVT_REGPARM
;
5539 if (lookup_attribute ("sseregparm", attrs
))
5540 ret
|= IX86_CALLCVT_SSEREGPARM
;
5543 if (IX86_BASE_CALLCVT(ret
) != 0)
5547 is_stdarg
= stdarg_p (type
);
5548 if (TARGET_RTD
&& !is_stdarg
)
5549 return IX86_CALLCVT_STDCALL
| ret
;
5553 || TREE_CODE (type
) != METHOD_TYPE
5554 || ix86_function_type_abi (type
) != MS_ABI
)
5555 return IX86_CALLCVT_CDECL
| ret
;
5557 return IX86_CALLCVT_THISCALL
;
5560 /* Return 0 if the attributes for two types are incompatible, 1 if they
5561 are compatible, and 2 if they are nearly compatible (which causes a
5562 warning to be generated). */
5565 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5567 unsigned int ccvt1
, ccvt2
;
5569 if (TREE_CODE (type1
) != FUNCTION_TYPE
5570 && TREE_CODE (type1
) != METHOD_TYPE
)
5573 ccvt1
= ix86_get_callcvt (type1
);
5574 ccvt2
= ix86_get_callcvt (type2
);
5577 if (ix86_function_regparm (type1
, NULL
)
5578 != ix86_function_regparm (type2
, NULL
))
5584 /* Return the regparm value for a function with the indicated TYPE and DECL.
5585 DECL may be NULL when calling function indirectly
5586 or considering a libcall. */
5589 ix86_function_regparm (const_tree type
, const_tree decl
)
5596 return (ix86_function_type_abi (type
) == SYSV_ABI
5597 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5598 ccvt
= ix86_get_callcvt (type
);
5599 regparm
= ix86_regparm
;
5601 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5603 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5606 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5610 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5612 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5615 /* Use register calling convention for local functions when possible. */
5617 && TREE_CODE (decl
) == FUNCTION_DECL
5619 && !(profile_flag
&& !flag_fentry
))
5621 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5622 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5623 if (i
&& i
->local
&& i
->can_change_signature
)
5625 int local_regparm
, globals
= 0, regno
;
5627 /* Make sure no regparm register is taken by a
5628 fixed register variable. */
5629 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5630 if (fixed_regs
[local_regparm
])
5633 /* We don't want to use regparm(3) for nested functions as
5634 these use a static chain pointer in the third argument. */
5635 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5638 /* In 32-bit mode save a register for the split stack. */
5639 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5642 /* Each fixed register usage increases register pressure,
5643 so less registers should be used for argument passing.
5644 This functionality can be overriden by an explicit
5646 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5647 if (fixed_regs
[regno
])
5651 = globals
< local_regparm
? local_regparm
- globals
: 0;
5653 if (local_regparm
> regparm
)
5654 regparm
= local_regparm
;
5661 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5662 DFmode (2) arguments in SSE registers for a function with the
5663 indicated TYPE and DECL. DECL may be NULL when calling function
5664 indirectly or considering a libcall. Otherwise return 0. */
5667 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5669 gcc_assert (!TARGET_64BIT
);
5671 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5672 by the sseregparm attribute. */
5673 if (TARGET_SSEREGPARM
5674 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5681 error ("calling %qD with attribute sseregparm without "
5682 "SSE/SSE2 enabled", decl
);
5684 error ("calling %qT with attribute sseregparm without "
5685 "SSE/SSE2 enabled", type
);
5693 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5694 (and DFmode for SSE2) arguments in SSE registers. */
5695 if (decl
&& TARGET_SSE_MATH
&& optimize
5696 && !(profile_flag
&& !flag_fentry
))
5698 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5699 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5700 if (i
&& i
->local
&& i
->can_change_signature
)
5701 return TARGET_SSE2
? 2 : 1;
5707 /* Return true if EAX is live at the start of the function. Used by
5708 ix86_expand_prologue to determine if we need special help before
5709 calling allocate_stack_worker. */
5712 ix86_eax_live_at_start_p (void)
5714 /* Cheat. Don't bother working forward from ix86_function_regparm
5715 to the function type to whether an actual argument is located in
5716 eax. Instead just look at cfg info, which is still close enough
5717 to correct at this point. This gives false positives for broken
5718 functions that might use uninitialized data that happens to be
5719 allocated in eax, but who cares? */
5720 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5724 ix86_keep_aggregate_return_pointer (tree fntype
)
5730 attr
= lookup_attribute ("callee_pop_aggregate_return",
5731 TYPE_ATTRIBUTES (fntype
));
5733 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5735 /* For 32-bit MS-ABI the default is to keep aggregate
5737 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5740 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5743 /* Value is the number of bytes of arguments automatically
5744 popped when returning from a subroutine call.
5745 FUNDECL is the declaration node of the function (as a tree),
5746 FUNTYPE is the data type of the function (as a tree),
5747 or for a library call it is an identifier node for the subroutine name.
5748 SIZE is the number of bytes of arguments passed on the stack.
5750 On the 80386, the RTD insn may be used to pop them if the number
5751 of args is fixed, but if the number is variable then the caller
5752 must pop them all. RTD can't be used for library calls now
5753 because the library is compiled with the Unix compiler.
5754 Use of RTD is a selectable option, since it is incompatible with
5755 standard Unix calling sequences. If the option is not selected,
5756 the caller must always pop the args.
5758 The attribute stdcall is equivalent to RTD on a per module basis. */
5761 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5765 /* None of the 64-bit ABIs pop arguments. */
5769 ccvt
= ix86_get_callcvt (funtype
);
5771 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5772 | IX86_CALLCVT_THISCALL
)) != 0
5773 && ! stdarg_p (funtype
))
5776 /* Lose any fake structure return argument if it is passed on the stack. */
5777 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5778 && !ix86_keep_aggregate_return_pointer (funtype
))
5780 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5782 return GET_MODE_SIZE (Pmode
);
5788 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5791 ix86_legitimate_combined_insn (rtx insn
)
5793 /* Check operand constraints in case hard registers were propagated
5794 into insn pattern. This check prevents combine pass from
5795 generating insn patterns with invalid hard register operands.
5796 These invalid insns can eventually confuse reload to error out
5797 with a spill failure. See also PRs 46829 and 46843. */
5798 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5802 extract_insn (insn
);
5803 preprocess_constraints ();
5805 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5807 rtx op
= recog_data
.operand
[i
];
5808 enum machine_mode mode
= GET_MODE (op
);
5809 struct operand_alternative
*op_alt
;
5814 /* A unary operator may be accepted by the predicate, but it
5815 is irrelevant for matching constraints. */
5819 if (GET_CODE (op
) == SUBREG
)
5821 if (REG_P (SUBREG_REG (op
))
5822 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5823 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5824 GET_MODE (SUBREG_REG (op
)),
5827 op
= SUBREG_REG (op
);
5830 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5833 op_alt
= recog_op_alt
[i
];
5835 /* Operand has no constraints, anything is OK. */
5836 win
= !recog_data
.n_alternatives
;
5838 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5840 if (op_alt
[j
].anything_ok
5841 || (op_alt
[j
].matches
!= -1
5843 (recog_data
.operand
[i
],
5844 recog_data
.operand
[op_alt
[j
].matches
]))
5845 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5860 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5862 static unsigned HOST_WIDE_INT
5863 ix86_asan_shadow_offset (void)
5865 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5866 : HOST_WIDE_INT_C (0x7fff8000))
5867 : (HOST_WIDE_INT_1
<< 29);
5870 /* Argument support functions. */
5872 /* Return true when register may be used to pass function parameters. */
5874 ix86_function_arg_regno_p (int regno
)
5877 const int *parm_regs
;
5882 return (regno
< REGPARM_MAX
5883 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5885 return (regno
< REGPARM_MAX
5886 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5887 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5888 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5889 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5894 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5899 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5900 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5904 /* TODO: The function should depend on current function ABI but
5905 builtins.c would need updating then. Therefore we use the
5908 /* RAX is used as hidden argument to va_arg functions. */
5909 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5912 if (ix86_abi
== MS_ABI
)
5913 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5915 parm_regs
= x86_64_int_parameter_registers
;
5916 for (i
= 0; i
< (ix86_abi
== MS_ABI
5917 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5918 if (regno
== parm_regs
[i
])
5923 /* Return if we do not know how to pass TYPE solely in registers. */
5926 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5928 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5931 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5932 The layout_type routine is crafty and tries to trick us into passing
5933 currently unsupported vector types on the stack by using TImode. */
5934 return (!TARGET_64BIT
&& mode
== TImode
5935 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5938 /* It returns the size, in bytes, of the area reserved for arguments passed
5939 in registers for the function represented by fndecl dependent to the used
5942 ix86_reg_parm_stack_space (const_tree fndecl
)
5944 enum calling_abi call_abi
= SYSV_ABI
;
5945 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5946 call_abi
= ix86_function_abi (fndecl
);
5948 call_abi
= ix86_function_type_abi (fndecl
);
5949 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5954 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5957 ix86_function_type_abi (const_tree fntype
)
5959 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5961 enum calling_abi abi
= ix86_abi
;
5962 if (abi
== SYSV_ABI
)
5964 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5967 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5975 ix86_function_ms_hook_prologue (const_tree fn
)
5977 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5979 if (decl_function_context (fn
) != NULL_TREE
)
5980 error_at (DECL_SOURCE_LOCATION (fn
),
5981 "ms_hook_prologue is not compatible with nested function");
5988 static enum calling_abi
5989 ix86_function_abi (const_tree fndecl
)
5993 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5996 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5999 ix86_cfun_abi (void)
6003 return cfun
->machine
->call_abi
;
6006 /* Write the extra assembler code needed to declare a function properly. */
6009 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
6012 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
6016 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
6017 unsigned int filler_cc
= 0xcccccccc;
6019 for (i
= 0; i
< filler_count
; i
+= 4)
6020 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
6023 #ifdef SUBTARGET_ASM_UNWIND_INIT
6024 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
6027 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
6029 /* Output magic byte marker, if hot-patch attribute is set. */
6034 /* leaq [%rsp + 0], %rsp */
6035 asm_fprintf (asm_out_file
, ASM_BYTE
6036 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6040 /* movl.s %edi, %edi
6042 movl.s %esp, %ebp */
6043 asm_fprintf (asm_out_file
, ASM_BYTE
6044 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6050 extern void init_regs (void);
6052 /* Implementation of call abi switching target hook. Specific to FNDECL
6053 the specific call register sets are set. See also
6054 ix86_conditional_register_usage for more details. */
6056 ix86_call_abi_override (const_tree fndecl
)
6058 if (fndecl
== NULL_TREE
)
6059 cfun
->machine
->call_abi
= ix86_abi
;
6061 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
6064 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6065 expensive re-initialization of init_regs each time we switch function context
6066 since this is needed only during RTL expansion. */
6068 ix86_maybe_switch_abi (void)
6071 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
6075 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6076 for a call to a function whose data type is FNTYPE.
6077 For a library call, FNTYPE is 0. */
6080 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
6081 tree fntype
, /* tree ptr for function decl */
6082 rtx libname
, /* SYMBOL_REF of library name or 0 */
6086 struct cgraph_local_info
*i
;
6088 memset (cum
, 0, sizeof (*cum
));
6092 i
= cgraph_local_info (fndecl
);
6093 cum
->call_abi
= ix86_function_abi (fndecl
);
6098 cum
->call_abi
= ix86_function_type_abi (fntype
);
6101 cum
->caller
= caller
;
6103 /* Set up the number of registers to use for passing arguments. */
6105 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
6106 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
6107 "or subtarget optimization implying it");
6108 cum
->nregs
= ix86_regparm
;
6111 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
6112 ? X86_64_REGPARM_MAX
6113 : X86_64_MS_REGPARM_MAX
);
6117 cum
->sse_nregs
= SSE_REGPARM_MAX
;
6120 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
6121 ? X86_64_SSE_REGPARM_MAX
6122 : X86_64_MS_SSE_REGPARM_MAX
);
6126 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
6127 cum
->warn_avx
= true;
6128 cum
->warn_sse
= true;
6129 cum
->warn_mmx
= true;
6131 /* Because type might mismatch in between caller and callee, we need to
6132 use actual type of function for local calls.
6133 FIXME: cgraph_analyze can be told to actually record if function uses
6134 va_start so for local functions maybe_vaarg can be made aggressive
6136 FIXME: once typesytem is fixed, we won't need this code anymore. */
6137 if (i
&& i
->local
&& i
->can_change_signature
)
6138 fntype
= TREE_TYPE (fndecl
);
6139 cum
->maybe_vaarg
= (fntype
6140 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
6145 /* If there are variable arguments, then we won't pass anything
6146 in registers in 32-bit mode. */
6147 if (stdarg_p (fntype
))
6158 /* Use ecx and edx registers if function has fastcall attribute,
6159 else look for regparm information. */
6162 unsigned int ccvt
= ix86_get_callcvt (fntype
);
6163 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6166 cum
->fastcall
= 1; /* Same first register as in fastcall. */
6168 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6174 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
6177 /* Set up the number of SSE registers used for passing SFmode
6178 and DFmode arguments. Warn for mismatching ABI. */
6179 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
6183 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6184 But in the case of vector types, it is some vector mode.
6186 When we have only some of our vector isa extensions enabled, then there
6187 are some modes for which vector_mode_supported_p is false. For these
6188 modes, the generic vector support in gcc will choose some non-vector mode
6189 in order to implement the type. By computing the natural mode, we'll
6190 select the proper ABI location for the operand and not depend on whatever
6191 the middle-end decides to do with these vector types.
6193 The midde-end can't deal with the vector types > 16 bytes. In this
6194 case, we return the original mode and warn ABI change if CUM isn't
6197 static enum machine_mode
6198 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
6200 enum machine_mode mode
= TYPE_MODE (type
);
6202 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6204 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6205 if ((size
== 8 || size
== 16 || size
== 32)
6206 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6207 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6209 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6211 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6212 mode
= MIN_MODE_VECTOR_FLOAT
;
6214 mode
= MIN_MODE_VECTOR_INT
;
6216 /* Get the mode which has this inner mode and number of units. */
6217 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6218 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6219 && GET_MODE_INNER (mode
) == innermode
)
6221 if (size
== 32 && !TARGET_AVX
)
6223 static bool warnedavx
;
6230 warning (0, "AVX vector argument without AVX "
6231 "enabled changes the ABI");
6233 return TYPE_MODE (type
);
6235 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
6237 static bool warnedsse
;
6244 warning (0, "SSE vector argument without SSE "
6245 "enabled changes the ABI");
6260 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6261 this may not agree with the mode that the type system has chosen for the
6262 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6263 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6266 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6271 if (orig_mode
!= BLKmode
)
6272 tmp
= gen_rtx_REG (orig_mode
, regno
);
6275 tmp
= gen_rtx_REG (mode
, regno
);
6276 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6277 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6283 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6284 of this code is to classify each 8bytes of incoming argument by the register
6285 class and assign registers accordingly. */
6287 /* Return the union class of CLASS1 and CLASS2.
6288 See the x86-64 PS ABI for details. */
6290 static enum x86_64_reg_class
6291 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6293 /* Rule #1: If both classes are equal, this is the resulting class. */
6294 if (class1
== class2
)
6297 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6299 if (class1
== X86_64_NO_CLASS
)
6301 if (class2
== X86_64_NO_CLASS
)
6304 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6305 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6306 return X86_64_MEMORY_CLASS
;
6308 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6309 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6310 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6311 return X86_64_INTEGERSI_CLASS
;
6312 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6313 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6314 return X86_64_INTEGER_CLASS
;
6316 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6318 if (class1
== X86_64_X87_CLASS
6319 || class1
== X86_64_X87UP_CLASS
6320 || class1
== X86_64_COMPLEX_X87_CLASS
6321 || class2
== X86_64_X87_CLASS
6322 || class2
== X86_64_X87UP_CLASS
6323 || class2
== X86_64_COMPLEX_X87_CLASS
)
6324 return X86_64_MEMORY_CLASS
;
6326 /* Rule #6: Otherwise class SSE is used. */
6327 return X86_64_SSE_CLASS
;
6330 /* Classify the argument of type TYPE and mode MODE.
6331 CLASSES will be filled by the register class used to pass each word
6332 of the operand. The number of words is returned. In case the parameter
6333 should be passed in memory, 0 is returned. As a special case for zero
6334 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6336 BIT_OFFSET is used internally for handling records and specifies offset
6337 of the offset in bits modulo 256 to avoid overflow cases.
6339 See the x86-64 PS ABI for details.
6343 classify_argument (enum machine_mode mode
, const_tree type
,
6344 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6346 HOST_WIDE_INT bytes
=
6347 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6349 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6351 /* Variable sized entities are always passed/returned in memory. */
6355 if (mode
!= VOIDmode
6356 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6359 if (type
&& AGGREGATE_TYPE_P (type
))
6363 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6365 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6369 for (i
= 0; i
< words
; i
++)
6370 classes
[i
] = X86_64_NO_CLASS
;
6372 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6373 signalize memory class, so handle it as special case. */
6376 classes
[0] = X86_64_NO_CLASS
;
6380 /* Classify each field of record and merge classes. */
6381 switch (TREE_CODE (type
))
6384 /* And now merge the fields of structure. */
6385 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6387 if (TREE_CODE (field
) == FIELD_DECL
)
6391 if (TREE_TYPE (field
) == error_mark_node
)
6394 /* Bitfields are always classified as integer. Handle them
6395 early, since later code would consider them to be
6396 misaligned integers. */
6397 if (DECL_BIT_FIELD (field
))
6399 for (i
= (int_bit_position (field
)
6400 + (bit_offset
% 64)) / 8 / 8;
6401 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6402 + tree_low_cst (DECL_SIZE (field
), 0)
6405 merge_classes (X86_64_INTEGER_CLASS
,
6412 type
= TREE_TYPE (field
);
6414 /* Flexible array member is ignored. */
6415 if (TYPE_MODE (type
) == BLKmode
6416 && TREE_CODE (type
) == ARRAY_TYPE
6417 && TYPE_SIZE (type
) == NULL_TREE
6418 && TYPE_DOMAIN (type
) != NULL_TREE
6419 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6424 if (!warned
&& warn_psabi
)
6427 inform (input_location
,
6428 "the ABI of passing struct with"
6429 " a flexible array member has"
6430 " changed in GCC 4.4");
6434 num
= classify_argument (TYPE_MODE (type
), type
,
6436 (int_bit_position (field
)
6437 + bit_offset
) % 256);
6440 pos
= (int_bit_position (field
)
6441 + (bit_offset
% 64)) / 8 / 8;
6442 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6444 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6451 /* Arrays are handled as small records. */
6454 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6455 TREE_TYPE (type
), subclasses
, bit_offset
);
6459 /* The partial classes are now full classes. */
6460 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6461 subclasses
[0] = X86_64_SSE_CLASS
;
6462 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6463 && !((bit_offset
% 64) == 0 && bytes
== 4))
6464 subclasses
[0] = X86_64_INTEGER_CLASS
;
6466 for (i
= 0; i
< words
; i
++)
6467 classes
[i
] = subclasses
[i
% num
];
6472 case QUAL_UNION_TYPE
:
6473 /* Unions are similar to RECORD_TYPE but offset is always 0.
6475 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6477 if (TREE_CODE (field
) == FIELD_DECL
)
6481 if (TREE_TYPE (field
) == error_mark_node
)
6484 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6485 TREE_TYPE (field
), subclasses
,
6489 for (i
= 0; i
< num
; i
++)
6490 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6501 /* When size > 16 bytes, if the first one isn't
6502 X86_64_SSE_CLASS or any other ones aren't
6503 X86_64_SSEUP_CLASS, everything should be passed in
6505 if (classes
[0] != X86_64_SSE_CLASS
)
6508 for (i
= 1; i
< words
; i
++)
6509 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6513 /* Final merger cleanup. */
6514 for (i
= 0; i
< words
; i
++)
6516 /* If one class is MEMORY, everything should be passed in
6518 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6521 /* The X86_64_SSEUP_CLASS should be always preceded by
6522 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6523 if (classes
[i
] == X86_64_SSEUP_CLASS
6524 && classes
[i
- 1] != X86_64_SSE_CLASS
6525 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6527 /* The first one should never be X86_64_SSEUP_CLASS. */
6528 gcc_assert (i
!= 0);
6529 classes
[i
] = X86_64_SSE_CLASS
;
6532 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6533 everything should be passed in memory. */
6534 if (classes
[i
] == X86_64_X87UP_CLASS
6535 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6539 /* The first one should never be X86_64_X87UP_CLASS. */
6540 gcc_assert (i
!= 0);
6541 if (!warned
&& warn_psabi
)
6544 inform (input_location
,
6545 "the ABI of passing union with long double"
6546 " has changed in GCC 4.4");
6554 /* Compute alignment needed. We align all types to natural boundaries with
6555 exception of XFmode that is aligned to 64bits. */
6556 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6558 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6561 mode_alignment
= 128;
6562 else if (mode
== XCmode
)
6563 mode_alignment
= 256;
6564 if (COMPLEX_MODE_P (mode
))
6565 mode_alignment
/= 2;
6566 /* Misaligned fields are always returned in memory. */
6567 if (bit_offset
% mode_alignment
)
6571 /* for V1xx modes, just use the base mode */
6572 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6573 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6574 mode
= GET_MODE_INNER (mode
);
6576 /* Classification of atomic types. */
6581 classes
[0] = X86_64_SSE_CLASS
;
6584 classes
[0] = X86_64_SSE_CLASS
;
6585 classes
[1] = X86_64_SSEUP_CLASS
;
6595 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6599 classes
[0] = X86_64_INTEGERSI_CLASS
;
6602 else if (size
<= 64)
6604 classes
[0] = X86_64_INTEGER_CLASS
;
6607 else if (size
<= 64+32)
6609 classes
[0] = X86_64_INTEGER_CLASS
;
6610 classes
[1] = X86_64_INTEGERSI_CLASS
;
6613 else if (size
<= 64+64)
6615 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6623 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6627 /* OImode shouldn't be used directly. */
6632 if (!(bit_offset
% 64))
6633 classes
[0] = X86_64_SSESF_CLASS
;
6635 classes
[0] = X86_64_SSE_CLASS
;
6638 classes
[0] = X86_64_SSEDF_CLASS
;
6641 classes
[0] = X86_64_X87_CLASS
;
6642 classes
[1] = X86_64_X87UP_CLASS
;
6645 classes
[0] = X86_64_SSE_CLASS
;
6646 classes
[1] = X86_64_SSEUP_CLASS
;
6649 classes
[0] = X86_64_SSE_CLASS
;
6650 if (!(bit_offset
% 64))
6656 if (!warned
&& warn_psabi
)
6659 inform (input_location
,
6660 "the ABI of passing structure with complex float"
6661 " member has changed in GCC 4.4");
6663 classes
[1] = X86_64_SSESF_CLASS
;
6667 classes
[0] = X86_64_SSEDF_CLASS
;
6668 classes
[1] = X86_64_SSEDF_CLASS
;
6671 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6674 /* This modes is larger than 16 bytes. */
6682 classes
[0] = X86_64_SSE_CLASS
;
6683 classes
[1] = X86_64_SSEUP_CLASS
;
6684 classes
[2] = X86_64_SSEUP_CLASS
;
6685 classes
[3] = X86_64_SSEUP_CLASS
;
6693 classes
[0] = X86_64_SSE_CLASS
;
6694 classes
[1] = X86_64_SSEUP_CLASS
;
6702 classes
[0] = X86_64_SSE_CLASS
;
6708 gcc_assert (VECTOR_MODE_P (mode
));
6713 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6715 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6716 classes
[0] = X86_64_INTEGERSI_CLASS
;
6718 classes
[0] = X86_64_INTEGER_CLASS
;
6719 classes
[1] = X86_64_INTEGER_CLASS
;
6720 return 1 + (bytes
> 8);
6724 /* Examine the argument and return set number of register required in each
6725 class. Return 0 iff parameter should be passed in memory. */
6727 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6728 int *int_nregs
, int *sse_nregs
)
6730 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6731 int n
= classify_argument (mode
, type
, regclass
, 0);
6737 for (n
--; n
>= 0; n
--)
6738 switch (regclass
[n
])
6740 case X86_64_INTEGER_CLASS
:
6741 case X86_64_INTEGERSI_CLASS
:
6744 case X86_64_SSE_CLASS
:
6745 case X86_64_SSESF_CLASS
:
6746 case X86_64_SSEDF_CLASS
:
6749 case X86_64_NO_CLASS
:
6750 case X86_64_SSEUP_CLASS
:
6752 case X86_64_X87_CLASS
:
6753 case X86_64_X87UP_CLASS
:
6757 case X86_64_COMPLEX_X87_CLASS
:
6758 return in_return
? 2 : 0;
6759 case X86_64_MEMORY_CLASS
:
6765 /* Construct container for the argument used by GCC interface. See
6766 FUNCTION_ARG for the detailed description. */
6769 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6770 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6771 const int *intreg
, int sse_regno
)
6773 /* The following variables hold the static issued_error state. */
6774 static bool issued_sse_arg_error
;
6775 static bool issued_sse_ret_error
;
6776 static bool issued_x87_ret_error
;
6778 enum machine_mode tmpmode
;
6780 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6781 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6785 int needed_sseregs
, needed_intregs
;
6786 rtx exp
[MAX_CLASSES
];
6789 n
= classify_argument (mode
, type
, regclass
, 0);
6792 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6795 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6798 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6799 some less clueful developer tries to use floating-point anyway. */
6800 if (needed_sseregs
&& !TARGET_SSE
)
6804 if (!issued_sse_ret_error
)
6806 error ("SSE register return with SSE disabled");
6807 issued_sse_ret_error
= true;
6810 else if (!issued_sse_arg_error
)
6812 error ("SSE register argument with SSE disabled");
6813 issued_sse_arg_error
= true;
6818 /* Likewise, error if the ABI requires us to return values in the
6819 x87 registers and the user specified -mno-80387. */
6820 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6821 for (i
= 0; i
< n
; i
++)
6822 if (regclass
[i
] == X86_64_X87_CLASS
6823 || regclass
[i
] == X86_64_X87UP_CLASS
6824 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6826 if (!issued_x87_ret_error
)
6828 error ("x87 register return with x87 disabled");
6829 issued_x87_ret_error
= true;
6834 /* First construct simple cases. Avoid SCmode, since we want to use
6835 single register to pass this type. */
6836 if (n
== 1 && mode
!= SCmode
)
6837 switch (regclass
[0])
6839 case X86_64_INTEGER_CLASS
:
6840 case X86_64_INTEGERSI_CLASS
:
6841 return gen_rtx_REG (mode
, intreg
[0]);
6842 case X86_64_SSE_CLASS
:
6843 case X86_64_SSESF_CLASS
:
6844 case X86_64_SSEDF_CLASS
:
6845 if (mode
!= BLKmode
)
6846 return gen_reg_or_parallel (mode
, orig_mode
,
6847 SSE_REGNO (sse_regno
));
6849 case X86_64_X87_CLASS
:
6850 case X86_64_COMPLEX_X87_CLASS
:
6851 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6852 case X86_64_NO_CLASS
:
6853 /* Zero sized array, struct or class. */
6859 && regclass
[0] == X86_64_SSE_CLASS
6860 && regclass
[1] == X86_64_SSEUP_CLASS
6862 return gen_reg_or_parallel (mode
, orig_mode
,
6863 SSE_REGNO (sse_regno
));
6865 && regclass
[0] == X86_64_SSE_CLASS
6866 && regclass
[1] == X86_64_SSEUP_CLASS
6867 && regclass
[2] == X86_64_SSEUP_CLASS
6868 && regclass
[3] == X86_64_SSEUP_CLASS
6870 return gen_reg_or_parallel (mode
, orig_mode
,
6871 SSE_REGNO (sse_regno
));
6873 && regclass
[0] == X86_64_X87_CLASS
6874 && regclass
[1] == X86_64_X87UP_CLASS
)
6875 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6878 && regclass
[0] == X86_64_INTEGER_CLASS
6879 && regclass
[1] == X86_64_INTEGER_CLASS
6880 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6881 && intreg
[0] + 1 == intreg
[1])
6882 return gen_rtx_REG (mode
, intreg
[0]);
6884 /* Otherwise figure out the entries of the PARALLEL. */
6885 for (i
= 0; i
< n
; i
++)
6889 switch (regclass
[i
])
6891 case X86_64_NO_CLASS
:
6893 case X86_64_INTEGER_CLASS
:
6894 case X86_64_INTEGERSI_CLASS
:
6895 /* Merge TImodes on aligned occasions here too. */
6896 if (i
* 8 + 8 > bytes
)
6898 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6899 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6903 /* We've requested 24 bytes we
6904 don't have mode for. Use DImode. */
6905 if (tmpmode
== BLKmode
)
6908 = gen_rtx_EXPR_LIST (VOIDmode
,
6909 gen_rtx_REG (tmpmode
, *intreg
),
6913 case X86_64_SSESF_CLASS
:
6915 = gen_rtx_EXPR_LIST (VOIDmode
,
6916 gen_rtx_REG (SFmode
,
6917 SSE_REGNO (sse_regno
)),
6921 case X86_64_SSEDF_CLASS
:
6923 = gen_rtx_EXPR_LIST (VOIDmode
,
6924 gen_rtx_REG (DFmode
,
6925 SSE_REGNO (sse_regno
)),
6929 case X86_64_SSE_CLASS
:
6937 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6947 && regclass
[1] == X86_64_SSEUP_CLASS
6948 && regclass
[2] == X86_64_SSEUP_CLASS
6949 && regclass
[3] == X86_64_SSEUP_CLASS
);
6957 = gen_rtx_EXPR_LIST (VOIDmode
,
6958 gen_rtx_REG (tmpmode
,
6959 SSE_REGNO (sse_regno
)),
6968 /* Empty aligned struct, union or class. */
6972 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6973 for (i
= 0; i
< nexps
; i
++)
6974 XVECEXP (ret
, 0, i
) = exp
[i
];
6978 /* Update the data in CUM to advance over an argument of mode MODE
6979 and data type TYPE. (TYPE is null for libcalls where that information
6980 may not be available.) */
6983 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6984 const_tree type
, HOST_WIDE_INT bytes
,
6985 HOST_WIDE_INT words
)
7001 cum
->words
+= words
;
7002 cum
->nregs
-= words
;
7003 cum
->regno
+= words
;
7005 if (cum
->nregs
<= 0)
7013 /* OImode shouldn't be used directly. */
7017 if (cum
->float_in_sse
< 2)
7020 if (cum
->float_in_sse
< 1)
7037 if (!type
|| !AGGREGATE_TYPE_P (type
))
7039 cum
->sse_words
+= words
;
7040 cum
->sse_nregs
-= 1;
7041 cum
->sse_regno
+= 1;
7042 if (cum
->sse_nregs
<= 0)
7056 if (!type
|| !AGGREGATE_TYPE_P (type
))
7058 cum
->mmx_words
+= words
;
7059 cum
->mmx_nregs
-= 1;
7060 cum
->mmx_regno
+= 1;
7061 if (cum
->mmx_nregs
<= 0)
7072 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7073 const_tree type
, HOST_WIDE_INT words
, bool named
)
7075 int int_nregs
, sse_nregs
;
7077 /* Unnamed 256bit vector mode parameters are passed on stack. */
7078 if (!named
&& VALID_AVX256_REG_MODE (mode
))
7081 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
7082 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
7084 cum
->nregs
-= int_nregs
;
7085 cum
->sse_nregs
-= sse_nregs
;
7086 cum
->regno
+= int_nregs
;
7087 cum
->sse_regno
+= sse_nregs
;
7091 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
7092 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
7093 cum
->words
+= words
;
7098 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
7099 HOST_WIDE_INT words
)
7101 /* Otherwise, this should be passed indirect. */
7102 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
7104 cum
->words
+= words
;
7112 /* Update the data in CUM to advance over an argument of mode MODE and
7113 data type TYPE. (TYPE is null for libcalls where that information
7114 may not be available.) */
7117 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
7118 const_tree type
, bool named
)
7120 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7121 HOST_WIDE_INT bytes
, words
;
7123 if (mode
== BLKmode
)
7124 bytes
= int_size_in_bytes (type
);
7126 bytes
= GET_MODE_SIZE (mode
);
7127 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7130 mode
= type_natural_mode (type
, NULL
);
7132 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7133 function_arg_advance_ms_64 (cum
, bytes
, words
);
7134 else if (TARGET_64BIT
)
7135 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
7137 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
7140 /* Define where to put the arguments to a function.
7141 Value is zero to push the argument on the stack,
7142 or a hard register in which to store the argument.
7144 MODE is the argument's machine mode.
7145 TYPE is the data type of the argument (as a tree).
7146 This is null for libcalls where that information may
7148 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7149 the preceding args and about the function being called.
7150 NAMED is nonzero if this argument is a named parameter
7151 (otherwise it is an extra parameter matching an ellipsis). */
7154 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7155 enum machine_mode orig_mode
, const_tree type
,
7156 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
7158 static bool warnedsse
, warnedmmx
;
7160 /* Avoid the AL settings for the Unix64 ABI. */
7161 if (mode
== VOIDmode
)
7177 if (words
<= cum
->nregs
)
7179 int regno
= cum
->regno
;
7181 /* Fastcall allocates the first two DWORD (SImode) or
7182 smaller arguments to ECX and EDX if it isn't an
7188 || (type
&& AGGREGATE_TYPE_P (type
)))
7191 /* ECX not EAX is the first allocated register. */
7192 if (regno
== AX_REG
)
7195 return gen_rtx_REG (mode
, regno
);
7200 if (cum
->float_in_sse
< 2)
7203 if (cum
->float_in_sse
< 1)
7207 /* In 32bit, we pass TImode in xmm registers. */
7214 if (!type
|| !AGGREGATE_TYPE_P (type
))
7216 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
7219 warning (0, "SSE vector argument without SSE enabled "
7223 return gen_reg_or_parallel (mode
, orig_mode
,
7224 cum
->sse_regno
+ FIRST_SSE_REG
);
7229 /* OImode shouldn't be used directly. */
7238 if (!type
|| !AGGREGATE_TYPE_P (type
))
7241 return gen_reg_or_parallel (mode
, orig_mode
,
7242 cum
->sse_regno
+ FIRST_SSE_REG
);
7252 if (!type
|| !AGGREGATE_TYPE_P (type
))
7254 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7257 warning (0, "MMX vector argument without MMX enabled "
7261 return gen_reg_or_parallel (mode
, orig_mode
,
7262 cum
->mmx_regno
+ FIRST_MMX_REG
);
7271 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7272 enum machine_mode orig_mode
, const_tree type
, bool named
)
7274 /* Handle a hidden AL argument containing number of registers
7275 for varargs x86-64 functions. */
7276 if (mode
== VOIDmode
)
7277 return GEN_INT (cum
->maybe_vaarg
7278 ? (cum
->sse_nregs
< 0
7279 ? X86_64_SSE_REGPARM_MAX
7294 /* Unnamed 256bit vector mode parameters are passed on stack. */
7300 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7302 &x86_64_int_parameter_registers
[cum
->regno
],
7307 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7308 enum machine_mode orig_mode
, bool named
,
7309 HOST_WIDE_INT bytes
)
7313 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7314 We use value of -2 to specify that current function call is MSABI. */
7315 if (mode
== VOIDmode
)
7316 return GEN_INT (-2);
7318 /* If we've run out of registers, it goes on the stack. */
7319 if (cum
->nregs
== 0)
7322 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7324 /* Only floating point modes are passed in anything but integer regs. */
7325 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7328 regno
= cum
->regno
+ FIRST_SSE_REG
;
7333 /* Unnamed floating parameters are passed in both the
7334 SSE and integer registers. */
7335 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7336 t2
= gen_rtx_REG (mode
, regno
);
7337 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7338 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7339 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7342 /* Handle aggregated types passed in register. */
7343 if (orig_mode
== BLKmode
)
7345 if (bytes
> 0 && bytes
<= 8)
7346 mode
= (bytes
> 4 ? DImode
: SImode
);
7347 if (mode
== BLKmode
)
7351 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7354 /* Return where to put the arguments to a function.
7355 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7357 MODE is the argument's machine mode. TYPE is the data type of the
7358 argument. It is null for libcalls where that information may not be
7359 available. CUM gives information about the preceding args and about
7360 the function being called. NAMED is nonzero if this argument is a
7361 named parameter (otherwise it is an extra parameter matching an
7365 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7366 const_tree type
, bool named
)
7368 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7369 enum machine_mode mode
= omode
;
7370 HOST_WIDE_INT bytes
, words
;
7373 if (mode
== BLKmode
)
7374 bytes
= int_size_in_bytes (type
);
7376 bytes
= GET_MODE_SIZE (mode
);
7377 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7379 /* To simplify the code below, represent vector types with a vector mode
7380 even if MMX/SSE are not active. */
7381 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7382 mode
= type_natural_mode (type
, cum
);
7384 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7385 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7386 else if (TARGET_64BIT
)
7387 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7389 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7394 /* A C expression that indicates when an argument must be passed by
7395 reference. If nonzero for an argument, a copy of that argument is
7396 made in memory and a pointer to the argument is passed instead of
7397 the argument itself. The pointer is passed in whatever way is
7398 appropriate for passing a pointer to that type. */
7401 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7402 enum machine_mode mode ATTRIBUTE_UNUSED
,
7403 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7405 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7407 /* See Windows x64 Software Convention. */
7408 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7410 int msize
= (int) GET_MODE_SIZE (mode
);
7413 /* Arrays are passed by reference. */
7414 if (TREE_CODE (type
) == ARRAY_TYPE
)
7417 if (AGGREGATE_TYPE_P (type
))
7419 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7420 are passed by reference. */
7421 msize
= int_size_in_bytes (type
);
7425 /* __m128 is passed by reference. */
7427 case 1: case 2: case 4: case 8:
7433 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7439 /* Return true when TYPE should be 128bit aligned for 32bit argument
7440 passing ABI. XXX: This function is obsolete and is only used for
7441 checking psABI compatibility with previous versions of GCC. */
7444 ix86_compat_aligned_value_p (const_tree type
)
7446 enum machine_mode mode
= TYPE_MODE (type
);
7447 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7451 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7453 if (TYPE_ALIGN (type
) < 128)
7456 if (AGGREGATE_TYPE_P (type
))
7458 /* Walk the aggregates recursively. */
7459 switch (TREE_CODE (type
))
7463 case QUAL_UNION_TYPE
:
7467 /* Walk all the structure fields. */
7468 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7470 if (TREE_CODE (field
) == FIELD_DECL
7471 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7478 /* Just for use if some languages passes arrays by value. */
7479 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7490 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7491 XXX: This function is obsolete and is only used for checking psABI
7492 compatibility with previous versions of GCC. */
7495 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7496 const_tree type
, unsigned int align
)
7498 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7499 natural boundaries. */
7500 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7502 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7503 make an exception for SSE modes since these require 128bit
7506 The handling here differs from field_alignment. ICC aligns MMX
7507 arguments to 4 byte boundaries, while structure fields are aligned
7508 to 8 byte boundaries. */
7511 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7512 align
= PARM_BOUNDARY
;
7516 if (!ix86_compat_aligned_value_p (type
))
7517 align
= PARM_BOUNDARY
;
7520 if (align
> BIGGEST_ALIGNMENT
)
7521 align
= BIGGEST_ALIGNMENT
;
7525 /* Return true when TYPE should be 128bit aligned for 32bit argument
7529 ix86_contains_aligned_value_p (const_tree type
)
7531 enum machine_mode mode
= TYPE_MODE (type
);
7533 if (mode
== XFmode
|| mode
== XCmode
)
7536 if (TYPE_ALIGN (type
) < 128)
7539 if (AGGREGATE_TYPE_P (type
))
7541 /* Walk the aggregates recursively. */
7542 switch (TREE_CODE (type
))
7546 case QUAL_UNION_TYPE
:
7550 /* Walk all the structure fields. */
7551 for (field
= TYPE_FIELDS (type
);
7553 field
= DECL_CHAIN (field
))
7555 if (TREE_CODE (field
) == FIELD_DECL
7556 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7563 /* Just for use if some languages passes arrays by value. */
7564 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7573 return TYPE_ALIGN (type
) >= 128;
7578 /* Gives the alignment boundary, in bits, of an argument with the
7579 specified mode and type. */
7582 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7587 /* Since the main variant type is used for call, we convert it to
7588 the main variant type. */
7589 type
= TYPE_MAIN_VARIANT (type
);
7590 align
= TYPE_ALIGN (type
);
7593 align
= GET_MODE_ALIGNMENT (mode
);
7594 if (align
< PARM_BOUNDARY
)
7595 align
= PARM_BOUNDARY
;
7599 unsigned int saved_align
= align
;
7603 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7606 if (mode
== XFmode
|| mode
== XCmode
)
7607 align
= PARM_BOUNDARY
;
7609 else if (!ix86_contains_aligned_value_p (type
))
7610 align
= PARM_BOUNDARY
;
7613 align
= PARM_BOUNDARY
;
7618 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7622 inform (input_location
,
7623 "The ABI for passing parameters with %d-byte"
7624 " alignment has changed in GCC 4.6",
7625 align
/ BITS_PER_UNIT
);
7632 /* Return true if N is a possible register number of function value. */
7635 ix86_function_value_regno_p (const unsigned int regno
)
7642 case FIRST_FLOAT_REG
:
7643 /* TODO: The function should depend on current function ABI but
7644 builtins.c would need updating then. Therefore we use the
7646 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7648 return TARGET_FLOAT_RETURNS_IN_80387
;
7654 if (TARGET_MACHO
|| TARGET_64BIT
)
7662 /* Define how to find the value returned by a function.
7663 VALTYPE is the data type of the value (as a tree).
7664 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7665 otherwise, FUNC is 0. */
7668 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7669 const_tree fntype
, const_tree fn
)
7673 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7674 we normally prevent this case when mmx is not available. However
7675 some ABIs may require the result to be returned like DImode. */
7676 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7677 regno
= FIRST_MMX_REG
;
7679 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7680 we prevent this case when sse is not available. However some ABIs
7681 may require the result to be returned like integer TImode. */
7682 else if (mode
== TImode
7683 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7684 regno
= FIRST_SSE_REG
;
7686 /* 32-byte vector modes in %ymm0. */
7687 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7688 regno
= FIRST_SSE_REG
;
7690 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7691 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7692 regno
= FIRST_FLOAT_REG
;
7694 /* Most things go in %eax. */
7697 /* Override FP return register with %xmm0 for local functions when
7698 SSE math is enabled or for functions with sseregparm attribute. */
7699 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7701 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7702 if ((sse_level
>= 1 && mode
== SFmode
)
7703 || (sse_level
== 2 && mode
== DFmode
))
7704 regno
= FIRST_SSE_REG
;
7707 /* OImode shouldn't be used directly. */
7708 gcc_assert (mode
!= OImode
);
7710 return gen_rtx_REG (orig_mode
, regno
);
7714 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7719 /* Handle libcalls, which don't provide a type node. */
7720 if (valtype
== NULL
)
7734 regno
= FIRST_SSE_REG
;
7738 regno
= FIRST_FLOAT_REG
;
7746 return gen_rtx_REG (mode
, regno
);
7748 else if (POINTER_TYPE_P (valtype
))
7750 /* Pointers are always returned in word_mode. */
7754 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7755 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7756 x86_64_int_return_registers
, 0);
7758 /* For zero sized structures, construct_container returns NULL, but we
7759 need to keep rest of compiler happy by returning meaningful value. */
7761 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7767 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7770 unsigned int regno
= AX_REG
;
7774 switch (GET_MODE_SIZE (mode
))
7777 if (valtype
!= NULL_TREE
7778 && !VECTOR_INTEGER_TYPE_P (valtype
)
7779 && !VECTOR_INTEGER_TYPE_P (valtype
)
7780 && !INTEGRAL_TYPE_P (valtype
)
7781 && !VECTOR_FLOAT_TYPE_P (valtype
))
7783 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7784 && !COMPLEX_MODE_P (mode
))
7785 regno
= FIRST_SSE_REG
;
7789 if (mode
== SFmode
|| mode
== DFmode
)
7790 regno
= FIRST_SSE_REG
;
7796 return gen_rtx_REG (orig_mode
, regno
);
7800 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7801 enum machine_mode orig_mode
, enum machine_mode mode
)
7803 const_tree fn
, fntype
;
7806 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7807 fn
= fntype_or_decl
;
7808 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7810 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7811 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7812 else if (TARGET_64BIT
)
7813 return function_value_64 (orig_mode
, mode
, valtype
);
7815 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7819 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7820 bool outgoing ATTRIBUTE_UNUSED
)
7822 enum machine_mode mode
, orig_mode
;
7824 orig_mode
= TYPE_MODE (valtype
);
7825 mode
= type_natural_mode (valtype
, NULL
);
7826 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7829 /* Pointer function arguments and return values are promoted to
7832 static enum machine_mode
7833 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7834 int *punsignedp
, const_tree fntype
,
7837 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7839 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7842 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7846 /* Return true if a structure, union or array with MODE containing FIELD
7847 should be accessed using BLKmode. */
7850 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7852 /* Union with XFmode must be in BLKmode. */
7853 return (mode
== XFmode
7854 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7855 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7859 ix86_libcall_value (enum machine_mode mode
)
7861 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7864 /* Return true iff type is returned in memory. */
7866 static bool ATTRIBUTE_UNUSED
7867 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7871 if (mode
== BLKmode
)
7874 size
= int_size_in_bytes (type
);
7876 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7879 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7881 /* User-created vectors small enough to fit in EAX. */
7885 /* MMX/3dNow values are returned in MM0,
7886 except when it doesn't exits or the ABI prescribes otherwise. */
7888 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7890 /* SSE values are returned in XMM0, except when it doesn't exist. */
7894 /* AVX values are returned in YMM0, except when it doesn't exist. */
7905 /* OImode shouldn't be used directly. */
7906 gcc_assert (mode
!= OImode
);
7911 static bool ATTRIBUTE_UNUSED
7912 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7914 int needed_intregs
, needed_sseregs
;
7915 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7918 static bool ATTRIBUTE_UNUSED
7919 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7921 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7923 /* __m128 is returned in xmm0. */
7924 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7925 || VECTOR_FLOAT_TYPE_P (type
))
7926 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7927 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7930 /* Otherwise, the size must be exactly in [1248]. */
7931 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7935 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7937 #ifdef SUBTARGET_RETURN_IN_MEMORY
7938 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7940 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7944 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7945 return return_in_memory_ms_64 (type
, mode
);
7947 return return_in_memory_64 (type
, mode
);
7950 return return_in_memory_32 (type
, mode
);
7954 /* When returning SSE vector types, we have a choice of either
7955 (1) being abi incompatible with a -march switch, or
7956 (2) generating an error.
7957 Given no good solution, I think the safest thing is one warning.
7958 The user won't be able to use -Werror, but....
7960 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7961 called in response to actually generating a caller or callee that
7962 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7963 via aggregate_value_p for general type probing from tree-ssa. */
7966 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7968 static bool warnedsse
, warnedmmx
;
7970 if (!TARGET_64BIT
&& type
)
7972 /* Look at the return type of the function, not the function type. */
7973 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7975 if (!TARGET_SSE
&& !warnedsse
)
7978 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7981 warning (0, "SSE vector return without SSE enabled "
7986 if (!TARGET_MMX
&& !warnedmmx
)
7988 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7991 warning (0, "MMX vector return without MMX enabled "
8001 /* Create the va_list data type. */
8003 /* Returns the calling convention specific va_list date type.
8004 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8007 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
8009 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
8011 /* For i386 we use plain pointer to argument area. */
8012 if (!TARGET_64BIT
|| abi
== MS_ABI
)
8013 return build_pointer_type (char_type_node
);
8015 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
8016 type_decl
= build_decl (BUILTINS_LOCATION
,
8017 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
8019 f_gpr
= build_decl (BUILTINS_LOCATION
,
8020 FIELD_DECL
, get_identifier ("gp_offset"),
8021 unsigned_type_node
);
8022 f_fpr
= build_decl (BUILTINS_LOCATION
,
8023 FIELD_DECL
, get_identifier ("fp_offset"),
8024 unsigned_type_node
);
8025 f_ovf
= build_decl (BUILTINS_LOCATION
,
8026 FIELD_DECL
, get_identifier ("overflow_arg_area"),
8028 f_sav
= build_decl (BUILTINS_LOCATION
,
8029 FIELD_DECL
, get_identifier ("reg_save_area"),
8032 va_list_gpr_counter_field
= f_gpr
;
8033 va_list_fpr_counter_field
= f_fpr
;
8035 DECL_FIELD_CONTEXT (f_gpr
) = record
;
8036 DECL_FIELD_CONTEXT (f_fpr
) = record
;
8037 DECL_FIELD_CONTEXT (f_ovf
) = record
;
8038 DECL_FIELD_CONTEXT (f_sav
) = record
;
8040 TYPE_STUB_DECL (record
) = type_decl
;
8041 TYPE_NAME (record
) = type_decl
;
8042 TYPE_FIELDS (record
) = f_gpr
;
8043 DECL_CHAIN (f_gpr
) = f_fpr
;
8044 DECL_CHAIN (f_fpr
) = f_ovf
;
8045 DECL_CHAIN (f_ovf
) = f_sav
;
8047 layout_type (record
);
8049 /* The correct type is an array type of one element. */
8050 return build_array_type (record
, build_index_type (size_zero_node
));
8053 /* Setup the builtin va_list data type and for 64-bit the additional
8054 calling convention specific va_list data types. */
8057 ix86_build_builtin_va_list (void)
8059 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
8061 /* Initialize abi specific va_list builtin types. */
8065 if (ix86_abi
== MS_ABI
)
8067 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
8068 if (TREE_CODE (t
) != RECORD_TYPE
)
8069 t
= build_variant_type_copy (t
);
8070 sysv_va_list_type_node
= t
;
8075 if (TREE_CODE (t
) != RECORD_TYPE
)
8076 t
= build_variant_type_copy (t
);
8077 sysv_va_list_type_node
= t
;
8079 if (ix86_abi
!= MS_ABI
)
8081 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
8082 if (TREE_CODE (t
) != RECORD_TYPE
)
8083 t
= build_variant_type_copy (t
);
8084 ms_va_list_type_node
= t
;
8089 if (TREE_CODE (t
) != RECORD_TYPE
)
8090 t
= build_variant_type_copy (t
);
8091 ms_va_list_type_node
= t
;
8098 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8101 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
8107 /* GPR size of varargs save area. */
8108 if (cfun
->va_list_gpr_size
)
8109 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
8111 ix86_varargs_gpr_size
= 0;
8113 /* FPR size of varargs save area. We don't need it if we don't pass
8114 anything in SSE registers. */
8115 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8116 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
8118 ix86_varargs_fpr_size
= 0;
8120 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
8123 save_area
= frame_pointer_rtx
;
8124 set
= get_varargs_alias_set ();
8126 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
8127 if (max
> X86_64_REGPARM_MAX
)
8128 max
= X86_64_REGPARM_MAX
;
8130 for (i
= cum
->regno
; i
< max
; i
++)
8132 mem
= gen_rtx_MEM (word_mode
,
8133 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
8134 MEM_NOTRAP_P (mem
) = 1;
8135 set_mem_alias_set (mem
, set
);
8136 emit_move_insn (mem
,
8137 gen_rtx_REG (word_mode
,
8138 x86_64_int_parameter_registers
[i
]));
8141 if (ix86_varargs_fpr_size
)
8143 enum machine_mode smode
;
8146 /* Now emit code to save SSE registers. The AX parameter contains number
8147 of SSE parameter registers used to call this function, though all we
8148 actually check here is the zero/non-zero status. */
8150 label
= gen_label_rtx ();
8151 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
8152 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
8155 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8156 we used movdqa (i.e. TImode) instead? Perhaps even better would
8157 be if we could determine the real mode of the data, via a hook
8158 into pass_stdarg. Ignore all that for now. */
8160 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
8161 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
8163 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
8164 if (max
> X86_64_SSE_REGPARM_MAX
)
8165 max
= X86_64_SSE_REGPARM_MAX
;
8167 for (i
= cum
->sse_regno
; i
< max
; ++i
)
8169 mem
= plus_constant (Pmode
, save_area
,
8170 i
* 16 + ix86_varargs_gpr_size
);
8171 mem
= gen_rtx_MEM (smode
, mem
);
8172 MEM_NOTRAP_P (mem
) = 1;
8173 set_mem_alias_set (mem
, set
);
8174 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
8176 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
8184 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
8186 alias_set_type set
= get_varargs_alias_set ();
8189 /* Reset to zero, as there might be a sysv vaarg used
8191 ix86_varargs_gpr_size
= 0;
8192 ix86_varargs_fpr_size
= 0;
8194 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8198 mem
= gen_rtx_MEM (Pmode
,
8199 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8200 i
* UNITS_PER_WORD
));
8201 MEM_NOTRAP_P (mem
) = 1;
8202 set_mem_alias_set (mem
, set
);
8204 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8205 emit_move_insn (mem
, reg
);
8210 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8211 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8214 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8215 CUMULATIVE_ARGS next_cum
;
8218 /* This argument doesn't appear to be used anymore. Which is good,
8219 because the old code here didn't suppress rtl generation. */
8220 gcc_assert (!no_rtl
);
8225 fntype
= TREE_TYPE (current_function_decl
);
8227 /* For varargs, we do not want to skip the dummy va_dcl argument.
8228 For stdargs, we do want to skip the last named argument. */
8230 if (stdarg_p (fntype
))
8231 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8234 if (cum
->call_abi
== MS_ABI
)
8235 setup_incoming_varargs_ms_64 (&next_cum
);
8237 setup_incoming_varargs_64 (&next_cum
);
8240 /* Checks if TYPE is of kind va_list char *. */
8243 is_va_list_char_pointer (tree type
)
8247 /* For 32-bit it is always true. */
8250 canonic
= ix86_canonical_va_list_type (type
);
8251 return (canonic
== ms_va_list_type_node
8252 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8255 /* Implement va_start. */
8258 ix86_va_start (tree valist
, rtx nextarg
)
8260 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8261 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8262 tree gpr
, fpr
, ovf
, sav
, t
;
8266 if (flag_split_stack
8267 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8269 unsigned int scratch_regno
;
8271 /* When we are splitting the stack, we can't refer to the stack
8272 arguments using internal_arg_pointer, because they may be on
8273 the old stack. The split stack prologue will arrange to
8274 leave a pointer to the old stack arguments in a scratch
8275 register, which we here copy to a pseudo-register. The split
8276 stack prologue can't set the pseudo-register directly because
8277 it (the prologue) runs before any registers have been saved. */
8279 scratch_regno
= split_stack_prologue_scratch_regno ();
8280 if (scratch_regno
!= INVALID_REGNUM
)
8284 reg
= gen_reg_rtx (Pmode
);
8285 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8288 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8292 push_topmost_sequence ();
8293 emit_insn_after (seq
, entry_of_function ());
8294 pop_topmost_sequence ();
8298 /* Only 64bit target needs something special. */
8299 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8301 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8302 std_expand_builtin_va_start (valist
, nextarg
);
8307 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8308 next
= expand_binop (ptr_mode
, add_optab
,
8309 cfun
->machine
->split_stack_varargs_pointer
,
8310 crtl
->args
.arg_offset_rtx
,
8311 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8312 convert_move (va_r
, next
, 0);
8317 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8318 f_fpr
= DECL_CHAIN (f_gpr
);
8319 f_ovf
= DECL_CHAIN (f_fpr
);
8320 f_sav
= DECL_CHAIN (f_ovf
);
8322 valist
= build_simple_mem_ref (valist
);
8323 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8324 /* The following should be folded into the MEM_REF offset. */
8325 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8327 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8329 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8331 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8334 /* Count number of gp and fp argument registers used. */
8335 words
= crtl
->args
.info
.words
;
8336 n_gpr
= crtl
->args
.info
.regno
;
8337 n_fpr
= crtl
->args
.info
.sse_regno
;
8339 if (cfun
->va_list_gpr_size
)
8341 type
= TREE_TYPE (gpr
);
8342 t
= build2 (MODIFY_EXPR
, type
,
8343 gpr
, build_int_cst (type
, n_gpr
* 8));
8344 TREE_SIDE_EFFECTS (t
) = 1;
8345 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8348 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8350 type
= TREE_TYPE (fpr
);
8351 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8352 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8353 TREE_SIDE_EFFECTS (t
) = 1;
8354 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8357 /* Find the overflow area. */
8358 type
= TREE_TYPE (ovf
);
8359 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8360 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8362 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8363 t
= make_tree (type
, ovf_rtx
);
8365 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8366 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8367 TREE_SIDE_EFFECTS (t
) = 1;
8368 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8370 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8372 /* Find the register save area.
8373 Prologue of the function save it right above stack frame. */
8374 type
= TREE_TYPE (sav
);
8375 t
= make_tree (type
, frame_pointer_rtx
);
8376 if (!ix86_varargs_gpr_size
)
8377 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8378 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8379 TREE_SIDE_EFFECTS (t
) = 1;
8380 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8384 /* Implement va_arg. */
8387 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8390 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8391 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8392 tree gpr
, fpr
, ovf
, sav
, t
;
8394 tree lab_false
, lab_over
= NULL_TREE
;
8399 enum machine_mode nat_mode
;
8400 unsigned int arg_boundary
;
8402 /* Only 64bit target needs something special. */
8403 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8404 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8406 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8407 f_fpr
= DECL_CHAIN (f_gpr
);
8408 f_ovf
= DECL_CHAIN (f_fpr
);
8409 f_sav
= DECL_CHAIN (f_ovf
);
8411 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8412 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8413 valist
= build_va_arg_indirect_ref (valist
);
8414 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8415 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8416 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8418 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8420 type
= build_pointer_type (type
);
8421 size
= int_size_in_bytes (type
);
8422 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8424 nat_mode
= type_natural_mode (type
, NULL
);
8433 /* Unnamed 256bit vector mode parameters are passed on stack. */
8434 if (!TARGET_64BIT_MS_ABI
)
8441 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8442 type
, 0, X86_64_REGPARM_MAX
,
8443 X86_64_SSE_REGPARM_MAX
, intreg
,
8448 /* Pull the value out of the saved registers. */
8450 addr
= create_tmp_var (ptr_type_node
, "addr");
8454 int needed_intregs
, needed_sseregs
;
8456 tree int_addr
, sse_addr
;
8458 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8459 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8461 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8463 need_temp
= (!REG_P (container
)
8464 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8465 || TYPE_ALIGN (type
) > 128));
8467 /* In case we are passing structure, verify that it is consecutive block
8468 on the register save area. If not we need to do moves. */
8469 if (!need_temp
&& !REG_P (container
))
8471 /* Verify that all registers are strictly consecutive */
8472 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8476 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8478 rtx slot
= XVECEXP (container
, 0, i
);
8479 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8480 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8488 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8490 rtx slot
= XVECEXP (container
, 0, i
);
8491 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8492 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8504 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8505 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8508 /* First ensure that we fit completely in registers. */
8511 t
= build_int_cst (TREE_TYPE (gpr
),
8512 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8513 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8514 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8515 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8516 gimplify_and_add (t
, pre_p
);
8520 t
= build_int_cst (TREE_TYPE (fpr
),
8521 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8522 + X86_64_REGPARM_MAX
* 8);
8523 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8524 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8525 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8526 gimplify_and_add (t
, pre_p
);
8529 /* Compute index to start of area used for integer regs. */
8532 /* int_addr = gpr + sav; */
8533 t
= fold_build_pointer_plus (sav
, gpr
);
8534 gimplify_assign (int_addr
, t
, pre_p
);
8538 /* sse_addr = fpr + sav; */
8539 t
= fold_build_pointer_plus (sav
, fpr
);
8540 gimplify_assign (sse_addr
, t
, pre_p
);
8544 int i
, prev_size
= 0;
8545 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8548 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8549 gimplify_assign (addr
, t
, pre_p
);
8551 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8553 rtx slot
= XVECEXP (container
, 0, i
);
8554 rtx reg
= XEXP (slot
, 0);
8555 enum machine_mode mode
= GET_MODE (reg
);
8561 tree dest_addr
, dest
;
8562 int cur_size
= GET_MODE_SIZE (mode
);
8564 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8565 prev_size
= INTVAL (XEXP (slot
, 1));
8566 if (prev_size
+ cur_size
> size
)
8568 cur_size
= size
- prev_size
;
8569 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8570 if (mode
== BLKmode
)
8573 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8574 if (mode
== GET_MODE (reg
))
8575 addr_type
= build_pointer_type (piece_type
);
8577 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8579 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8582 if (SSE_REGNO_P (REGNO (reg
)))
8584 src_addr
= sse_addr
;
8585 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8589 src_addr
= int_addr
;
8590 src_offset
= REGNO (reg
) * 8;
8592 src_addr
= fold_convert (addr_type
, src_addr
);
8593 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8595 dest_addr
= fold_convert (daddr_type
, addr
);
8596 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8597 if (cur_size
== GET_MODE_SIZE (mode
))
8599 src
= build_va_arg_indirect_ref (src_addr
);
8600 dest
= build_va_arg_indirect_ref (dest_addr
);
8602 gimplify_assign (dest
, src
, pre_p
);
8607 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8608 3, dest_addr
, src_addr
,
8609 size_int (cur_size
));
8610 gimplify_and_add (copy
, pre_p
);
8612 prev_size
+= cur_size
;
8618 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8619 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8620 gimplify_assign (gpr
, t
, pre_p
);
8625 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8626 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8627 gimplify_assign (fpr
, t
, pre_p
);
8630 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8632 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8635 /* ... otherwise out of the overflow area. */
8637 /* When we align parameter on stack for caller, if the parameter
8638 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8639 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8640 here with caller. */
8641 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8642 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8643 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8645 /* Care for on-stack alignment if needed. */
8646 if (arg_boundary
<= 64 || size
== 0)
8650 HOST_WIDE_INT align
= arg_boundary
/ 8;
8651 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8652 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8653 build_int_cst (TREE_TYPE (t
), -align
));
8656 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8657 gimplify_assign (addr
, t
, pre_p
);
8659 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8660 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8663 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8665 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8666 addr
= fold_convert (ptrtype
, addr
);
8669 addr
= build_va_arg_indirect_ref (addr
);
8670 return build_va_arg_indirect_ref (addr
);
8673 /* Return true if OPNUM's MEM should be matched
8674 in movabs* patterns. */
8677 ix86_check_movabs (rtx insn
, int opnum
)
8681 set
= PATTERN (insn
);
8682 if (GET_CODE (set
) == PARALLEL
)
8683 set
= XVECEXP (set
, 0, 0);
8684 gcc_assert (GET_CODE (set
) == SET
);
8685 mem
= XEXP (set
, opnum
);
8686 while (GET_CODE (mem
) == SUBREG
)
8687 mem
= SUBREG_REG (mem
);
8688 gcc_assert (MEM_P (mem
));
8689 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8692 /* Initialize the table of extra 80387 mathematical constants. */
8695 init_ext_80387_constants (void)
8697 static const char * cst
[5] =
8699 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8700 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8701 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8702 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8703 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8707 for (i
= 0; i
< 5; i
++)
8709 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8710 /* Ensure each constant is rounded to XFmode precision. */
8711 real_convert (&ext_80387_constants_table
[i
],
8712 XFmode
, &ext_80387_constants_table
[i
]);
8715 ext_80387_constants_init
= 1;
8718 /* Return non-zero if the constant is something that
8719 can be loaded with a special instruction. */
8722 standard_80387_constant_p (rtx x
)
8724 enum machine_mode mode
= GET_MODE (x
);
8728 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8731 if (x
== CONST0_RTX (mode
))
8733 if (x
== CONST1_RTX (mode
))
8736 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8738 /* For XFmode constants, try to find a special 80387 instruction when
8739 optimizing for size or on those CPUs that benefit from them. */
8741 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8745 if (! ext_80387_constants_init
)
8746 init_ext_80387_constants ();
8748 for (i
= 0; i
< 5; i
++)
8749 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8753 /* Load of the constant -0.0 or -1.0 will be split as
8754 fldz;fchs or fld1;fchs sequence. */
8755 if (real_isnegzero (&r
))
8757 if (real_identical (&r
, &dconstm1
))
8763 /* Return the opcode of the special instruction to be used to load
8767 standard_80387_constant_opcode (rtx x
)
8769 switch (standard_80387_constant_p (x
))
8793 /* Return the CONST_DOUBLE representing the 80387 constant that is
8794 loaded by the specified special instruction. The argument IDX
8795 matches the return value from standard_80387_constant_p. */
8798 standard_80387_constant_rtx (int idx
)
8802 if (! ext_80387_constants_init
)
8803 init_ext_80387_constants ();
8819 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8823 /* Return 1 if X is all 0s and 2 if x is all 1s
8824 in supported SSE/AVX vector mode. */
8827 standard_sse_constant_p (rtx x
)
8829 enum machine_mode mode
= GET_MODE (x
);
8831 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8833 if (vector_all_ones_operand (x
, mode
))
8855 /* Return the opcode of the special instruction to be used to load
8859 standard_sse_constant_opcode (rtx insn
, rtx x
)
8861 switch (standard_sse_constant_p (x
))
8864 switch (get_attr_mode (insn
))
8867 return "%vpxor\t%0, %d0";
8869 return "%vxorpd\t%0, %d0";
8871 return "%vxorps\t%0, %d0";
8874 return "vpxor\t%x0, %x0, %x0";
8876 return "vxorpd\t%x0, %x0, %x0";
8878 return "vxorps\t%x0, %x0, %x0";
8886 return "vpcmpeqd\t%0, %0, %0";
8888 return "pcmpeqd\t%0, %0";
8896 /* Returns true if OP contains a symbol reference */
8899 symbolic_reference_mentioned_p (rtx op
)
8904 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8907 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8908 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8914 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8915 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8919 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8926 /* Return true if it is appropriate to emit `ret' instructions in the
8927 body of a function. Do this only if the epilogue is simple, needing a
8928 couple of insns. Prior to reloading, we can't tell how many registers
8929 must be saved, so return false then. Return false if there is no frame
8930 marker to de-allocate. */
8933 ix86_can_use_return_insn_p (void)
8935 struct ix86_frame frame
;
8937 if (! reload_completed
|| frame_pointer_needed
)
8940 /* Don't allow more than 32k pop, since that's all we can do
8941 with one instruction. */
8942 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8945 ix86_compute_frame_layout (&frame
);
8946 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8947 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8950 /* Value should be nonzero if functions must have frame pointers.
8951 Zero means the frame pointer need not be set up (and parms may
8952 be accessed via the stack pointer) in functions that seem suitable. */
8955 ix86_frame_pointer_required (void)
8957 /* If we accessed previous frames, then the generated code expects
8958 to be able to access the saved ebp value in our frame. */
8959 if (cfun
->machine
->accesses_prev_frame
)
8962 /* Several x86 os'es need a frame pointer for other reasons,
8963 usually pertaining to setjmp. */
8964 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8967 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8968 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8971 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8972 allocation is 4GB. */
8973 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8976 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8977 turns off the frame pointer by default. Turn it back on now if
8978 we've not got a leaf function. */
8979 if (TARGET_OMIT_LEAF_FRAME_POINTER
8981 || ix86_current_function_calls_tls_descriptor
))
8984 if (crtl
->profile
&& !flag_fentry
)
8990 /* Record that the current function accesses previous call frames. */
8993 ix86_setup_frame_addresses (void)
8995 cfun
->machine
->accesses_prev_frame
= 1;
8998 #ifndef USE_HIDDEN_LINKONCE
8999 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9000 # define USE_HIDDEN_LINKONCE 1
9002 # define USE_HIDDEN_LINKONCE 0
9006 static int pic_labels_used
;
9008 /* Fills in the label name that should be used for a pc thunk for
9009 the given register. */
9012 get_pc_thunk_name (char name
[32], unsigned int regno
)
9014 gcc_assert (!TARGET_64BIT
);
9016 if (USE_HIDDEN_LINKONCE
)
9017 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
9019 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
9023 /* This function generates code for -fpic that loads %ebx with
9024 the return address of the caller and then returns. */
9027 ix86_code_end (void)
9032 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
9037 if (!(pic_labels_used
& (1 << regno
)))
9040 get_pc_thunk_name (name
, regno
);
9042 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
9043 get_identifier (name
),
9044 build_function_type_list (void_type_node
, NULL_TREE
));
9045 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
9046 NULL_TREE
, void_type_node
);
9047 TREE_PUBLIC (decl
) = 1;
9048 TREE_STATIC (decl
) = 1;
9049 DECL_IGNORED_P (decl
) = 1;
9054 switch_to_section (darwin_sections
[text_coal_section
]);
9055 fputs ("\t.weak_definition\t", asm_out_file
);
9056 assemble_name (asm_out_file
, name
);
9057 fputs ("\n\t.private_extern\t", asm_out_file
);
9058 assemble_name (asm_out_file
, name
);
9059 putc ('\n', asm_out_file
);
9060 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9061 DECL_WEAK (decl
) = 1;
9065 if (USE_HIDDEN_LINKONCE
)
9067 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
9069 targetm
.asm_out
.unique_section (decl
, 0);
9070 switch_to_section (get_named_section (decl
, NULL
, 0));
9072 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
9073 fputs ("\t.hidden\t", asm_out_file
);
9074 assemble_name (asm_out_file
, name
);
9075 putc ('\n', asm_out_file
);
9076 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
9080 switch_to_section (text_section
);
9081 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9084 DECL_INITIAL (decl
) = make_node (BLOCK
);
9085 current_function_decl
= decl
;
9086 init_function_start (decl
);
9087 first_function_block_is_cold
= false;
9088 /* Make sure unwind info is emitted for the thunk if needed. */
9089 final_start_function (emit_barrier (), asm_out_file
, 1);
9091 /* Pad stack IP move with 4 instructions (two NOPs count
9092 as one instruction). */
9093 if (TARGET_PAD_SHORT_FUNCTION
)
9098 fputs ("\tnop\n", asm_out_file
);
9101 xops
[0] = gen_rtx_REG (Pmode
, regno
);
9102 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
9103 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
9104 fputs ("\tret\n", asm_out_file
);
9105 final_end_function ();
9106 init_insn_lengths ();
9107 free_after_compilation (cfun
);
9109 current_function_decl
= NULL
;
9112 if (flag_split_stack
)
9113 file_end_indicate_split_stack ();
9116 /* Emit code for the SET_GOT patterns. */
9119 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
9125 if (TARGET_VXWORKS_RTP
&& flag_pic
)
9127 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9128 xops
[2] = gen_rtx_MEM (Pmode
,
9129 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
9130 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
9132 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9133 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9134 an unadorned address. */
9135 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
9136 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
9137 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
9141 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
9146 /* We don't need a pic base, we're not producing pic. */
9149 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
9150 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
9151 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9152 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
9157 get_pc_thunk_name (name
, REGNO (dest
));
9158 pic_labels_used
|= 1 << REGNO (dest
);
9160 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
9161 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
9162 output_asm_insn ("call\t%X2", xops
);
9165 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9166 This is what will be referenced by the Mach-O PIC subsystem. */
9167 if (machopic_should_output_picbase_label () || !label
)
9168 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
9170 /* When we are restoring the pic base at the site of a nonlocal label,
9171 and we decided to emit the pic base above, we will still output a
9172 local label used for calculating the correction offset (even though
9173 the offset will be 0 in that case). */
9175 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9176 CODE_LABEL_NUMBER (label
));
9181 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
9186 /* Generate an "push" pattern for input ARG. */
9191 struct machine_function
*m
= cfun
->machine
;
9193 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9194 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9195 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9197 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9198 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9200 return gen_rtx_SET (VOIDmode
,
9201 gen_rtx_MEM (word_mode
,
9202 gen_rtx_PRE_DEC (Pmode
,
9203 stack_pointer_rtx
)),
9207 /* Generate an "pop" pattern for input ARG. */
9212 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9213 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9215 return gen_rtx_SET (VOIDmode
,
9217 gen_rtx_MEM (word_mode
,
9218 gen_rtx_POST_INC (Pmode
,
9219 stack_pointer_rtx
)));
9222 /* Return >= 0 if there is an unused call-clobbered register available
9223 for the entire function. */
9226 ix86_select_alt_pic_regnum (void)
9230 && !ix86_current_function_calls_tls_descriptor
)
9233 /* Can't use the same register for both PIC and DRAP. */
9235 drap
= REGNO (crtl
->drap_reg
);
9238 for (i
= 2; i
>= 0; --i
)
9239 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9243 return INVALID_REGNUM
;
9246 /* Return TRUE if we need to save REGNO. */
9249 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9251 if (pic_offset_table_rtx
9252 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9253 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9255 || crtl
->calls_eh_return
9256 || crtl
->uses_const_pool
9257 || cfun
->has_nonlocal_label
))
9258 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9260 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9265 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9266 if (test
== INVALID_REGNUM
)
9273 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9276 return (df_regs_ever_live_p (regno
)
9277 && !call_used_regs
[regno
]
9278 && !fixed_regs
[regno
]
9279 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9282 /* Return number of saved general prupose registers. */
9285 ix86_nsaved_regs (void)
9290 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9291 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9296 /* Return number of saved SSE registrers. */
9299 ix86_nsaved_sseregs (void)
9304 if (!TARGET_64BIT_MS_ABI
)
9306 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9307 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9312 /* Given FROM and TO register numbers, say whether this elimination is
9313 allowed. If stack alignment is needed, we can only replace argument
9314 pointer with hard frame pointer, or replace frame pointer with stack
9315 pointer. Otherwise, frame pointer elimination is automatically
9316 handled and all other eliminations are valid. */
9319 ix86_can_eliminate (const int from
, const int to
)
9321 if (stack_realign_fp
)
9322 return ((from
== ARG_POINTER_REGNUM
9323 && to
== HARD_FRAME_POINTER_REGNUM
)
9324 || (from
== FRAME_POINTER_REGNUM
9325 && to
== STACK_POINTER_REGNUM
));
9327 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9330 /* Return the offset between two registers, one to be eliminated, and the other
9331 its replacement, at the start of a routine. */
9334 ix86_initial_elimination_offset (int from
, int to
)
9336 struct ix86_frame frame
;
9337 ix86_compute_frame_layout (&frame
);
9339 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9340 return frame
.hard_frame_pointer_offset
;
9341 else if (from
== FRAME_POINTER_REGNUM
9342 && to
== HARD_FRAME_POINTER_REGNUM
)
9343 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9346 gcc_assert (to
== STACK_POINTER_REGNUM
);
9348 if (from
== ARG_POINTER_REGNUM
)
9349 return frame
.stack_pointer_offset
;
9351 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9352 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9356 /* In a dynamically-aligned function, we can't know the offset from
9357 stack pointer to frame pointer, so we must ensure that setjmp
9358 eliminates fp against the hard fp (%ebp) rather than trying to
9359 index from %esp up to the top of the frame across a gap that is
9360 of unknown (at compile-time) size. */
9362 ix86_builtin_setjmp_frame_value (void)
9364 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9367 /* When using -fsplit-stack, the allocation routines set a field in
9368 the TCB to the bottom of the stack plus this much space, measured
9371 #define SPLIT_STACK_AVAILABLE 256
9373 /* Fill structure ix86_frame about frame of currently computed function. */
9376 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9378 unsigned HOST_WIDE_INT stack_alignment_needed
;
9379 HOST_WIDE_INT offset
;
9380 unsigned HOST_WIDE_INT preferred_alignment
;
9381 HOST_WIDE_INT size
= get_frame_size ();
9382 HOST_WIDE_INT to_allocate
;
9384 frame
->nregs
= ix86_nsaved_regs ();
9385 frame
->nsseregs
= ix86_nsaved_sseregs ();
9387 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9388 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9390 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9391 function prologues and leaf. */
9392 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9393 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9394 || ix86_current_function_calls_tls_descriptor
))
9396 preferred_alignment
= 16;
9397 stack_alignment_needed
= 16;
9398 crtl
->preferred_stack_boundary
= 128;
9399 crtl
->stack_alignment_needed
= 128;
9402 gcc_assert (!size
|| stack_alignment_needed
);
9403 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9404 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9406 /* For SEH we have to limit the amount of code movement into the prologue.
9407 At present we do this via a BLOCKAGE, at which point there's very little
9408 scheduling that can be done, which means that there's very little point
9409 in doing anything except PUSHs. */
9411 cfun
->machine
->use_fast_prologue_epilogue
= false;
9413 /* During reload iteration the amount of registers saved can change.
9414 Recompute the value as needed. Do not recompute when amount of registers
9415 didn't change as reload does multiple calls to the function and does not
9416 expect the decision to change within single iteration. */
9417 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR
)
9418 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9420 int count
= frame
->nregs
;
9421 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9423 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9425 /* The fast prologue uses move instead of push to save registers. This
9426 is significantly longer, but also executes faster as modern hardware
9427 can execute the moves in parallel, but can't do that for push/pop.
9429 Be careful about choosing what prologue to emit: When function takes
9430 many instructions to execute we may use slow version as well as in
9431 case function is known to be outside hot spot (this is known with
9432 feedback only). Weight the size of function by number of registers
9433 to save as it is cheap to use one or two push instructions but very
9434 slow to use many of them. */
9436 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9437 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9438 || (flag_branch_probabilities
9439 && node
->frequency
< NODE_FREQUENCY_HOT
))
9440 cfun
->machine
->use_fast_prologue_epilogue
= false;
9442 cfun
->machine
->use_fast_prologue_epilogue
9443 = !expensive_function_p (count
);
9446 frame
->save_regs_using_mov
9447 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9448 /* If static stack checking is enabled and done with probes,
9449 the registers need to be saved before allocating the frame. */
9450 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9452 /* Skip return address. */
9453 offset
= UNITS_PER_WORD
;
9455 /* Skip pushed static chain. */
9456 if (ix86_static_chain_on_stack
)
9457 offset
+= UNITS_PER_WORD
;
9459 /* Skip saved base pointer. */
9460 if (frame_pointer_needed
)
9461 offset
+= UNITS_PER_WORD
;
9462 frame
->hfp_save_offset
= offset
;
9464 /* The traditional frame pointer location is at the top of the frame. */
9465 frame
->hard_frame_pointer_offset
= offset
;
9467 /* Register save area */
9468 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9469 frame
->reg_save_offset
= offset
;
9471 /* On SEH target, registers are pushed just before the frame pointer
9474 frame
->hard_frame_pointer_offset
= offset
;
9476 /* Align and set SSE register save area. */
9477 if (frame
->nsseregs
)
9479 /* The only ABI that has saved SSE registers (Win64) also has a
9480 16-byte aligned default stack, and thus we don't need to be
9481 within the re-aligned local stack frame to save them. */
9482 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9483 offset
= (offset
+ 16 - 1) & -16;
9484 offset
+= frame
->nsseregs
* 16;
9486 frame
->sse_reg_save_offset
= offset
;
9488 /* The re-aligned stack starts here. Values before this point are not
9489 directly comparable with values below this point. In order to make
9490 sure that no value happens to be the same before and after, force
9491 the alignment computation below to add a non-zero value. */
9492 if (stack_realign_fp
)
9493 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9496 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9497 offset
+= frame
->va_arg_size
;
9499 /* Align start of frame for local function. */
9500 if (stack_realign_fp
9501 || offset
!= frame
->sse_reg_save_offset
9504 || cfun
->calls_alloca
9505 || ix86_current_function_calls_tls_descriptor
)
9506 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9508 /* Frame pointer points here. */
9509 frame
->frame_pointer_offset
= offset
;
9513 /* Add outgoing arguments area. Can be skipped if we eliminated
9514 all the function calls as dead code.
9515 Skipping is however impossible when function calls alloca. Alloca
9516 expander assumes that last crtl->outgoing_args_size
9517 of stack frame are unused. */
9518 if (ACCUMULATE_OUTGOING_ARGS
9519 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9520 || ix86_current_function_calls_tls_descriptor
))
9522 offset
+= crtl
->outgoing_args_size
;
9523 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9526 frame
->outgoing_arguments_size
= 0;
9528 /* Align stack boundary. Only needed if we're calling another function
9530 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9531 || ix86_current_function_calls_tls_descriptor
)
9532 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9534 /* We've reached end of stack frame. */
9535 frame
->stack_pointer_offset
= offset
;
9537 /* Size prologue needs to allocate. */
9538 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9540 if ((!to_allocate
&& frame
->nregs
<= 1)
9541 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9542 frame
->save_regs_using_mov
= false;
9544 if (ix86_using_red_zone ()
9545 && crtl
->sp_is_unchanging
9547 && !ix86_current_function_calls_tls_descriptor
)
9549 frame
->red_zone_size
= to_allocate
;
9550 if (frame
->save_regs_using_mov
)
9551 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9552 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9553 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9556 frame
->red_zone_size
= 0;
9557 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9559 /* The SEH frame pointer location is near the bottom of the frame.
9560 This is enforced by the fact that the difference between the
9561 stack pointer and the frame pointer is limited to 240 bytes in
9562 the unwind data structure. */
9567 /* If we can leave the frame pointer where it is, do so. Also, returns
9568 the establisher frame for __builtin_frame_address (0). */
9569 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9570 if (diff
<= SEH_MAX_FRAME_SIZE
9571 && (diff
> 240 || (diff
& 15) != 0)
9572 && !crtl
->accesses_prior_frames
)
9574 /* Ideally we'd determine what portion of the local stack frame
9575 (within the constraint of the lowest 240) is most heavily used.
9576 But without that complication, simply bias the frame pointer
9577 by 128 bytes so as to maximize the amount of the local stack
9578 frame that is addressable with 8-bit offsets. */
9579 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9584 /* This is semi-inlined memory_address_length, but simplified
9585 since we know that we're always dealing with reg+offset, and
9586 to avoid having to create and discard all that rtl. */
9589 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9595 /* EBP and R13 cannot be encoded without an offset. */
9596 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9598 else if (IN_RANGE (offset
, -128, 127))
9601 /* ESP and R12 must be encoded with a SIB byte. */
9602 if (regno
== SP_REG
|| regno
== R12_REG
)
9608 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9609 The valid base registers are taken from CFUN->MACHINE->FS. */
9612 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9614 const struct machine_function
*m
= cfun
->machine
;
9615 rtx base_reg
= NULL
;
9616 HOST_WIDE_INT base_offset
= 0;
9618 if (m
->use_fast_prologue_epilogue
)
9620 /* Choose the base register most likely to allow the most scheduling
9621 opportunities. Generally FP is valid throughout the function,
9622 while DRAP must be reloaded within the epilogue. But choose either
9623 over the SP due to increased encoding size. */
9627 base_reg
= hard_frame_pointer_rtx
;
9628 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9630 else if (m
->fs
.drap_valid
)
9632 base_reg
= crtl
->drap_reg
;
9633 base_offset
= 0 - cfa_offset
;
9635 else if (m
->fs
.sp_valid
)
9637 base_reg
= stack_pointer_rtx
;
9638 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9643 HOST_WIDE_INT toffset
;
9646 /* Choose the base register with the smallest address encoding.
9647 With a tie, choose FP > DRAP > SP. */
9650 base_reg
= stack_pointer_rtx
;
9651 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9652 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9654 if (m
->fs
.drap_valid
)
9656 toffset
= 0 - cfa_offset
;
9657 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9660 base_reg
= crtl
->drap_reg
;
9661 base_offset
= toffset
;
9667 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9668 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9671 base_reg
= hard_frame_pointer_rtx
;
9672 base_offset
= toffset
;
9677 gcc_assert (base_reg
!= NULL
);
9679 return plus_constant (Pmode
, base_reg
, base_offset
);
9682 /* Emit code to save registers in the prologue. */
9685 ix86_emit_save_regs (void)
9690 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9691 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9693 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9694 RTX_FRAME_RELATED_P (insn
) = 1;
9698 /* Emit a single register save at CFA - CFA_OFFSET. */
9701 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9702 HOST_WIDE_INT cfa_offset
)
9704 struct machine_function
*m
= cfun
->machine
;
9705 rtx reg
= gen_rtx_REG (mode
, regno
);
9706 rtx mem
, addr
, base
, insn
;
9708 addr
= choose_baseaddr (cfa_offset
);
9709 mem
= gen_frame_mem (mode
, addr
);
9711 /* For SSE saves, we need to indicate the 128-bit alignment. */
9712 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9714 insn
= emit_move_insn (mem
, reg
);
9715 RTX_FRAME_RELATED_P (insn
) = 1;
9718 if (GET_CODE (base
) == PLUS
)
9719 base
= XEXP (base
, 0);
9720 gcc_checking_assert (REG_P (base
));
9722 /* When saving registers into a re-aligned local stack frame, avoid
9723 any tricky guessing by dwarf2out. */
9724 if (m
->fs
.realigned
)
9726 gcc_checking_assert (stack_realign_drap
);
9728 if (regno
== REGNO (crtl
->drap_reg
))
9730 /* A bit of a hack. We force the DRAP register to be saved in
9731 the re-aligned stack frame, which provides us with a copy
9732 of the CFA that will last past the prologue. Install it. */
9733 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9734 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9735 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9736 mem
= gen_rtx_MEM (mode
, addr
);
9737 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9741 /* The frame pointer is a stable reference within the
9742 aligned frame. Use it. */
9743 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9744 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9745 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9746 mem
= gen_rtx_MEM (mode
, addr
);
9747 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9748 gen_rtx_SET (VOIDmode
, mem
, reg
));
9752 /* The memory may not be relative to the current CFA register,
9753 which means that we may need to generate a new pattern for
9754 use by the unwind info. */
9755 else if (base
!= m
->fs
.cfa_reg
)
9757 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9758 m
->fs
.cfa_offset
- cfa_offset
);
9759 mem
= gen_rtx_MEM (mode
, addr
);
9760 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9764 /* Emit code to save registers using MOV insns.
9765 First register is stored at CFA - CFA_OFFSET. */
9767 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9771 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9772 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9774 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9775 cfa_offset
-= UNITS_PER_WORD
;
9779 /* Emit code to save SSE registers using MOV insns.
9780 First register is stored at CFA - CFA_OFFSET. */
9782 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9786 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9787 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9789 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9794 static GTY(()) rtx queued_cfa_restores
;
9796 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9797 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9798 Don't add the note if the previously saved value will be left untouched
9799 within stack red-zone till return, as unwinders can find the same value
9800 in the register and on the stack. */
9803 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9805 if (!crtl
->shrink_wrapped
9806 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9811 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9812 RTX_FRAME_RELATED_P (insn
) = 1;
9816 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9819 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9822 ix86_add_queued_cfa_restore_notes (rtx insn
)
9825 if (!queued_cfa_restores
)
9827 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9829 XEXP (last
, 1) = REG_NOTES (insn
);
9830 REG_NOTES (insn
) = queued_cfa_restores
;
9831 queued_cfa_restores
= NULL_RTX
;
9832 RTX_FRAME_RELATED_P (insn
) = 1;
9835 /* Expand prologue or epilogue stack adjustment.
9836 The pattern exist to put a dependency on all ebp-based memory accesses.
9837 STYLE should be negative if instructions should be marked as frame related,
9838 zero if %r11 register is live and cannot be freely used and positive
9842 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9843 int style
, bool set_cfa
)
9845 struct machine_function
*m
= cfun
->machine
;
9847 bool add_frame_related_expr
= false;
9849 if (Pmode
== SImode
)
9850 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9851 else if (x86_64_immediate_operand (offset
, DImode
))
9852 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9856 /* r11 is used by indirect sibcall return as well, set before the
9857 epilogue and used after the epilogue. */
9859 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9862 gcc_assert (src
!= hard_frame_pointer_rtx
9863 && dest
!= hard_frame_pointer_rtx
);
9864 tmp
= hard_frame_pointer_rtx
;
9866 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9868 add_frame_related_expr
= true;
9870 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9873 insn
= emit_insn (insn
);
9875 ix86_add_queued_cfa_restore_notes (insn
);
9881 gcc_assert (m
->fs
.cfa_reg
== src
);
9882 m
->fs
.cfa_offset
+= INTVAL (offset
);
9883 m
->fs
.cfa_reg
= dest
;
9885 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9886 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9887 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9888 RTX_FRAME_RELATED_P (insn
) = 1;
9892 RTX_FRAME_RELATED_P (insn
) = 1;
9893 if (add_frame_related_expr
)
9895 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9896 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9897 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9901 if (dest
== stack_pointer_rtx
)
9903 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9904 bool valid
= m
->fs
.sp_valid
;
9906 if (src
== hard_frame_pointer_rtx
)
9908 valid
= m
->fs
.fp_valid
;
9909 ooffset
= m
->fs
.fp_offset
;
9911 else if (src
== crtl
->drap_reg
)
9913 valid
= m
->fs
.drap_valid
;
9918 /* Else there are two possibilities: SP itself, which we set
9919 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9920 taken care of this by hand along the eh_return path. */
9921 gcc_checking_assert (src
== stack_pointer_rtx
9922 || offset
== const0_rtx
);
9925 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9926 m
->fs
.sp_valid
= valid
;
9930 /* Find an available register to be used as dynamic realign argument
9931 pointer regsiter. Such a register will be written in prologue and
9932 used in begin of body, so it must not be
9933 1. parameter passing register.
9935 We reuse static-chain register if it is available. Otherwise, we
9936 use DI for i386 and R13 for x86-64. We chose R13 since it has
9939 Return: the regno of chosen register. */
9942 find_drap_reg (void)
9944 tree decl
= cfun
->decl
;
9948 /* Use R13 for nested function or function need static chain.
9949 Since function with tail call may use any caller-saved
9950 registers in epilogue, DRAP must not use caller-saved
9951 register in such case. */
9952 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9959 /* Use DI for nested function or function need static chain.
9960 Since function with tail call may use any caller-saved
9961 registers in epilogue, DRAP must not use caller-saved
9962 register in such case. */
9963 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9966 /* Reuse static chain register if it isn't used for parameter
9968 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9970 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9971 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9978 /* Return minimum incoming stack alignment. */
9981 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9983 unsigned int incoming_stack_boundary
;
9985 /* Prefer the one specified at command line. */
9986 if (ix86_user_incoming_stack_boundary
)
9987 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9988 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9989 if -mstackrealign is used, it isn't used for sibcall check and
9990 estimated stack alignment is 128bit. */
9993 && ix86_force_align_arg_pointer
9994 && crtl
->stack_alignment_estimated
== 128)
9995 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9997 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9999 /* Incoming stack alignment can be changed on individual functions
10000 via force_align_arg_pointer attribute. We use the smallest
10001 incoming stack boundary. */
10002 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
10003 && lookup_attribute (ix86_force_align_arg_pointer_string
,
10004 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
10005 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
10007 /* The incoming stack frame has to be aligned at least at
10008 parm_stack_boundary. */
10009 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
10010 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
10012 /* Stack at entrance of main is aligned by runtime. We use the
10013 smallest incoming stack boundary. */
10014 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
10015 && DECL_NAME (current_function_decl
)
10016 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
10017 && DECL_FILE_SCOPE_P (current_function_decl
))
10018 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
10020 return incoming_stack_boundary
;
10023 /* Update incoming stack boundary and estimated stack alignment. */
10026 ix86_update_stack_boundary (void)
10028 ix86_incoming_stack_boundary
10029 = ix86_minimum_incoming_stack_boundary (false);
10031 /* x86_64 vararg needs 16byte stack alignment for register save
10035 && crtl
->stack_alignment_estimated
< 128)
10036 crtl
->stack_alignment_estimated
= 128;
10039 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10040 needed or an rtx for DRAP otherwise. */
10043 ix86_get_drap_rtx (void)
10045 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
10046 crtl
->need_drap
= true;
10048 if (stack_realign_drap
)
10050 /* Assign DRAP to vDRAP and returns vDRAP */
10051 unsigned int regno
= find_drap_reg ();
10056 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
10057 crtl
->drap_reg
= arg_ptr
;
10060 drap_vreg
= copy_to_reg (arg_ptr
);
10061 seq
= get_insns ();
10064 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
10067 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
10068 RTX_FRAME_RELATED_P (insn
) = 1;
10076 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10079 ix86_internal_arg_pointer (void)
10081 return virtual_incoming_args_rtx
;
10084 struct scratch_reg
{
10089 /* Return a short-lived scratch register for use on function entry.
10090 In 32-bit mode, it is valid only after the registers are saved
10091 in the prologue. This register must be released by means of
10092 release_scratch_register_on_entry once it is dead. */
10095 get_scratch_register_on_entry (struct scratch_reg
*sr
)
10103 /* We always use R11 in 64-bit mode. */
10108 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
10110 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10112 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10113 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
10114 int regparm
= ix86_function_regparm (fntype
, decl
);
10116 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
10118 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10119 for the static chain register. */
10120 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
10121 && drap_regno
!= AX_REG
)
10123 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10124 for the static chain register. */
10125 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
10127 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
10129 /* ecx is the static chain register. */
10130 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
10132 && drap_regno
!= CX_REG
)
10134 else if (ix86_save_reg (BX_REG
, true))
10136 /* esi is the static chain register. */
10137 else if (!(regparm
== 3 && static_chain_p
)
10138 && ix86_save_reg (SI_REG
, true))
10140 else if (ix86_save_reg (DI_REG
, true))
10144 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
10149 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
10152 rtx insn
= emit_insn (gen_push (sr
->reg
));
10153 RTX_FRAME_RELATED_P (insn
) = 1;
10157 /* Release a scratch register obtained from the preceding function. */
10160 release_scratch_register_on_entry (struct scratch_reg
*sr
)
10164 struct machine_function
*m
= cfun
->machine
;
10165 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
10167 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10168 RTX_FRAME_RELATED_P (insn
) = 1;
10169 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
10170 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10171 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
10172 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10176 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10178 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10181 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
10183 /* We skip the probe for the first interval + a small dope of 4 words and
10184 probe that many bytes past the specified size to maintain a protection
10185 area at the botton of the stack. */
10186 const int dope
= 4 * UNITS_PER_WORD
;
10187 rtx size_rtx
= GEN_INT (size
), last
;
10189 /* See if we have a constant small number of probes to generate. If so,
10190 that's the easy case. The run-time loop is made up of 11 insns in the
10191 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10192 for n # of intervals. */
10193 if (size
<= 5 * PROBE_INTERVAL
)
10195 HOST_WIDE_INT i
, adjust
;
10196 bool first_probe
= true;
10198 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10199 values of N from 1 until it exceeds SIZE. If only one probe is
10200 needed, this will not generate any code. Then adjust and probe
10201 to PROBE_INTERVAL + SIZE. */
10202 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10206 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10207 first_probe
= false;
10210 adjust
= PROBE_INTERVAL
;
10212 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10213 plus_constant (Pmode
, stack_pointer_rtx
,
10215 emit_stack_probe (stack_pointer_rtx
);
10219 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10221 adjust
= size
+ PROBE_INTERVAL
- i
;
10223 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10224 plus_constant (Pmode
, stack_pointer_rtx
,
10226 emit_stack_probe (stack_pointer_rtx
);
10228 /* Adjust back to account for the additional first interval. */
10229 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10230 plus_constant (Pmode
, stack_pointer_rtx
,
10231 PROBE_INTERVAL
+ dope
)));
10234 /* Otherwise, do the same as above, but in a loop. Note that we must be
10235 extra careful with variables wrapping around because we might be at
10236 the very top (or the very bottom) of the address space and we have
10237 to be able to handle this case properly; in particular, we use an
10238 equality test for the loop condition. */
10241 HOST_WIDE_INT rounded_size
;
10242 struct scratch_reg sr
;
10244 get_scratch_register_on_entry (&sr
);
10247 /* Step 1: round SIZE to the previous multiple of the interval. */
10249 rounded_size
= size
& -PROBE_INTERVAL
;
10252 /* Step 2: compute initial and final value of the loop counter. */
10254 /* SP = SP_0 + PROBE_INTERVAL. */
10255 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10256 plus_constant (Pmode
, stack_pointer_rtx
,
10257 - (PROBE_INTERVAL
+ dope
))));
10259 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10260 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10261 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10262 gen_rtx_PLUS (Pmode
, sr
.reg
,
10263 stack_pointer_rtx
)));
10266 /* Step 3: the loop
10268 while (SP != LAST_ADDR)
10270 SP = SP + PROBE_INTERVAL
10274 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10275 values of N from 1 until it is equal to ROUNDED_SIZE. */
10277 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10280 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10281 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10283 if (size
!= rounded_size
)
10285 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10286 plus_constant (Pmode
, stack_pointer_rtx
,
10287 rounded_size
- size
)));
10288 emit_stack_probe (stack_pointer_rtx
);
10291 /* Adjust back to account for the additional first interval. */
10292 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10293 plus_constant (Pmode
, stack_pointer_rtx
,
10294 PROBE_INTERVAL
+ dope
)));
10296 release_scratch_register_on_entry (&sr
);
10299 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10301 /* Even if the stack pointer isn't the CFA register, we need to correctly
10302 describe the adjustments made to it, in particular differentiate the
10303 frame-related ones from the frame-unrelated ones. */
10306 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10307 XVECEXP (expr
, 0, 0)
10308 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10309 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10310 XVECEXP (expr
, 0, 1)
10311 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10312 plus_constant (Pmode
, stack_pointer_rtx
,
10313 PROBE_INTERVAL
+ dope
+ size
));
10314 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10315 RTX_FRAME_RELATED_P (last
) = 1;
10317 cfun
->machine
->fs
.sp_offset
+= size
;
10320 /* Make sure nothing is scheduled before we are done. */
10321 emit_insn (gen_blockage ());
10324 /* Adjust the stack pointer up to REG while probing it. */
10327 output_adjust_stack_and_probe (rtx reg
)
10329 static int labelno
= 0;
10330 char loop_lab
[32], end_lab
[32];
10333 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10334 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10336 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10338 /* Jump to END_LAB if SP == LAST_ADDR. */
10339 xops
[0] = stack_pointer_rtx
;
10341 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10342 fputs ("\tje\t", asm_out_file
);
10343 assemble_name_raw (asm_out_file
, end_lab
);
10344 fputc ('\n', asm_out_file
);
10346 /* SP = SP + PROBE_INTERVAL. */
10347 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10348 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10351 xops
[1] = const0_rtx
;
10352 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10354 fprintf (asm_out_file
, "\tjmp\t");
10355 assemble_name_raw (asm_out_file
, loop_lab
);
10356 fputc ('\n', asm_out_file
);
10358 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10363 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10364 inclusive. These are offsets from the current stack pointer. */
10367 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10369 /* See if we have a constant small number of probes to generate. If so,
10370 that's the easy case. The run-time loop is made up of 7 insns in the
10371 generic case while the compile-time loop is made up of n insns for n #
10373 if (size
<= 7 * PROBE_INTERVAL
)
10377 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10378 it exceeds SIZE. If only one probe is needed, this will not
10379 generate any code. Then probe at FIRST + SIZE. */
10380 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10381 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10384 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10388 /* Otherwise, do the same as above, but in a loop. Note that we must be
10389 extra careful with variables wrapping around because we might be at
10390 the very top (or the very bottom) of the address space and we have
10391 to be able to handle this case properly; in particular, we use an
10392 equality test for the loop condition. */
10395 HOST_WIDE_INT rounded_size
, last
;
10396 struct scratch_reg sr
;
10398 get_scratch_register_on_entry (&sr
);
10401 /* Step 1: round SIZE to the previous multiple of the interval. */
10403 rounded_size
= size
& -PROBE_INTERVAL
;
10406 /* Step 2: compute initial and final value of the loop counter. */
10408 /* TEST_OFFSET = FIRST. */
10409 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10411 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10412 last
= first
+ rounded_size
;
10415 /* Step 3: the loop
10417 while (TEST_ADDR != LAST_ADDR)
10419 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10423 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10424 until it is equal to ROUNDED_SIZE. */
10426 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10429 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10430 that SIZE is equal to ROUNDED_SIZE. */
10432 if (size
!= rounded_size
)
10433 emit_stack_probe (plus_constant (Pmode
,
10434 gen_rtx_PLUS (Pmode
,
10437 rounded_size
- size
));
10439 release_scratch_register_on_entry (&sr
);
10442 /* Make sure nothing is scheduled before we are done. */
10443 emit_insn (gen_blockage ());
10446 /* Probe a range of stack addresses from REG to END, inclusive. These are
10447 offsets from the current stack pointer. */
10450 output_probe_stack_range (rtx reg
, rtx end
)
10452 static int labelno
= 0;
10453 char loop_lab
[32], end_lab
[32];
10456 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10457 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10459 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10461 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10464 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10465 fputs ("\tje\t", asm_out_file
);
10466 assemble_name_raw (asm_out_file
, end_lab
);
10467 fputc ('\n', asm_out_file
);
10469 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10470 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10471 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10473 /* Probe at TEST_ADDR. */
10474 xops
[0] = stack_pointer_rtx
;
10476 xops
[2] = const0_rtx
;
10477 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10479 fprintf (asm_out_file
, "\tjmp\t");
10480 assemble_name_raw (asm_out_file
, loop_lab
);
10481 fputc ('\n', asm_out_file
);
10483 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10488 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10489 to be generated in correct form. */
10491 ix86_finalize_stack_realign_flags (void)
10493 /* Check if stack realign is really needed after reload, and
10494 stores result in cfun */
10495 unsigned int incoming_stack_boundary
10496 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10497 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10498 unsigned int stack_realign
= (incoming_stack_boundary
10500 ? crtl
->max_used_stack_slot_alignment
10501 : crtl
->stack_alignment_needed
));
10503 if (crtl
->stack_realign_finalized
)
10505 /* After stack_realign_needed is finalized, we can't no longer
10507 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10511 /* If the only reason for frame_pointer_needed is that we conservatively
10512 assumed stack realignment might be needed, but in the end nothing that
10513 needed the stack alignment had been spilled, clear frame_pointer_needed
10514 and say we don't need stack realignment. */
10516 && !crtl
->need_drap
10517 && frame_pointer_needed
10519 && flag_omit_frame_pointer
10520 && crtl
->sp_is_unchanging
10521 && !ix86_current_function_calls_tls_descriptor
10522 && !crtl
->accesses_prior_frames
10523 && !cfun
->calls_alloca
10524 && !crtl
->calls_eh_return
10525 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10526 && !ix86_frame_pointer_required ()
10527 && get_frame_size () == 0
10528 && ix86_nsaved_sseregs () == 0
10529 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10531 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10534 CLEAR_HARD_REG_SET (prologue_used
);
10535 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10536 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10537 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10538 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10539 HARD_FRAME_POINTER_REGNUM
);
10543 FOR_BB_INSNS (bb
, insn
)
10544 if (NONDEBUG_INSN_P (insn
)
10545 && requires_stack_frame_p (insn
, prologue_used
,
10546 set_up_by_prologue
))
10548 crtl
->stack_realign_needed
= stack_realign
;
10549 crtl
->stack_realign_finalized
= true;
10554 frame_pointer_needed
= false;
10555 stack_realign
= false;
10556 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10557 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10558 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10559 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10560 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10561 df_finish_pass (true);
10562 df_scan_alloc (NULL
);
10564 df_compute_regs_ever_live (true);
10568 crtl
->stack_realign_needed
= stack_realign
;
10569 crtl
->stack_realign_finalized
= true;
10572 /* Expand the prologue into a bunch of separate insns. */
10575 ix86_expand_prologue (void)
10577 struct machine_function
*m
= cfun
->machine
;
10580 struct ix86_frame frame
;
10581 HOST_WIDE_INT allocate
;
10582 bool int_registers_saved
;
10583 bool sse_registers_saved
;
10585 ix86_finalize_stack_realign_flags ();
10587 /* DRAP should not coexist with stack_realign_fp */
10588 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10590 memset (&m
->fs
, 0, sizeof (m
->fs
));
10592 /* Initialize CFA state for before the prologue. */
10593 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10594 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10596 /* Track SP offset to the CFA. We continue tracking this after we've
10597 swapped the CFA register away from SP. In the case of re-alignment
10598 this is fudged; we're interested to offsets within the local frame. */
10599 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10600 m
->fs
.sp_valid
= true;
10602 ix86_compute_frame_layout (&frame
);
10604 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10606 /* We should have already generated an error for any use of
10607 ms_hook on a nested function. */
10608 gcc_checking_assert (!ix86_static_chain_on_stack
);
10610 /* Check if profiling is active and we shall use profiling before
10611 prologue variant. If so sorry. */
10612 if (crtl
->profile
&& flag_fentry
!= 0)
10613 sorry ("ms_hook_prologue attribute isn%'t compatible "
10614 "with -mfentry for 32-bit");
10616 /* In ix86_asm_output_function_label we emitted:
10617 8b ff movl.s %edi,%edi
10619 8b ec movl.s %esp,%ebp
10621 This matches the hookable function prologue in Win32 API
10622 functions in Microsoft Windows XP Service Pack 2 and newer.
10623 Wine uses this to enable Windows apps to hook the Win32 API
10624 functions provided by Wine.
10626 What that means is that we've already set up the frame pointer. */
10628 if (frame_pointer_needed
10629 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10633 /* We've decided to use the frame pointer already set up.
10634 Describe this to the unwinder by pretending that both
10635 push and mov insns happen right here.
10637 Putting the unwind info here at the end of the ms_hook
10638 is done so that we can make absolutely certain we get
10639 the required byte sequence at the start of the function,
10640 rather than relying on an assembler that can produce
10641 the exact encoding required.
10643 However it does mean (in the unpatched case) that we have
10644 a 1 insn window where the asynchronous unwind info is
10645 incorrect. However, if we placed the unwind info at
10646 its correct location we would have incorrect unwind info
10647 in the patched case. Which is probably all moot since
10648 I don't expect Wine generates dwarf2 unwind info for the
10649 system libraries that use this feature. */
10651 insn
= emit_insn (gen_blockage ());
10653 push
= gen_push (hard_frame_pointer_rtx
);
10654 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10655 stack_pointer_rtx
);
10656 RTX_FRAME_RELATED_P (push
) = 1;
10657 RTX_FRAME_RELATED_P (mov
) = 1;
10659 RTX_FRAME_RELATED_P (insn
) = 1;
10660 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10661 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10663 /* Note that gen_push incremented m->fs.cfa_offset, even
10664 though we didn't emit the push insn here. */
10665 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10666 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10667 m
->fs
.fp_valid
= true;
10671 /* The frame pointer is not needed so pop %ebp again.
10672 This leaves us with a pristine state. */
10673 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10677 /* The first insn of a function that accepts its static chain on the
10678 stack is to push the register that would be filled in by a direct
10679 call. This insn will be skipped by the trampoline. */
10680 else if (ix86_static_chain_on_stack
)
10682 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10683 emit_insn (gen_blockage ());
10685 /* We don't want to interpret this push insn as a register save,
10686 only as a stack adjustment. The real copy of the register as
10687 a save will be done later, if needed. */
10688 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10689 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10690 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10691 RTX_FRAME_RELATED_P (insn
) = 1;
10694 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10695 of DRAP is needed and stack realignment is really needed after reload */
10696 if (stack_realign_drap
)
10698 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10700 /* Only need to push parameter pointer reg if it is caller saved. */
10701 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10703 /* Push arg pointer reg */
10704 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10705 RTX_FRAME_RELATED_P (insn
) = 1;
10708 /* Grab the argument pointer. */
10709 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10710 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10711 RTX_FRAME_RELATED_P (insn
) = 1;
10712 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10713 m
->fs
.cfa_offset
= 0;
10715 /* Align the stack. */
10716 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10718 GEN_INT (-align_bytes
)));
10719 RTX_FRAME_RELATED_P (insn
) = 1;
10721 /* Replicate the return address on the stack so that return
10722 address can be reached via (argp - 1) slot. This is needed
10723 to implement macro RETURN_ADDR_RTX and intrinsic function
10724 expand_builtin_return_addr etc. */
10725 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10726 t
= gen_frame_mem (word_mode
, t
);
10727 insn
= emit_insn (gen_push (t
));
10728 RTX_FRAME_RELATED_P (insn
) = 1;
10730 /* For the purposes of frame and register save area addressing,
10731 we've started over with a new frame. */
10732 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10733 m
->fs
.realigned
= true;
10736 int_registers_saved
= (frame
.nregs
== 0);
10737 sse_registers_saved
= (frame
.nsseregs
== 0);
10739 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10741 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10742 slower on all targets. Also sdb doesn't like it. */
10743 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10744 RTX_FRAME_RELATED_P (insn
) = 1;
10746 /* Push registers now, before setting the frame pointer
10748 if (!int_registers_saved
10750 && !frame
.save_regs_using_mov
)
10752 ix86_emit_save_regs ();
10753 int_registers_saved
= true;
10754 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10757 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10759 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10760 RTX_FRAME_RELATED_P (insn
) = 1;
10762 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10763 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10764 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10765 m
->fs
.fp_valid
= true;
10769 if (!int_registers_saved
)
10771 /* If saving registers via PUSH, do so now. */
10772 if (!frame
.save_regs_using_mov
)
10774 ix86_emit_save_regs ();
10775 int_registers_saved
= true;
10776 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10779 /* When using red zone we may start register saving before allocating
10780 the stack frame saving one cycle of the prologue. However, avoid
10781 doing this if we have to probe the stack; at least on x86_64 the
10782 stack probe can turn into a call that clobbers a red zone location. */
10783 else if (ix86_using_red_zone ()
10784 && (! TARGET_STACK_PROBE
10785 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10787 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10788 int_registers_saved
= true;
10792 if (stack_realign_fp
)
10794 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10795 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10797 /* The computation of the size of the re-aligned stack frame means
10798 that we must allocate the size of the register save area before
10799 performing the actual alignment. Otherwise we cannot guarantee
10800 that there's enough storage above the realignment point. */
10801 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10802 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10803 GEN_INT (m
->fs
.sp_offset
10804 - frame
.sse_reg_save_offset
),
10807 /* Align the stack. */
10808 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10810 GEN_INT (-align_bytes
)));
10812 /* For the purposes of register save area addressing, the stack
10813 pointer is no longer valid. As for the value of sp_offset,
10814 see ix86_compute_frame_layout, which we need to match in order
10815 to pass verification of stack_pointer_offset at the end. */
10816 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10817 m
->fs
.sp_valid
= false;
10820 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10822 if (flag_stack_usage_info
)
10824 /* We start to count from ARG_POINTER. */
10825 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10827 /* If it was realigned, take into account the fake frame. */
10828 if (stack_realign_drap
)
10830 if (ix86_static_chain_on_stack
)
10831 stack_size
+= UNITS_PER_WORD
;
10833 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10834 stack_size
+= UNITS_PER_WORD
;
10836 /* This over-estimates by 1 minimal-stack-alignment-unit but
10837 mitigates that by counting in the new return address slot. */
10838 current_function_dynamic_stack_size
10839 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10842 current_function_static_stack_size
= stack_size
;
10845 /* On SEH target with very large frame size, allocate an area to save
10846 SSE registers (as the very large allocation won't be described). */
10848 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10849 && !sse_registers_saved
)
10851 HOST_WIDE_INT sse_size
=
10852 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10854 gcc_assert (int_registers_saved
);
10856 /* No need to do stack checking as the area will be immediately
10858 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10859 GEN_INT (-sse_size
), -1,
10860 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10861 allocate
-= sse_size
;
10862 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10863 sse_registers_saved
= true;
10866 /* The stack has already been decremented by the instruction calling us
10867 so probe if the size is non-negative to preserve the protection area. */
10868 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10870 /* We expect the registers to be saved when probes are used. */
10871 gcc_assert (int_registers_saved
);
10873 if (STACK_CHECK_MOVING_SP
)
10875 ix86_adjust_stack_and_probe (allocate
);
10880 HOST_WIDE_INT size
= allocate
;
10882 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10883 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10885 if (TARGET_STACK_PROBE
)
10886 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10888 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10894 else if (!ix86_target_stack_probe ()
10895 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10897 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10898 GEN_INT (-allocate
), -1,
10899 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10903 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10905 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10906 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10907 bool eax_live
= false;
10908 bool r10_live
= false;
10911 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10912 if (!TARGET_64BIT_MS_ABI
)
10913 eax_live
= ix86_eax_live_at_start_p ();
10915 /* Note that SEH directives need to continue tracking the stack
10916 pointer even after the frame pointer has been set up. */
10919 insn
= emit_insn (gen_push (eax
));
10920 allocate
-= UNITS_PER_WORD
;
10921 if (sp_is_cfa_reg
|| TARGET_SEH
)
10924 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10925 RTX_FRAME_RELATED_P (insn
) = 1;
10931 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10932 insn
= emit_insn (gen_push (r10
));
10933 allocate
-= UNITS_PER_WORD
;
10934 if (sp_is_cfa_reg
|| TARGET_SEH
)
10937 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10938 RTX_FRAME_RELATED_P (insn
) = 1;
10942 emit_move_insn (eax
, GEN_INT (allocate
));
10943 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10945 /* Use the fact that AX still contains ALLOCATE. */
10946 adjust_stack_insn
= (Pmode
== DImode
10947 ? gen_pro_epilogue_adjust_stack_di_sub
10948 : gen_pro_epilogue_adjust_stack_si_sub
);
10950 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10951 stack_pointer_rtx
, eax
));
10953 if (sp_is_cfa_reg
|| TARGET_SEH
)
10956 m
->fs
.cfa_offset
+= allocate
;
10957 RTX_FRAME_RELATED_P (insn
) = 1;
10958 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10959 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10960 plus_constant (Pmode
, stack_pointer_rtx
,
10963 m
->fs
.sp_offset
+= allocate
;
10965 if (r10_live
&& eax_live
)
10967 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10968 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10969 gen_frame_mem (word_mode
, t
));
10970 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10971 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10972 gen_frame_mem (word_mode
, t
));
10974 else if (eax_live
|| r10_live
)
10976 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10977 emit_move_insn (gen_rtx_REG (word_mode
,
10978 (eax_live
? AX_REG
: R10_REG
)),
10979 gen_frame_mem (word_mode
, t
));
10982 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10984 /* If we havn't already set up the frame pointer, do so now. */
10985 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10987 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10988 GEN_INT (frame
.stack_pointer_offset
10989 - frame
.hard_frame_pointer_offset
));
10990 insn
= emit_insn (insn
);
10991 RTX_FRAME_RELATED_P (insn
) = 1;
10992 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10994 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10995 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10996 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10997 m
->fs
.fp_valid
= true;
11000 if (!int_registers_saved
)
11001 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
11002 if (!sse_registers_saved
)
11003 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
11005 pic_reg_used
= false;
11006 /* We don't use pic-register for pe-coff target. */
11007 if (pic_offset_table_rtx
11009 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
11012 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
11014 if (alt_pic_reg_used
!= INVALID_REGNUM
)
11015 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
11017 pic_reg_used
= true;
11024 if (ix86_cmodel
== CM_LARGE_PIC
)
11026 rtx label
, tmp_reg
;
11028 gcc_assert (Pmode
== DImode
);
11029 label
= gen_label_rtx ();
11030 emit_label (label
);
11031 LABEL_PRESERVE_P (label
) = 1;
11032 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
11033 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
11034 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
11036 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
11037 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
11038 pic_offset_table_rtx
, tmp_reg
));
11041 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
11045 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
11046 RTX_FRAME_RELATED_P (insn
) = 1;
11047 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
11051 /* In the pic_reg_used case, make sure that the got load isn't deleted
11052 when mcount needs it. Blockage to avoid call movement across mcount
11053 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11055 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
11056 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
11058 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
11060 /* vDRAP is setup but after reload it turns out stack realign
11061 isn't necessary, here we will emit prologue to setup DRAP
11062 without stack realign adjustment */
11063 t
= choose_baseaddr (0);
11064 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
11067 /* Prevent instructions from being scheduled into register save push
11068 sequence when access to the redzone area is done through frame pointer.
11069 The offset between the frame pointer and the stack pointer is calculated
11070 relative to the value of the stack pointer at the end of the function
11071 prologue, and moving instructions that access redzone area via frame
11072 pointer inside push sequence violates this assumption. */
11073 if (frame_pointer_needed
&& frame
.red_zone_size
)
11074 emit_insn (gen_memory_blockage ());
11076 /* Emit cld instruction if stringops are used in the function. */
11077 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
11078 emit_insn (gen_cld ());
11080 /* SEH requires that the prologue end within 256 bytes of the start of
11081 the function. Prevent instruction schedules that would extend that.
11082 Further, prevent alloca modifications to the stack pointer from being
11083 combined with prologue modifications. */
11085 emit_insn (gen_prologue_use (stack_pointer_rtx
));
11088 /* Emit code to restore REG using a POP insn. */
11091 ix86_emit_restore_reg_using_pop (rtx reg
)
11093 struct machine_function
*m
= cfun
->machine
;
11094 rtx insn
= emit_insn (gen_pop (reg
));
11096 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
11097 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11099 if (m
->fs
.cfa_reg
== crtl
->drap_reg
11100 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
11102 /* Previously we'd represented the CFA as an expression
11103 like *(%ebp - 8). We've just popped that value from
11104 the stack, which means we need to reset the CFA to
11105 the drap register. This will remain until we restore
11106 the stack pointer. */
11107 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11108 RTX_FRAME_RELATED_P (insn
) = 1;
11110 /* This means that the DRAP register is valid for addressing too. */
11111 m
->fs
.drap_valid
= true;
11115 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11117 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11118 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11119 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11120 RTX_FRAME_RELATED_P (insn
) = 1;
11122 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11125 /* When the frame pointer is the CFA, and we pop it, we are
11126 swapping back to the stack pointer as the CFA. This happens
11127 for stack frames that don't allocate other data, so we assume
11128 the stack pointer is now pointing at the return address, i.e.
11129 the function entry state, which makes the offset be 1 word. */
11130 if (reg
== hard_frame_pointer_rtx
)
11132 m
->fs
.fp_valid
= false;
11133 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11135 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11136 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11138 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11139 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11140 GEN_INT (m
->fs
.cfa_offset
)));
11141 RTX_FRAME_RELATED_P (insn
) = 1;
11146 /* Emit code to restore saved registers using POP insns. */
11149 ix86_emit_restore_regs_using_pop (void)
11151 unsigned int regno
;
11153 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11154 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
11155 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
11158 /* Emit code and notes for the LEAVE instruction. */
11161 ix86_emit_leave (void)
11163 struct machine_function
*m
= cfun
->machine
;
11164 rtx insn
= emit_insn (ix86_gen_leave ());
11166 ix86_add_queued_cfa_restore_notes (insn
);
11168 gcc_assert (m
->fs
.fp_valid
);
11169 m
->fs
.sp_valid
= true;
11170 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
11171 m
->fs
.fp_valid
= false;
11173 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11175 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11176 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
11178 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11179 plus_constant (Pmode
, stack_pointer_rtx
,
11181 RTX_FRAME_RELATED_P (insn
) = 1;
11183 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
11187 /* Emit code to restore saved registers using MOV insns.
11188 First register is restored from CFA - CFA_OFFSET. */
11190 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11191 bool maybe_eh_return
)
11193 struct machine_function
*m
= cfun
->machine
;
11194 unsigned int regno
;
11196 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11197 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11199 rtx reg
= gen_rtx_REG (word_mode
, regno
);
11202 mem
= choose_baseaddr (cfa_offset
);
11203 mem
= gen_frame_mem (word_mode
, mem
);
11204 insn
= emit_move_insn (reg
, mem
);
11206 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
11208 /* Previously we'd represented the CFA as an expression
11209 like *(%ebp - 8). We've just popped that value from
11210 the stack, which means we need to reset the CFA to
11211 the drap register. This will remain until we restore
11212 the stack pointer. */
11213 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11214 RTX_FRAME_RELATED_P (insn
) = 1;
11216 /* This means that the DRAP register is valid for addressing. */
11217 m
->fs
.drap_valid
= true;
11220 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11222 cfa_offset
-= UNITS_PER_WORD
;
11226 /* Emit code to restore saved registers using MOV insns.
11227 First register is restored from CFA - CFA_OFFSET. */
11229 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11230 bool maybe_eh_return
)
11232 unsigned int regno
;
11234 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11235 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11237 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11240 mem
= choose_baseaddr (cfa_offset
);
11241 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11242 set_mem_align (mem
, 128);
11243 emit_move_insn (reg
, mem
);
11245 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11251 /* Restore function stack, frame, and registers. */
11254 ix86_expand_epilogue (int style
)
11256 struct machine_function
*m
= cfun
->machine
;
11257 struct machine_frame_state frame_state_save
= m
->fs
;
11258 struct ix86_frame frame
;
11259 bool restore_regs_via_mov
;
11262 ix86_finalize_stack_realign_flags ();
11263 ix86_compute_frame_layout (&frame
);
11265 m
->fs
.sp_valid
= (!frame_pointer_needed
11266 || (crtl
->sp_is_unchanging
11267 && !stack_realign_fp
));
11268 gcc_assert (!m
->fs
.sp_valid
11269 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11271 /* The FP must be valid if the frame pointer is present. */
11272 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11273 gcc_assert (!m
->fs
.fp_valid
11274 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11276 /* We must have *some* valid pointer to the stack frame. */
11277 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11279 /* The DRAP is never valid at this point. */
11280 gcc_assert (!m
->fs
.drap_valid
);
11282 /* See the comment about red zone and frame
11283 pointer usage in ix86_expand_prologue. */
11284 if (frame_pointer_needed
&& frame
.red_zone_size
)
11285 emit_insn (gen_memory_blockage ());
11287 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11288 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11290 /* Determine the CFA offset of the end of the red-zone. */
11291 m
->fs
.red_zone_offset
= 0;
11292 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11294 /* The red-zone begins below the return address. */
11295 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11297 /* When the register save area is in the aligned portion of
11298 the stack, determine the maximum runtime displacement that
11299 matches up with the aligned frame. */
11300 if (stack_realign_drap
)
11301 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11305 /* Special care must be taken for the normal return case of a function
11306 using eh_return: the eax and edx registers are marked as saved, but
11307 not restored along this path. Adjust the save location to match. */
11308 if (crtl
->calls_eh_return
&& style
!= 2)
11309 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11311 /* EH_RETURN requires the use of moves to function properly. */
11312 if (crtl
->calls_eh_return
)
11313 restore_regs_via_mov
= true;
11314 /* SEH requires the use of pops to identify the epilogue. */
11315 else if (TARGET_SEH
)
11316 restore_regs_via_mov
= false;
11317 /* If we're only restoring one register and sp is not valid then
11318 using a move instruction to restore the register since it's
11319 less work than reloading sp and popping the register. */
11320 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11321 restore_regs_via_mov
= true;
11322 else if (TARGET_EPILOGUE_USING_MOVE
11323 && cfun
->machine
->use_fast_prologue_epilogue
11324 && (frame
.nregs
> 1
11325 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11326 restore_regs_via_mov
= true;
11327 else if (frame_pointer_needed
11329 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11330 restore_regs_via_mov
= true;
11331 else if (frame_pointer_needed
11332 && TARGET_USE_LEAVE
11333 && cfun
->machine
->use_fast_prologue_epilogue
11334 && frame
.nregs
== 1)
11335 restore_regs_via_mov
= true;
11337 restore_regs_via_mov
= false;
11339 if (restore_regs_via_mov
|| frame
.nsseregs
)
11341 /* Ensure that the entire register save area is addressable via
11342 the stack pointer, if we will restore via sp. */
11344 && m
->fs
.sp_offset
> 0x7fffffff
11345 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11346 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11348 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11349 GEN_INT (m
->fs
.sp_offset
11350 - frame
.sse_reg_save_offset
),
11352 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11356 /* If there are any SSE registers to restore, then we have to do it
11357 via moves, since there's obviously no pop for SSE regs. */
11358 if (frame
.nsseregs
)
11359 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11362 if (restore_regs_via_mov
)
11367 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11369 /* eh_return epilogues need %ecx added to the stack pointer. */
11372 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11374 /* Stack align doesn't work with eh_return. */
11375 gcc_assert (!stack_realign_drap
);
11376 /* Neither does regparm nested functions. */
11377 gcc_assert (!ix86_static_chain_on_stack
);
11379 if (frame_pointer_needed
)
11381 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11382 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11383 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11385 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11386 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11388 /* Note that we use SA as a temporary CFA, as the return
11389 address is at the proper place relative to it. We
11390 pretend this happens at the FP restore insn because
11391 prior to this insn the FP would be stored at the wrong
11392 offset relative to SA, and after this insn we have no
11393 other reasonable register to use for the CFA. We don't
11394 bother resetting the CFA to the SP for the duration of
11395 the return insn. */
11396 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11397 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11398 ix86_add_queued_cfa_restore_notes (insn
);
11399 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11400 RTX_FRAME_RELATED_P (insn
) = 1;
11402 m
->fs
.cfa_reg
= sa
;
11403 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11404 m
->fs
.fp_valid
= false;
11406 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11407 const0_rtx
, style
, false);
11411 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11412 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11413 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11414 ix86_add_queued_cfa_restore_notes (insn
);
11416 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11417 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11419 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11420 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11421 plus_constant (Pmode
, stack_pointer_rtx
,
11423 RTX_FRAME_RELATED_P (insn
) = 1;
11426 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11427 m
->fs
.sp_valid
= true;
11432 /* SEH requires that the function end with (1) a stack adjustment
11433 if necessary, (2) a sequence of pops, and (3) a return or
11434 jump instruction. Prevent insns from the function body from
11435 being scheduled into this sequence. */
11438 /* Prevent a catch region from being adjacent to the standard
11439 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11440 several other flags that would be interesting to test are
11442 if (flag_non_call_exceptions
)
11443 emit_insn (gen_nops (const1_rtx
));
11445 emit_insn (gen_blockage ());
11448 /* First step is to deallocate the stack frame so that we can
11449 pop the registers. Also do it on SEH target for very large
11450 frame as the emitted instructions aren't allowed by the ABI in
11452 if (!m
->fs
.sp_valid
11454 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11455 >= SEH_MAX_FRAME_SIZE
)))
11457 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11458 GEN_INT (m
->fs
.fp_offset
11459 - frame
.reg_save_offset
),
11462 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11464 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11465 GEN_INT (m
->fs
.sp_offset
11466 - frame
.reg_save_offset
),
11468 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11471 ix86_emit_restore_regs_using_pop ();
11474 /* If we used a stack pointer and haven't already got rid of it,
11476 if (m
->fs
.fp_valid
)
11478 /* If the stack pointer is valid and pointing at the frame
11479 pointer store address, then we only need a pop. */
11480 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11481 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11482 /* Leave results in shorter dependency chains on CPUs that are
11483 able to grok it fast. */
11484 else if (TARGET_USE_LEAVE
11485 || optimize_bb_for_size_p (EXIT_BLOCK_PTR
)
11486 || !cfun
->machine
->use_fast_prologue_epilogue
)
11487 ix86_emit_leave ();
11490 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11491 hard_frame_pointer_rtx
,
11492 const0_rtx
, style
, !using_drap
);
11493 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11499 int param_ptr_offset
= UNITS_PER_WORD
;
11502 gcc_assert (stack_realign_drap
);
11504 if (ix86_static_chain_on_stack
)
11505 param_ptr_offset
+= UNITS_PER_WORD
;
11506 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11507 param_ptr_offset
+= UNITS_PER_WORD
;
11509 insn
= emit_insn (gen_rtx_SET
11510 (VOIDmode
, stack_pointer_rtx
,
11511 gen_rtx_PLUS (Pmode
,
11513 GEN_INT (-param_ptr_offset
))));
11514 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11515 m
->fs
.cfa_offset
= param_ptr_offset
;
11516 m
->fs
.sp_offset
= param_ptr_offset
;
11517 m
->fs
.realigned
= false;
11519 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11520 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11521 GEN_INT (param_ptr_offset
)));
11522 RTX_FRAME_RELATED_P (insn
) = 1;
11524 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11525 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11528 /* At this point the stack pointer must be valid, and we must have
11529 restored all of the registers. We may not have deallocated the
11530 entire stack frame. We've delayed this until now because it may
11531 be possible to merge the local stack deallocation with the
11532 deallocation forced by ix86_static_chain_on_stack. */
11533 gcc_assert (m
->fs
.sp_valid
);
11534 gcc_assert (!m
->fs
.fp_valid
);
11535 gcc_assert (!m
->fs
.realigned
);
11536 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11538 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11539 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11543 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11545 /* Sibcall epilogues don't want a return instruction. */
11548 m
->fs
= frame_state_save
;
11552 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11554 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11556 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11557 address, do explicit add, and jump indirectly to the caller. */
11559 if (crtl
->args
.pops_args
>= 65536)
11561 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11564 /* There is no "pascal" calling convention in any 64bit ABI. */
11565 gcc_assert (!TARGET_64BIT
);
11567 insn
= emit_insn (gen_pop (ecx
));
11568 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11569 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11571 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11572 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11573 add_reg_note (insn
, REG_CFA_REGISTER
,
11574 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11575 RTX_FRAME_RELATED_P (insn
) = 1;
11577 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11579 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11582 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11585 emit_jump_insn (gen_simple_return_internal ());
11587 /* Restore the state back to the state from the prologue,
11588 so that it's correct for the next epilogue. */
11589 m
->fs
= frame_state_save
;
11592 /* Reset from the function's potential modifications. */
11595 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11596 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11598 if (pic_offset_table_rtx
)
11599 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11601 /* Mach-O doesn't support labels at the end of objects, so if
11602 it looks like we might want one, insert a NOP. */
11604 rtx insn
= get_last_insn ();
11605 rtx deleted_debug_label
= NULL_RTX
;
11608 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11610 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11611 notes only, instead set their CODE_LABEL_NUMBER to -1,
11612 otherwise there would be code generation differences
11613 in between -g and -g0. */
11614 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11615 deleted_debug_label
= insn
;
11616 insn
= PREV_INSN (insn
);
11621 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11622 fputs ("\tnop\n", file
);
11623 else if (deleted_debug_label
)
11624 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11625 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11626 CODE_LABEL_NUMBER (insn
) = -1;
11632 /* Return a scratch register to use in the split stack prologue. The
11633 split stack prologue is used for -fsplit-stack. It is the first
11634 instructions in the function, even before the regular prologue.
11635 The scratch register can be any caller-saved register which is not
11636 used for parameters or for the static chain. */
11638 static unsigned int
11639 split_stack_prologue_scratch_regno (void)
11645 bool is_fastcall
, is_thiscall
;
11648 is_fastcall
= (lookup_attribute ("fastcall",
11649 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11651 is_thiscall
= (lookup_attribute ("thiscall",
11652 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11654 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11658 if (DECL_STATIC_CHAIN (cfun
->decl
))
11660 sorry ("-fsplit-stack does not support fastcall with "
11661 "nested function");
11662 return INVALID_REGNUM
;
11666 else if (is_thiscall
)
11668 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11672 else if (regparm
< 3)
11674 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11680 sorry ("-fsplit-stack does not support 2 register "
11681 " parameters for a nested function");
11682 return INVALID_REGNUM
;
11689 /* FIXME: We could make this work by pushing a register
11690 around the addition and comparison. */
11691 sorry ("-fsplit-stack does not support 3 register parameters");
11692 return INVALID_REGNUM
;
11697 /* A SYMBOL_REF for the function which allocates new stackspace for
11700 static GTY(()) rtx split_stack_fn
;
11702 /* A SYMBOL_REF for the more stack function when using the large
11705 static GTY(()) rtx split_stack_fn_large
;
11707 /* Handle -fsplit-stack. These are the first instructions in the
11708 function, even before the regular prologue. */
11711 ix86_expand_split_stack_prologue (void)
11713 struct ix86_frame frame
;
11714 HOST_WIDE_INT allocate
;
11715 unsigned HOST_WIDE_INT args_size
;
11716 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11717 rtx scratch_reg
= NULL_RTX
;
11718 rtx varargs_label
= NULL_RTX
;
11721 gcc_assert (flag_split_stack
&& reload_completed
);
11723 ix86_finalize_stack_realign_flags ();
11724 ix86_compute_frame_layout (&frame
);
11725 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11727 /* This is the label we will branch to if we have enough stack
11728 space. We expect the basic block reordering pass to reverse this
11729 branch if optimizing, so that we branch in the unlikely case. */
11730 label
= gen_label_rtx ();
11732 /* We need to compare the stack pointer minus the frame size with
11733 the stack boundary in the TCB. The stack boundary always gives
11734 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11735 can compare directly. Otherwise we need to do an addition. */
11737 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11738 UNSPEC_STACK_CHECK
);
11739 limit
= gen_rtx_CONST (Pmode
, limit
);
11740 limit
= gen_rtx_MEM (Pmode
, limit
);
11741 if (allocate
< SPLIT_STACK_AVAILABLE
)
11742 current
= stack_pointer_rtx
;
11745 unsigned int scratch_regno
;
11748 /* We need a scratch register to hold the stack pointer minus
11749 the required frame size. Since this is the very start of the
11750 function, the scratch register can be any caller-saved
11751 register which is not used for parameters. */
11752 offset
= GEN_INT (- allocate
);
11753 scratch_regno
= split_stack_prologue_scratch_regno ();
11754 if (scratch_regno
== INVALID_REGNUM
)
11756 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11757 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11759 /* We don't use ix86_gen_add3 in this case because it will
11760 want to split to lea, but when not optimizing the insn
11761 will not be split after this point. */
11762 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11763 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11768 emit_move_insn (scratch_reg
, offset
);
11769 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11770 stack_pointer_rtx
));
11772 current
= scratch_reg
;
11775 ix86_expand_branch (GEU
, current
, limit
, label
);
11776 jump_insn
= get_last_insn ();
11777 JUMP_LABEL (jump_insn
) = label
;
11779 /* Mark the jump as very likely to be taken. */
11780 add_reg_note (jump_insn
, REG_BR_PROB
,
11781 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11783 if (split_stack_fn
== NULL_RTX
)
11784 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11785 fn
= split_stack_fn
;
11787 /* Get more stack space. We pass in the desired stack space and the
11788 size of the arguments to copy to the new stack. In 32-bit mode
11789 we push the parameters; __morestack will return on a new stack
11790 anyhow. In 64-bit mode we pass the parameters in r10 and
11792 allocate_rtx
= GEN_INT (allocate
);
11793 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11794 call_fusage
= NULL_RTX
;
11799 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11800 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11802 /* If this function uses a static chain, it will be in %r10.
11803 Preserve it across the call to __morestack. */
11804 if (DECL_STATIC_CHAIN (cfun
->decl
))
11808 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11809 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11810 use_reg (&call_fusage
, rax
);
11813 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11816 HOST_WIDE_INT argval
;
11818 gcc_assert (Pmode
== DImode
);
11819 /* When using the large model we need to load the address
11820 into a register, and we've run out of registers. So we
11821 switch to a different calling convention, and we call a
11822 different function: __morestack_large. We pass the
11823 argument size in the upper 32 bits of r10 and pass the
11824 frame size in the lower 32 bits. */
11825 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11826 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11828 if (split_stack_fn_large
== NULL_RTX
)
11829 split_stack_fn_large
=
11830 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11832 if (ix86_cmodel
== CM_LARGE_PIC
)
11836 label
= gen_label_rtx ();
11837 emit_label (label
);
11838 LABEL_PRESERVE_P (label
) = 1;
11839 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11840 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11841 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11842 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11844 x
= gen_rtx_CONST (Pmode
, x
);
11845 emit_move_insn (reg11
, x
);
11846 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11847 x
= gen_const_mem (Pmode
, x
);
11848 emit_move_insn (reg11
, x
);
11851 emit_move_insn (reg11
, split_stack_fn_large
);
11855 argval
= ((args_size
<< 16) << 16) + allocate
;
11856 emit_move_insn (reg10
, GEN_INT (argval
));
11860 emit_move_insn (reg10
, allocate_rtx
);
11861 emit_move_insn (reg11
, GEN_INT (args_size
));
11862 use_reg (&call_fusage
, reg11
);
11865 use_reg (&call_fusage
, reg10
);
11869 emit_insn (gen_push (GEN_INT (args_size
)));
11870 emit_insn (gen_push (allocate_rtx
));
11872 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11873 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11875 add_function_usage_to (call_insn
, call_fusage
);
11877 /* In order to make call/return prediction work right, we now need
11878 to execute a return instruction. See
11879 libgcc/config/i386/morestack.S for the details on how this works.
11881 For flow purposes gcc must not see this as a return
11882 instruction--we need control flow to continue at the subsequent
11883 label. Therefore, we use an unspec. */
11884 gcc_assert (crtl
->args
.pops_args
< 65536);
11885 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11887 /* If we are in 64-bit mode and this function uses a static chain,
11888 we saved %r10 in %rax before calling _morestack. */
11889 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11890 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11891 gen_rtx_REG (word_mode
, AX_REG
));
11893 /* If this function calls va_start, we need to store a pointer to
11894 the arguments on the old stack, because they may not have been
11895 all copied to the new stack. At this point the old stack can be
11896 found at the frame pointer value used by __morestack, because
11897 __morestack has set that up before calling back to us. Here we
11898 store that pointer in a scratch register, and in
11899 ix86_expand_prologue we store the scratch register in a stack
11901 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11903 unsigned int scratch_regno
;
11907 scratch_regno
= split_stack_prologue_scratch_regno ();
11908 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11909 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11913 return address within this function
11914 return address of caller of this function
11916 So we add three words to get to the stack arguments.
11920 return address within this function
11921 first argument to __morestack
11922 second argument to __morestack
11923 return address of caller of this function
11925 So we add five words to get to the stack arguments.
11927 words
= TARGET_64BIT
? 3 : 5;
11928 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11929 gen_rtx_PLUS (Pmode
, frame_reg
,
11930 GEN_INT (words
* UNITS_PER_WORD
))));
11932 varargs_label
= gen_label_rtx ();
11933 emit_jump_insn (gen_jump (varargs_label
));
11934 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11939 emit_label (label
);
11940 LABEL_NUSES (label
) = 1;
11942 /* If this function calls va_start, we now have to set the scratch
11943 register for the case where we do not call __morestack. In this
11944 case we need to set it based on the stack pointer. */
11945 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11947 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11948 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11949 GEN_INT (UNITS_PER_WORD
))));
11951 emit_label (varargs_label
);
11952 LABEL_NUSES (varargs_label
) = 1;
11956 /* We may have to tell the dataflow pass that the split stack prologue
11957 is initializing a scratch register. */
11960 ix86_live_on_entry (bitmap regs
)
11962 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11964 gcc_assert (flag_split_stack
);
11965 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11969 /* Determine if op is suitable SUBREG RTX for address. */
11972 ix86_address_subreg_operand (rtx op
)
11974 enum machine_mode mode
;
11979 mode
= GET_MODE (op
);
11981 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11984 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11985 failures when the register is one word out of a two word structure. */
11986 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11989 /* Allow only SUBREGs of non-eliminable hard registers. */
11990 return register_no_elim_operand (op
, mode
);
11993 /* Extract the parts of an RTL expression that is a valid memory address
11994 for an instruction. Return 0 if the structure of the address is
11995 grossly off. Return -1 if the address contains ASHIFT, so it is not
11996 strictly valid, but still used for computing length of lea instruction. */
11999 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
12001 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
12002 rtx base_reg
, index_reg
;
12003 HOST_WIDE_INT scale
= 1;
12004 rtx scale_rtx
= NULL_RTX
;
12007 enum ix86_address_seg seg
= SEG_DEFAULT
;
12009 /* Allow zero-extended SImode addresses,
12010 they will be emitted with addr32 prefix. */
12011 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
12013 if (GET_CODE (addr
) == ZERO_EXTEND
12014 && GET_MODE (XEXP (addr
, 0)) == SImode
)
12016 addr
= XEXP (addr
, 0);
12017 if (CONST_INT_P (addr
))
12020 else if (GET_CODE (addr
) == AND
12021 && const_32bit_mask (XEXP (addr
, 1), DImode
))
12023 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
12024 if (addr
== NULL_RTX
)
12027 if (CONST_INT_P (addr
))
12032 /* Allow SImode subregs of DImode addresses,
12033 they will be emitted with addr32 prefix. */
12034 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
12036 if (GET_CODE (addr
) == SUBREG
12037 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
12039 addr
= SUBREG_REG (addr
);
12040 if (CONST_INT_P (addr
))
12047 else if (GET_CODE (addr
) == SUBREG
)
12049 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
12054 else if (GET_CODE (addr
) == PLUS
)
12056 rtx addends
[4], op
;
12064 addends
[n
++] = XEXP (op
, 1);
12067 while (GET_CODE (op
) == PLUS
);
12072 for (i
= n
; i
>= 0; --i
)
12075 switch (GET_CODE (op
))
12080 index
= XEXP (op
, 0);
12081 scale_rtx
= XEXP (op
, 1);
12087 index
= XEXP (op
, 0);
12088 tmp
= XEXP (op
, 1);
12089 if (!CONST_INT_P (tmp
))
12091 scale
= INTVAL (tmp
);
12092 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12094 scale
= 1 << scale
;
12099 if (GET_CODE (op
) != UNSPEC
)
12104 if (XINT (op
, 1) == UNSPEC_TP
12105 && TARGET_TLS_DIRECT_SEG_REFS
12106 && seg
== SEG_DEFAULT
)
12107 seg
= DEFAULT_TLS_SEG_REG
;
12113 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
12140 else if (GET_CODE (addr
) == MULT
)
12142 index
= XEXP (addr
, 0); /* index*scale */
12143 scale_rtx
= XEXP (addr
, 1);
12145 else if (GET_CODE (addr
) == ASHIFT
)
12147 /* We're called for lea too, which implements ashift on occasion. */
12148 index
= XEXP (addr
, 0);
12149 tmp
= XEXP (addr
, 1);
12150 if (!CONST_INT_P (tmp
))
12152 scale
= INTVAL (tmp
);
12153 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12155 scale
= 1 << scale
;
12158 else if (CONST_INT_P (addr
))
12160 if (!x86_64_immediate_operand (addr
, VOIDmode
))
12163 /* Constant addresses are sign extended to 64bit, we have to
12164 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
12166 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
12172 disp
= addr
; /* displacement */
12178 else if (GET_CODE (index
) == SUBREG
12179 && ix86_address_subreg_operand (SUBREG_REG (index
)))
12185 /* Address override works only on the (%reg) part of %fs:(%reg). */
12186 if (seg
!= SEG_DEFAULT
12187 && ((base
&& GET_MODE (base
) != word_mode
)
12188 || (index
&& GET_MODE (index
) != word_mode
)))
12191 /* Extract the integral value of scale. */
12194 if (!CONST_INT_P (scale_rtx
))
12196 scale
= INTVAL (scale_rtx
);
12199 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
12200 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
12202 /* Avoid useless 0 displacement. */
12203 if (disp
== const0_rtx
&& (base
|| index
))
12206 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12207 if (base_reg
&& index_reg
&& scale
== 1
12208 && (index_reg
== arg_pointer_rtx
12209 || index_reg
== frame_pointer_rtx
12210 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12213 tmp
= base
, base
= index
, index
= tmp
;
12214 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12217 /* Special case: %ebp cannot be encoded as a base without a displacement.
12221 && (base_reg
== hard_frame_pointer_rtx
12222 || base_reg
== frame_pointer_rtx
12223 || base_reg
== arg_pointer_rtx
12224 || (REG_P (base_reg
)
12225 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12226 || REGNO (base_reg
) == R13_REG
))))
12229 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12230 Avoid this by transforming to [%esi+0].
12231 Reload calls address legitimization without cfun defined, so we need
12232 to test cfun for being non-NULL. */
12233 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12234 && base_reg
&& !index_reg
&& !disp
12235 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12238 /* Special case: encode reg+reg instead of reg*2. */
12239 if (!base
&& index
&& scale
== 2)
12240 base
= index
, base_reg
= index_reg
, scale
= 1;
12242 /* Special case: scaling cannot be encoded without base or displacement. */
12243 if (!base
&& !disp
&& index
&& scale
!= 1)
12247 out
->index
= index
;
12249 out
->scale
= scale
;
12255 /* Return cost of the memory address x.
12256 For i386, it is better to use a complex address than let gcc copy
12257 the address into a reg and make a new pseudo. But not if the address
12258 requires to two regs - that would mean more pseudos with longer
12261 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12262 addr_space_t as ATTRIBUTE_UNUSED
,
12263 bool speed ATTRIBUTE_UNUSED
)
12265 struct ix86_address parts
;
12267 int ok
= ix86_decompose_address (x
, &parts
);
12271 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12272 parts
.base
= SUBREG_REG (parts
.base
);
12273 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12274 parts
.index
= SUBREG_REG (parts
.index
);
12276 /* Attempt to minimize number of registers in the address. */
12278 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12280 && (!REG_P (parts
.index
)
12281 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12285 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12287 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12288 && parts
.base
!= parts
.index
)
12291 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12292 since it's predecode logic can't detect the length of instructions
12293 and it degenerates to vector decoded. Increase cost of such
12294 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12295 to split such addresses or even refuse such addresses at all.
12297 Following addressing modes are affected:
12302 The first and last case may be avoidable by explicitly coding the zero in
12303 memory address, but I don't have AMD-K6 machine handy to check this
12307 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12308 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12309 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12315 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12316 this is used for to form addresses to local data when -fPIC is in
12320 darwin_local_data_pic (rtx disp
)
12322 return (GET_CODE (disp
) == UNSPEC
12323 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12326 /* Determine if a given RTX is a valid constant. We already know this
12327 satisfies CONSTANT_P. */
12330 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12332 switch (GET_CODE (x
))
12337 if (GET_CODE (x
) == PLUS
)
12339 if (!CONST_INT_P (XEXP (x
, 1)))
12344 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12347 /* Only some unspecs are valid as "constants". */
12348 if (GET_CODE (x
) == UNSPEC
)
12349 switch (XINT (x
, 1))
12352 case UNSPEC_GOTOFF
:
12353 case UNSPEC_PLTOFF
:
12354 return TARGET_64BIT
;
12356 case UNSPEC_NTPOFF
:
12357 x
= XVECEXP (x
, 0, 0);
12358 return (GET_CODE (x
) == SYMBOL_REF
12359 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12360 case UNSPEC_DTPOFF
:
12361 x
= XVECEXP (x
, 0, 0);
12362 return (GET_CODE (x
) == SYMBOL_REF
12363 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12368 /* We must have drilled down to a symbol. */
12369 if (GET_CODE (x
) == LABEL_REF
)
12371 if (GET_CODE (x
) != SYMBOL_REF
)
12376 /* TLS symbols are never valid. */
12377 if (SYMBOL_REF_TLS_MODEL (x
))
12380 /* DLLIMPORT symbols are never valid. */
12381 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12382 && SYMBOL_REF_DLLIMPORT_P (x
))
12386 /* mdynamic-no-pic */
12387 if (MACHO_DYNAMIC_NO_PIC_P
)
12388 return machopic_symbol_defined_p (x
);
12393 if (GET_MODE (x
) == TImode
12394 && x
!= CONST0_RTX (TImode
)
12400 if (!standard_sse_constant_p (x
))
12407 /* Otherwise we handle everything else in the move patterns. */
12411 /* Determine if it's legal to put X into the constant pool. This
12412 is not possible for the address of thread-local symbols, which
12413 is checked above. */
12416 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12418 /* We can always put integral constants and vectors in memory. */
12419 switch (GET_CODE (x
))
12429 return !ix86_legitimate_constant_p (mode
, x
);
12432 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12436 is_imported_p (rtx x
)
12438 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12439 || GET_CODE (x
) != SYMBOL_REF
)
12442 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12446 /* Nonzero if the constant value X is a legitimate general operand
12447 when generating PIC code. It is given that flag_pic is on and
12448 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12451 legitimate_pic_operand_p (rtx x
)
12455 switch (GET_CODE (x
))
12458 inner
= XEXP (x
, 0);
12459 if (GET_CODE (inner
) == PLUS
12460 && CONST_INT_P (XEXP (inner
, 1)))
12461 inner
= XEXP (inner
, 0);
12463 /* Only some unspecs are valid as "constants". */
12464 if (GET_CODE (inner
) == UNSPEC
)
12465 switch (XINT (inner
, 1))
12468 case UNSPEC_GOTOFF
:
12469 case UNSPEC_PLTOFF
:
12470 return TARGET_64BIT
;
12472 x
= XVECEXP (inner
, 0, 0);
12473 return (GET_CODE (x
) == SYMBOL_REF
12474 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12475 case UNSPEC_MACHOPIC_OFFSET
:
12476 return legitimate_pic_address_disp_p (x
);
12484 return legitimate_pic_address_disp_p (x
);
12491 /* Determine if a given CONST RTX is a valid memory displacement
12495 legitimate_pic_address_disp_p (rtx disp
)
12499 /* In 64bit mode we can allow direct addresses of symbols and labels
12500 when they are not dynamic symbols. */
12503 rtx op0
= disp
, op1
;
12505 switch (GET_CODE (disp
))
12511 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12513 op0
= XEXP (XEXP (disp
, 0), 0);
12514 op1
= XEXP (XEXP (disp
, 0), 1);
12515 if (!CONST_INT_P (op1
)
12516 || INTVAL (op1
) >= 16*1024*1024
12517 || INTVAL (op1
) < -16*1024*1024)
12519 if (GET_CODE (op0
) == LABEL_REF
)
12521 if (GET_CODE (op0
) == CONST
12522 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12523 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12525 if (GET_CODE (op0
) == UNSPEC
12526 && XINT (op0
, 1) == UNSPEC_PCREL
)
12528 if (GET_CODE (op0
) != SYMBOL_REF
)
12533 /* TLS references should always be enclosed in UNSPEC.
12534 The dllimported symbol needs always to be resolved. */
12535 if (SYMBOL_REF_TLS_MODEL (op0
)
12536 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12541 if (is_imported_p (op0
))
12544 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12545 || !SYMBOL_REF_LOCAL_P (op0
))
12548 /* Function-symbols need to be resolved only for
12550 For the small-model we don't need to resolve anything
12552 if ((ix86_cmodel
!= CM_LARGE_PIC
12553 && SYMBOL_REF_FUNCTION_P (op0
))
12554 || ix86_cmodel
== CM_SMALL_PIC
)
12556 /* Non-external symbols don't need to be resolved for
12557 large, and medium-model. */
12558 if ((ix86_cmodel
== CM_LARGE_PIC
12559 || ix86_cmodel
== CM_MEDIUM_PIC
)
12560 && !SYMBOL_REF_EXTERNAL_P (op0
))
12563 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12564 && SYMBOL_REF_LOCAL_P (op0
)
12565 && ix86_cmodel
!= CM_LARGE_PIC
)
12573 if (GET_CODE (disp
) != CONST
)
12575 disp
= XEXP (disp
, 0);
12579 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12580 of GOT tables. We should not need these anyway. */
12581 if (GET_CODE (disp
) != UNSPEC
12582 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12583 && XINT (disp
, 1) != UNSPEC_GOTOFF
12584 && XINT (disp
, 1) != UNSPEC_PCREL
12585 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12588 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12589 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12595 if (GET_CODE (disp
) == PLUS
)
12597 if (!CONST_INT_P (XEXP (disp
, 1)))
12599 disp
= XEXP (disp
, 0);
12603 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12606 if (GET_CODE (disp
) != UNSPEC
)
12609 switch (XINT (disp
, 1))
12614 /* We need to check for both symbols and labels because VxWorks loads
12615 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12617 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12618 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12619 case UNSPEC_GOTOFF
:
12620 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12621 While ABI specify also 32bit relocation but we don't produce it in
12622 small PIC model at all. */
12623 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12624 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12626 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12628 case UNSPEC_GOTTPOFF
:
12629 case UNSPEC_GOTNTPOFF
:
12630 case UNSPEC_INDNTPOFF
:
12633 disp
= XVECEXP (disp
, 0, 0);
12634 return (GET_CODE (disp
) == SYMBOL_REF
12635 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12636 case UNSPEC_NTPOFF
:
12637 disp
= XVECEXP (disp
, 0, 0);
12638 return (GET_CODE (disp
) == SYMBOL_REF
12639 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12640 case UNSPEC_DTPOFF
:
12641 disp
= XVECEXP (disp
, 0, 0);
12642 return (GET_CODE (disp
) == SYMBOL_REF
12643 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12649 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12650 replace the input X, or the original X if no replacement is called for.
12651 The output parameter *WIN is 1 if the calling macro should goto WIN,
12652 0 if it should not. */
12655 ix86_legitimize_reload_address (rtx x
,
12656 enum machine_mode mode ATTRIBUTE_UNUSED
,
12657 int opnum
, int type
,
12658 int ind_levels ATTRIBUTE_UNUSED
)
12660 /* Reload can generate:
12662 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12666 This RTX is rejected from ix86_legitimate_address_p due to
12667 non-strictness of base register 97. Following this rejection,
12668 reload pushes all three components into separate registers,
12669 creating invalid memory address RTX.
12671 Following code reloads only the invalid part of the
12672 memory address RTX. */
12674 if (GET_CODE (x
) == PLUS
12675 && REG_P (XEXP (x
, 1))
12676 && GET_CODE (XEXP (x
, 0)) == PLUS
12677 && REG_P (XEXP (XEXP (x
, 0), 1)))
12680 bool something_reloaded
= false;
12682 base
= XEXP (XEXP (x
, 0), 1);
12683 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12685 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12686 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12687 opnum
, (enum reload_type
) type
);
12688 something_reloaded
= true;
12691 index
= XEXP (x
, 1);
12692 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12694 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12695 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12696 opnum
, (enum reload_type
) type
);
12697 something_reloaded
= true;
12700 gcc_assert (something_reloaded
);
12707 /* Recognizes RTL expressions that are valid memory addresses for an
12708 instruction. The MODE argument is the machine mode for the MEM
12709 expression that wants to use this address.
12711 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12712 convert common non-canonical forms to canonical form so that they will
12716 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12717 rtx addr
, bool strict
)
12719 struct ix86_address parts
;
12720 rtx base
, index
, disp
;
12721 HOST_WIDE_INT scale
;
12723 if (ix86_decompose_address (addr
, &parts
) <= 0)
12724 /* Decomposition failed. */
12728 index
= parts
.index
;
12730 scale
= parts
.scale
;
12732 /* Validate base register. */
12739 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12740 reg
= SUBREG_REG (base
);
12742 /* Base is not a register. */
12745 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12748 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12749 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12750 /* Base is not valid. */
12754 /* Validate index register. */
12761 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12762 reg
= SUBREG_REG (index
);
12764 /* Index is not a register. */
12767 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12770 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12771 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12772 /* Index is not valid. */
12776 /* Index and base should have the same mode. */
12778 && GET_MODE (base
) != GET_MODE (index
))
12781 /* Validate scale factor. */
12785 /* Scale without index. */
12788 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12789 /* Scale is not a valid multiplier. */
12793 /* Validate displacement. */
12796 if (GET_CODE (disp
) == CONST
12797 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12798 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12799 switch (XINT (XEXP (disp
, 0), 1))
12801 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12802 used. While ABI specify also 32bit relocations, we don't produce
12803 them at all and use IP relative instead. */
12805 case UNSPEC_GOTOFF
:
12806 gcc_assert (flag_pic
);
12808 goto is_legitimate_pic
;
12810 /* 64bit address unspec. */
12813 case UNSPEC_GOTPCREL
:
12815 gcc_assert (flag_pic
);
12816 goto is_legitimate_pic
;
12818 case UNSPEC_GOTTPOFF
:
12819 case UNSPEC_GOTNTPOFF
:
12820 case UNSPEC_INDNTPOFF
:
12821 case UNSPEC_NTPOFF
:
12822 case UNSPEC_DTPOFF
:
12825 case UNSPEC_STACK_CHECK
:
12826 gcc_assert (flag_split_stack
);
12830 /* Invalid address unspec. */
12834 else if (SYMBOLIC_CONST (disp
)
12838 && MACHOPIC_INDIRECT
12839 && !machopic_operand_p (disp
)
12845 if (TARGET_64BIT
&& (index
|| base
))
12847 /* foo@dtpoff(%rX) is ok. */
12848 if (GET_CODE (disp
) != CONST
12849 || GET_CODE (XEXP (disp
, 0)) != PLUS
12850 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12851 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12852 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12853 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12854 /* Non-constant pic memory reference. */
12857 else if ((!TARGET_MACHO
|| flag_pic
)
12858 && ! legitimate_pic_address_disp_p (disp
))
12859 /* Displacement is an invalid pic construct. */
12862 else if (MACHO_DYNAMIC_NO_PIC_P
12863 && !ix86_legitimate_constant_p (Pmode
, disp
))
12864 /* displacment must be referenced via non_lazy_pointer */
12868 /* This code used to verify that a symbolic pic displacement
12869 includes the pic_offset_table_rtx register.
12871 While this is good idea, unfortunately these constructs may
12872 be created by "adds using lea" optimization for incorrect
12881 This code is nonsensical, but results in addressing
12882 GOT table with pic_offset_table_rtx base. We can't
12883 just refuse it easily, since it gets matched by
12884 "addsi3" pattern, that later gets split to lea in the
12885 case output register differs from input. While this
12886 can be handled by separate addsi pattern for this case
12887 that never results in lea, this seems to be easier and
12888 correct fix for crash to disable this test. */
12890 else if (GET_CODE (disp
) != LABEL_REF
12891 && !CONST_INT_P (disp
)
12892 && (GET_CODE (disp
) != CONST
12893 || !ix86_legitimate_constant_p (Pmode
, disp
))
12894 && (GET_CODE (disp
) != SYMBOL_REF
12895 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12896 /* Displacement is not constant. */
12898 else if (TARGET_64BIT
12899 && !x86_64_immediate_operand (disp
, VOIDmode
))
12900 /* Displacement is out of range. */
12904 /* Everything looks valid. */
12908 /* Determine if a given RTX is a valid constant address. */
12911 constant_address_p (rtx x
)
12913 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12916 /* Return a unique alias set for the GOT. */
12918 static alias_set_type
12919 ix86_GOT_alias_set (void)
12921 static alias_set_type set
= -1;
12923 set
= new_alias_set ();
12927 /* Return a legitimate reference for ORIG (an address) using the
12928 register REG. If REG is 0, a new pseudo is generated.
12930 There are two types of references that must be handled:
12932 1. Global data references must load the address from the GOT, via
12933 the PIC reg. An insn is emitted to do this load, and the reg is
12936 2. Static data references, constant pool addresses, and code labels
12937 compute the address as an offset from the GOT, whose base is in
12938 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12939 differentiate them from global data objects. The returned
12940 address is the PIC reg + an unspec constant.
12942 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12943 reg also appears in the address. */
12946 legitimize_pic_address (rtx orig
, rtx reg
)
12949 rtx new_rtx
= orig
;
12952 if (TARGET_MACHO
&& !TARGET_64BIT
)
12955 reg
= gen_reg_rtx (Pmode
);
12956 /* Use the generic Mach-O PIC machinery. */
12957 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12961 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12963 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12968 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12970 else if (TARGET_64BIT
&& !TARGET_PECOFF
12971 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12974 /* This symbol may be referenced via a displacement from the PIC
12975 base address (@GOTOFF). */
12977 if (reload_in_progress
)
12978 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12979 if (GET_CODE (addr
) == CONST
)
12980 addr
= XEXP (addr
, 0);
12981 if (GET_CODE (addr
) == PLUS
)
12983 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12985 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12988 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12989 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12991 tmpreg
= gen_reg_rtx (Pmode
);
12994 emit_move_insn (tmpreg
, new_rtx
);
12998 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12999 tmpreg
, 1, OPTAB_DIRECT
);
13003 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
13005 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
13007 /* This symbol may be referenced via a displacement from the PIC
13008 base address (@GOTOFF). */
13010 if (reload_in_progress
)
13011 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13012 if (GET_CODE (addr
) == CONST
)
13013 addr
= XEXP (addr
, 0);
13014 if (GET_CODE (addr
) == PLUS
)
13016 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
13018 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
13021 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
13022 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13023 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13027 emit_move_insn (reg
, new_rtx
);
13031 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
13032 /* We can't use @GOTOFF for text labels on VxWorks;
13033 see gotoff_operand. */
13034 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
13036 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
13040 /* For x64 PE-COFF there is no GOT table. So we use address
13042 if (TARGET_64BIT
&& TARGET_PECOFF
)
13044 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
13045 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13048 reg
= gen_reg_rtx (Pmode
);
13049 emit_move_insn (reg
, new_rtx
);
13052 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
13054 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
13055 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13056 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13057 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13060 reg
= gen_reg_rtx (Pmode
);
13061 /* Use directly gen_movsi, otherwise the address is loaded
13062 into register for CSE. We don't want to CSE this addresses,
13063 instead we CSE addresses from the GOT table, so skip this. */
13064 emit_insn (gen_movsi (reg
, new_rtx
));
13069 /* This symbol must be referenced via a load from the
13070 Global Offset Table (@GOT). */
13072 if (reload_in_progress
)
13073 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13074 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
13075 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13077 new_rtx
= force_reg (Pmode
, new_rtx
);
13078 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13079 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13080 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13083 reg
= gen_reg_rtx (Pmode
);
13084 emit_move_insn (reg
, new_rtx
);
13090 if (CONST_INT_P (addr
)
13091 && !x86_64_immediate_operand (addr
, VOIDmode
))
13095 emit_move_insn (reg
, addr
);
13099 new_rtx
= force_reg (Pmode
, addr
);
13101 else if (GET_CODE (addr
) == CONST
)
13103 addr
= XEXP (addr
, 0);
13105 /* We must match stuff we generate before. Assume the only
13106 unspecs that can get here are ours. Not that we could do
13107 anything with them anyway.... */
13108 if (GET_CODE (addr
) == UNSPEC
13109 || (GET_CODE (addr
) == PLUS
13110 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
13112 gcc_assert (GET_CODE (addr
) == PLUS
);
13114 if (GET_CODE (addr
) == PLUS
)
13116 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
13118 /* Check first to see if this is a constant offset from a @GOTOFF
13119 symbol reference. */
13120 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
13121 && CONST_INT_P (op1
))
13125 if (reload_in_progress
)
13126 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13127 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
13129 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
13130 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13131 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13135 emit_move_insn (reg
, new_rtx
);
13141 if (INTVAL (op1
) < -16*1024*1024
13142 || INTVAL (op1
) >= 16*1024*1024)
13144 if (!x86_64_immediate_operand (op1
, Pmode
))
13145 op1
= force_reg (Pmode
, op1
);
13146 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
13152 rtx base
= legitimize_pic_address (op0
, reg
);
13153 enum machine_mode mode
= GET_MODE (base
);
13155 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
13157 if (CONST_INT_P (new_rtx
))
13159 if (INTVAL (new_rtx
) < -16*1024*1024
13160 || INTVAL (new_rtx
) >= 16*1024*1024)
13162 if (!x86_64_immediate_operand (new_rtx
, mode
))
13163 new_rtx
= force_reg (mode
, new_rtx
);
13165 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
13168 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
13172 if (GET_CODE (new_rtx
) == PLUS
13173 && CONSTANT_P (XEXP (new_rtx
, 1)))
13175 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
13176 new_rtx
= XEXP (new_rtx
, 1);
13178 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
13186 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13189 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
13191 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
13193 if (GET_MODE (tp
) != tp_mode
)
13195 gcc_assert (GET_MODE (tp
) == SImode
);
13196 gcc_assert (tp_mode
== DImode
);
13198 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
13202 tp
= copy_to_mode_reg (tp_mode
, tp
);
13207 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13209 static GTY(()) rtx ix86_tls_symbol
;
13212 ix86_tls_get_addr (void)
13214 if (!ix86_tls_symbol
)
13217 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13218 ? "___tls_get_addr" : "__tls_get_addr");
13220 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13223 return ix86_tls_symbol
;
13226 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13228 static GTY(()) rtx ix86_tls_module_base_symbol
;
13231 ix86_tls_module_base (void)
13233 if (!ix86_tls_module_base_symbol
)
13235 ix86_tls_module_base_symbol
13236 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13238 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13239 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13242 return ix86_tls_module_base_symbol
;
13245 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13246 false if we expect this to be used for a memory address and true if
13247 we expect to load the address into a register. */
13250 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13252 rtx dest
, base
, off
;
13253 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13254 enum machine_mode tp_mode
= Pmode
;
13259 case TLS_MODEL_GLOBAL_DYNAMIC
:
13260 dest
= gen_reg_rtx (Pmode
);
13264 if (flag_pic
&& !TARGET_PECOFF
)
13265 pic
= pic_offset_table_rtx
;
13268 pic
= gen_reg_rtx (Pmode
);
13269 emit_insn (gen_set_got (pic
));
13273 if (TARGET_GNU2_TLS
)
13276 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13278 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13280 tp
= get_thread_pointer (Pmode
, true);
13281 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13283 if (GET_MODE (x
) != Pmode
)
13284 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13286 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13290 rtx caddr
= ix86_tls_get_addr ();
13294 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13299 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13300 insns
= get_insns ();
13303 if (GET_MODE (x
) != Pmode
)
13304 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13306 RTL_CONST_CALL_P (insns
) = 1;
13307 emit_libcall_block (insns
, dest
, rax
, x
);
13310 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13314 case TLS_MODEL_LOCAL_DYNAMIC
:
13315 base
= gen_reg_rtx (Pmode
);
13320 pic
= pic_offset_table_rtx
;
13323 pic
= gen_reg_rtx (Pmode
);
13324 emit_insn (gen_set_got (pic
));
13328 if (TARGET_GNU2_TLS
)
13330 rtx tmp
= ix86_tls_module_base ();
13333 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13335 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13337 tp
= get_thread_pointer (Pmode
, true);
13338 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13339 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13343 rtx caddr
= ix86_tls_get_addr ();
13347 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13352 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13353 insns
= get_insns ();
13356 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13357 share the LD_BASE result with other LD model accesses. */
13358 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13359 UNSPEC_TLS_LD_BASE
);
13361 RTL_CONST_CALL_P (insns
) = 1;
13362 emit_libcall_block (insns
, base
, rax
, eqv
);
13365 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13368 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13369 off
= gen_rtx_CONST (Pmode
, off
);
13371 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13373 if (TARGET_GNU2_TLS
)
13375 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13377 if (GET_MODE (x
) != Pmode
)
13378 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13380 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13384 case TLS_MODEL_INITIAL_EXEC
:
13387 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13389 /* The Sun linker took the AMD64 TLS spec literally
13390 and can only handle %rax as destination of the
13391 initial executable code sequence. */
13393 dest
= gen_reg_rtx (DImode
);
13394 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13398 /* Generate DImode references to avoid %fs:(%reg32)
13399 problems and linker IE->LE relaxation bug. */
13402 type
= UNSPEC_GOTNTPOFF
;
13406 if (reload_in_progress
)
13407 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13408 pic
= pic_offset_table_rtx
;
13409 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13411 else if (!TARGET_ANY_GNU_TLS
)
13413 pic
= gen_reg_rtx (Pmode
);
13414 emit_insn (gen_set_got (pic
));
13415 type
= UNSPEC_GOTTPOFF
;
13420 type
= UNSPEC_INDNTPOFF
;
13423 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13424 off
= gen_rtx_CONST (tp_mode
, off
);
13426 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13427 off
= gen_const_mem (tp_mode
, off
);
13428 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13430 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13432 base
= get_thread_pointer (tp_mode
,
13433 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13434 off
= force_reg (tp_mode
, off
);
13435 return gen_rtx_PLUS (tp_mode
, base
, off
);
13439 base
= get_thread_pointer (Pmode
, true);
13440 dest
= gen_reg_rtx (Pmode
);
13441 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13445 case TLS_MODEL_LOCAL_EXEC
:
13446 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13447 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13448 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13449 off
= gen_rtx_CONST (Pmode
, off
);
13451 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13453 base
= get_thread_pointer (Pmode
,
13454 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13455 return gen_rtx_PLUS (Pmode
, base
, off
);
13459 base
= get_thread_pointer (Pmode
, true);
13460 dest
= gen_reg_rtx (Pmode
);
13461 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13466 gcc_unreachable ();
13472 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13473 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13474 unique refptr-DECL symbol corresponding to symbol DECL. */
13476 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13477 htab_t dllimport_map
;
13480 get_dllimport_decl (tree decl
, bool beimport
)
13482 struct tree_map
*h
, in
;
13485 const char *prefix
;
13486 size_t namelen
, prefixlen
;
13491 if (!dllimport_map
)
13492 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13494 in
.hash
= htab_hash_pointer (decl
);
13495 in
.base
.from
= decl
;
13496 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13497 h
= (struct tree_map
*) *loc
;
13501 *loc
= h
= ggc_alloc_tree_map ();
13503 h
->base
.from
= decl
;
13504 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13505 VAR_DECL
, NULL
, ptr_type_node
);
13506 DECL_ARTIFICIAL (to
) = 1;
13507 DECL_IGNORED_P (to
) = 1;
13508 DECL_EXTERNAL (to
) = 1;
13509 TREE_READONLY (to
) = 1;
13511 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13512 name
= targetm
.strip_name_encoding (name
);
13514 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13515 ? "*__imp_" : "*__imp__";
13517 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13518 namelen
= strlen (name
);
13519 prefixlen
= strlen (prefix
);
13520 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13521 memcpy (imp_name
, prefix
, prefixlen
);
13522 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13524 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13525 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13526 SET_SYMBOL_REF_DECL (rtl
, to
);
13527 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13530 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13531 #ifdef SUB_TARGET_RECORD_STUB
13532 SUB_TARGET_RECORD_STUB (name
);
13536 rtl
= gen_const_mem (Pmode
, rtl
);
13537 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13539 SET_DECL_RTL (to
, rtl
);
13540 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13545 /* Expand SYMBOL into its corresponding far-addresse symbol.
13546 WANT_REG is true if we require the result be a register. */
13549 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13554 gcc_assert (SYMBOL_REF_DECL (symbol
));
13555 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13557 x
= DECL_RTL (imp_decl
);
13559 x
= force_reg (Pmode
, x
);
13563 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13564 true if we require the result be a register. */
13567 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13572 gcc_assert (SYMBOL_REF_DECL (symbol
));
13573 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13575 x
= DECL_RTL (imp_decl
);
13577 x
= force_reg (Pmode
, x
);
13581 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13582 is true if we require the result be a register. */
13585 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13587 if (!TARGET_PECOFF
)
13590 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13592 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13593 return legitimize_dllimport_symbol (addr
, inreg
);
13594 if (GET_CODE (addr
) == CONST
13595 && GET_CODE (XEXP (addr
, 0)) == PLUS
13596 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13597 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13599 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13600 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13604 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13606 if (GET_CODE (addr
) == SYMBOL_REF
13607 && !is_imported_p (addr
)
13608 && SYMBOL_REF_EXTERNAL_P (addr
)
13609 && SYMBOL_REF_DECL (addr
))
13610 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13612 if (GET_CODE (addr
) == CONST
13613 && GET_CODE (XEXP (addr
, 0)) == PLUS
13614 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13615 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13616 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13617 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13619 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13620 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13625 /* Try machine-dependent ways of modifying an illegitimate address
13626 to be legitimate. If we find one, return the new, valid address.
13627 This macro is used in only one place: `memory_address' in explow.c.
13629 OLDX is the address as it was before break_out_memory_refs was called.
13630 In some cases it is useful to look at this to decide what needs to be done.
13632 It is always safe for this macro to do nothing. It exists to recognize
13633 opportunities to optimize the output.
13635 For the 80386, we handle X+REG by loading X into a register R and
13636 using R+REG. R will go in a general reg and indexing will be used.
13637 However, if REG is a broken-out memory address or multiplication,
13638 nothing needs to be done because REG can certainly go in a general reg.
13640 When -fpic is used, special handling is needed for symbolic references.
13641 See comments by legitimize_pic_address in i386.c for details. */
13644 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13645 enum machine_mode mode
)
13650 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13652 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13653 if (GET_CODE (x
) == CONST
13654 && GET_CODE (XEXP (x
, 0)) == PLUS
13655 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13656 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13658 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13659 (enum tls_model
) log
, false);
13660 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13663 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13665 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13670 if (flag_pic
&& SYMBOLIC_CONST (x
))
13671 return legitimize_pic_address (x
, 0);
13674 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13675 return machopic_indirect_data_reference (x
, 0);
13678 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13679 if (GET_CODE (x
) == ASHIFT
13680 && CONST_INT_P (XEXP (x
, 1))
13681 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13684 log
= INTVAL (XEXP (x
, 1));
13685 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13686 GEN_INT (1 << log
));
13689 if (GET_CODE (x
) == PLUS
)
13691 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13693 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13694 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13695 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13698 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13699 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13700 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13701 GEN_INT (1 << log
));
13704 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13705 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13706 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13709 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13710 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13711 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13712 GEN_INT (1 << log
));
13715 /* Put multiply first if it isn't already. */
13716 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13718 rtx tmp
= XEXP (x
, 0);
13719 XEXP (x
, 0) = XEXP (x
, 1);
13724 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13725 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13726 created by virtual register instantiation, register elimination, and
13727 similar optimizations. */
13728 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13731 x
= gen_rtx_PLUS (Pmode
,
13732 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13733 XEXP (XEXP (x
, 1), 0)),
13734 XEXP (XEXP (x
, 1), 1));
13738 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13739 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13740 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13741 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13742 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13743 && CONSTANT_P (XEXP (x
, 1)))
13746 rtx other
= NULL_RTX
;
13748 if (CONST_INT_P (XEXP (x
, 1)))
13750 constant
= XEXP (x
, 1);
13751 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13753 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13755 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13756 other
= XEXP (x
, 1);
13764 x
= gen_rtx_PLUS (Pmode
,
13765 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13766 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13767 plus_constant (Pmode
, other
,
13768 INTVAL (constant
)));
13772 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13775 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13778 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13781 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13784 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13788 && REG_P (XEXP (x
, 1))
13789 && REG_P (XEXP (x
, 0)))
13792 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13795 x
= legitimize_pic_address (x
, 0);
13798 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13801 if (REG_P (XEXP (x
, 0)))
13803 rtx temp
= gen_reg_rtx (Pmode
);
13804 rtx val
= force_operand (XEXP (x
, 1), temp
);
13807 val
= convert_to_mode (Pmode
, val
, 1);
13808 emit_move_insn (temp
, val
);
13811 XEXP (x
, 1) = temp
;
13815 else if (REG_P (XEXP (x
, 1)))
13817 rtx temp
= gen_reg_rtx (Pmode
);
13818 rtx val
= force_operand (XEXP (x
, 0), temp
);
13821 val
= convert_to_mode (Pmode
, val
, 1);
13822 emit_move_insn (temp
, val
);
13825 XEXP (x
, 0) = temp
;
13833 /* Print an integer constant expression in assembler syntax. Addition
13834 and subtraction are the only arithmetic that may appear in these
13835 expressions. FILE is the stdio stream to write to, X is the rtx, and
13836 CODE is the operand print code from the output string. */
13839 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13843 switch (GET_CODE (x
))
13846 gcc_assert (flag_pic
);
13851 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13852 output_addr_const (file
, x
);
13855 const char *name
= XSTR (x
, 0);
13857 /* Mark the decl as referenced so that cgraph will
13858 output the function. */
13859 if (SYMBOL_REF_DECL (x
))
13860 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13863 if (MACHOPIC_INDIRECT
13864 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13865 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13867 assemble_name (file
, name
);
13869 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13870 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13871 fputs ("@PLT", file
);
13878 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13879 assemble_name (asm_out_file
, buf
);
13883 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13887 /* This used to output parentheses around the expression,
13888 but that does not work on the 386 (either ATT or BSD assembler). */
13889 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13893 if (GET_MODE (x
) == VOIDmode
)
13895 /* We can use %d if the number is <32 bits and positive. */
13896 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13897 fprintf (file
, "0x%lx%08lx",
13898 (unsigned long) CONST_DOUBLE_HIGH (x
),
13899 (unsigned long) CONST_DOUBLE_LOW (x
));
13901 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13904 /* We can't handle floating point constants;
13905 TARGET_PRINT_OPERAND must handle them. */
13906 output_operand_lossage ("floating constant misused");
13910 /* Some assemblers need integer constants to appear first. */
13911 if (CONST_INT_P (XEXP (x
, 0)))
13913 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13915 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13919 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13920 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13922 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13928 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13929 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13931 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13933 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13937 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13939 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13944 gcc_assert (XVECLEN (x
, 0) == 1);
13945 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13946 switch (XINT (x
, 1))
13949 fputs ("@GOT", file
);
13951 case UNSPEC_GOTOFF
:
13952 fputs ("@GOTOFF", file
);
13954 case UNSPEC_PLTOFF
:
13955 fputs ("@PLTOFF", file
);
13958 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13959 "(%rip)" : "[rip]", file
);
13961 case UNSPEC_GOTPCREL
:
13962 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13963 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13965 case UNSPEC_GOTTPOFF
:
13966 /* FIXME: This might be @TPOFF in Sun ld too. */
13967 fputs ("@gottpoff", file
);
13970 fputs ("@tpoff", file
);
13972 case UNSPEC_NTPOFF
:
13974 fputs ("@tpoff", file
);
13976 fputs ("@ntpoff", file
);
13978 case UNSPEC_DTPOFF
:
13979 fputs ("@dtpoff", file
);
13981 case UNSPEC_GOTNTPOFF
:
13983 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13984 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13986 fputs ("@gotntpoff", file
);
13988 case UNSPEC_INDNTPOFF
:
13989 fputs ("@indntpoff", file
);
13992 case UNSPEC_MACHOPIC_OFFSET
:
13994 machopic_output_function_base_name (file
);
13998 output_operand_lossage ("invalid UNSPEC as operand");
14004 output_operand_lossage ("invalid expression as operand");
14008 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14009 We need to emit DTP-relative relocations. */
14011 static void ATTRIBUTE_UNUSED
14012 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
14014 fputs (ASM_LONG
, file
);
14015 output_addr_const (file
, x
);
14016 fputs ("@dtpoff", file
);
14022 fputs (", 0", file
);
14025 gcc_unreachable ();
14029 /* Return true if X is a representation of the PIC register. This copes
14030 with calls from ix86_find_base_term, where the register might have
14031 been replaced by a cselib value. */
14034 ix86_pic_register_p (rtx x
)
14036 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
14037 return (pic_offset_table_rtx
14038 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
14040 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
14043 /* Helper function for ix86_delegitimize_address.
14044 Attempt to delegitimize TLS local-exec accesses. */
14047 ix86_delegitimize_tls_address (rtx orig_x
)
14049 rtx x
= orig_x
, unspec
;
14050 struct ix86_address addr
;
14052 if (!TARGET_TLS_DIRECT_SEG_REFS
)
14056 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
14058 if (ix86_decompose_address (x
, &addr
) == 0
14059 || addr
.seg
!= DEFAULT_TLS_SEG_REG
14060 || addr
.disp
== NULL_RTX
14061 || GET_CODE (addr
.disp
) != CONST
)
14063 unspec
= XEXP (addr
.disp
, 0);
14064 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
14065 unspec
= XEXP (unspec
, 0);
14066 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
14068 x
= XVECEXP (unspec
, 0, 0);
14069 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
14070 if (unspec
!= XEXP (addr
.disp
, 0))
14071 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
14074 rtx idx
= addr
.index
;
14075 if (addr
.scale
!= 1)
14076 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
14077 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
14080 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
14081 if (MEM_P (orig_x
))
14082 x
= replace_equiv_address_nv (orig_x
, x
);
14086 /* In the name of slightly smaller debug output, and to cater to
14087 general assembler lossage, recognize PIC+GOTOFF and turn it back
14088 into a direct symbol reference.
14090 On Darwin, this is necessary to avoid a crash, because Darwin
14091 has a different PIC label for each routine but the DWARF debugging
14092 information is not associated with any particular routine, so it's
14093 necessary to remove references to the PIC label from RTL stored by
14094 the DWARF output code. */
14097 ix86_delegitimize_address (rtx x
)
14099 rtx orig_x
= delegitimize_mem_from_attrs (x
);
14100 /* addend is NULL or some rtx if x is something+GOTOFF where
14101 something doesn't include the PIC register. */
14102 rtx addend
= NULL_RTX
;
14103 /* reg_addend is NULL or a multiple of some register. */
14104 rtx reg_addend
= NULL_RTX
;
14105 /* const_addend is NULL or a const_int. */
14106 rtx const_addend
= NULL_RTX
;
14107 /* This is the result, or NULL. */
14108 rtx result
= NULL_RTX
;
14117 if (GET_CODE (x
) == CONST
14118 && GET_CODE (XEXP (x
, 0)) == PLUS
14119 && GET_MODE (XEXP (x
, 0)) == Pmode
14120 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
14121 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
14122 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
14124 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
14125 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
14126 if (MEM_P (orig_x
))
14127 x
= replace_equiv_address_nv (orig_x
, x
);
14130 if (GET_CODE (x
) != CONST
14131 || GET_CODE (XEXP (x
, 0)) != UNSPEC
14132 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
14133 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
14134 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
14135 return ix86_delegitimize_tls_address (orig_x
);
14136 x
= XVECEXP (XEXP (x
, 0), 0, 0);
14137 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
14139 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
14147 if (GET_CODE (x
) != PLUS
14148 || GET_CODE (XEXP (x
, 1)) != CONST
)
14149 return ix86_delegitimize_tls_address (orig_x
);
14151 if (ix86_pic_register_p (XEXP (x
, 0)))
14152 /* %ebx + GOT/GOTOFF */
14154 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14156 /* %ebx + %reg * scale + GOT/GOTOFF */
14157 reg_addend
= XEXP (x
, 0);
14158 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
14159 reg_addend
= XEXP (reg_addend
, 1);
14160 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
14161 reg_addend
= XEXP (reg_addend
, 0);
14164 reg_addend
= NULL_RTX
;
14165 addend
= XEXP (x
, 0);
14169 addend
= XEXP (x
, 0);
14171 x
= XEXP (XEXP (x
, 1), 0);
14172 if (GET_CODE (x
) == PLUS
14173 && CONST_INT_P (XEXP (x
, 1)))
14175 const_addend
= XEXP (x
, 1);
14179 if (GET_CODE (x
) == UNSPEC
14180 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
14181 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
14182 result
= XVECEXP (x
, 0, 0);
14184 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
14185 && !MEM_P (orig_x
))
14186 result
= XVECEXP (x
, 0, 0);
14189 return ix86_delegitimize_tls_address (orig_x
);
14192 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
14194 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14197 /* If the rest of original X doesn't involve the PIC register, add
14198 addend and subtract pic_offset_table_rtx. This can happen e.g.
14200 leal (%ebx, %ecx, 4), %ecx
14202 movl foo@GOTOFF(%ecx), %edx
14203 in which case we return (%ecx - %ebx) + foo. */
14204 if (pic_offset_table_rtx
)
14205 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14206 pic_offset_table_rtx
),
14211 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14213 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14214 if (result
== NULL_RTX
)
14220 /* If X is a machine specific address (i.e. a symbol or label being
14221 referenced as a displacement from the GOT implemented using an
14222 UNSPEC), then return the base term. Otherwise return X. */
14225 ix86_find_base_term (rtx x
)
14231 if (GET_CODE (x
) != CONST
)
14233 term
= XEXP (x
, 0);
14234 if (GET_CODE (term
) == PLUS
14235 && (CONST_INT_P (XEXP (term
, 1))
14236 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14237 term
= XEXP (term
, 0);
14238 if (GET_CODE (term
) != UNSPEC
14239 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14240 && XINT (term
, 1) != UNSPEC_PCREL
))
14243 return XVECEXP (term
, 0, 0);
14246 return ix86_delegitimize_address (x
);
14250 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14251 bool fp
, FILE *file
)
14253 const char *suffix
;
14255 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14257 code
= ix86_fp_compare_code_to_integer (code
);
14261 code
= reverse_condition (code
);
14312 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14316 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14317 Those same assemblers have the same but opposite lossage on cmov. */
14318 if (mode
== CCmode
)
14319 suffix
= fp
? "nbe" : "a";
14320 else if (mode
== CCCmode
)
14323 gcc_unreachable ();
14339 gcc_unreachable ();
14343 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14360 gcc_unreachable ();
14364 /* ??? As above. */
14365 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14366 suffix
= fp
? "nb" : "ae";
14369 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14373 /* ??? As above. */
14374 if (mode
== CCmode
)
14376 else if (mode
== CCCmode
)
14377 suffix
= fp
? "nb" : "ae";
14379 gcc_unreachable ();
14382 suffix
= fp
? "u" : "p";
14385 suffix
= fp
? "nu" : "np";
14388 gcc_unreachable ();
14390 fputs (suffix
, file
);
14393 /* Print the name of register X to FILE based on its machine mode and number.
14394 If CODE is 'w', pretend the mode is HImode.
14395 If CODE is 'b', pretend the mode is QImode.
14396 If CODE is 'k', pretend the mode is SImode.
14397 If CODE is 'q', pretend the mode is DImode.
14398 If CODE is 'x', pretend the mode is V4SFmode.
14399 If CODE is 't', pretend the mode is V8SFmode.
14400 If CODE is 'h', pretend the reg is the 'high' byte register.
14401 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14402 If CODE is 'd', duplicate the operand for AVX instruction.
14406 print_reg (rtx x
, int code
, FILE *file
)
14409 unsigned int regno
;
14410 bool duplicated
= code
== 'd' && TARGET_AVX
;
14412 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14417 gcc_assert (TARGET_64BIT
);
14418 fputs ("rip", file
);
14422 regno
= true_regnum (x
);
14423 gcc_assert (regno
!= ARG_POINTER_REGNUM
14424 && regno
!= FRAME_POINTER_REGNUM
14425 && regno
!= FLAGS_REG
14426 && regno
!= FPSR_REG
14427 && regno
!= FPCR_REG
);
14429 if (code
== 'w' || MMX_REG_P (x
))
14431 else if (code
== 'b')
14433 else if (code
== 'k')
14435 else if (code
== 'q')
14437 else if (code
== 'y')
14439 else if (code
== 'h')
14441 else if (code
== 'x')
14443 else if (code
== 't')
14446 code
= GET_MODE_SIZE (GET_MODE (x
));
14448 /* Irritatingly, AMD extended registers use different naming convention
14449 from the normal registers: "r%d[bwd]" */
14450 if (REX_INT_REGNO_P (regno
))
14452 gcc_assert (TARGET_64BIT
);
14454 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14458 error ("extended registers have no high halves");
14473 error ("unsupported operand size for extended register");
14483 if (STACK_TOP_P (x
))
14492 if (! ANY_FP_REG_P (x
))
14493 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14498 reg
= hi_reg_name
[regno
];
14501 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14503 reg
= qi_reg_name
[regno
];
14506 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14508 reg
= qi_high_reg_name
[regno
];
14513 gcc_assert (!duplicated
);
14515 fputs (hi_reg_name
[regno
] + 1, file
);
14520 gcc_unreachable ();
14526 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14527 fprintf (file
, ", %%%s", reg
);
14529 fprintf (file
, ", %s", reg
);
14533 /* Locate some local-dynamic symbol still in use by this function
14534 so that we can print its name in some tls_local_dynamic_base
14538 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14542 if (GET_CODE (x
) == SYMBOL_REF
14543 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14545 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14552 static const char *
14553 get_some_local_dynamic_name (void)
14557 if (cfun
->machine
->some_ld_name
)
14558 return cfun
->machine
->some_ld_name
;
14560 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14561 if (NONDEBUG_INSN_P (insn
)
14562 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14563 return cfun
->machine
->some_ld_name
;
14568 /* Meaning of CODE:
14569 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14570 C -- print opcode suffix for set/cmov insn.
14571 c -- like C, but print reversed condition
14572 F,f -- likewise, but for floating-point.
14573 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14575 R -- print the prefix for register names.
14576 z -- print the opcode suffix for the size of the current operand.
14577 Z -- likewise, with special suffixes for x87 instructions.
14578 * -- print a star (in certain assembler syntax)
14579 A -- print an absolute memory reference.
14580 E -- print address with DImode register names if TARGET_64BIT.
14581 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14582 s -- print a shift double count, followed by the assemblers argument
14584 b -- print the QImode name of the register for the indicated operand.
14585 %b0 would print %al if operands[0] is reg 0.
14586 w -- likewise, print the HImode name of the register.
14587 k -- likewise, print the SImode name of the register.
14588 q -- likewise, print the DImode name of the register.
14589 x -- likewise, print the V4SFmode name of the register.
14590 t -- likewise, print the V8SFmode name of the register.
14591 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14592 y -- print "st(0)" instead of "st" as a register.
14593 d -- print duplicated register operand for AVX instruction.
14594 D -- print condition for SSE cmp instruction.
14595 P -- if PIC, print an @PLT suffix.
14596 p -- print raw symbol name.
14597 X -- don't print any sort of PIC '@' suffix for a symbol.
14598 & -- print some in-use local-dynamic symbol name.
14599 H -- print a memory address offset by 8; used for sse high-parts
14600 Y -- print condition for XOP pcom* instruction.
14601 + -- print a branch hint as 'cs' or 'ds' prefix
14602 ; -- print a semicolon (after prefixes due to bug in older gas).
14603 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14604 @ -- print a segment register of thread base pointer load
14605 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14609 ix86_print_operand (FILE *file
, rtx x
, int code
)
14616 switch (ASSEMBLER_DIALECT
)
14623 /* Intel syntax. For absolute addresses, registers should not
14624 be surrounded by braces. */
14628 ix86_print_operand (file
, x
, 0);
14635 gcc_unreachable ();
14638 ix86_print_operand (file
, x
, 0);
14642 /* Wrap address in an UNSPEC to declare special handling. */
14644 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14646 output_address (x
);
14650 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14655 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14665 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14675 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14680 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14681 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14684 switch (GET_MODE_SIZE (GET_MODE (x
)))
14699 output_operand_lossage
14700 ("invalid operand size for operand code 'O'");
14709 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14711 /* Opcodes don't get size suffixes if using Intel opcodes. */
14712 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14715 switch (GET_MODE_SIZE (GET_MODE (x
)))
14734 output_operand_lossage
14735 ("invalid operand size for operand code 'z'");
14740 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14742 (0, "non-integer operand used with operand code 'z'");
14746 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14747 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14750 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14752 switch (GET_MODE_SIZE (GET_MODE (x
)))
14755 #ifdef HAVE_AS_IX86_FILDS
14765 #ifdef HAVE_AS_IX86_FILDQ
14768 fputs ("ll", file
);
14776 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14778 /* 387 opcodes don't get size suffixes
14779 if the operands are registers. */
14780 if (STACK_REG_P (x
))
14783 switch (GET_MODE_SIZE (GET_MODE (x
)))
14804 output_operand_lossage
14805 ("invalid operand type used with operand code 'Z'");
14809 output_operand_lossage
14810 ("invalid operand size for operand code 'Z'");
14828 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14830 ix86_print_operand (file
, x
, 0);
14831 fputs (", ", file
);
14836 switch (GET_CODE (x
))
14839 fputs ("neq", file
);
14842 fputs ("eq", file
);
14846 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14850 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14854 fputs ("le", file
);
14858 fputs ("lt", file
);
14861 fputs ("unord", file
);
14864 fputs ("ord", file
);
14867 fputs ("ueq", file
);
14870 fputs ("nlt", file
);
14873 fputs ("nle", file
);
14876 fputs ("ule", file
);
14879 fputs ("ult", file
);
14882 fputs ("une", file
);
14885 output_operand_lossage ("operand is not a condition code, "
14886 "invalid operand code 'Y'");
14892 /* Little bit of braindamage here. The SSE compare instructions
14893 does use completely different names for the comparisons that the
14894 fp conditional moves. */
14895 switch (GET_CODE (x
))
14900 fputs ("eq_us", file
);
14904 fputs ("eq", file
);
14909 fputs ("nge", file
);
14913 fputs ("lt", file
);
14918 fputs ("ngt", file
);
14922 fputs ("le", file
);
14925 fputs ("unord", file
);
14930 fputs ("neq_oq", file
);
14934 fputs ("neq", file
);
14939 fputs ("ge", file
);
14943 fputs ("nlt", file
);
14948 fputs ("gt", file
);
14952 fputs ("nle", file
);
14955 fputs ("ord", file
);
14958 output_operand_lossage ("operand is not a condition code, "
14959 "invalid operand code 'D'");
14966 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14967 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14973 if (!COMPARISON_P (x
))
14975 output_operand_lossage ("operand is not a condition code, "
14976 "invalid operand code '%c'", code
);
14979 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14980 code
== 'c' || code
== 'f',
14981 code
== 'F' || code
== 'f',
14986 if (!offsettable_memref_p (x
))
14988 output_operand_lossage ("operand is not an offsettable memory "
14989 "reference, invalid operand code 'H'");
14992 /* It doesn't actually matter what mode we use here, as we're
14993 only going to use this for printing. */
14994 x
= adjust_address_nv (x
, DImode
, 8);
14995 /* Output 'qword ptr' for intel assembler dialect. */
14996 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
15001 gcc_assert (CONST_INT_P (x
));
15003 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
15004 #ifdef HAVE_AS_IX86_HLE
15005 fputs ("xacquire ", file
);
15007 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
15009 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
15010 #ifdef HAVE_AS_IX86_HLE
15011 fputs ("xrelease ", file
);
15013 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
15015 /* We do not want to print value of the operand. */
15019 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15025 const char *name
= get_some_local_dynamic_name ();
15027 output_operand_lossage ("'%%&' used without any "
15028 "local dynamic TLS references");
15030 assemble_name (file
, name
);
15039 || optimize_function_for_size_p (cfun
)
15040 || !TARGET_BRANCH_PREDICTION_HINTS
)
15043 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
15046 int pred_val
= INTVAL (XEXP (x
, 0));
15048 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
15049 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
15051 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
15053 = final_forward_branch_p (current_output_insn
) == 0;
15055 /* Emit hints only in the case default branch prediction
15056 heuristics would fail. */
15057 if (taken
!= cputaken
)
15059 /* We use 3e (DS) prefix for taken branches and
15060 2e (CS) prefix for not taken branches. */
15062 fputs ("ds ; ", file
);
15064 fputs ("cs ; ", file
);
15072 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15078 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15081 /* The kernel uses a different segment register for performance
15082 reasons; a system call would not have to trash the userspace
15083 segment register, which would be expensive. */
15084 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
15085 fputs ("fs", file
);
15087 fputs ("gs", file
);
15091 putc (TARGET_AVX2
? 'i' : 'f', file
);
15095 if (TARGET_64BIT
&& Pmode
!= word_mode
)
15096 fputs ("addr32 ", file
);
15100 output_operand_lossage ("invalid operand code '%c'", code
);
15105 print_reg (x
, code
, file
);
15107 else if (MEM_P (x
))
15109 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15110 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
15111 && GET_MODE (x
) != BLKmode
)
15114 switch (GET_MODE_SIZE (GET_MODE (x
)))
15116 case 1: size
= "BYTE"; break;
15117 case 2: size
= "WORD"; break;
15118 case 4: size
= "DWORD"; break;
15119 case 8: size
= "QWORD"; break;
15120 case 12: size
= "TBYTE"; break;
15122 if (GET_MODE (x
) == XFmode
)
15127 case 32: size
= "YMMWORD"; break;
15129 gcc_unreachable ();
15132 /* Check for explicit size override (codes 'b', 'w', 'k',
15136 else if (code
== 'w')
15138 else if (code
== 'k')
15140 else if (code
== 'q')
15142 else if (code
== 'x')
15145 fputs (size
, file
);
15146 fputs (" PTR ", file
);
15150 /* Avoid (%rip) for call operands. */
15151 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
15152 && !CONST_INT_P (x
))
15153 output_addr_const (file
, x
);
15154 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
15155 output_operand_lossage ("invalid constraints for operand");
15157 output_address (x
);
15160 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
15165 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15166 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
15168 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15170 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15172 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
15173 (unsigned long long) (int) l
);
15175 fprintf (file
, "0x%08x", (unsigned int) l
);
15178 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15183 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15184 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15186 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15188 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15191 /* These float cases don't actually occur as immediate operands. */
15192 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15196 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15197 fputs (dstr
, file
);
15202 /* We have patterns that allow zero sets of memory, for instance.
15203 In 64-bit mode, we should probably support all 8-byte vectors,
15204 since we can in fact encode that into an immediate. */
15205 if (GET_CODE (x
) == CONST_VECTOR
)
15207 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15211 if (code
!= 'P' && code
!= 'p')
15213 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15215 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15218 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15219 || GET_CODE (x
) == LABEL_REF
)
15221 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15224 fputs ("OFFSET FLAT:", file
);
15227 if (CONST_INT_P (x
))
15228 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15229 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15230 output_pic_addr_const (file
, x
, code
);
15232 output_addr_const (file
, x
);
15237 ix86_print_operand_punct_valid_p (unsigned char code
)
15239 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15240 || code
== ';' || code
== '~' || code
== '^');
15243 /* Print a memory operand whose address is ADDR. */
15246 ix86_print_operand_address (FILE *file
, rtx addr
)
15248 struct ix86_address parts
;
15249 rtx base
, index
, disp
;
15255 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15257 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15258 gcc_assert (parts
.index
== NULL_RTX
);
15259 parts
.index
= XVECEXP (addr
, 0, 1);
15260 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15261 addr
= XVECEXP (addr
, 0, 0);
15264 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15266 gcc_assert (TARGET_64BIT
);
15267 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15271 ok
= ix86_decompose_address (addr
, &parts
);
15276 index
= parts
.index
;
15278 scale
= parts
.scale
;
15286 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15288 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15291 gcc_unreachable ();
15294 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15295 if (TARGET_64BIT
&& !base
&& !index
)
15299 if (GET_CODE (disp
) == CONST
15300 && GET_CODE (XEXP (disp
, 0)) == PLUS
15301 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15302 symbol
= XEXP (XEXP (disp
, 0), 0);
15304 if (GET_CODE (symbol
) == LABEL_REF
15305 || (GET_CODE (symbol
) == SYMBOL_REF
15306 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15309 if (!base
&& !index
)
15311 /* Displacement only requires special attention. */
15313 if (CONST_INT_P (disp
))
15315 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15316 fputs ("ds:", file
);
15317 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15320 output_pic_addr_const (file
, disp
, 0);
15322 output_addr_const (file
, disp
);
15326 /* Print SImode register names to force addr32 prefix. */
15327 if (SImode_address_operand (addr
, VOIDmode
))
15329 #ifdef ENABLE_CHECKING
15330 gcc_assert (TARGET_64BIT
);
15331 switch (GET_CODE (addr
))
15334 gcc_assert (GET_MODE (addr
) == SImode
);
15335 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15339 gcc_assert (GET_MODE (addr
) == DImode
);
15342 gcc_unreachable ();
15345 gcc_assert (!code
);
15351 && CONST_INT_P (disp
)
15352 && INTVAL (disp
) < -16*1024*1024)
15354 /* X32 runs in 64-bit mode, where displacement, DISP, in
15355 address DISP(%r64), is encoded as 32-bit immediate sign-
15356 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15357 address is %r64 + 0xffffffffbffffd00. When %r64 <
15358 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15359 which is invalid for x32. The correct address is %r64
15360 - 0x40000300 == 0xf7ffdd64. To properly encode
15361 -0x40000300(%r64) for x32, we zero-extend negative
15362 displacement by forcing addr32 prefix which truncates
15363 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15364 zero-extend all negative displacements, including -1(%rsp).
15365 However, for small negative displacements, sign-extension
15366 won't cause overflow. We only zero-extend negative
15367 displacements if they < -16*1024*1024, which is also used
15368 to check legitimate address displacements for PIC. */
15372 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15377 output_pic_addr_const (file
, disp
, 0);
15378 else if (GET_CODE (disp
) == LABEL_REF
)
15379 output_asm_label (disp
);
15381 output_addr_const (file
, disp
);
15386 print_reg (base
, code
, file
);
15390 print_reg (index
, vsib
? 0 : code
, file
);
15391 if (scale
!= 1 || vsib
)
15392 fprintf (file
, ",%d", scale
);
15398 rtx offset
= NULL_RTX
;
15402 /* Pull out the offset of a symbol; print any symbol itself. */
15403 if (GET_CODE (disp
) == CONST
15404 && GET_CODE (XEXP (disp
, 0)) == PLUS
15405 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15407 offset
= XEXP (XEXP (disp
, 0), 1);
15408 disp
= gen_rtx_CONST (VOIDmode
,
15409 XEXP (XEXP (disp
, 0), 0));
15413 output_pic_addr_const (file
, disp
, 0);
15414 else if (GET_CODE (disp
) == LABEL_REF
)
15415 output_asm_label (disp
);
15416 else if (CONST_INT_P (disp
))
15419 output_addr_const (file
, disp
);
15425 print_reg (base
, code
, file
);
15428 if (INTVAL (offset
) >= 0)
15430 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15434 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15441 print_reg (index
, vsib
? 0 : code
, file
);
15442 if (scale
!= 1 || vsib
)
15443 fprintf (file
, "*%d", scale
);
15450 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15453 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15457 if (GET_CODE (x
) != UNSPEC
)
15460 op
= XVECEXP (x
, 0, 0);
15461 switch (XINT (x
, 1))
15463 case UNSPEC_GOTTPOFF
:
15464 output_addr_const (file
, op
);
15465 /* FIXME: This might be @TPOFF in Sun ld. */
15466 fputs ("@gottpoff", file
);
15469 output_addr_const (file
, op
);
15470 fputs ("@tpoff", file
);
15472 case UNSPEC_NTPOFF
:
15473 output_addr_const (file
, op
);
15475 fputs ("@tpoff", file
);
15477 fputs ("@ntpoff", file
);
15479 case UNSPEC_DTPOFF
:
15480 output_addr_const (file
, op
);
15481 fputs ("@dtpoff", file
);
15483 case UNSPEC_GOTNTPOFF
:
15484 output_addr_const (file
, op
);
15486 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15487 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15489 fputs ("@gotntpoff", file
);
15491 case UNSPEC_INDNTPOFF
:
15492 output_addr_const (file
, op
);
15493 fputs ("@indntpoff", file
);
15496 case UNSPEC_MACHOPIC_OFFSET
:
15497 output_addr_const (file
, op
);
15499 machopic_output_function_base_name (file
);
15503 case UNSPEC_STACK_CHECK
:
15507 gcc_assert (flag_split_stack
);
15509 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15510 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15512 gcc_unreachable ();
15515 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15526 /* Split one or more double-mode RTL references into pairs of half-mode
15527 references. The RTL can be REG, offsettable MEM, integer constant, or
15528 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15529 split and "num" is its length. lo_half and hi_half are output arrays
15530 that parallel "operands". */
15533 split_double_mode (enum machine_mode mode
, rtx operands
[],
15534 int num
, rtx lo_half
[], rtx hi_half
[])
15536 enum machine_mode half_mode
;
15542 half_mode
= DImode
;
15545 half_mode
= SImode
;
15548 gcc_unreachable ();
15551 byte
= GET_MODE_SIZE (half_mode
);
15555 rtx op
= operands
[num
];
15557 /* simplify_subreg refuse to split volatile memory addresses,
15558 but we still have to handle it. */
15561 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15562 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15566 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15567 GET_MODE (op
) == VOIDmode
15568 ? mode
: GET_MODE (op
), 0);
15569 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15570 GET_MODE (op
) == VOIDmode
15571 ? mode
: GET_MODE (op
), byte
);
15576 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15577 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15578 is the expression of the binary operation. The output may either be
15579 emitted here, or returned to the caller, like all output_* functions.
15581 There is no guarantee that the operands are the same mode, as they
15582 might be within FLOAT or FLOAT_EXTEND expressions. */
15584 #ifndef SYSV386_COMPAT
15585 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15586 wants to fix the assemblers because that causes incompatibility
15587 with gcc. No-one wants to fix gcc because that causes
15588 incompatibility with assemblers... You can use the option of
15589 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15590 #define SYSV386_COMPAT 1
15594 output_387_binary_op (rtx insn
, rtx
*operands
)
15596 static char buf
[40];
15599 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15601 #ifdef ENABLE_CHECKING
15602 /* Even if we do not want to check the inputs, this documents input
15603 constraints. Which helps in understanding the following code. */
15604 if (STACK_REG_P (operands
[0])
15605 && ((REG_P (operands
[1])
15606 && REGNO (operands
[0]) == REGNO (operands
[1])
15607 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15608 || (REG_P (operands
[2])
15609 && REGNO (operands
[0]) == REGNO (operands
[2])
15610 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15611 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15614 gcc_assert (is_sse
);
15617 switch (GET_CODE (operands
[3]))
15620 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15621 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15629 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15630 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15638 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15639 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15647 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15648 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15656 gcc_unreachable ();
15663 strcpy (buf
, ssep
);
15664 if (GET_MODE (operands
[0]) == SFmode
)
15665 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15667 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15671 strcpy (buf
, ssep
+ 1);
15672 if (GET_MODE (operands
[0]) == SFmode
)
15673 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15675 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15681 switch (GET_CODE (operands
[3]))
15685 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15687 rtx temp
= operands
[2];
15688 operands
[2] = operands
[1];
15689 operands
[1] = temp
;
15692 /* know operands[0] == operands[1]. */
15694 if (MEM_P (operands
[2]))
15700 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15702 if (STACK_TOP_P (operands
[0]))
15703 /* How is it that we are storing to a dead operand[2]?
15704 Well, presumably operands[1] is dead too. We can't
15705 store the result to st(0) as st(0) gets popped on this
15706 instruction. Instead store to operands[2] (which I
15707 think has to be st(1)). st(1) will be popped later.
15708 gcc <= 2.8.1 didn't have this check and generated
15709 assembly code that the Unixware assembler rejected. */
15710 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15712 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15716 if (STACK_TOP_P (operands
[0]))
15717 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15719 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15724 if (MEM_P (operands
[1]))
15730 if (MEM_P (operands
[2]))
15736 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15739 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15740 derived assemblers, confusingly reverse the direction of
15741 the operation for fsub{r} and fdiv{r} when the
15742 destination register is not st(0). The Intel assembler
15743 doesn't have this brain damage. Read !SYSV386_COMPAT to
15744 figure out what the hardware really does. */
15745 if (STACK_TOP_P (operands
[0]))
15746 p
= "{p\t%0, %2|rp\t%2, %0}";
15748 p
= "{rp\t%2, %0|p\t%0, %2}";
15750 if (STACK_TOP_P (operands
[0]))
15751 /* As above for fmul/fadd, we can't store to st(0). */
15752 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15754 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15759 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15762 if (STACK_TOP_P (operands
[0]))
15763 p
= "{rp\t%0, %1|p\t%1, %0}";
15765 p
= "{p\t%1, %0|rp\t%0, %1}";
15767 if (STACK_TOP_P (operands
[0]))
15768 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15770 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15775 if (STACK_TOP_P (operands
[0]))
15777 if (STACK_TOP_P (operands
[1]))
15778 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15780 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15783 else if (STACK_TOP_P (operands
[1]))
15786 p
= "{\t%1, %0|r\t%0, %1}";
15788 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15794 p
= "{r\t%2, %0|\t%0, %2}";
15796 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15802 gcc_unreachable ();
15809 /* Check if a 256bit AVX register is referenced inside of EXP. */
15812 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15816 if (GET_CODE (exp
) == SUBREG
)
15817 exp
= SUBREG_REG (exp
);
15820 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15826 /* Return needed mode for entity in optimize_mode_switching pass. */
15829 ix86_avx_u128_mode_needed (rtx insn
)
15835 /* Needed mode is set to AVX_U128_CLEAN if there are
15836 no 256bit modes used in function arguments. */
15837 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15839 link
= XEXP (link
, 1))
15841 if (GET_CODE (XEXP (link
, 0)) == USE
)
15843 rtx arg
= XEXP (XEXP (link
, 0), 0);
15845 if (ix86_check_avx256_register (&arg
, NULL
))
15846 return AVX_U128_ANY
;
15850 return AVX_U128_CLEAN
;
15853 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15854 changes state only when a 256bit register is written to, but we need
15855 to prevent the compiler from moving optimal insertion point above
15856 eventual read from 256bit register. */
15857 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15858 return AVX_U128_DIRTY
;
15860 return AVX_U128_ANY
;
15863 /* Return mode that i387 must be switched into
15864 prior to the execution of insn. */
15867 ix86_i387_mode_needed (int entity
, rtx insn
)
15869 enum attr_i387_cw mode
;
15871 /* The mode UNINITIALIZED is used to store control word after a
15872 function call or ASM pattern. The mode ANY specify that function
15873 has no requirements on the control word and make no changes in the
15874 bits we are interested in. */
15877 || (NONJUMP_INSN_P (insn
)
15878 && (asm_noperands (PATTERN (insn
)) >= 0
15879 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15880 return I387_CW_UNINITIALIZED
;
15882 if (recog_memoized (insn
) < 0)
15883 return I387_CW_ANY
;
15885 mode
= get_attr_i387_cw (insn
);
15890 if (mode
== I387_CW_TRUNC
)
15895 if (mode
== I387_CW_FLOOR
)
15900 if (mode
== I387_CW_CEIL
)
15905 if (mode
== I387_CW_MASK_PM
)
15910 gcc_unreachable ();
15913 return I387_CW_ANY
;
15916 /* Return mode that entity must be switched into
15917 prior to the execution of insn. */
15920 ix86_mode_needed (int entity
, rtx insn
)
15925 return ix86_avx_u128_mode_needed (insn
);
15930 return ix86_i387_mode_needed (entity
, insn
);
15932 gcc_unreachable ();
15937 /* Check if a 256bit AVX register is referenced in stores. */
15940 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15942 if (ix86_check_avx256_register (&dest
, NULL
))
15944 bool *used
= (bool *) data
;
15949 /* Calculate mode of upper 128bit AVX registers after the insn. */
15952 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15954 rtx pat
= PATTERN (insn
);
15956 if (vzeroupper_operation (pat
, VOIDmode
)
15957 || vzeroall_operation (pat
, VOIDmode
))
15958 return AVX_U128_CLEAN
;
15960 /* We know that state is clean after CALL insn if there are no
15961 256bit registers used in the function return register. */
15964 bool avx_reg256_found
= false;
15965 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15966 if (!avx_reg256_found
)
15967 return AVX_U128_CLEAN
;
15970 /* Otherwise, return current mode. Remember that if insn
15971 references AVX 256bit registers, the mode was already changed
15972 to DIRTY from MODE_NEEDED. */
15976 /* Return the mode that an insn results in. */
15979 ix86_mode_after (int entity
, int mode
, rtx insn
)
15984 return ix86_avx_u128_mode_after (mode
, insn
);
15991 gcc_unreachable ();
15996 ix86_avx_u128_mode_entry (void)
16000 /* Entry mode is set to AVX_U128_DIRTY if there are
16001 256bit modes used in function arguments. */
16002 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
16003 arg
= TREE_CHAIN (arg
))
16005 rtx incoming
= DECL_INCOMING_RTL (arg
);
16007 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
16008 return AVX_U128_DIRTY
;
16011 return AVX_U128_CLEAN
;
16014 /* Return a mode that ENTITY is assumed to be
16015 switched to at function entry. */
16018 ix86_mode_entry (int entity
)
16023 return ix86_avx_u128_mode_entry ();
16028 return I387_CW_ANY
;
16030 gcc_unreachable ();
16035 ix86_avx_u128_mode_exit (void)
16037 rtx reg
= crtl
->return_rtx
;
16039 /* Exit mode is set to AVX_U128_DIRTY if there are
16040 256bit modes used in the function return register. */
16041 if (reg
&& ix86_check_avx256_register (®
, NULL
))
16042 return AVX_U128_DIRTY
;
16044 return AVX_U128_CLEAN
;
16047 /* Return a mode that ENTITY is assumed to be
16048 switched to at function exit. */
16051 ix86_mode_exit (int entity
)
16056 return ix86_avx_u128_mode_exit ();
16061 return I387_CW_ANY
;
16063 gcc_unreachable ();
16067 /* Output code to initialize control word copies used by trunc?f?i and
16068 rounding patterns. CURRENT_MODE is set to current control word,
16069 while NEW_MODE is set to new control word. */
16072 emit_i387_cw_initialization (int mode
)
16074 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
16077 enum ix86_stack_slot slot
;
16079 rtx reg
= gen_reg_rtx (HImode
);
16081 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
16082 emit_move_insn (reg
, copy_rtx (stored_mode
));
16084 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
16085 || optimize_insn_for_size_p ())
16089 case I387_CW_TRUNC
:
16090 /* round toward zero (truncate) */
16091 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
16092 slot
= SLOT_CW_TRUNC
;
16095 case I387_CW_FLOOR
:
16096 /* round down toward -oo */
16097 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16098 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
16099 slot
= SLOT_CW_FLOOR
;
16103 /* round up toward +oo */
16104 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16105 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
16106 slot
= SLOT_CW_CEIL
;
16109 case I387_CW_MASK_PM
:
16110 /* mask precision exception for nearbyint() */
16111 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16112 slot
= SLOT_CW_MASK_PM
;
16116 gcc_unreachable ();
16123 case I387_CW_TRUNC
:
16124 /* round toward zero (truncate) */
16125 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
16126 slot
= SLOT_CW_TRUNC
;
16129 case I387_CW_FLOOR
:
16130 /* round down toward -oo */
16131 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
16132 slot
= SLOT_CW_FLOOR
;
16136 /* round up toward +oo */
16137 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
16138 slot
= SLOT_CW_CEIL
;
16141 case I387_CW_MASK_PM
:
16142 /* mask precision exception for nearbyint() */
16143 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16144 slot
= SLOT_CW_MASK_PM
;
16148 gcc_unreachable ();
16152 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
16154 new_mode
= assign_386_stack_local (HImode
, slot
);
16155 emit_move_insn (new_mode
, reg
);
16158 /* Emit vzeroupper. */
16161 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16165 /* Cancel automatic vzeroupper insertion if there are
16166 live call-saved SSE registers at the insertion point. */
16168 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16169 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16173 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16174 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16177 emit_insn (gen_avx_vzeroupper ());
16180 /* Generate one or more insns to set ENTITY to MODE. */
16183 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
16188 if (mode
== AVX_U128_CLEAN
)
16189 ix86_avx_emit_vzeroupper (regs_live
);
16195 if (mode
!= I387_CW_ANY
16196 && mode
!= I387_CW_UNINITIALIZED
)
16197 emit_i387_cw_initialization (mode
);
16200 gcc_unreachable ();
16204 /* Output code for INSN to convert a float to a signed int. OPERANDS
16205 are the insn operands. The output may be [HSD]Imode and the input
16206 operand may be [SDX]Fmode. */
16209 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16211 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16212 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16213 int round_mode
= get_attr_i387_cw (insn
);
16215 /* Jump through a hoop or two for DImode, since the hardware has no
16216 non-popping instruction. We used to do this a different way, but
16217 that was somewhat fragile and broke with post-reload splitters. */
16218 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16219 output_asm_insn ("fld\t%y1", operands
);
16221 gcc_assert (STACK_TOP_P (operands
[1]));
16222 gcc_assert (MEM_P (operands
[0]));
16223 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16226 output_asm_insn ("fisttp%Z0\t%0", operands
);
16229 if (round_mode
!= I387_CW_ANY
)
16230 output_asm_insn ("fldcw\t%3", operands
);
16231 if (stack_top_dies
|| dimode_p
)
16232 output_asm_insn ("fistp%Z0\t%0", operands
);
16234 output_asm_insn ("fist%Z0\t%0", operands
);
16235 if (round_mode
!= I387_CW_ANY
)
16236 output_asm_insn ("fldcw\t%2", operands
);
16242 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16243 have the values zero or one, indicates the ffreep insn's operand
16244 from the OPERANDS array. */
16246 static const char *
16247 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16249 if (TARGET_USE_FFREEP
)
16250 #ifdef HAVE_AS_IX86_FFREEP
16251 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16254 static char retval
[32];
16255 int regno
= REGNO (operands
[opno
]);
16257 gcc_assert (STACK_REGNO_P (regno
));
16259 regno
-= FIRST_STACK_REG
;
16261 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16266 return opno
? "fstp\t%y1" : "fstp\t%y0";
16270 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16271 should be used. UNORDERED_P is true when fucom should be used. */
16274 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16276 int stack_top_dies
;
16277 rtx cmp_op0
, cmp_op1
;
16278 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16282 cmp_op0
= operands
[0];
16283 cmp_op1
= operands
[1];
16287 cmp_op0
= operands
[1];
16288 cmp_op1
= operands
[2];
16293 if (GET_MODE (operands
[0]) == SFmode
)
16295 return "%vucomiss\t{%1, %0|%0, %1}";
16297 return "%vcomiss\t{%1, %0|%0, %1}";
16300 return "%vucomisd\t{%1, %0|%0, %1}";
16302 return "%vcomisd\t{%1, %0|%0, %1}";
16305 gcc_assert (STACK_TOP_P (cmp_op0
));
16307 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16309 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16311 if (stack_top_dies
)
16313 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16314 return output_387_ffreep (operands
, 1);
16317 return "ftst\n\tfnstsw\t%0";
16320 if (STACK_REG_P (cmp_op1
)
16322 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16323 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16325 /* If both the top of the 387 stack dies, and the other operand
16326 is also a stack register that dies, then this must be a
16327 `fcompp' float compare */
16331 /* There is no double popping fcomi variant. Fortunately,
16332 eflags is immune from the fstp's cc clobbering. */
16334 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16336 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16337 return output_387_ffreep (operands
, 0);
16342 return "fucompp\n\tfnstsw\t%0";
16344 return "fcompp\n\tfnstsw\t%0";
16349 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16351 static const char * const alt
[16] =
16353 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16354 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16355 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16356 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16358 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16359 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16363 "fcomi\t{%y1, %0|%0, %y1}",
16364 "fcomip\t{%y1, %0|%0, %y1}",
16365 "fucomi\t{%y1, %0|%0, %y1}",
16366 "fucomip\t{%y1, %0|%0, %y1}",
16377 mask
= eflags_p
<< 3;
16378 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16379 mask
|= unordered_p
<< 1;
16380 mask
|= stack_top_dies
;
16382 gcc_assert (mask
< 16);
16391 ix86_output_addr_vec_elt (FILE *file
, int value
)
16393 const char *directive
= ASM_LONG
;
16397 directive
= ASM_QUAD
;
16399 gcc_assert (!TARGET_64BIT
);
16402 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16406 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16408 const char *directive
= ASM_LONG
;
16411 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16412 directive
= ASM_QUAD
;
16414 gcc_assert (!TARGET_64BIT
);
16416 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16417 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16418 fprintf (file
, "%s%s%d-%s%d\n",
16419 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16420 else if (HAVE_AS_GOTOFF_IN_DATA
)
16421 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16423 else if (TARGET_MACHO
)
16425 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16426 machopic_output_function_base_name (file
);
16431 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16432 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16435 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16439 ix86_expand_clear (rtx dest
)
16443 /* We play register width games, which are only valid after reload. */
16444 gcc_assert (reload_completed
);
16446 /* Avoid HImode and its attendant prefix byte. */
16447 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16448 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16449 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16451 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16452 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16454 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16455 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16461 /* X is an unchanging MEM. If it is a constant pool reference, return
16462 the constant pool rtx, else NULL. */
16465 maybe_get_pool_constant (rtx x
)
16467 x
= ix86_delegitimize_address (XEXP (x
, 0));
16469 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16470 return get_pool_constant (x
);
16476 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16479 enum tls_model model
;
16484 if (GET_CODE (op1
) == SYMBOL_REF
)
16488 model
= SYMBOL_REF_TLS_MODEL (op1
);
16491 op1
= legitimize_tls_address (op1
, model
, true);
16492 op1
= force_operand (op1
, op0
);
16495 op1
= convert_to_mode (mode
, op1
, 1);
16497 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16500 else if (GET_CODE (op1
) == CONST
16501 && GET_CODE (XEXP (op1
, 0)) == PLUS
16502 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16504 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16505 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16508 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16510 tmp
= legitimize_tls_address (symbol
, model
, true);
16512 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16516 tmp
= force_operand (tmp
, NULL
);
16517 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16518 op0
, 1, OPTAB_DIRECT
);
16521 op1
= convert_to_mode (mode
, tmp
, 1);
16525 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16526 && symbolic_operand (op1
, mode
))
16528 if (TARGET_MACHO
&& !TARGET_64BIT
)
16531 /* dynamic-no-pic */
16532 if (MACHOPIC_INDIRECT
)
16534 rtx temp
= ((reload_in_progress
16535 || ((op0
&& REG_P (op0
))
16537 ? op0
: gen_reg_rtx (Pmode
));
16538 op1
= machopic_indirect_data_reference (op1
, temp
);
16540 op1
= machopic_legitimize_pic_address (op1
, mode
,
16541 temp
== op1
? 0 : temp
);
16543 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16545 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16549 if (GET_CODE (op0
) == MEM
)
16550 op1
= force_reg (Pmode
, op1
);
16554 if (GET_CODE (temp
) != REG
)
16555 temp
= gen_reg_rtx (Pmode
);
16556 temp
= legitimize_pic_address (op1
, temp
);
16561 /* dynamic-no-pic */
16567 op1
= force_reg (mode
, op1
);
16568 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16570 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16571 op1
= legitimize_pic_address (op1
, reg
);
16574 op1
= convert_to_mode (mode
, op1
, 1);
16581 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16582 || !push_operand (op0
, mode
))
16584 op1
= force_reg (mode
, op1
);
16586 if (push_operand (op0
, mode
)
16587 && ! general_no_elim_operand (op1
, mode
))
16588 op1
= copy_to_mode_reg (mode
, op1
);
16590 /* Force large constants in 64bit compilation into register
16591 to get them CSEed. */
16592 if (can_create_pseudo_p ()
16593 && (mode
== DImode
) && TARGET_64BIT
16594 && immediate_operand (op1
, mode
)
16595 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16596 && !register_operand (op0
, mode
)
16598 op1
= copy_to_mode_reg (mode
, op1
);
16600 if (can_create_pseudo_p ()
16601 && FLOAT_MODE_P (mode
)
16602 && GET_CODE (op1
) == CONST_DOUBLE
)
16604 /* If we are loading a floating point constant to a register,
16605 force the value to memory now, since we'll get better code
16606 out the back end. */
16608 op1
= validize_mem (force_const_mem (mode
, op1
));
16609 if (!register_operand (op0
, mode
))
16611 rtx temp
= gen_reg_rtx (mode
);
16612 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16613 emit_move_insn (op0
, temp
);
16619 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16623 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16625 rtx op0
= operands
[0], op1
= operands
[1];
16626 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16628 /* Force constants other than zero into memory. We do not know how
16629 the instructions used to build constants modify the upper 64 bits
16630 of the register, once we have that information we may be able
16631 to handle some of them more efficiently. */
16632 if (can_create_pseudo_p ()
16633 && register_operand (op0
, mode
)
16634 && (CONSTANT_P (op1
)
16635 || (GET_CODE (op1
) == SUBREG
16636 && CONSTANT_P (SUBREG_REG (op1
))))
16637 && !standard_sse_constant_p (op1
))
16638 op1
= validize_mem (force_const_mem (mode
, op1
));
16640 /* We need to check memory alignment for SSE mode since attribute
16641 can make operands unaligned. */
16642 if (can_create_pseudo_p ()
16643 && SSE_REG_MODE_P (mode
)
16644 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16645 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16649 /* ix86_expand_vector_move_misalign() does not like constants ... */
16650 if (CONSTANT_P (op1
)
16651 || (GET_CODE (op1
) == SUBREG
16652 && CONSTANT_P (SUBREG_REG (op1
))))
16653 op1
= validize_mem (force_const_mem (mode
, op1
));
16655 /* ... nor both arguments in memory. */
16656 if (!register_operand (op0
, mode
)
16657 && !register_operand (op1
, mode
))
16658 op1
= force_reg (mode
, op1
);
16660 tmp
[0] = op0
; tmp
[1] = op1
;
16661 ix86_expand_vector_move_misalign (mode
, tmp
);
16665 /* Make operand1 a register if it isn't already. */
16666 if (can_create_pseudo_p ()
16667 && !register_operand (op0
, mode
)
16668 && !register_operand (op1
, mode
))
16670 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16674 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16677 /* Split 32-byte AVX unaligned load and store if needed. */
16680 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16683 rtx (*extract
) (rtx
, rtx
, rtx
);
16684 rtx (*load_unaligned
) (rtx
, rtx
);
16685 rtx (*store_unaligned
) (rtx
, rtx
);
16686 enum machine_mode mode
;
16688 switch (GET_MODE (op0
))
16691 gcc_unreachable ();
16693 extract
= gen_avx_vextractf128v32qi
;
16694 load_unaligned
= gen_avx_loaddqu256
;
16695 store_unaligned
= gen_avx_storedqu256
;
16699 extract
= gen_avx_vextractf128v8sf
;
16700 load_unaligned
= gen_avx_loadups256
;
16701 store_unaligned
= gen_avx_storeups256
;
16705 extract
= gen_avx_vextractf128v4df
;
16706 load_unaligned
= gen_avx_loadupd256
;
16707 store_unaligned
= gen_avx_storeupd256
;
16714 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16716 rtx r
= gen_reg_rtx (mode
);
16717 m
= adjust_address (op1
, mode
, 0);
16718 emit_move_insn (r
, m
);
16719 m
= adjust_address (op1
, mode
, 16);
16720 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16721 emit_move_insn (op0
, r
);
16724 emit_insn (load_unaligned (op0
, op1
));
16726 else if (MEM_P (op0
))
16728 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16730 m
= adjust_address (op0
, mode
, 0);
16731 emit_insn (extract (m
, op1
, const0_rtx
));
16732 m
= adjust_address (op0
, mode
, 16);
16733 emit_insn (extract (m
, op1
, const1_rtx
));
16736 emit_insn (store_unaligned (op0
, op1
));
16739 gcc_unreachable ();
16742 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16743 straight to ix86_expand_vector_move. */
16744 /* Code generation for scalar reg-reg moves of single and double precision data:
16745 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16749 if (x86_sse_partial_reg_dependency == true)
16754 Code generation for scalar loads of double precision data:
16755 if (x86_sse_split_regs == true)
16756 movlpd mem, reg (gas syntax)
16760 Code generation for unaligned packed loads of single precision data
16761 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16762 if (x86_sse_unaligned_move_optimal)
16765 if (x86_sse_partial_reg_dependency == true)
16777 Code generation for unaligned packed loads of double precision data
16778 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16779 if (x86_sse_unaligned_move_optimal)
16782 if (x86_sse_split_regs == true)
16795 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16803 && GET_MODE_SIZE (mode
) == 32)
16805 switch (GET_MODE_CLASS (mode
))
16807 case MODE_VECTOR_INT
:
16809 op0
= gen_lowpart (V32QImode
, op0
);
16810 op1
= gen_lowpart (V32QImode
, op1
);
16813 case MODE_VECTOR_FLOAT
:
16814 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16818 gcc_unreachable ();
16826 /* ??? If we have typed data, then it would appear that using
16827 movdqu is the only way to get unaligned data loaded with
16829 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16831 op0
= gen_lowpart (V16QImode
, op0
);
16832 op1
= gen_lowpart (V16QImode
, op1
);
16833 /* We will eventually emit movups based on insn attributes. */
16834 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16836 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16841 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16842 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16843 || optimize_insn_for_size_p ())
16845 /* We will eventually emit movups based on insn attributes. */
16846 emit_insn (gen_sse2_loadupd (op0
, op1
));
16850 /* When SSE registers are split into halves, we can avoid
16851 writing to the top half twice. */
16852 if (TARGET_SSE_SPLIT_REGS
)
16854 emit_clobber (op0
);
16859 /* ??? Not sure about the best option for the Intel chips.
16860 The following would seem to satisfy; the register is
16861 entirely cleared, breaking the dependency chain. We
16862 then store to the upper half, with a dependency depth
16863 of one. A rumor has it that Intel recommends two movsd
16864 followed by an unpacklpd, but this is unconfirmed. And
16865 given that the dependency depth of the unpacklpd would
16866 still be one, I'm not sure why this would be better. */
16867 zero
= CONST0_RTX (V2DFmode
);
16870 m
= adjust_address (op1
, DFmode
, 0);
16871 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16872 m
= adjust_address (op1
, DFmode
, 8);
16873 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16878 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16879 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16880 || optimize_insn_for_size_p ())
16882 op0
= gen_lowpart (V4SFmode
, op0
);
16883 op1
= gen_lowpart (V4SFmode
, op1
);
16884 emit_insn (gen_sse_loadups (op0
, op1
));
16888 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16889 emit_move_insn (op0
, CONST0_RTX (mode
));
16891 emit_clobber (op0
);
16893 if (mode
!= V4SFmode
)
16894 op0
= gen_lowpart (V4SFmode
, op0
);
16896 m
= adjust_address (op1
, V2SFmode
, 0);
16897 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16898 m
= adjust_address (op1
, V2SFmode
, 8);
16899 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16902 else if (MEM_P (op0
))
16904 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16906 op0
= gen_lowpart (V16QImode
, op0
);
16907 op1
= gen_lowpart (V16QImode
, op1
);
16908 /* We will eventually emit movups based on insn attributes. */
16909 emit_insn (gen_sse2_storedqu (op0
, op1
));
16911 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16914 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16915 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16916 || optimize_insn_for_size_p ())
16917 /* We will eventually emit movups based on insn attributes. */
16918 emit_insn (gen_sse2_storeupd (op0
, op1
));
16921 m
= adjust_address (op0
, DFmode
, 0);
16922 emit_insn (gen_sse2_storelpd (m
, op1
));
16923 m
= adjust_address (op0
, DFmode
, 8);
16924 emit_insn (gen_sse2_storehpd (m
, op1
));
16929 if (mode
!= V4SFmode
)
16930 op1
= gen_lowpart (V4SFmode
, op1
);
16933 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16934 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16935 || optimize_insn_for_size_p ())
16937 op0
= gen_lowpart (V4SFmode
, op0
);
16938 emit_insn (gen_sse_storeups (op0
, op1
));
16942 m
= adjust_address (op0
, V2SFmode
, 0);
16943 emit_insn (gen_sse_storelps (m
, op1
));
16944 m
= adjust_address (op0
, V2SFmode
, 8);
16945 emit_insn (gen_sse_storehps (m
, op1
));
16950 gcc_unreachable ();
16953 /* Expand a push in MODE. This is some mode for which we do not support
16954 proper push instructions, at least from the registers that we expect
16955 the value to live in. */
16958 ix86_expand_push (enum machine_mode mode
, rtx x
)
16962 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16963 GEN_INT (-GET_MODE_SIZE (mode
)),
16964 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16965 if (tmp
!= stack_pointer_rtx
)
16966 emit_move_insn (stack_pointer_rtx
, tmp
);
16968 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16970 /* When we push an operand onto stack, it has to be aligned at least
16971 at the function argument boundary. However since we don't have
16972 the argument type, we can't determine the actual argument
16974 emit_move_insn (tmp
, x
);
16977 /* Helper function of ix86_fixup_binary_operands to canonicalize
16978 operand order. Returns true if the operands should be swapped. */
16981 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16984 rtx dst
= operands
[0];
16985 rtx src1
= operands
[1];
16986 rtx src2
= operands
[2];
16988 /* If the operation is not commutative, we can't do anything. */
16989 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16992 /* Highest priority is that src1 should match dst. */
16993 if (rtx_equal_p (dst
, src1
))
16995 if (rtx_equal_p (dst
, src2
))
16998 /* Next highest priority is that immediate constants come second. */
16999 if (immediate_operand (src2
, mode
))
17001 if (immediate_operand (src1
, mode
))
17004 /* Lowest priority is that memory references should come second. */
17014 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17015 destination to use for the operation. If different from the true
17016 destination in operands[0], a copy operation will be required. */
17019 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
17022 rtx dst
= operands
[0];
17023 rtx src1
= operands
[1];
17024 rtx src2
= operands
[2];
17026 /* Canonicalize operand order. */
17027 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17031 /* It is invalid to swap operands of different modes. */
17032 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
17039 /* Both source operands cannot be in memory. */
17040 if (MEM_P (src1
) && MEM_P (src2
))
17042 /* Optimization: Only read from memory once. */
17043 if (rtx_equal_p (src1
, src2
))
17045 src2
= force_reg (mode
, src2
);
17049 src2
= force_reg (mode
, src2
);
17052 /* If the destination is memory, and we do not have matching source
17053 operands, do things in registers. */
17054 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17055 dst
= gen_reg_rtx (mode
);
17057 /* Source 1 cannot be a constant. */
17058 if (CONSTANT_P (src1
))
17059 src1
= force_reg (mode
, src1
);
17061 /* Source 1 cannot be a non-matching memory. */
17062 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17063 src1
= force_reg (mode
, src1
);
17065 /* Improve address combine. */
17067 && GET_MODE_CLASS (mode
) == MODE_INT
17069 src2
= force_reg (mode
, src2
);
17071 operands
[1] = src1
;
17072 operands
[2] = src2
;
17076 /* Similarly, but assume that the destination has already been
17077 set up properly. */
17080 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17081 enum machine_mode mode
, rtx operands
[])
17083 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17084 gcc_assert (dst
== operands
[0]);
17087 /* Attempt to expand a binary operator. Make the expansion closer to the
17088 actual machine, then just general_operand, which will allow 3 separate
17089 memory references (one output, two input) in a single insn. */
17092 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17095 rtx src1
, src2
, dst
, op
, clob
;
17097 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17098 src1
= operands
[1];
17099 src2
= operands
[2];
17101 /* Emit the instruction. */
17103 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17104 if (reload_in_progress
)
17106 /* Reload doesn't know about the flags register, and doesn't know that
17107 it doesn't want to clobber it. We can only do this with PLUS. */
17108 gcc_assert (code
== PLUS
);
17111 else if (reload_completed
17113 && !rtx_equal_p (dst
, src1
))
17115 /* This is going to be an LEA; avoid splitting it later. */
17120 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17121 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17124 /* Fix up the destination if needed. */
17125 if (dst
!= operands
[0])
17126 emit_move_insn (operands
[0], dst
);
17129 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17130 the given OPERANDS. */
17133 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17136 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17137 if (GET_CODE (operands
[1]) == SUBREG
)
17142 else if (GET_CODE (operands
[2]) == SUBREG
)
17147 /* Optimize (__m128i) d | (__m128i) e and similar code
17148 when d and e are float vectors into float vector logical
17149 insn. In C/C++ without using intrinsics there is no other way
17150 to express vector logical operation on float vectors than
17151 to cast them temporarily to integer vectors. */
17153 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17154 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17155 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17156 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17157 && SUBREG_BYTE (op1
) == 0
17158 && (GET_CODE (op2
) == CONST_VECTOR
17159 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17160 && SUBREG_BYTE (op2
) == 0))
17161 && can_create_pseudo_p ())
17164 switch (GET_MODE (SUBREG_REG (op1
)))
17170 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17171 if (GET_CODE (op2
) == CONST_VECTOR
)
17173 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17174 op2
= force_reg (GET_MODE (dst
), op2
);
17179 op2
= SUBREG_REG (operands
[2]);
17180 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17181 op2
= force_reg (GET_MODE (dst
), op2
);
17183 op1
= SUBREG_REG (op1
);
17184 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17185 op1
= force_reg (GET_MODE (dst
), op1
);
17186 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17187 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17189 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17195 if (!nonimmediate_operand (operands
[1], mode
))
17196 operands
[1] = force_reg (mode
, operands
[1]);
17197 if (!nonimmediate_operand (operands
[2], mode
))
17198 operands
[2] = force_reg (mode
, operands
[2]);
17199 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17200 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17201 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17205 /* Return TRUE or FALSE depending on whether the binary operator meets the
17206 appropriate constraints. */
17209 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17212 rtx dst
= operands
[0];
17213 rtx src1
= operands
[1];
17214 rtx src2
= operands
[2];
17216 /* Both source operands cannot be in memory. */
17217 if (MEM_P (src1
) && MEM_P (src2
))
17220 /* Canonicalize operand order for commutative operators. */
17221 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17228 /* If the destination is memory, we must have a matching source operand. */
17229 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17232 /* Source 1 cannot be a constant. */
17233 if (CONSTANT_P (src1
))
17236 /* Source 1 cannot be a non-matching memory. */
17237 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17238 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17239 return (code
== AND
17242 || (TARGET_64BIT
&& mode
== DImode
))
17243 && satisfies_constraint_L (src2
));
17248 /* Attempt to expand a unary operator. Make the expansion closer to the
17249 actual machine, then just general_operand, which will allow 2 separate
17250 memory references (one output, one input) in a single insn. */
17253 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17256 int matching_memory
;
17257 rtx src
, dst
, op
, clob
;
17262 /* If the destination is memory, and we do not have matching source
17263 operands, do things in registers. */
17264 matching_memory
= 0;
17267 if (rtx_equal_p (dst
, src
))
17268 matching_memory
= 1;
17270 dst
= gen_reg_rtx (mode
);
17273 /* When source operand is memory, destination must match. */
17274 if (MEM_P (src
) && !matching_memory
)
17275 src
= force_reg (mode
, src
);
17277 /* Emit the instruction. */
17279 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17280 if (reload_in_progress
|| code
== NOT
)
17282 /* Reload doesn't know about the flags register, and doesn't know that
17283 it doesn't want to clobber it. */
17284 gcc_assert (code
== NOT
);
17289 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17290 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17293 /* Fix up the destination if needed. */
17294 if (dst
!= operands
[0])
17295 emit_move_insn (operands
[0], dst
);
17298 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17299 divisor are within the range [0-255]. */
17302 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17305 rtx end_label
, qimode_label
;
17306 rtx insn
, div
, mod
;
17307 rtx scratch
, tmp0
, tmp1
, tmp2
;
17308 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17309 rtx (*gen_zero_extend
) (rtx
, rtx
);
17310 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17315 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17316 gen_test_ccno_1
= gen_testsi_ccno_1
;
17317 gen_zero_extend
= gen_zero_extendqisi2
;
17320 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17321 gen_test_ccno_1
= gen_testdi_ccno_1
;
17322 gen_zero_extend
= gen_zero_extendqidi2
;
17325 gcc_unreachable ();
17328 end_label
= gen_label_rtx ();
17329 qimode_label
= gen_label_rtx ();
17331 scratch
= gen_reg_rtx (mode
);
17333 /* Use 8bit unsigned divimod if dividend and divisor are within
17334 the range [0-255]. */
17335 emit_move_insn (scratch
, operands
[2]);
17336 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17337 scratch
, 1, OPTAB_DIRECT
);
17338 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17339 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17340 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17341 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17342 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17344 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17345 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17346 JUMP_LABEL (insn
) = qimode_label
;
17348 /* Generate original signed/unsigned divimod. */
17349 div
= gen_divmod4_1 (operands
[0], operands
[1],
17350 operands
[2], operands
[3]);
17353 /* Branch to the end. */
17354 emit_jump_insn (gen_jump (end_label
));
17357 /* Generate 8bit unsigned divide. */
17358 emit_label (qimode_label
);
17359 /* Don't use operands[0] for result of 8bit divide since not all
17360 registers support QImode ZERO_EXTRACT. */
17361 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17362 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17363 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17364 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17368 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17369 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17373 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17374 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17377 /* Extract remainder from AH. */
17378 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17379 if (REG_P (operands
[1]))
17380 insn
= emit_move_insn (operands
[1], tmp1
);
17383 /* Need a new scratch register since the old one has result
17385 scratch
= gen_reg_rtx (mode
);
17386 emit_move_insn (scratch
, tmp1
);
17387 insn
= emit_move_insn (operands
[1], scratch
);
17389 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17391 /* Zero extend quotient from AL. */
17392 tmp1
= gen_lowpart (QImode
, tmp0
);
17393 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17394 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17396 emit_label (end_label
);
17399 #define LEA_MAX_STALL (3)
17400 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17402 /* Increase given DISTANCE in half-cycles according to
17403 dependencies between PREV and NEXT instructions.
17404 Add 1 half-cycle if there is no dependency and
17405 go to next cycle if there is some dependecy. */
17407 static unsigned int
17408 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17413 if (!prev
|| !next
)
17414 return distance
+ (distance
& 1) + 2;
17416 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17417 return distance
+ 1;
17419 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17420 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17421 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17422 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17423 return distance
+ (distance
& 1) + 2;
17425 return distance
+ 1;
17428 /* Function checks if instruction INSN defines register number
17429 REGNO1 or REGNO2. */
17432 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17437 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17438 if (DF_REF_REG_DEF_P (*def_rec
)
17439 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17440 && (regno1
== DF_REF_REGNO (*def_rec
)
17441 || regno2
== DF_REF_REGNO (*def_rec
)))
17449 /* Function checks if instruction INSN uses register number
17450 REGNO as a part of address expression. */
17453 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17457 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17458 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17464 /* Search backward for non-agu definition of register number REGNO1
17465 or register number REGNO2 in basic block starting from instruction
17466 START up to head of basic block or instruction INSN.
17468 Function puts true value into *FOUND var if definition was found
17469 and false otherwise.
17471 Distance in half-cycles between START and found instruction or head
17472 of BB is added to DISTANCE and returned. */
17475 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17476 rtx insn
, int distance
,
17477 rtx start
, bool *found
)
17479 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17487 && distance
< LEA_SEARCH_THRESHOLD
)
17489 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17491 distance
= increase_distance (prev
, next
, distance
);
17492 if (insn_defines_reg (regno1
, regno2
, prev
))
17494 if (recog_memoized (prev
) < 0
17495 || get_attr_type (prev
) != TYPE_LEA
)
17504 if (prev
== BB_HEAD (bb
))
17507 prev
= PREV_INSN (prev
);
17513 /* Search backward for non-agu definition of register number REGNO1
17514 or register number REGNO2 in INSN's basic block until
17515 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17516 2. Reach neighbour BBs boundary, or
17517 3. Reach agu definition.
17518 Returns the distance between the non-agu definition point and INSN.
17519 If no definition point, returns -1. */
17522 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17525 basic_block bb
= BLOCK_FOR_INSN (insn
);
17527 bool found
= false;
17529 if (insn
!= BB_HEAD (bb
))
17530 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17531 distance
, PREV_INSN (insn
),
17534 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17538 bool simple_loop
= false;
17540 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17543 simple_loop
= true;
17548 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17550 BB_END (bb
), &found
);
17553 int shortest_dist
= -1;
17554 bool found_in_bb
= false;
17556 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17559 = distance_non_agu_define_in_bb (regno1
, regno2
,
17565 if (shortest_dist
< 0)
17566 shortest_dist
= bb_dist
;
17567 else if (bb_dist
> 0)
17568 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17574 distance
= shortest_dist
;
17578 /* get_attr_type may modify recog data. We want to make sure
17579 that recog data is valid for instruction INSN, on which
17580 distance_non_agu_define is called. INSN is unchanged here. */
17581 extract_insn_cached (insn
);
17586 return distance
>> 1;
17589 /* Return the distance in half-cycles between INSN and the next
17590 insn that uses register number REGNO in memory address added
17591 to DISTANCE. Return -1 if REGNO0 is set.
17593 Put true value into *FOUND if register usage was found and
17595 Put true value into *REDEFINED if register redefinition was
17596 found and false otherwise. */
17599 distance_agu_use_in_bb (unsigned int regno
,
17600 rtx insn
, int distance
, rtx start
,
17601 bool *found
, bool *redefined
)
17603 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17608 *redefined
= false;
17612 && distance
< LEA_SEARCH_THRESHOLD
)
17614 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17616 distance
= increase_distance(prev
, next
, distance
);
17617 if (insn_uses_reg_mem (regno
, next
))
17619 /* Return DISTANCE if OP0 is used in memory
17620 address in NEXT. */
17625 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17627 /* Return -1 if OP0 is set in NEXT. */
17635 if (next
== BB_END (bb
))
17638 next
= NEXT_INSN (next
);
17644 /* Return the distance between INSN and the next insn that uses
17645 register number REGNO0 in memory address. Return -1 if no such
17646 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17649 distance_agu_use (unsigned int regno0
, rtx insn
)
17651 basic_block bb
= BLOCK_FOR_INSN (insn
);
17653 bool found
= false;
17654 bool redefined
= false;
17656 if (insn
!= BB_END (bb
))
17657 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17659 &found
, &redefined
);
17661 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17665 bool simple_loop
= false;
17667 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17670 simple_loop
= true;
17675 distance
= distance_agu_use_in_bb (regno0
, insn
,
17676 distance
, BB_HEAD (bb
),
17677 &found
, &redefined
);
17680 int shortest_dist
= -1;
17681 bool found_in_bb
= false;
17682 bool redefined_in_bb
= false;
17684 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17687 = distance_agu_use_in_bb (regno0
, insn
,
17688 distance
, BB_HEAD (e
->dest
),
17689 &found_in_bb
, &redefined_in_bb
);
17692 if (shortest_dist
< 0)
17693 shortest_dist
= bb_dist
;
17694 else if (bb_dist
> 0)
17695 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17701 distance
= shortest_dist
;
17705 if (!found
|| redefined
)
17708 return distance
>> 1;
17711 /* Define this macro to tune LEA priority vs ADD, it take effect when
17712 there is a dilemma of choicing LEA or ADD
17713 Negative value: ADD is more preferred than LEA
17715 Positive value: LEA is more preferred than ADD*/
17716 #define IX86_LEA_PRIORITY 0
17718 /* Return true if usage of lea INSN has performance advantage
17719 over a sequence of instructions. Instructions sequence has
17720 SPLIT_COST cycles higher latency than lea latency. */
17723 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17724 unsigned int regno2
, int split_cost
, bool has_scale
)
17726 int dist_define
, dist_use
;
17728 /* For Silvermont if using a 2-source or 3-source LEA for
17729 non-destructive destination purposes, or due to wanting
17730 ability to use SCALE, the use of LEA is justified. */
17731 if (ix86_tune
== PROCESSOR_SLM
)
17735 if (split_cost
< 1)
17737 if (regno0
== regno1
|| regno0
== regno2
)
17742 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17743 dist_use
= distance_agu_use (regno0
, insn
);
17745 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17747 /* If there is no non AGU operand definition, no AGU
17748 operand usage and split cost is 0 then both lea
17749 and non lea variants have same priority. Currently
17750 we prefer lea for 64 bit code and non lea on 32 bit
17752 if (dist_use
< 0 && split_cost
== 0)
17753 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17758 /* With longer definitions distance lea is more preferable.
17759 Here we change it to take into account splitting cost and
17761 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17763 /* If there is no use in memory addess then we just check
17764 that split cost exceeds AGU stall. */
17766 return dist_define
> LEA_MAX_STALL
;
17768 /* If this insn has both backward non-agu dependence and forward
17769 agu dependence, the one with short distance takes effect. */
17770 return dist_define
>= dist_use
;
17773 /* Return true if it is legal to clobber flags by INSN and
17774 false otherwise. */
17777 ix86_ok_to_clobber_flags (rtx insn
)
17779 basic_block bb
= BLOCK_FOR_INSN (insn
);
17785 if (NONDEBUG_INSN_P (insn
))
17787 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17788 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17791 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17795 if (insn
== BB_END (bb
))
17798 insn
= NEXT_INSN (insn
);
17801 live
= df_get_live_out(bb
);
17802 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17805 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17806 move and add to avoid AGU stalls. */
17809 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17811 unsigned int regno0
, regno1
, regno2
;
17813 /* Check if we need to optimize. */
17814 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17817 /* Check it is correct to split here. */
17818 if (!ix86_ok_to_clobber_flags(insn
))
17821 regno0
= true_regnum (operands
[0]);
17822 regno1
= true_regnum (operands
[1]);
17823 regno2
= true_regnum (operands
[2]);
17825 /* We need to split only adds with non destructive
17826 destination operand. */
17827 if (regno0
== regno1
|| regno0
== regno2
)
17830 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17833 /* Return true if we should emit lea instruction instead of mov
17837 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17839 unsigned int regno0
, regno1
;
17841 /* Check if we need to optimize. */
17842 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17845 /* Use lea for reg to reg moves only. */
17846 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17849 regno0
= true_regnum (operands
[0]);
17850 regno1
= true_regnum (operands
[1]);
17852 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
17855 /* Return true if we need to split lea into a sequence of
17856 instructions to avoid AGU stalls. */
17859 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17861 unsigned int regno0
, regno1
, regno2
;
17863 struct ix86_address parts
;
17866 /* Check we need to optimize. */
17867 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17870 /* Check it is correct to split here. */
17871 if (!ix86_ok_to_clobber_flags(insn
))
17874 ok
= ix86_decompose_address (operands
[1], &parts
);
17877 /* There should be at least two components in the address. */
17878 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17879 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17882 /* We should not split into add if non legitimate pic
17883 operand is used as displacement. */
17884 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17887 regno0
= true_regnum (operands
[0]) ;
17888 regno1
= INVALID_REGNUM
;
17889 regno2
= INVALID_REGNUM
;
17892 regno1
= true_regnum (parts
.base
);
17894 regno2
= true_regnum (parts
.index
);
17898 /* Compute how many cycles we will add to execution time
17899 if split lea into a sequence of instructions. */
17900 if (parts
.base
|| parts
.index
)
17902 /* Have to use mov instruction if non desctructive
17903 destination form is used. */
17904 if (regno1
!= regno0
&& regno2
!= regno0
)
17907 /* Have to add index to base if both exist. */
17908 if (parts
.base
&& parts
.index
)
17911 /* Have to use shift and adds if scale is 2 or greater. */
17912 if (parts
.scale
> 1)
17914 if (regno0
!= regno1
)
17916 else if (regno2
== regno0
)
17919 split_cost
+= parts
.scale
;
17922 /* Have to use add instruction with immediate if
17923 disp is non zero. */
17924 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17927 /* Subtract the price of lea. */
17931 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
17935 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17936 matches destination. RTX includes clobber of FLAGS_REG. */
17939 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17944 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17945 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17947 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17950 /* Return true if regno1 def is nearest to the insn. */
17953 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17956 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17960 while (prev
&& prev
!= start
)
17962 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17964 prev
= PREV_INSN (prev
);
17967 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17969 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17971 prev
= PREV_INSN (prev
);
17974 /* None of the regs is defined in the bb. */
17978 /* Split lea instructions into a sequence of instructions
17979 which are executed on ALU to avoid AGU stalls.
17980 It is assumed that it is allowed to clobber flags register
17981 at lea position. */
17984 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17986 unsigned int regno0
, regno1
, regno2
;
17987 struct ix86_address parts
;
17991 ok
= ix86_decompose_address (operands
[1], &parts
);
17994 target
= gen_lowpart (mode
, operands
[0]);
17996 regno0
= true_regnum (target
);
17997 regno1
= INVALID_REGNUM
;
17998 regno2
= INVALID_REGNUM
;
18002 parts
.base
= gen_lowpart (mode
, parts
.base
);
18003 regno1
= true_regnum (parts
.base
);
18008 parts
.index
= gen_lowpart (mode
, parts
.index
);
18009 regno2
= true_regnum (parts
.index
);
18013 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
18015 if (parts
.scale
> 1)
18017 /* Case r1 = r1 + ... */
18018 if (regno1
== regno0
)
18020 /* If we have a case r1 = r1 + C * r1 then we
18021 should use multiplication which is very
18022 expensive. Assume cost model is wrong if we
18023 have such case here. */
18024 gcc_assert (regno2
!= regno0
);
18026 for (adds
= parts
.scale
; adds
> 0; adds
--)
18027 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
18031 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18032 if (regno0
!= regno2
)
18033 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18035 /* Use shift for scaling. */
18036 ix86_emit_binop (ASHIFT
, mode
, target
,
18037 GEN_INT (exact_log2 (parts
.scale
)));
18040 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18042 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18043 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18046 else if (!parts
.base
&& !parts
.index
)
18048 gcc_assert(parts
.disp
);
18049 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18055 if (regno0
!= regno2
)
18056 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18058 else if (!parts
.index
)
18060 if (regno0
!= regno1
)
18061 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18065 if (regno0
== regno1
)
18067 else if (regno0
== regno2
)
18073 /* Find better operand for SET instruction, depending
18074 on which definition is farther from the insn. */
18075 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18076 tmp
= parts
.index
, tmp1
= parts
.base
;
18078 tmp
= parts
.base
, tmp1
= parts
.index
;
18080 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18082 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18083 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18085 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18089 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18092 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18093 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18097 /* Return true if it is ok to optimize an ADD operation to LEA
18098 operation to avoid flag register consumation. For most processors,
18099 ADD is faster than LEA. For the processors like ATOM, if the
18100 destination register of LEA holds an actual address which will be
18101 used soon, LEA is better and otherwise ADD is better. */
18104 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
18106 unsigned int regno0
= true_regnum (operands
[0]);
18107 unsigned int regno1
= true_regnum (operands
[1]);
18108 unsigned int regno2
= true_regnum (operands
[2]);
18110 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18111 if (regno0
!= regno1
&& regno0
!= regno2
)
18114 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18117 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18120 /* Return true if destination reg of SET_BODY is shift count of
18124 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18130 /* Retrieve destination of SET_BODY. */
18131 switch (GET_CODE (set_body
))
18134 set_dest
= SET_DEST (set_body
);
18135 if (!set_dest
|| !REG_P (set_dest
))
18139 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18140 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18148 /* Retrieve shift count of USE_BODY. */
18149 switch (GET_CODE (use_body
))
18152 shift_rtx
= XEXP (use_body
, 1);
18155 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18156 if (ix86_dep_by_shift_count_body (set_body
,
18157 XVECEXP (use_body
, 0, i
)))
18165 && (GET_CODE (shift_rtx
) == ASHIFT
18166 || GET_CODE (shift_rtx
) == LSHIFTRT
18167 || GET_CODE (shift_rtx
) == ASHIFTRT
18168 || GET_CODE (shift_rtx
) == ROTATE
18169 || GET_CODE (shift_rtx
) == ROTATERT
))
18171 rtx shift_count
= XEXP (shift_rtx
, 1);
18173 /* Return true if shift count is dest of SET_BODY. */
18174 if (REG_P (shift_count
))
18176 /* Add check since it can be invoked before register
18177 allocation in pre-reload schedule. */
18178 if (reload_completed
18179 && true_regnum (set_dest
) == true_regnum (shift_count
))
18181 else if (REGNO(set_dest
) == REGNO(shift_count
))
18189 /* Return true if destination reg of SET_INSN is shift count of
18193 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18195 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18196 PATTERN (use_insn
));
18199 /* Return TRUE or FALSE depending on whether the unary operator meets the
18200 appropriate constraints. */
18203 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18204 enum machine_mode mode ATTRIBUTE_UNUSED
,
18205 rtx operands
[2] ATTRIBUTE_UNUSED
)
18207 /* If one of operands is memory, source and destination must match. */
18208 if ((MEM_P (operands
[0])
18209 || MEM_P (operands
[1]))
18210 && ! rtx_equal_p (operands
[0], operands
[1]))
18215 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18216 are ok, keeping in mind the possible movddup alternative. */
18219 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18221 if (MEM_P (operands
[0]))
18222 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18223 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18224 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18228 /* Post-reload splitter for converting an SF or DFmode value in an
18229 SSE register into an unsigned SImode. */
18232 ix86_split_convert_uns_si_sse (rtx operands
[])
18234 enum machine_mode vecmode
;
18235 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18237 large
= operands
[1];
18238 zero_or_two31
= operands
[2];
18239 input
= operands
[3];
18240 two31
= operands
[4];
18241 vecmode
= GET_MODE (large
);
18242 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18244 /* Load up the value into the low element. We must ensure that the other
18245 elements are valid floats -- zero is the easiest such value. */
18248 if (vecmode
== V4SFmode
)
18249 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18251 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18255 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18256 emit_move_insn (value
, CONST0_RTX (vecmode
));
18257 if (vecmode
== V4SFmode
)
18258 emit_insn (gen_sse_movss (value
, value
, input
));
18260 emit_insn (gen_sse2_movsd (value
, value
, input
));
18263 emit_move_insn (large
, two31
);
18264 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18266 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18267 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18269 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18270 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18272 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18273 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18275 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18276 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18278 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18279 if (vecmode
== V4SFmode
)
18280 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18282 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18285 emit_insn (gen_xorv4si3 (value
, value
, large
));
18288 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18289 Expects the 64-bit DImode to be supplied in a pair of integral
18290 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18291 -mfpmath=sse, !optimize_size only. */
18294 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18296 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18297 rtx int_xmm
, fp_xmm
;
18298 rtx biases
, exponents
;
18301 int_xmm
= gen_reg_rtx (V4SImode
);
18302 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18303 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18304 else if (TARGET_SSE_SPLIT_REGS
)
18306 emit_clobber (int_xmm
);
18307 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18311 x
= gen_reg_rtx (V2DImode
);
18312 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18313 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18316 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18317 gen_rtvec (4, GEN_INT (0x43300000UL
),
18318 GEN_INT (0x45300000UL
),
18319 const0_rtx
, const0_rtx
));
18320 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18322 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18323 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18325 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18326 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18327 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18328 (0x1.0p84 + double(fp_value_hi_xmm)).
18329 Note these exponents differ by 32. */
18331 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18333 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18334 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18335 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18336 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18337 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18338 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18339 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18340 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18341 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18343 /* Add the upper and lower DFmode values together. */
18345 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18348 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18349 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18350 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18353 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18356 /* Not used, but eases macroization of patterns. */
18358 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18359 rtx input ATTRIBUTE_UNUSED
)
18361 gcc_unreachable ();
18364 /* Convert an unsigned SImode value into a DFmode. Only currently used
18365 for SSE, but applicable anywhere. */
18368 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18370 REAL_VALUE_TYPE TWO31r
;
18373 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18374 NULL
, 1, OPTAB_DIRECT
);
18376 fp
= gen_reg_rtx (DFmode
);
18377 emit_insn (gen_floatsidf2 (fp
, x
));
18379 real_ldexp (&TWO31r
, &dconst1
, 31);
18380 x
= const_double_from_real_value (TWO31r
, DFmode
);
18382 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18384 emit_move_insn (target
, x
);
18387 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18388 32-bit mode; otherwise we have a direct convert instruction. */
18391 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18393 REAL_VALUE_TYPE TWO32r
;
18394 rtx fp_lo
, fp_hi
, x
;
18396 fp_lo
= gen_reg_rtx (DFmode
);
18397 fp_hi
= gen_reg_rtx (DFmode
);
18399 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18401 real_ldexp (&TWO32r
, &dconst1
, 32);
18402 x
= const_double_from_real_value (TWO32r
, DFmode
);
18403 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18405 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18407 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18410 emit_move_insn (target
, x
);
18413 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18414 For x86_32, -mfpmath=sse, !optimize_size only. */
18416 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18418 REAL_VALUE_TYPE ONE16r
;
18419 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18421 real_ldexp (&ONE16r
, &dconst1
, 16);
18422 x
= const_double_from_real_value (ONE16r
, SFmode
);
18423 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18424 NULL
, 0, OPTAB_DIRECT
);
18425 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18426 NULL
, 0, OPTAB_DIRECT
);
18427 fp_hi
= gen_reg_rtx (SFmode
);
18428 fp_lo
= gen_reg_rtx (SFmode
);
18429 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18430 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18431 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18433 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18435 if (!rtx_equal_p (target
, fp_hi
))
18436 emit_move_insn (target
, fp_hi
);
18439 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18440 a vector of unsigned ints VAL to vector of floats TARGET. */
18443 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18446 REAL_VALUE_TYPE TWO16r
;
18447 enum machine_mode intmode
= GET_MODE (val
);
18448 enum machine_mode fltmode
= GET_MODE (target
);
18449 rtx (*cvt
) (rtx
, rtx
);
18451 if (intmode
== V4SImode
)
18452 cvt
= gen_floatv4siv4sf2
;
18454 cvt
= gen_floatv8siv8sf2
;
18455 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18456 tmp
[0] = force_reg (intmode
, tmp
[0]);
18457 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18459 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18460 NULL_RTX
, 1, OPTAB_DIRECT
);
18461 tmp
[3] = gen_reg_rtx (fltmode
);
18462 emit_insn (cvt (tmp
[3], tmp
[1]));
18463 tmp
[4] = gen_reg_rtx (fltmode
);
18464 emit_insn (cvt (tmp
[4], tmp
[2]));
18465 real_ldexp (&TWO16r
, &dconst1
, 16);
18466 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18467 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18468 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18470 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18472 if (tmp
[7] != target
)
18473 emit_move_insn (target
, tmp
[7]);
18476 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18477 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18478 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18479 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18482 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18484 REAL_VALUE_TYPE TWO31r
;
18485 rtx two31r
, tmp
[4];
18486 enum machine_mode mode
= GET_MODE (val
);
18487 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18488 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18489 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18492 for (i
= 0; i
< 3; i
++)
18493 tmp
[i
] = gen_reg_rtx (mode
);
18494 real_ldexp (&TWO31r
, &dconst1
, 31);
18495 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18496 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18497 two31r
= force_reg (mode
, two31r
);
18500 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18501 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18502 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18503 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18504 default: gcc_unreachable ();
18506 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18507 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18508 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18510 if (intmode
== V4SImode
|| TARGET_AVX2
)
18511 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18512 gen_lowpart (intmode
, tmp
[0]),
18513 GEN_INT (31), NULL_RTX
, 0,
18517 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18518 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18519 *xorp
= expand_simple_binop (intmode
, AND
,
18520 gen_lowpart (intmode
, tmp
[0]),
18521 two31
, NULL_RTX
, 0,
18524 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18528 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18529 then replicate the value for all elements of the vector
18533 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18537 enum machine_mode scalar_mode
;
18554 n_elt
= GET_MODE_NUNITS (mode
);
18555 v
= rtvec_alloc (n_elt
);
18556 scalar_mode
= GET_MODE_INNER (mode
);
18558 RTVEC_ELT (v
, 0) = value
;
18560 for (i
= 1; i
< n_elt
; ++i
)
18561 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18563 return gen_rtx_CONST_VECTOR (mode
, v
);
18566 gcc_unreachable ();
18570 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18571 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18572 for an SSE register. If VECT is true, then replicate the mask for
18573 all elements of the vector register. If INVERT is true, then create
18574 a mask excluding the sign bit. */
18577 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18579 enum machine_mode vec_mode
, imode
;
18580 HOST_WIDE_INT hi
, lo
;
18585 /* Find the sign bit, sign extended to 2*HWI. */
18593 mode
= GET_MODE_INNER (mode
);
18595 lo
= 0x80000000, hi
= lo
< 0;
18603 mode
= GET_MODE_INNER (mode
);
18605 if (HOST_BITS_PER_WIDE_INT
>= 64)
18606 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18608 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18613 vec_mode
= VOIDmode
;
18614 if (HOST_BITS_PER_WIDE_INT
>= 64)
18617 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18624 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18628 lo
= ~lo
, hi
= ~hi
;
18634 mask
= immed_double_const (lo
, hi
, imode
);
18636 vec
= gen_rtvec (2, v
, mask
);
18637 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18638 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18645 gcc_unreachable ();
18649 lo
= ~lo
, hi
= ~hi
;
18651 /* Force this value into the low part of a fp vector constant. */
18652 mask
= immed_double_const (lo
, hi
, imode
);
18653 mask
= gen_lowpart (mode
, mask
);
18655 if (vec_mode
== VOIDmode
)
18656 return force_reg (mode
, mask
);
18658 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18659 return force_reg (vec_mode
, v
);
18662 /* Generate code for floating point ABS or NEG. */
18665 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18668 rtx mask
, set
, dst
, src
;
18669 bool use_sse
= false;
18670 bool vector_mode
= VECTOR_MODE_P (mode
);
18671 enum machine_mode vmode
= mode
;
18675 else if (mode
== TFmode
)
18677 else if (TARGET_SSE_MATH
)
18679 use_sse
= SSE_FLOAT_MODE_P (mode
);
18680 if (mode
== SFmode
)
18682 else if (mode
== DFmode
)
18686 /* NEG and ABS performed with SSE use bitwise mask operations.
18687 Create the appropriate mask now. */
18689 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18696 set
= gen_rtx_fmt_e (code
, mode
, src
);
18697 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18704 use
= gen_rtx_USE (VOIDmode
, mask
);
18706 par
= gen_rtvec (2, set
, use
);
18709 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18710 par
= gen_rtvec (3, set
, use
, clob
);
18712 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18718 /* Expand a copysign operation. Special case operand 0 being a constant. */
18721 ix86_expand_copysign (rtx operands
[])
18723 enum machine_mode mode
, vmode
;
18724 rtx dest
, op0
, op1
, mask
, nmask
;
18726 dest
= operands
[0];
18730 mode
= GET_MODE (dest
);
18732 if (mode
== SFmode
)
18734 else if (mode
== DFmode
)
18739 if (GET_CODE (op0
) == CONST_DOUBLE
)
18741 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18743 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18744 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18746 if (mode
== SFmode
|| mode
== DFmode
)
18748 if (op0
== CONST0_RTX (mode
))
18749 op0
= CONST0_RTX (vmode
);
18752 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18754 op0
= force_reg (vmode
, v
);
18757 else if (op0
!= CONST0_RTX (mode
))
18758 op0
= force_reg (mode
, op0
);
18760 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18762 if (mode
== SFmode
)
18763 copysign_insn
= gen_copysignsf3_const
;
18764 else if (mode
== DFmode
)
18765 copysign_insn
= gen_copysigndf3_const
;
18767 copysign_insn
= gen_copysigntf3_const
;
18769 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18773 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18775 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18776 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18778 if (mode
== SFmode
)
18779 copysign_insn
= gen_copysignsf3_var
;
18780 else if (mode
== DFmode
)
18781 copysign_insn
= gen_copysigndf3_var
;
18783 copysign_insn
= gen_copysigntf3_var
;
18785 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18789 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18790 be a constant, and so has already been expanded into a vector constant. */
18793 ix86_split_copysign_const (rtx operands
[])
18795 enum machine_mode mode
, vmode
;
18796 rtx dest
, op0
, mask
, x
;
18798 dest
= operands
[0];
18800 mask
= operands
[3];
18802 mode
= GET_MODE (dest
);
18803 vmode
= GET_MODE (mask
);
18805 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18806 x
= gen_rtx_AND (vmode
, dest
, mask
);
18807 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18809 if (op0
!= CONST0_RTX (vmode
))
18811 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18812 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18816 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18817 so we have to do two masks. */
18820 ix86_split_copysign_var (rtx operands
[])
18822 enum machine_mode mode
, vmode
;
18823 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18825 dest
= operands
[0];
18826 scratch
= operands
[1];
18829 nmask
= operands
[4];
18830 mask
= operands
[5];
18832 mode
= GET_MODE (dest
);
18833 vmode
= GET_MODE (mask
);
18835 if (rtx_equal_p (op0
, op1
))
18837 /* Shouldn't happen often (it's useless, obviously), but when it does
18838 we'd generate incorrect code if we continue below. */
18839 emit_move_insn (dest
, op0
);
18843 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18845 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18847 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18848 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18851 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18852 x
= gen_rtx_NOT (vmode
, dest
);
18853 x
= gen_rtx_AND (vmode
, x
, op0
);
18854 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18858 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18860 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18862 else /* alternative 2,4 */
18864 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18865 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18866 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18868 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18870 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18872 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18873 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18875 else /* alternative 3,4 */
18877 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18879 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18880 x
= gen_rtx_AND (vmode
, dest
, op0
);
18882 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18885 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18886 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18889 /* Return TRUE or FALSE depending on whether the first SET in INSN
18890 has source and destination with matching CC modes, and that the
18891 CC mode is at least as constrained as REQ_MODE. */
18894 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18897 enum machine_mode set_mode
;
18899 set
= PATTERN (insn
);
18900 if (GET_CODE (set
) == PARALLEL
)
18901 set
= XVECEXP (set
, 0, 0);
18902 gcc_assert (GET_CODE (set
) == SET
);
18903 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18905 set_mode
= GET_MODE (SET_DEST (set
));
18909 if (req_mode
!= CCNOmode
18910 && (req_mode
!= CCmode
18911 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18915 if (req_mode
== CCGCmode
)
18919 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18923 if (req_mode
== CCZmode
)
18933 if (set_mode
!= req_mode
)
18938 gcc_unreachable ();
18941 return GET_MODE (SET_SRC (set
)) == set_mode
;
18944 /* Generate insn patterns to do an integer compare of OPERANDS. */
18947 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18949 enum machine_mode cmpmode
;
18952 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18953 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18955 /* This is very simple, but making the interface the same as in the
18956 FP case makes the rest of the code easier. */
18957 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18958 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18960 /* Return the test that should be put into the flags user, i.e.
18961 the bcc, scc, or cmov instruction. */
18962 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18965 /* Figure out whether to use ordered or unordered fp comparisons.
18966 Return the appropriate mode to use. */
18969 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18971 /* ??? In order to make all comparisons reversible, we do all comparisons
18972 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18973 all forms trapping and nontrapping comparisons, we can make inequality
18974 comparisons trapping again, since it results in better code when using
18975 FCOM based compares. */
18976 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18980 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18982 enum machine_mode mode
= GET_MODE (op0
);
18984 if (SCALAR_FLOAT_MODE_P (mode
))
18986 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18987 return ix86_fp_compare_mode (code
);
18992 /* Only zero flag is needed. */
18993 case EQ
: /* ZF=0 */
18994 case NE
: /* ZF!=0 */
18996 /* Codes needing carry flag. */
18997 case GEU
: /* CF=0 */
18998 case LTU
: /* CF=1 */
18999 /* Detect overflow checks. They need just the carry flag. */
19000 if (GET_CODE (op0
) == PLUS
19001 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19005 case GTU
: /* CF=0 & ZF=0 */
19006 case LEU
: /* CF=1 | ZF=1 */
19007 /* Detect overflow checks. They need just the carry flag. */
19008 if (GET_CODE (op0
) == MINUS
19009 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19013 /* Codes possibly doable only with sign flag when
19014 comparing against zero. */
19015 case GE
: /* SF=OF or SF=0 */
19016 case LT
: /* SF<>OF or SF=1 */
19017 if (op1
== const0_rtx
)
19020 /* For other cases Carry flag is not required. */
19022 /* Codes doable only with sign flag when comparing
19023 against zero, but we miss jump instruction for it
19024 so we need to use relational tests against overflow
19025 that thus needs to be zero. */
19026 case GT
: /* ZF=0 & SF=OF */
19027 case LE
: /* ZF=1 | SF<>OF */
19028 if (op1
== const0_rtx
)
19032 /* strcmp pattern do (use flags) and combine may ask us for proper
19037 gcc_unreachable ();
19041 /* Return the fixed registers used for condition codes. */
19044 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19051 /* If two condition code modes are compatible, return a condition code
19052 mode which is compatible with both. Otherwise, return
19055 static enum machine_mode
19056 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19061 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19064 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19065 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19068 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19070 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19076 gcc_unreachable ();
19106 /* These are only compatible with themselves, which we already
19113 /* Return a comparison we can do and that it is equivalent to
19114 swap_condition (code) apart possibly from orderedness.
19115 But, never change orderedness if TARGET_IEEE_FP, returning
19116 UNKNOWN in that case if necessary. */
19118 static enum rtx_code
19119 ix86_fp_swap_condition (enum rtx_code code
)
19123 case GT
: /* GTU - CF=0 & ZF=0 */
19124 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19125 case GE
: /* GEU - CF=0 */
19126 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19127 case UNLT
: /* LTU - CF=1 */
19128 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19129 case UNLE
: /* LEU - CF=1 | ZF=1 */
19130 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19132 return swap_condition (code
);
19136 /* Return cost of comparison CODE using the best strategy for performance.
19137 All following functions do use number of instructions as a cost metrics.
19138 In future this should be tweaked to compute bytes for optimize_size and
19139 take into account performance of various instructions on various CPUs. */
19142 ix86_fp_comparison_cost (enum rtx_code code
)
19146 /* The cost of code using bit-twiddling on %ah. */
19163 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19167 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19170 gcc_unreachable ();
19173 switch (ix86_fp_comparison_strategy (code
))
19175 case IX86_FPCMP_COMI
:
19176 return arith_cost
> 4 ? 3 : 2;
19177 case IX86_FPCMP_SAHF
:
19178 return arith_cost
> 4 ? 4 : 3;
19184 /* Return strategy to use for floating-point. We assume that fcomi is always
19185 preferrable where available, since that is also true when looking at size
19186 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19188 enum ix86_fpcmp_strategy
19189 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19191 /* Do fcomi/sahf based test when profitable. */
19194 return IX86_FPCMP_COMI
;
19196 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19197 return IX86_FPCMP_SAHF
;
19199 return IX86_FPCMP_ARITH
;
19202 /* Swap, force into registers, or otherwise massage the two operands
19203 to a fp comparison. The operands are updated in place; the new
19204 comparison code is returned. */
19206 static enum rtx_code
19207 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19209 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19210 rtx op0
= *pop0
, op1
= *pop1
;
19211 enum machine_mode op_mode
= GET_MODE (op0
);
19212 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19214 /* All of the unordered compare instructions only work on registers.
19215 The same is true of the fcomi compare instructions. The XFmode
19216 compare instructions require registers except when comparing
19217 against zero or when converting operand 1 from fixed point to
19221 && (fpcmp_mode
== CCFPUmode
19222 || (op_mode
== XFmode
19223 && ! (standard_80387_constant_p (op0
) == 1
19224 || standard_80387_constant_p (op1
) == 1)
19225 && GET_CODE (op1
) != FLOAT
)
19226 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19228 op0
= force_reg (op_mode
, op0
);
19229 op1
= force_reg (op_mode
, op1
);
19233 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19234 things around if they appear profitable, otherwise force op0
19235 into a register. */
19237 if (standard_80387_constant_p (op0
) == 0
19239 && ! (standard_80387_constant_p (op1
) == 0
19242 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19243 if (new_code
!= UNKNOWN
)
19246 tmp
= op0
, op0
= op1
, op1
= tmp
;
19252 op0
= force_reg (op_mode
, op0
);
19254 if (CONSTANT_P (op1
))
19256 int tmp
= standard_80387_constant_p (op1
);
19258 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19262 op1
= force_reg (op_mode
, op1
);
19265 op1
= force_reg (op_mode
, op1
);
19269 /* Try to rearrange the comparison to make it cheaper. */
19270 if (ix86_fp_comparison_cost (code
)
19271 > ix86_fp_comparison_cost (swap_condition (code
))
19272 && (REG_P (op1
) || can_create_pseudo_p ()))
19275 tmp
= op0
, op0
= op1
, op1
= tmp
;
19276 code
= swap_condition (code
);
19278 op0
= force_reg (op_mode
, op0
);
19286 /* Convert comparison codes we use to represent FP comparison to integer
19287 code that will result in proper branch. Return UNKNOWN if no such code
19291 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19320 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19323 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19325 enum machine_mode fpcmp_mode
, intcmp_mode
;
19328 fpcmp_mode
= ix86_fp_compare_mode (code
);
19329 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19331 /* Do fcomi/sahf based test when profitable. */
19332 switch (ix86_fp_comparison_strategy (code
))
19334 case IX86_FPCMP_COMI
:
19335 intcmp_mode
= fpcmp_mode
;
19336 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19337 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19342 case IX86_FPCMP_SAHF
:
19343 intcmp_mode
= fpcmp_mode
;
19344 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19345 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19349 scratch
= gen_reg_rtx (HImode
);
19350 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19351 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19354 case IX86_FPCMP_ARITH
:
19355 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19356 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19357 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19359 scratch
= gen_reg_rtx (HImode
);
19360 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19362 /* In the unordered case, we have to check C2 for NaN's, which
19363 doesn't happen to work out to anything nice combination-wise.
19364 So do some bit twiddling on the value we've got in AH to come
19365 up with an appropriate set of condition codes. */
19367 intcmp_mode
= CCNOmode
;
19372 if (code
== GT
|| !TARGET_IEEE_FP
)
19374 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19379 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19380 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19381 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19382 intcmp_mode
= CCmode
;
19388 if (code
== LT
&& TARGET_IEEE_FP
)
19390 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19391 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19392 intcmp_mode
= CCmode
;
19397 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19403 if (code
== GE
|| !TARGET_IEEE_FP
)
19405 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19410 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19411 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19417 if (code
== LE
&& TARGET_IEEE_FP
)
19419 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19420 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19421 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19422 intcmp_mode
= CCmode
;
19427 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19433 if (code
== EQ
&& TARGET_IEEE_FP
)
19435 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19436 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19437 intcmp_mode
= CCmode
;
19442 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19448 if (code
== NE
&& TARGET_IEEE_FP
)
19450 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19451 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19457 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19463 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19467 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19472 gcc_unreachable ();
19480 /* Return the test that should be put into the flags user, i.e.
19481 the bcc, scc, or cmov instruction. */
19482 return gen_rtx_fmt_ee (code
, VOIDmode
,
19483 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19488 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19492 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19493 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19495 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19497 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19498 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19501 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19507 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19509 enum machine_mode mode
= GET_MODE (op0
);
19521 tmp
= ix86_expand_compare (code
, op0
, op1
);
19522 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19523 gen_rtx_LABEL_REF (VOIDmode
, label
),
19525 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19532 /* Expand DImode branch into multiple compare+branch. */
19534 rtx lo
[2], hi
[2], label2
;
19535 enum rtx_code code1
, code2
, code3
;
19536 enum machine_mode submode
;
19538 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19540 tmp
= op0
, op0
= op1
, op1
= tmp
;
19541 code
= swap_condition (code
);
19544 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19545 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19547 submode
= mode
== DImode
? SImode
: DImode
;
19549 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19550 avoid two branches. This costs one extra insn, so disable when
19551 optimizing for size. */
19553 if ((code
== EQ
|| code
== NE
)
19554 && (!optimize_insn_for_size_p ()
19555 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19560 if (hi
[1] != const0_rtx
)
19561 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19562 NULL_RTX
, 0, OPTAB_WIDEN
);
19565 if (lo
[1] != const0_rtx
)
19566 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19567 NULL_RTX
, 0, OPTAB_WIDEN
);
19569 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19570 NULL_RTX
, 0, OPTAB_WIDEN
);
19572 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19576 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19577 op1 is a constant and the low word is zero, then we can just
19578 examine the high word. Similarly for low word -1 and
19579 less-or-equal-than or greater-than. */
19581 if (CONST_INT_P (hi
[1]))
19584 case LT
: case LTU
: case GE
: case GEU
:
19585 if (lo
[1] == const0_rtx
)
19587 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19591 case LE
: case LEU
: case GT
: case GTU
:
19592 if (lo
[1] == constm1_rtx
)
19594 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19602 /* Otherwise, we need two or three jumps. */
19604 label2
= gen_label_rtx ();
19607 code2
= swap_condition (code
);
19608 code3
= unsigned_condition (code
);
19612 case LT
: case GT
: case LTU
: case GTU
:
19615 case LE
: code1
= LT
; code2
= GT
; break;
19616 case GE
: code1
= GT
; code2
= LT
; break;
19617 case LEU
: code1
= LTU
; code2
= GTU
; break;
19618 case GEU
: code1
= GTU
; code2
= LTU
; break;
19620 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19621 case NE
: code2
= UNKNOWN
; break;
19624 gcc_unreachable ();
19629 * if (hi(a) < hi(b)) goto true;
19630 * if (hi(a) > hi(b)) goto false;
19631 * if (lo(a) < lo(b)) goto true;
19635 if (code1
!= UNKNOWN
)
19636 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19637 if (code2
!= UNKNOWN
)
19638 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19640 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19642 if (code2
!= UNKNOWN
)
19643 emit_label (label2
);
19648 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19653 /* Split branch based on floating point condition. */
19655 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19656 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19661 if (target2
!= pc_rtx
)
19664 code
= reverse_condition_maybe_unordered (code
);
19669 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19672 /* Remove pushed operand from stack. */
19674 ix86_free_from_memory (GET_MODE (pushed
));
19676 i
= emit_jump_insn (gen_rtx_SET
19678 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19679 condition
, target1
, target2
)));
19680 if (split_branch_probability
>= 0)
19681 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19685 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19689 gcc_assert (GET_MODE (dest
) == QImode
);
19691 ret
= ix86_expand_compare (code
, op0
, op1
);
19692 PUT_MODE (ret
, QImode
);
19693 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19696 /* Expand comparison setting or clearing carry flag. Return true when
19697 successful and set pop for the operation. */
19699 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19701 enum machine_mode mode
=
19702 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19704 /* Do not handle double-mode compares that go through special path. */
19705 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19708 if (SCALAR_FLOAT_MODE_P (mode
))
19710 rtx compare_op
, compare_seq
;
19712 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19714 /* Shortcut: following common codes never translate
19715 into carry flag compares. */
19716 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19717 || code
== ORDERED
|| code
== UNORDERED
)
19720 /* These comparisons require zero flag; swap operands so they won't. */
19721 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19722 && !TARGET_IEEE_FP
)
19727 code
= swap_condition (code
);
19730 /* Try to expand the comparison and verify that we end up with
19731 carry flag based comparison. This fails to be true only when
19732 we decide to expand comparison using arithmetic that is not
19733 too common scenario. */
19735 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19736 compare_seq
= get_insns ();
19739 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19740 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19741 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19743 code
= GET_CODE (compare_op
);
19745 if (code
!= LTU
&& code
!= GEU
)
19748 emit_insn (compare_seq
);
19753 if (!INTEGRAL_MODE_P (mode
))
19762 /* Convert a==0 into (unsigned)a<1. */
19765 if (op1
!= const0_rtx
)
19768 code
= (code
== EQ
? LTU
: GEU
);
19771 /* Convert a>b into b<a or a>=b-1. */
19774 if (CONST_INT_P (op1
))
19776 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19777 /* Bail out on overflow. We still can swap operands but that
19778 would force loading of the constant into register. */
19779 if (op1
== const0_rtx
19780 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19782 code
= (code
== GTU
? GEU
: LTU
);
19789 code
= (code
== GTU
? LTU
: GEU
);
19793 /* Convert a>=0 into (unsigned)a<0x80000000. */
19796 if (mode
== DImode
|| op1
!= const0_rtx
)
19798 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19799 code
= (code
== LT
? GEU
: LTU
);
19803 if (mode
== DImode
|| op1
!= constm1_rtx
)
19805 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19806 code
= (code
== LE
? GEU
: LTU
);
19812 /* Swapping operands may cause constant to appear as first operand. */
19813 if (!nonimmediate_operand (op0
, VOIDmode
))
19815 if (!can_create_pseudo_p ())
19817 op0
= force_reg (mode
, op0
);
19819 *pop
= ix86_expand_compare (code
, op0
, op1
);
19820 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19825 ix86_expand_int_movcc (rtx operands
[])
19827 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19828 rtx compare_seq
, compare_op
;
19829 enum machine_mode mode
= GET_MODE (operands
[0]);
19830 bool sign_bit_compare_p
= false;
19831 rtx op0
= XEXP (operands
[1], 0);
19832 rtx op1
= XEXP (operands
[1], 1);
19834 if (GET_MODE (op0
) == TImode
19835 || (GET_MODE (op0
) == DImode
19840 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19841 compare_seq
= get_insns ();
19844 compare_code
= GET_CODE (compare_op
);
19846 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19847 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19848 sign_bit_compare_p
= true;
19850 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19851 HImode insns, we'd be swallowed in word prefix ops. */
19853 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19854 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19855 && CONST_INT_P (operands
[2])
19856 && CONST_INT_P (operands
[3]))
19858 rtx out
= operands
[0];
19859 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19860 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19861 HOST_WIDE_INT diff
;
19864 /* Sign bit compares are better done using shifts than we do by using
19866 if (sign_bit_compare_p
19867 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19869 /* Detect overlap between destination and compare sources. */
19872 if (!sign_bit_compare_p
)
19875 bool fpcmp
= false;
19877 compare_code
= GET_CODE (compare_op
);
19879 flags
= XEXP (compare_op
, 0);
19881 if (GET_MODE (flags
) == CCFPmode
19882 || GET_MODE (flags
) == CCFPUmode
)
19886 = ix86_fp_compare_code_to_integer (compare_code
);
19889 /* To simplify rest of code, restrict to the GEU case. */
19890 if (compare_code
== LTU
)
19892 HOST_WIDE_INT tmp
= ct
;
19895 compare_code
= reverse_condition (compare_code
);
19896 code
= reverse_condition (code
);
19901 PUT_CODE (compare_op
,
19902 reverse_condition_maybe_unordered
19903 (GET_CODE (compare_op
)));
19905 PUT_CODE (compare_op
,
19906 reverse_condition (GET_CODE (compare_op
)));
19910 if (reg_overlap_mentioned_p (out
, op0
)
19911 || reg_overlap_mentioned_p (out
, op1
))
19912 tmp
= gen_reg_rtx (mode
);
19914 if (mode
== DImode
)
19915 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19917 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19918 flags
, compare_op
));
19922 if (code
== GT
|| code
== GE
)
19923 code
= reverse_condition (code
);
19926 HOST_WIDE_INT tmp
= ct
;
19931 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19944 tmp
= expand_simple_binop (mode
, PLUS
,
19946 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19957 tmp
= expand_simple_binop (mode
, IOR
,
19959 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19961 else if (diff
== -1 && ct
)
19971 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19973 tmp
= expand_simple_binop (mode
, PLUS
,
19974 copy_rtx (tmp
), GEN_INT (cf
),
19975 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19983 * andl cf - ct, dest
19993 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19996 tmp
= expand_simple_binop (mode
, AND
,
19998 gen_int_mode (cf
- ct
, mode
),
19999 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20001 tmp
= expand_simple_binop (mode
, PLUS
,
20002 copy_rtx (tmp
), GEN_INT (ct
),
20003 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20006 if (!rtx_equal_p (tmp
, out
))
20007 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
20014 enum machine_mode cmp_mode
= GET_MODE (op0
);
20017 tmp
= ct
, ct
= cf
, cf
= tmp
;
20020 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20022 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20024 /* We may be reversing unordered compare to normal compare, that
20025 is not valid in general (we may convert non-trapping condition
20026 to trapping one), however on i386 we currently emit all
20027 comparisons unordered. */
20028 compare_code
= reverse_condition_maybe_unordered (compare_code
);
20029 code
= reverse_condition_maybe_unordered (code
);
20033 compare_code
= reverse_condition (compare_code
);
20034 code
= reverse_condition (code
);
20038 compare_code
= UNKNOWN
;
20039 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
20040 && CONST_INT_P (op1
))
20042 if (op1
== const0_rtx
20043 && (code
== LT
|| code
== GE
))
20044 compare_code
= code
;
20045 else if (op1
== constm1_rtx
)
20049 else if (code
== GT
)
20054 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20055 if (compare_code
!= UNKNOWN
20056 && GET_MODE (op0
) == GET_MODE (out
)
20057 && (cf
== -1 || ct
== -1))
20059 /* If lea code below could be used, only optimize
20060 if it results in a 2 insn sequence. */
20062 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20063 || diff
== 3 || diff
== 5 || diff
== 9)
20064 || (compare_code
== LT
&& ct
== -1)
20065 || (compare_code
== GE
&& cf
== -1))
20068 * notl op1 (if necessary)
20076 code
= reverse_condition (code
);
20079 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20081 out
= expand_simple_binop (mode
, IOR
,
20083 out
, 1, OPTAB_DIRECT
);
20084 if (out
!= operands
[0])
20085 emit_move_insn (operands
[0], out
);
20092 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20093 || diff
== 3 || diff
== 5 || diff
== 9)
20094 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20096 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20102 * lea cf(dest*(ct-cf)),dest
20106 * This also catches the degenerate setcc-only case.
20112 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20115 /* On x86_64 the lea instruction operates on Pmode, so we need
20116 to get arithmetics done in proper mode to match. */
20118 tmp
= copy_rtx (out
);
20122 out1
= copy_rtx (out
);
20123 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20127 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20133 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20136 if (!rtx_equal_p (tmp
, out
))
20139 out
= force_operand (tmp
, copy_rtx (out
));
20141 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20143 if (!rtx_equal_p (out
, operands
[0]))
20144 emit_move_insn (operands
[0], copy_rtx (out
));
20150 * General case: Jumpful:
20151 * xorl dest,dest cmpl op1, op2
20152 * cmpl op1, op2 movl ct, dest
20153 * setcc dest jcc 1f
20154 * decl dest movl cf, dest
20155 * andl (cf-ct),dest 1:
20158 * Size 20. Size 14.
20160 * This is reasonably steep, but branch mispredict costs are
20161 * high on modern cpus, so consider failing only if optimizing
20165 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20166 && BRANCH_COST (optimize_insn_for_speed_p (),
20171 enum machine_mode cmp_mode
= GET_MODE (op0
);
20176 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20178 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20180 /* We may be reversing unordered compare to normal compare,
20181 that is not valid in general (we may convert non-trapping
20182 condition to trapping one), however on i386 we currently
20183 emit all comparisons unordered. */
20184 code
= reverse_condition_maybe_unordered (code
);
20188 code
= reverse_condition (code
);
20189 if (compare_code
!= UNKNOWN
)
20190 compare_code
= reverse_condition (compare_code
);
20194 if (compare_code
!= UNKNOWN
)
20196 /* notl op1 (if needed)
20201 For x < 0 (resp. x <= -1) there will be no notl,
20202 so if possible swap the constants to get rid of the
20204 True/false will be -1/0 while code below (store flag
20205 followed by decrement) is 0/-1, so the constants need
20206 to be exchanged once more. */
20208 if (compare_code
== GE
|| !cf
)
20210 code
= reverse_condition (code
);
20215 HOST_WIDE_INT tmp
= cf
;
20220 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20224 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20226 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20228 copy_rtx (out
), 1, OPTAB_DIRECT
);
20231 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20232 gen_int_mode (cf
- ct
, mode
),
20233 copy_rtx (out
), 1, OPTAB_DIRECT
);
20235 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20236 copy_rtx (out
), 1, OPTAB_DIRECT
);
20237 if (!rtx_equal_p (out
, operands
[0]))
20238 emit_move_insn (operands
[0], copy_rtx (out
));
20244 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20246 /* Try a few things more with specific constants and a variable. */
20249 rtx var
, orig_out
, out
, tmp
;
20251 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20254 /* If one of the two operands is an interesting constant, load a
20255 constant with the above and mask it in with a logical operation. */
20257 if (CONST_INT_P (operands
[2]))
20260 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20261 operands
[3] = constm1_rtx
, op
= and_optab
;
20262 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20263 operands
[3] = const0_rtx
, op
= ior_optab
;
20267 else if (CONST_INT_P (operands
[3]))
20270 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20271 operands
[2] = constm1_rtx
, op
= and_optab
;
20272 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20273 operands
[2] = const0_rtx
, op
= ior_optab
;
20280 orig_out
= operands
[0];
20281 tmp
= gen_reg_rtx (mode
);
20284 /* Recurse to get the constant loaded. */
20285 if (ix86_expand_int_movcc (operands
) == 0)
20288 /* Mask in the interesting variable. */
20289 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20291 if (!rtx_equal_p (out
, orig_out
))
20292 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20298 * For comparison with above,
20308 if (! nonimmediate_operand (operands
[2], mode
))
20309 operands
[2] = force_reg (mode
, operands
[2]);
20310 if (! nonimmediate_operand (operands
[3], mode
))
20311 operands
[3] = force_reg (mode
, operands
[3]);
20313 if (! register_operand (operands
[2], VOIDmode
)
20315 || ! register_operand (operands
[3], VOIDmode
)))
20316 operands
[2] = force_reg (mode
, operands
[2]);
20319 && ! register_operand (operands
[3], VOIDmode
))
20320 operands
[3] = force_reg (mode
, operands
[3]);
20322 emit_insn (compare_seq
);
20323 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20324 gen_rtx_IF_THEN_ELSE (mode
,
20325 compare_op
, operands
[2],
20330 /* Swap, force into registers, or otherwise massage the two operands
20331 to an sse comparison with a mask result. Thus we differ a bit from
20332 ix86_prepare_fp_compare_args which expects to produce a flags result.
20334 The DEST operand exists to help determine whether to commute commutative
20335 operators. The POP0/POP1 operands are updated in place. The new
20336 comparison code is returned, or UNKNOWN if not implementable. */
20338 static enum rtx_code
20339 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20340 rtx
*pop0
, rtx
*pop1
)
20348 /* AVX supports all the needed comparisons. */
20351 /* We have no LTGT as an operator. We could implement it with
20352 NE & ORDERED, but this requires an extra temporary. It's
20353 not clear that it's worth it. */
20360 /* These are supported directly. */
20367 /* AVX has 3 operand comparisons, no need to swap anything. */
20370 /* For commutative operators, try to canonicalize the destination
20371 operand to be first in the comparison - this helps reload to
20372 avoid extra moves. */
20373 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20381 /* These are not supported directly before AVX, and furthermore
20382 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20383 comparison operands to transform into something that is
20388 code
= swap_condition (code
);
20392 gcc_unreachable ();
20398 /* Detect conditional moves that exactly match min/max operational
20399 semantics. Note that this is IEEE safe, as long as we don't
20400 interchange the operands.
20402 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20403 and TRUE if the operation is successful and instructions are emitted. */
20406 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20407 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20409 enum machine_mode mode
;
20415 else if (code
== UNGE
)
20418 if_true
= if_false
;
20424 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20426 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20431 mode
= GET_MODE (dest
);
20433 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20434 but MODE may be a vector mode and thus not appropriate. */
20435 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20437 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20440 if_true
= force_reg (mode
, if_true
);
20441 v
= gen_rtvec (2, if_true
, if_false
);
20442 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20446 code
= is_min
? SMIN
: SMAX
;
20447 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20450 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20454 /* Expand an sse vector comparison. Return the register with the result. */
20457 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20458 rtx op_true
, rtx op_false
)
20460 enum machine_mode mode
= GET_MODE (dest
);
20461 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20464 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20465 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20466 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20469 || reg_overlap_mentioned_p (dest
, op_true
)
20470 || reg_overlap_mentioned_p (dest
, op_false
))
20471 dest
= gen_reg_rtx (mode
);
20473 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20474 if (cmp_mode
!= mode
)
20476 x
= force_reg (cmp_mode
, x
);
20477 convert_move (dest
, x
, false);
20480 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20485 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20486 operations. This is used for both scalar and vector conditional moves. */
20489 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20491 enum machine_mode mode
= GET_MODE (dest
);
20494 if (vector_all_ones_operand (op_true
, mode
)
20495 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20497 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20499 else if (op_false
== CONST0_RTX (mode
))
20501 op_true
= force_reg (mode
, op_true
);
20502 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20503 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20505 else if (op_true
== CONST0_RTX (mode
))
20507 op_false
= force_reg (mode
, op_false
);
20508 x
= gen_rtx_NOT (mode
, cmp
);
20509 x
= gen_rtx_AND (mode
, x
, op_false
);
20510 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20512 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20514 op_false
= force_reg (mode
, op_false
);
20515 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20516 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20518 else if (TARGET_XOP
)
20520 op_true
= force_reg (mode
, op_true
);
20522 if (!nonimmediate_operand (op_false
, mode
))
20523 op_false
= force_reg (mode
, op_false
);
20525 emit_insn (gen_rtx_SET (mode
, dest
,
20526 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20532 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20534 if (!nonimmediate_operand (op_true
, mode
))
20535 op_true
= force_reg (mode
, op_true
);
20537 op_false
= force_reg (mode
, op_false
);
20543 gen
= gen_sse4_1_blendvps
;
20547 gen
= gen_sse4_1_blendvpd
;
20555 gen
= gen_sse4_1_pblendvb
;
20556 dest
= gen_lowpart (V16QImode
, dest
);
20557 op_false
= gen_lowpart (V16QImode
, op_false
);
20558 op_true
= gen_lowpart (V16QImode
, op_true
);
20559 cmp
= gen_lowpart (V16QImode
, cmp
);
20564 gen
= gen_avx_blendvps256
;
20568 gen
= gen_avx_blendvpd256
;
20576 gen
= gen_avx2_pblendvb
;
20577 dest
= gen_lowpart (V32QImode
, dest
);
20578 op_false
= gen_lowpart (V32QImode
, op_false
);
20579 op_true
= gen_lowpart (V32QImode
, op_true
);
20580 cmp
= gen_lowpart (V32QImode
, cmp
);
20588 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20591 op_true
= force_reg (mode
, op_true
);
20593 t2
= gen_reg_rtx (mode
);
20595 t3
= gen_reg_rtx (mode
);
20599 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20600 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20602 x
= gen_rtx_NOT (mode
, cmp
);
20603 x
= gen_rtx_AND (mode
, x
, op_false
);
20604 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20606 x
= gen_rtx_IOR (mode
, t3
, t2
);
20607 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20612 /* Expand a floating-point conditional move. Return true if successful. */
20615 ix86_expand_fp_movcc (rtx operands
[])
20617 enum machine_mode mode
= GET_MODE (operands
[0]);
20618 enum rtx_code code
= GET_CODE (operands
[1]);
20619 rtx tmp
, compare_op
;
20620 rtx op0
= XEXP (operands
[1], 0);
20621 rtx op1
= XEXP (operands
[1], 1);
20623 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20625 enum machine_mode cmode
;
20627 /* Since we've no cmove for sse registers, don't force bad register
20628 allocation just to gain access to it. Deny movcc when the
20629 comparison mode doesn't match the move mode. */
20630 cmode
= GET_MODE (op0
);
20631 if (cmode
== VOIDmode
)
20632 cmode
= GET_MODE (op1
);
20636 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20637 if (code
== UNKNOWN
)
20640 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20641 operands
[2], operands
[3]))
20644 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20645 operands
[2], operands
[3]);
20646 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20650 if (GET_MODE (op0
) == TImode
20651 || (GET_MODE (op0
) == DImode
20655 /* The floating point conditional move instructions don't directly
20656 support conditions resulting from a signed integer comparison. */
20658 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20659 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20661 tmp
= gen_reg_rtx (QImode
);
20662 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20664 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20667 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20668 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20669 operands
[2], operands
[3])));
20674 /* Expand a floating-point vector conditional move; a vcond operation
20675 rather than a movcc operation. */
20678 ix86_expand_fp_vcond (rtx operands
[])
20680 enum rtx_code code
= GET_CODE (operands
[3]);
20683 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20684 &operands
[4], &operands
[5]);
20685 if (code
== UNKNOWN
)
20688 switch (GET_CODE (operands
[3]))
20691 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20692 operands
[5], operands
[0], operands
[0]);
20693 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20694 operands
[5], operands
[1], operands
[2]);
20698 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20699 operands
[5], operands
[0], operands
[0]);
20700 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20701 operands
[5], operands
[1], operands
[2]);
20705 gcc_unreachable ();
20707 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20709 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20713 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20714 operands
[5], operands
[1], operands
[2]))
20717 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20718 operands
[1], operands
[2]);
20719 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20723 /* Expand a signed/unsigned integral vector conditional move. */
20726 ix86_expand_int_vcond (rtx operands
[])
20728 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20729 enum machine_mode mode
= GET_MODE (operands
[4]);
20730 enum rtx_code code
= GET_CODE (operands
[3]);
20731 bool negate
= false;
20734 cop0
= operands
[4];
20735 cop1
= operands
[5];
20737 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20738 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20739 if ((code
== LT
|| code
== GE
)
20740 && data_mode
== mode
20741 && cop1
== CONST0_RTX (mode
)
20742 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20743 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20744 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20745 && (GET_MODE_SIZE (data_mode
) == 16
20746 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20748 rtx negop
= operands
[2 - (code
== LT
)];
20749 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20750 if (negop
== CONST1_RTX (data_mode
))
20752 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20753 operands
[0], 1, OPTAB_DIRECT
);
20754 if (res
!= operands
[0])
20755 emit_move_insn (operands
[0], res
);
20758 else if (GET_MODE_INNER (data_mode
) != DImode
20759 && vector_all_ones_operand (negop
, data_mode
))
20761 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20762 operands
[0], 0, OPTAB_DIRECT
);
20763 if (res
!= operands
[0])
20764 emit_move_insn (operands
[0], res
);
20769 if (!nonimmediate_operand (cop1
, mode
))
20770 cop1
= force_reg (mode
, cop1
);
20771 if (!general_operand (operands
[1], data_mode
))
20772 operands
[1] = force_reg (data_mode
, operands
[1]);
20773 if (!general_operand (operands
[2], data_mode
))
20774 operands
[2] = force_reg (data_mode
, operands
[2]);
20776 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20778 && (mode
== V16QImode
|| mode
== V8HImode
20779 || mode
== V4SImode
|| mode
== V2DImode
))
20783 /* Canonicalize the comparison to EQ, GT, GTU. */
20794 code
= reverse_condition (code
);
20800 code
= reverse_condition (code
);
20806 code
= swap_condition (code
);
20807 x
= cop0
, cop0
= cop1
, cop1
= x
;
20811 gcc_unreachable ();
20814 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20815 if (mode
== V2DImode
)
20820 /* SSE4.1 supports EQ. */
20821 if (!TARGET_SSE4_1
)
20827 /* SSE4.2 supports GT/GTU. */
20828 if (!TARGET_SSE4_2
)
20833 gcc_unreachable ();
20837 /* Unsigned parallel compare is not supported by the hardware.
20838 Play some tricks to turn this into a signed comparison
20842 cop0
= force_reg (mode
, cop0
);
20852 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20856 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20857 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20858 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20859 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20861 gcc_unreachable ();
20863 /* Subtract (-(INT MAX) - 1) from both operands to make
20865 mask
= ix86_build_signbit_mask (mode
, true, false);
20866 t1
= gen_reg_rtx (mode
);
20867 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20869 t2
= gen_reg_rtx (mode
);
20870 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20882 /* Perform a parallel unsigned saturating subtraction. */
20883 x
= gen_reg_rtx (mode
);
20884 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20885 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20888 cop1
= CONST0_RTX (mode
);
20894 gcc_unreachable ();
20899 /* Allow the comparison to be done in one mode, but the movcc to
20900 happen in another mode. */
20901 if (data_mode
== mode
)
20903 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20904 operands
[1+negate
], operands
[2-negate
]);
20908 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20909 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20911 operands
[1+negate
], operands
[2-negate
]);
20912 x
= gen_lowpart (data_mode
, x
);
20915 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20916 operands
[2-negate
]);
20920 /* Expand a variable vector permutation. */
20923 ix86_expand_vec_perm (rtx operands
[])
20925 rtx target
= operands
[0];
20926 rtx op0
= operands
[1];
20927 rtx op1
= operands
[2];
20928 rtx mask
= operands
[3];
20929 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20930 enum machine_mode mode
= GET_MODE (op0
);
20931 enum machine_mode maskmode
= GET_MODE (mask
);
20933 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20935 /* Number of elements in the vector. */
20936 w
= GET_MODE_NUNITS (mode
);
20937 e
= GET_MODE_UNIT_SIZE (mode
);
20938 gcc_assert (w
<= 32);
20942 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20944 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20945 an constant shuffle operand. With a tiny bit of effort we can
20946 use VPERMD instead. A re-interpretation stall for V4DFmode is
20947 unfortunate but there's no avoiding it.
20948 Similarly for V16HImode we don't have instructions for variable
20949 shuffling, while for V32QImode we can use after preparing suitable
20950 masks vpshufb; vpshufb; vpermq; vpor. */
20952 if (mode
== V16HImode
)
20954 maskmode
= mode
= V32QImode
;
20960 maskmode
= mode
= V8SImode
;
20964 t1
= gen_reg_rtx (maskmode
);
20966 /* Replicate the low bits of the V4DImode mask into V8SImode:
20968 t1 = { A A B B C C D D }. */
20969 for (i
= 0; i
< w
/ 2; ++i
)
20970 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20971 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20972 vt
= force_reg (maskmode
, vt
);
20973 mask
= gen_lowpart (maskmode
, mask
);
20974 if (maskmode
== V8SImode
)
20975 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20977 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20979 /* Multiply the shuffle indicies by two. */
20980 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20983 /* Add one to the odd shuffle indicies:
20984 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20985 for (i
= 0; i
< w
/ 2; ++i
)
20987 vec
[i
* 2] = const0_rtx
;
20988 vec
[i
* 2 + 1] = const1_rtx
;
20990 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20991 vt
= validize_mem (force_const_mem (maskmode
, vt
));
20992 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20995 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20996 operands
[3] = mask
= t1
;
20997 target
= gen_lowpart (mode
, target
);
20998 op0
= gen_lowpart (mode
, op0
);
20999 op1
= gen_lowpart (mode
, op1
);
21005 /* The VPERMD and VPERMPS instructions already properly ignore
21006 the high bits of the shuffle elements. No need for us to
21007 perform an AND ourselves. */
21008 if (one_operand_shuffle
)
21009 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
21012 t1
= gen_reg_rtx (V8SImode
);
21013 t2
= gen_reg_rtx (V8SImode
);
21014 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
21015 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
21021 mask
= gen_lowpart (V8SFmode
, mask
);
21022 if (one_operand_shuffle
)
21023 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
21026 t1
= gen_reg_rtx (V8SFmode
);
21027 t2
= gen_reg_rtx (V8SFmode
);
21028 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
21029 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
21035 /* By combining the two 128-bit input vectors into one 256-bit
21036 input vector, we can use VPERMD and VPERMPS for the full
21037 two-operand shuffle. */
21038 t1
= gen_reg_rtx (V8SImode
);
21039 t2
= gen_reg_rtx (V8SImode
);
21040 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21041 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21042 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21043 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21047 t1
= gen_reg_rtx (V8SFmode
);
21048 t2
= gen_reg_rtx (V8SImode
);
21049 mask
= gen_lowpart (V4SImode
, mask
);
21050 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21051 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21052 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21053 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21057 t1
= gen_reg_rtx (V32QImode
);
21058 t2
= gen_reg_rtx (V32QImode
);
21059 t3
= gen_reg_rtx (V32QImode
);
21060 vt2
= GEN_INT (128);
21061 for (i
= 0; i
< 32; i
++)
21063 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21064 vt
= force_reg (V32QImode
, vt
);
21065 for (i
= 0; i
< 32; i
++)
21066 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21067 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21068 vt2
= force_reg (V32QImode
, vt2
);
21069 /* From mask create two adjusted masks, which contain the same
21070 bits as mask in the low 7 bits of each vector element.
21071 The first mask will have the most significant bit clear
21072 if it requests element from the same 128-bit lane
21073 and MSB set if it requests element from the other 128-bit lane.
21074 The second mask will have the opposite values of the MSB,
21075 and additionally will have its 128-bit lanes swapped.
21076 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21077 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21078 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21079 stands for other 12 bytes. */
21080 /* The bit whether element is from the same lane or the other
21081 lane is bit 4, so shift it up by 3 to the MSB position. */
21082 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
21083 gen_lowpart (V4DImode
, mask
),
21085 /* Clear MSB bits from the mask just in case it had them set. */
21086 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21087 /* After this t1 will have MSB set for elements from other lane. */
21088 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
21089 /* Clear bits other than MSB. */
21090 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21091 /* Or in the lower bits from mask into t3. */
21092 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21093 /* And invert MSB bits in t1, so MSB is set for elements from the same
21095 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21096 /* Swap 128-bit lanes in t3. */
21097 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
21098 gen_lowpart (V4DImode
, t3
),
21099 const2_rtx
, GEN_INT (3),
21100 const0_rtx
, const1_rtx
));
21101 /* And or in the lower bits from mask into t1. */
21102 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21103 if (one_operand_shuffle
)
21105 /* Each of these shuffles will put 0s in places where
21106 element from the other 128-bit lane is needed, otherwise
21107 will shuffle in the requested value. */
21108 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
21109 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21110 /* For t3 the 128-bit lanes are swapped again. */
21111 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
21112 gen_lowpart (V4DImode
, t3
),
21113 const2_rtx
, GEN_INT (3),
21114 const0_rtx
, const1_rtx
));
21115 /* And oring both together leads to the result. */
21116 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
21120 t4
= gen_reg_rtx (V32QImode
);
21121 /* Similarly to the above one_operand_shuffle code,
21122 just for repeated twice for each operand. merge_two:
21123 code will merge the two results together. */
21124 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
21125 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
21126 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21127 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21128 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
21129 gen_lowpart (V4DImode
, t4
),
21130 const2_rtx
, GEN_INT (3),
21131 const0_rtx
, const1_rtx
));
21132 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
21133 gen_lowpart (V4DImode
, t3
),
21134 const2_rtx
, GEN_INT (3),
21135 const0_rtx
, const1_rtx
));
21136 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
21137 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
21143 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21150 /* The XOP VPPERM insn supports three inputs. By ignoring the
21151 one_operand_shuffle special case, we avoid creating another
21152 set of constant vectors in memory. */
21153 one_operand_shuffle
= false;
21155 /* mask = mask & {2*w-1, ...} */
21156 vt
= GEN_INT (2*w
- 1);
21160 /* mask = mask & {w-1, ...} */
21161 vt
= GEN_INT (w
- 1);
21164 for (i
= 0; i
< w
; i
++)
21166 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21167 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21168 NULL_RTX
, 0, OPTAB_DIRECT
);
21170 /* For non-QImode operations, convert the word permutation control
21171 into a byte permutation control. */
21172 if (mode
!= V16QImode
)
21174 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21175 GEN_INT (exact_log2 (e
)),
21176 NULL_RTX
, 0, OPTAB_DIRECT
);
21178 /* Convert mask to vector of chars. */
21179 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21181 /* Replicate each of the input bytes into byte positions:
21182 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21183 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21184 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21185 for (i
= 0; i
< 16; ++i
)
21186 vec
[i
] = GEN_INT (i
/e
* e
);
21187 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21188 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21190 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21192 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21194 /* Convert it into the byte positions by doing
21195 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21196 for (i
= 0; i
< 16; ++i
)
21197 vec
[i
] = GEN_INT (i
% e
);
21198 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21199 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21200 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21203 /* The actual shuffle operations all operate on V16QImode. */
21204 op0
= gen_lowpart (V16QImode
, op0
);
21205 op1
= gen_lowpart (V16QImode
, op1
);
21206 target
= gen_lowpart (V16QImode
, target
);
21210 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21212 else if (one_operand_shuffle
)
21214 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21221 /* Shuffle the two input vectors independently. */
21222 t1
= gen_reg_rtx (V16QImode
);
21223 t2
= gen_reg_rtx (V16QImode
);
21224 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21225 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21228 /* Then merge them together. The key is whether any given control
21229 element contained a bit set that indicates the second word. */
21230 mask
= operands
[3];
21232 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21234 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21235 more shuffle to convert the V2DI input mask into a V4SI
21236 input mask. At which point the masking that expand_int_vcond
21237 will work as desired. */
21238 rtx t3
= gen_reg_rtx (V4SImode
);
21239 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21240 const0_rtx
, const0_rtx
,
21241 const2_rtx
, const2_rtx
));
21243 maskmode
= V4SImode
;
21247 for (i
= 0; i
< w
; i
++)
21249 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21250 vt
= force_reg (maskmode
, vt
);
21251 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21252 NULL_RTX
, 0, OPTAB_DIRECT
);
21254 xops
[0] = gen_lowpart (mode
, operands
[0]);
21255 xops
[1] = gen_lowpart (mode
, t2
);
21256 xops
[2] = gen_lowpart (mode
, t1
);
21257 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21260 ok
= ix86_expand_int_vcond (xops
);
21265 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21266 true if we should do zero extension, else sign extension. HIGH_P is
21267 true if we want the N/2 high elements, else the low elements. */
21270 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21272 enum machine_mode imode
= GET_MODE (src
);
21277 rtx (*unpack
)(rtx
, rtx
);
21278 rtx (*extract
)(rtx
, rtx
) = NULL
;
21279 enum machine_mode halfmode
= BLKmode
;
21285 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21287 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21288 halfmode
= V16QImode
;
21290 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21294 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21296 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21297 halfmode
= V8HImode
;
21299 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21303 unpack
= gen_avx2_zero_extendv4siv4di2
;
21305 unpack
= gen_avx2_sign_extendv4siv4di2
;
21306 halfmode
= V4SImode
;
21308 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21312 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21314 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21318 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21320 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21324 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21326 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21329 gcc_unreachable ();
21332 if (GET_MODE_SIZE (imode
) == 32)
21334 tmp
= gen_reg_rtx (halfmode
);
21335 emit_insn (extract (tmp
, src
));
21339 /* Shift higher 8 bytes to lower 8 bytes. */
21340 tmp
= gen_reg_rtx (imode
);
21341 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
21342 gen_lowpart (V1TImode
, src
),
21348 emit_insn (unpack (dest
, tmp
));
21352 rtx (*unpack
)(rtx
, rtx
, rtx
);
21358 unpack
= gen_vec_interleave_highv16qi
;
21360 unpack
= gen_vec_interleave_lowv16qi
;
21364 unpack
= gen_vec_interleave_highv8hi
;
21366 unpack
= gen_vec_interleave_lowv8hi
;
21370 unpack
= gen_vec_interleave_highv4si
;
21372 unpack
= gen_vec_interleave_lowv4si
;
21375 gcc_unreachable ();
21379 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21381 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21382 src
, pc_rtx
, pc_rtx
);
21384 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
21388 /* Expand conditional increment or decrement using adb/sbb instructions.
21389 The default case using setcc followed by the conditional move can be
21390 done by generic code. */
21392 ix86_expand_int_addcc (rtx operands
[])
21394 enum rtx_code code
= GET_CODE (operands
[1]);
21396 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21398 rtx val
= const0_rtx
;
21399 bool fpcmp
= false;
21400 enum machine_mode mode
;
21401 rtx op0
= XEXP (operands
[1], 0);
21402 rtx op1
= XEXP (operands
[1], 1);
21404 if (operands
[3] != const1_rtx
21405 && operands
[3] != constm1_rtx
)
21407 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21409 code
= GET_CODE (compare_op
);
21411 flags
= XEXP (compare_op
, 0);
21413 if (GET_MODE (flags
) == CCFPmode
21414 || GET_MODE (flags
) == CCFPUmode
)
21417 code
= ix86_fp_compare_code_to_integer (code
);
21424 PUT_CODE (compare_op
,
21425 reverse_condition_maybe_unordered
21426 (GET_CODE (compare_op
)));
21428 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21431 mode
= GET_MODE (operands
[0]);
21433 /* Construct either adc or sbb insn. */
21434 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21439 insn
= gen_subqi3_carry
;
21442 insn
= gen_subhi3_carry
;
21445 insn
= gen_subsi3_carry
;
21448 insn
= gen_subdi3_carry
;
21451 gcc_unreachable ();
21459 insn
= gen_addqi3_carry
;
21462 insn
= gen_addhi3_carry
;
21465 insn
= gen_addsi3_carry
;
21468 insn
= gen_adddi3_carry
;
21471 gcc_unreachable ();
21474 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21480 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21481 but works for floating pointer parameters and nonoffsetable memories.
21482 For pushes, it returns just stack offsets; the values will be saved
21483 in the right order. Maximally three parts are generated. */
21486 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21491 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21493 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21495 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21496 gcc_assert (size
>= 2 && size
<= 4);
21498 /* Optimize constant pool reference to immediates. This is used by fp
21499 moves, that force all constants to memory to allow combining. */
21500 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21502 rtx tmp
= maybe_get_pool_constant (operand
);
21507 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21509 /* The only non-offsetable memories we handle are pushes. */
21510 int ok
= push_operand (operand
, VOIDmode
);
21514 operand
= copy_rtx (operand
);
21515 PUT_MODE (operand
, word_mode
);
21516 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21520 if (GET_CODE (operand
) == CONST_VECTOR
)
21522 enum machine_mode imode
= int_mode_for_mode (mode
);
21523 /* Caution: if we looked through a constant pool memory above,
21524 the operand may actually have a different mode now. That's
21525 ok, since we want to pun this all the way back to an integer. */
21526 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21527 gcc_assert (operand
!= NULL
);
21533 if (mode
== DImode
)
21534 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21539 if (REG_P (operand
))
21541 gcc_assert (reload_completed
);
21542 for (i
= 0; i
< size
; i
++)
21543 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21545 else if (offsettable_memref_p (operand
))
21547 operand
= adjust_address (operand
, SImode
, 0);
21548 parts
[0] = operand
;
21549 for (i
= 1; i
< size
; i
++)
21550 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21552 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21557 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21561 real_to_target (l
, &r
, mode
);
21562 parts
[3] = gen_int_mode (l
[3], SImode
);
21563 parts
[2] = gen_int_mode (l
[2], SImode
);
21566 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21567 long double may not be 80-bit. */
21568 real_to_target (l
, &r
, mode
);
21569 parts
[2] = gen_int_mode (l
[2], SImode
);
21572 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21575 gcc_unreachable ();
21577 parts
[1] = gen_int_mode (l
[1], SImode
);
21578 parts
[0] = gen_int_mode (l
[0], SImode
);
21581 gcc_unreachable ();
21586 if (mode
== TImode
)
21587 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21588 if (mode
== XFmode
|| mode
== TFmode
)
21590 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21591 if (REG_P (operand
))
21593 gcc_assert (reload_completed
);
21594 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21595 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21597 else if (offsettable_memref_p (operand
))
21599 operand
= adjust_address (operand
, DImode
, 0);
21600 parts
[0] = operand
;
21601 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21603 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21608 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21609 real_to_target (l
, &r
, mode
);
21611 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21612 if (HOST_BITS_PER_WIDE_INT
>= 64)
21615 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21616 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21619 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21621 if (upper_mode
== SImode
)
21622 parts
[1] = gen_int_mode (l
[2], SImode
);
21623 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21626 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21627 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21630 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21633 gcc_unreachable ();
21640 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21641 Return false when normal moves are needed; true when all required
21642 insns have been emitted. Operands 2-4 contain the input values
21643 int the correct order; operands 5-7 contain the output values. */
21646 ix86_split_long_move (rtx operands
[])
21651 int collisions
= 0;
21652 enum machine_mode mode
= GET_MODE (operands
[0]);
21653 bool collisionparts
[4];
21655 /* The DFmode expanders may ask us to move double.
21656 For 64bit target this is single move. By hiding the fact
21657 here we simplify i386.md splitters. */
21658 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21660 /* Optimize constant pool reference to immediates. This is used by
21661 fp moves, that force all constants to memory to allow combining. */
21663 if (MEM_P (operands
[1])
21664 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21665 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21666 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21667 if (push_operand (operands
[0], VOIDmode
))
21669 operands
[0] = copy_rtx (operands
[0]);
21670 PUT_MODE (operands
[0], word_mode
);
21673 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21674 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21675 emit_move_insn (operands
[0], operands
[1]);
21679 /* The only non-offsettable memory we handle is push. */
21680 if (push_operand (operands
[0], VOIDmode
))
21683 gcc_assert (!MEM_P (operands
[0])
21684 || offsettable_memref_p (operands
[0]));
21686 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21687 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21689 /* When emitting push, take care for source operands on the stack. */
21690 if (push
&& MEM_P (operands
[1])
21691 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21693 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21695 /* Compensate for the stack decrement by 4. */
21696 if (!TARGET_64BIT
&& nparts
== 3
21697 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21698 src_base
= plus_constant (Pmode
, src_base
, 4);
21700 /* src_base refers to the stack pointer and is
21701 automatically decreased by emitted push. */
21702 for (i
= 0; i
< nparts
; i
++)
21703 part
[1][i
] = change_address (part
[1][i
],
21704 GET_MODE (part
[1][i
]), src_base
);
21707 /* We need to do copy in the right order in case an address register
21708 of the source overlaps the destination. */
21709 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21713 for (i
= 0; i
< nparts
; i
++)
21716 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21717 if (collisionparts
[i
])
21721 /* Collision in the middle part can be handled by reordering. */
21722 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21724 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21725 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21727 else if (collisions
== 1
21729 && (collisionparts
[1] || collisionparts
[2]))
21731 if (collisionparts
[1])
21733 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21734 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21738 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21739 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21743 /* If there are more collisions, we can't handle it by reordering.
21744 Do an lea to the last part and use only one colliding move. */
21745 else if (collisions
> 1)
21751 base
= part
[0][nparts
- 1];
21753 /* Handle the case when the last part isn't valid for lea.
21754 Happens in 64-bit mode storing the 12-byte XFmode. */
21755 if (GET_MODE (base
) != Pmode
)
21756 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21758 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21759 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21760 for (i
= 1; i
< nparts
; i
++)
21762 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21763 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21774 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21775 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21776 stack_pointer_rtx
, GEN_INT (-4)));
21777 emit_move_insn (part
[0][2], part
[1][2]);
21779 else if (nparts
== 4)
21781 emit_move_insn (part
[0][3], part
[1][3]);
21782 emit_move_insn (part
[0][2], part
[1][2]);
21787 /* In 64bit mode we don't have 32bit push available. In case this is
21788 register, it is OK - we will just use larger counterpart. We also
21789 retype memory - these comes from attempt to avoid REX prefix on
21790 moving of second half of TFmode value. */
21791 if (GET_MODE (part
[1][1]) == SImode
)
21793 switch (GET_CODE (part
[1][1]))
21796 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21800 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21804 gcc_unreachable ();
21807 if (GET_MODE (part
[1][0]) == SImode
)
21808 part
[1][0] = part
[1][1];
21811 emit_move_insn (part
[0][1], part
[1][1]);
21812 emit_move_insn (part
[0][0], part
[1][0]);
21816 /* Choose correct order to not overwrite the source before it is copied. */
21817 if ((REG_P (part
[0][0])
21818 && REG_P (part
[1][1])
21819 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21821 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21823 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21825 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21827 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21829 operands
[2 + i
] = part
[0][j
];
21830 operands
[6 + i
] = part
[1][j
];
21835 for (i
= 0; i
< nparts
; i
++)
21837 operands
[2 + i
] = part
[0][i
];
21838 operands
[6 + i
] = part
[1][i
];
21842 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21843 if (optimize_insn_for_size_p ())
21845 for (j
= 0; j
< nparts
- 1; j
++)
21846 if (CONST_INT_P (operands
[6 + j
])
21847 && operands
[6 + j
] != const0_rtx
21848 && REG_P (operands
[2 + j
]))
21849 for (i
= j
; i
< nparts
- 1; i
++)
21850 if (CONST_INT_P (operands
[7 + i
])
21851 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21852 operands
[7 + i
] = operands
[2 + j
];
21855 for (i
= 0; i
< nparts
; i
++)
21856 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21861 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21862 left shift by a constant, either using a single shift or
21863 a sequence of add instructions. */
21866 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21868 rtx (*insn
)(rtx
, rtx
, rtx
);
21871 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21872 && !optimize_insn_for_size_p ()))
21874 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21875 while (count
-- > 0)
21876 emit_insn (insn (operand
, operand
, operand
));
21880 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21881 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21886 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21888 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21889 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21890 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21892 rtx low
[2], high
[2];
21895 if (CONST_INT_P (operands
[2]))
21897 split_double_mode (mode
, operands
, 2, low
, high
);
21898 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21900 if (count
>= half_width
)
21902 emit_move_insn (high
[0], low
[1]);
21903 emit_move_insn (low
[0], const0_rtx
);
21905 if (count
> half_width
)
21906 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21910 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21912 if (!rtx_equal_p (operands
[0], operands
[1]))
21913 emit_move_insn (operands
[0], operands
[1]);
21915 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21916 ix86_expand_ashl_const (low
[0], count
, mode
);
21921 split_double_mode (mode
, operands
, 1, low
, high
);
21923 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21925 if (operands
[1] == const1_rtx
)
21927 /* Assuming we've chosen a QImode capable registers, then 1 << N
21928 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21929 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21931 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21933 ix86_expand_clear (low
[0]);
21934 ix86_expand_clear (high
[0]);
21935 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21937 d
= gen_lowpart (QImode
, low
[0]);
21938 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21939 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21940 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21942 d
= gen_lowpart (QImode
, high
[0]);
21943 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21944 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21945 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21948 /* Otherwise, we can get the same results by manually performing
21949 a bit extract operation on bit 5/6, and then performing the two
21950 shifts. The two methods of getting 0/1 into low/high are exactly
21951 the same size. Avoiding the shift in the bit extract case helps
21952 pentium4 a bit; no one else seems to care much either way. */
21955 enum machine_mode half_mode
;
21956 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21957 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21958 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21959 HOST_WIDE_INT bits
;
21962 if (mode
== DImode
)
21964 half_mode
= SImode
;
21965 gen_lshr3
= gen_lshrsi3
;
21966 gen_and3
= gen_andsi3
;
21967 gen_xor3
= gen_xorsi3
;
21972 half_mode
= DImode
;
21973 gen_lshr3
= gen_lshrdi3
;
21974 gen_and3
= gen_anddi3
;
21975 gen_xor3
= gen_xordi3
;
21979 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21980 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21982 x
= gen_lowpart (half_mode
, operands
[2]);
21983 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21985 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21986 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21987 emit_move_insn (low
[0], high
[0]);
21988 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21991 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21992 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21996 if (operands
[1] == constm1_rtx
)
21998 /* For -1 << N, we can avoid the shld instruction, because we
21999 know that we're shifting 0...31/63 ones into a -1. */
22000 emit_move_insn (low
[0], constm1_rtx
);
22001 if (optimize_insn_for_size_p ())
22002 emit_move_insn (high
[0], low
[0]);
22004 emit_move_insn (high
[0], constm1_rtx
);
22008 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22010 if (!rtx_equal_p (operands
[0], operands
[1]))
22011 emit_move_insn (operands
[0], operands
[1]);
22013 split_double_mode (mode
, operands
, 1, low
, high
);
22014 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22017 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22019 if (TARGET_CMOVE
&& scratch
)
22021 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22022 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22024 ix86_expand_clear (scratch
);
22025 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22029 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22030 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22032 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22037 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22039 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22040 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22041 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22042 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22044 rtx low
[2], high
[2];
22047 if (CONST_INT_P (operands
[2]))
22049 split_double_mode (mode
, operands
, 2, low
, high
);
22050 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22052 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22054 emit_move_insn (high
[0], high
[1]);
22055 emit_insn (gen_ashr3 (high
[0], high
[0],
22056 GEN_INT (half_width
- 1)));
22057 emit_move_insn (low
[0], high
[0]);
22060 else if (count
>= half_width
)
22062 emit_move_insn (low
[0], high
[1]);
22063 emit_move_insn (high
[0], low
[0]);
22064 emit_insn (gen_ashr3 (high
[0], high
[0],
22065 GEN_INT (half_width
- 1)));
22067 if (count
> half_width
)
22068 emit_insn (gen_ashr3 (low
[0], low
[0],
22069 GEN_INT (count
- half_width
)));
22073 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22075 if (!rtx_equal_p (operands
[0], operands
[1]))
22076 emit_move_insn (operands
[0], operands
[1]);
22078 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22079 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22084 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22086 if (!rtx_equal_p (operands
[0], operands
[1]))
22087 emit_move_insn (operands
[0], operands
[1]);
22089 split_double_mode (mode
, operands
, 1, low
, high
);
22091 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22092 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22094 if (TARGET_CMOVE
&& scratch
)
22096 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22097 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22099 emit_move_insn (scratch
, high
[0]);
22100 emit_insn (gen_ashr3 (scratch
, scratch
,
22101 GEN_INT (half_width
- 1)));
22102 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22107 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22108 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22110 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22116 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22118 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22119 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22120 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22121 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22123 rtx low
[2], high
[2];
22126 if (CONST_INT_P (operands
[2]))
22128 split_double_mode (mode
, operands
, 2, low
, high
);
22129 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22131 if (count
>= half_width
)
22133 emit_move_insn (low
[0], high
[1]);
22134 ix86_expand_clear (high
[0]);
22136 if (count
> half_width
)
22137 emit_insn (gen_lshr3 (low
[0], low
[0],
22138 GEN_INT (count
- half_width
)));
22142 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22144 if (!rtx_equal_p (operands
[0], operands
[1]))
22145 emit_move_insn (operands
[0], operands
[1]);
22147 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22148 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22153 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22155 if (!rtx_equal_p (operands
[0], operands
[1]))
22156 emit_move_insn (operands
[0], operands
[1]);
22158 split_double_mode (mode
, operands
, 1, low
, high
);
22160 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22161 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22163 if (TARGET_CMOVE
&& scratch
)
22165 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22166 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22168 ix86_expand_clear (scratch
);
22169 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22174 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22175 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22177 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22182 /* Predict just emitted jump instruction to be taken with probability PROB. */
22184 predict_jump (int prob
)
22186 rtx insn
= get_last_insn ();
22187 gcc_assert (JUMP_P (insn
));
22188 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
22191 /* Helper function for the string operations below. Dest VARIABLE whether
22192 it is aligned to VALUE bytes. If true, jump to the label. */
22194 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22196 rtx label
= gen_label_rtx ();
22197 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22198 if (GET_MODE (variable
) == DImode
)
22199 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22201 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22202 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22205 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22207 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22211 /* Adjust COUNTER by the VALUE. */
22213 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22215 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22216 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22218 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22221 /* Zero extend possibly SImode EXP to Pmode register. */
22223 ix86_zero_extend_to_Pmode (rtx exp
)
22225 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22228 /* Divide COUNTREG by SCALE. */
22230 scale_counter (rtx countreg
, int scale
)
22236 if (CONST_INT_P (countreg
))
22237 return GEN_INT (INTVAL (countreg
) / scale
);
22238 gcc_assert (REG_P (countreg
));
22240 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22241 GEN_INT (exact_log2 (scale
)),
22242 NULL
, 1, OPTAB_DIRECT
);
22246 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22247 DImode for constant loop counts. */
22249 static enum machine_mode
22250 counter_mode (rtx count_exp
)
22252 if (GET_MODE (count_exp
) != VOIDmode
)
22253 return GET_MODE (count_exp
);
22254 if (!CONST_INT_P (count_exp
))
22256 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22261 /* When SRCPTR is non-NULL, output simple loop to move memory
22262 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
22263 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
22264 equivalent loop to set memory by VALUE (supposed to be in MODE).
22266 The size is rounded down to whole number of chunk size moved at once.
22267 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22271 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22272 rtx destptr
, rtx srcptr
, rtx value
,
22273 rtx count
, enum machine_mode mode
, int unroll
,
22276 rtx out_label
, top_label
, iter
, tmp
;
22277 enum machine_mode iter_mode
= counter_mode (count
);
22278 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22279 rtx piece_size
= GEN_INT (piece_size_n
);
22280 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22284 top_label
= gen_label_rtx ();
22285 out_label
= gen_label_rtx ();
22286 iter
= gen_reg_rtx (iter_mode
);
22288 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22289 NULL
, 1, OPTAB_DIRECT
);
22290 /* Those two should combine. */
22291 if (piece_size
== const1_rtx
)
22293 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22295 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22297 emit_move_insn (iter
, const0_rtx
);
22299 emit_label (top_label
);
22301 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22303 /* This assert could be relaxed - in this case we'll need to compute
22304 smallest power of two, containing in PIECE_SIZE_N and pass it to
22306 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22307 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22308 destmem
= adjust_address (destmem
, mode
, 0);
22312 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22313 srcmem
= adjust_address (srcmem
, mode
, 0);
22315 /* When unrolling for chips that reorder memory reads and writes,
22316 we can save registers by using single temporary.
22317 Also using 4 temporaries is overkill in 32bit mode. */
22318 if (!TARGET_64BIT
&& 0)
22320 for (i
= 0; i
< unroll
; i
++)
22325 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22327 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22329 emit_move_insn (destmem
, srcmem
);
22335 gcc_assert (unroll
<= 4);
22336 for (i
= 0; i
< unroll
; i
++)
22338 tmpreg
[i
] = gen_reg_rtx (mode
);
22342 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22344 emit_move_insn (tmpreg
[i
], srcmem
);
22346 for (i
= 0; i
< unroll
; i
++)
22351 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22353 emit_move_insn (destmem
, tmpreg
[i
]);
22358 for (i
= 0; i
< unroll
; i
++)
22362 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22363 emit_move_insn (destmem
, value
);
22366 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22367 true, OPTAB_LIB_WIDEN
);
22369 emit_move_insn (iter
, tmp
);
22371 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22373 if (expected_size
!= -1)
22375 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22376 if (expected_size
== 0)
22378 else if (expected_size
> REG_BR_PROB_BASE
)
22379 predict_jump (REG_BR_PROB_BASE
- 1);
22381 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22384 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22385 iter
= ix86_zero_extend_to_Pmode (iter
);
22386 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22387 true, OPTAB_LIB_WIDEN
);
22388 if (tmp
!= destptr
)
22389 emit_move_insn (destptr
, tmp
);
22392 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22393 true, OPTAB_LIB_WIDEN
);
22395 emit_move_insn (srcptr
, tmp
);
22397 emit_label (out_label
);
22400 /* Output "rep; mov" instruction.
22401 Arguments have same meaning as for previous function */
22403 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
22404 rtx destptr
, rtx srcptr
,
22406 enum machine_mode mode
)
22411 HOST_WIDE_INT rounded_count
;
22413 /* If the size is known, it is shorter to use rep movs. */
22414 if (mode
== QImode
&& CONST_INT_P (count
)
22415 && !(INTVAL (count
) & 3))
22418 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22419 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22420 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22421 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22422 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22423 if (mode
!= QImode
)
22425 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22426 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22427 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22428 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22429 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22430 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22434 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22435 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22437 if (CONST_INT_P (count
))
22439 rounded_count
= (INTVAL (count
)
22440 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22441 destmem
= shallow_copy_rtx (destmem
);
22442 srcmem
= shallow_copy_rtx (srcmem
);
22443 set_mem_size (destmem
, rounded_count
);
22444 set_mem_size (srcmem
, rounded_count
);
22448 if (MEM_SIZE_KNOWN_P (destmem
))
22449 clear_mem_size (destmem
);
22450 if (MEM_SIZE_KNOWN_P (srcmem
))
22451 clear_mem_size (srcmem
);
22453 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22457 /* Output "rep; stos" instruction.
22458 Arguments have same meaning as for previous function */
22460 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
22461 rtx count
, enum machine_mode mode
,
22466 HOST_WIDE_INT rounded_count
;
22468 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22469 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22470 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22471 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
22472 if (mode
!= QImode
)
22474 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22475 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22476 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22479 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22480 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
22482 rounded_count
= (INTVAL (count
)
22483 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22484 destmem
= shallow_copy_rtx (destmem
);
22485 set_mem_size (destmem
, rounded_count
);
22487 else if (MEM_SIZE_KNOWN_P (destmem
))
22488 clear_mem_size (destmem
);
22489 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22492 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22494 SRC is passed by pointer to be updated on return.
22495 Return value is updated DST. */
22497 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22498 HOST_WIDE_INT size_to_move
)
22500 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22501 enum insn_code code
;
22502 enum machine_mode move_mode
;
22505 /* Find the widest mode in which we could perform moves.
22506 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22507 it until move of such size is supported. */
22508 piece_size
= 1 << floor_log2 (size_to_move
);
22509 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22510 code
= optab_handler (mov_optab
, move_mode
);
22511 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22514 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22515 code
= optab_handler (mov_optab
, move_mode
);
22518 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22519 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22520 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22522 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22523 move_mode
= mode_for_vector (word_mode
, nunits
);
22524 code
= optab_handler (mov_optab
, move_mode
);
22525 if (code
== CODE_FOR_nothing
)
22527 move_mode
= word_mode
;
22528 piece_size
= GET_MODE_SIZE (move_mode
);
22529 code
= optab_handler (mov_optab
, move_mode
);
22532 gcc_assert (code
!= CODE_FOR_nothing
);
22534 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22535 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22537 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22538 gcc_assert (size_to_move
% piece_size
== 0);
22539 adjust
= GEN_INT (piece_size
);
22540 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22542 /* We move from memory to memory, so we'll need to do it via
22543 a temporary register. */
22544 tempreg
= gen_reg_rtx (move_mode
);
22545 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22546 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22548 emit_move_insn (destptr
,
22549 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22550 emit_move_insn (srcptr
,
22551 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22553 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22555 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22559 /* Update DST and SRC rtx. */
22564 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22566 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22567 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22570 if (CONST_INT_P (count
))
22572 HOST_WIDE_INT countval
= INTVAL (count
);
22573 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22576 /* For now MAX_SIZE should be a power of 2. This assert could be
22577 relaxed, but it'll require a bit more complicated epilogue
22579 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22580 for (i
= max_size
; i
>= 1; i
>>= 1)
22582 if (epilogue_size
& i
)
22583 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22589 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22590 count
, 1, OPTAB_DIRECT
);
22591 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22592 count
, QImode
, 1, 4);
22596 /* When there are stringops, we can cheaply increase dest and src pointers.
22597 Otherwise we save code size by maintaining offset (zero is readily
22598 available from preceding rep operation) and using x86 addressing modes.
22600 if (TARGET_SINGLE_STRINGOP
)
22604 rtx label
= ix86_expand_aligntest (count
, 4, true);
22605 src
= change_address (srcmem
, SImode
, srcptr
);
22606 dest
= change_address (destmem
, SImode
, destptr
);
22607 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22608 emit_label (label
);
22609 LABEL_NUSES (label
) = 1;
22613 rtx label
= ix86_expand_aligntest (count
, 2, true);
22614 src
= change_address (srcmem
, HImode
, srcptr
);
22615 dest
= change_address (destmem
, HImode
, destptr
);
22616 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22617 emit_label (label
);
22618 LABEL_NUSES (label
) = 1;
22622 rtx label
= ix86_expand_aligntest (count
, 1, true);
22623 src
= change_address (srcmem
, QImode
, srcptr
);
22624 dest
= change_address (destmem
, QImode
, destptr
);
22625 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22626 emit_label (label
);
22627 LABEL_NUSES (label
) = 1;
22632 rtx offset
= force_reg (Pmode
, const0_rtx
);
22637 rtx label
= ix86_expand_aligntest (count
, 4, true);
22638 src
= change_address (srcmem
, SImode
, srcptr
);
22639 dest
= change_address (destmem
, SImode
, destptr
);
22640 emit_move_insn (dest
, src
);
22641 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22642 true, OPTAB_LIB_WIDEN
);
22644 emit_move_insn (offset
, tmp
);
22645 emit_label (label
);
22646 LABEL_NUSES (label
) = 1;
22650 rtx label
= ix86_expand_aligntest (count
, 2, true);
22651 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22652 src
= change_address (srcmem
, HImode
, tmp
);
22653 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22654 dest
= change_address (destmem
, HImode
, tmp
);
22655 emit_move_insn (dest
, src
);
22656 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22657 true, OPTAB_LIB_WIDEN
);
22659 emit_move_insn (offset
, tmp
);
22660 emit_label (label
);
22661 LABEL_NUSES (label
) = 1;
22665 rtx label
= ix86_expand_aligntest (count
, 1, true);
22666 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22667 src
= change_address (srcmem
, QImode
, tmp
);
22668 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22669 dest
= change_address (destmem
, QImode
, tmp
);
22670 emit_move_insn (dest
, src
);
22671 emit_label (label
);
22672 LABEL_NUSES (label
) = 1;
22677 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22679 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22680 rtx count
, int max_size
)
22683 expand_simple_binop (counter_mode (count
), AND
, count
,
22684 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22685 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22686 gen_lowpart (QImode
, value
), count
, QImode
,
22690 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22692 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22696 if (CONST_INT_P (count
))
22698 HOST_WIDE_INT countval
= INTVAL (count
);
22701 if ((countval
& 0x10) && max_size
> 16)
22705 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22706 emit_insn (gen_strset (destptr
, dest
, value
));
22707 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22708 emit_insn (gen_strset (destptr
, dest
, value
));
22711 gcc_unreachable ();
22714 if ((countval
& 0x08) && max_size
> 8)
22718 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22719 emit_insn (gen_strset (destptr
, dest
, value
));
22723 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22724 emit_insn (gen_strset (destptr
, dest
, value
));
22725 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22726 emit_insn (gen_strset (destptr
, dest
, value
));
22730 if ((countval
& 0x04) && max_size
> 4)
22732 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22733 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22736 if ((countval
& 0x02) && max_size
> 2)
22738 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22739 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22742 if ((countval
& 0x01) && max_size
> 1)
22744 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22745 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22752 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22757 rtx label
= ix86_expand_aligntest (count
, 16, true);
22760 dest
= change_address (destmem
, DImode
, destptr
);
22761 emit_insn (gen_strset (destptr
, dest
, value
));
22762 emit_insn (gen_strset (destptr
, dest
, value
));
22766 dest
= change_address (destmem
, SImode
, destptr
);
22767 emit_insn (gen_strset (destptr
, dest
, value
));
22768 emit_insn (gen_strset (destptr
, dest
, value
));
22769 emit_insn (gen_strset (destptr
, dest
, value
));
22770 emit_insn (gen_strset (destptr
, dest
, value
));
22772 emit_label (label
);
22773 LABEL_NUSES (label
) = 1;
22777 rtx label
= ix86_expand_aligntest (count
, 8, true);
22780 dest
= change_address (destmem
, DImode
, destptr
);
22781 emit_insn (gen_strset (destptr
, dest
, value
));
22785 dest
= change_address (destmem
, SImode
, destptr
);
22786 emit_insn (gen_strset (destptr
, dest
, value
));
22787 emit_insn (gen_strset (destptr
, dest
, value
));
22789 emit_label (label
);
22790 LABEL_NUSES (label
) = 1;
22794 rtx label
= ix86_expand_aligntest (count
, 4, true);
22795 dest
= change_address (destmem
, SImode
, destptr
);
22796 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22797 emit_label (label
);
22798 LABEL_NUSES (label
) = 1;
22802 rtx label
= ix86_expand_aligntest (count
, 2, true);
22803 dest
= change_address (destmem
, HImode
, destptr
);
22804 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22805 emit_label (label
);
22806 LABEL_NUSES (label
) = 1;
22810 rtx label
= ix86_expand_aligntest (count
, 1, true);
22811 dest
= change_address (destmem
, QImode
, destptr
);
22812 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22813 emit_label (label
);
22814 LABEL_NUSES (label
) = 1;
22818 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22820 Return value is updated DESTMEM. */
22822 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22823 rtx destptr
, rtx srcptr
, rtx count
,
22824 int align
, int desired_alignment
)
22827 for (i
= 1; i
< desired_alignment
; i
<<= 1)
22831 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
22832 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22833 ix86_adjust_counter (count
, i
);
22834 emit_label (label
);
22835 LABEL_NUSES (label
) = 1;
22836 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
22842 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22843 ALIGN_BYTES is how many bytes need to be copied.
22844 The function updates DST and SRC, namely, it sets proper alignment.
22845 DST is returned via return value, SRC is updated via pointer SRCP. */
22847 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22848 int desired_align
, int align_bytes
)
22851 rtx orig_dst
= dst
;
22852 rtx orig_src
= src
;
22853 int piece_size
= 1;
22854 int copied_bytes
= 0;
22855 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22856 if (src_align_bytes
>= 0)
22857 src_align_bytes
= desired_align
- src_align_bytes
;
22859 for (piece_size
= 1;
22860 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
22863 if (align_bytes
& piece_size
)
22865 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
22866 copied_bytes
+= piece_size
;
22870 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22871 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22872 if (src_align_bytes
>= 0)
22874 unsigned int src_align
;
22875 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
22877 if ((src_align_bytes
& (src_align
- 1))
22878 == (align_bytes
& (src_align
- 1)))
22881 if (src_align
> (unsigned int) desired_align
)
22882 src_align
= desired_align
;
22883 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22884 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22886 if (MEM_SIZE_KNOWN_P (orig_dst
))
22887 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22888 if (MEM_SIZE_KNOWN_P (orig_src
))
22889 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22894 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22895 DESIRED_ALIGNMENT. */
22897 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22898 int align
, int desired_alignment
)
22900 if (align
<= 1 && desired_alignment
> 1)
22902 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22903 destmem
= change_address (destmem
, QImode
, destptr
);
22904 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22905 ix86_adjust_counter (count
, 1);
22906 emit_label (label
);
22907 LABEL_NUSES (label
) = 1;
22909 if (align
<= 2 && desired_alignment
> 2)
22911 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22912 destmem
= change_address (destmem
, HImode
, destptr
);
22913 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22914 ix86_adjust_counter (count
, 2);
22915 emit_label (label
);
22916 LABEL_NUSES (label
) = 1;
22918 if (align
<= 4 && desired_alignment
> 4)
22920 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22921 destmem
= change_address (destmem
, SImode
, destptr
);
22922 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22923 ix86_adjust_counter (count
, 4);
22924 emit_label (label
);
22925 LABEL_NUSES (label
) = 1;
22927 gcc_assert (desired_alignment
<= 8);
22930 /* Set enough from DST to align DST known to by aligned by ALIGN to
22931 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22933 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22934 int desired_align
, int align_bytes
)
22937 rtx orig_dst
= dst
;
22938 if (align_bytes
& 1)
22940 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22942 emit_insn (gen_strset (destreg
, dst
,
22943 gen_lowpart (QImode
, value
)));
22945 if (align_bytes
& 2)
22947 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22948 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22949 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22951 emit_insn (gen_strset (destreg
, dst
,
22952 gen_lowpart (HImode
, value
)));
22954 if (align_bytes
& 4)
22956 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22957 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22958 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22960 emit_insn (gen_strset (destreg
, dst
,
22961 gen_lowpart (SImode
, value
)));
22963 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22964 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22965 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22966 if (MEM_SIZE_KNOWN_P (orig_dst
))
22967 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22971 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22972 static enum stringop_alg
22973 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22974 int *dynamic_check
, bool *noalign
)
22976 const struct stringop_algs
* algs
;
22977 bool optimize_for_speed
;
22978 /* Algorithms using the rep prefix want at least edi and ecx;
22979 additionally, memset wants eax and memcpy wants esi. Don't
22980 consider such algorithms if the user has appropriated those
22981 registers for their own purposes. */
22982 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22984 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22987 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22988 || (alg != rep_prefix_1_byte \
22989 && alg != rep_prefix_4_byte \
22990 && alg != rep_prefix_8_byte))
22991 const struct processor_costs
*cost
;
22993 /* Even if the string operation call is cold, we still might spend a lot
22994 of time processing large blocks. */
22995 if (optimize_function_for_size_p (cfun
)
22996 || (optimize_insn_for_size_p ()
22997 && expected_size
!= -1 && expected_size
< 256))
22998 optimize_for_speed
= false;
23000 optimize_for_speed
= true;
23002 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23004 *dynamic_check
= -1;
23006 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23008 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23009 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
23010 return ix86_stringop_alg
;
23011 /* rep; movq or rep; movl is the smallest variant. */
23012 else if (!optimize_for_speed
)
23014 if (!count
|| (count
& 3))
23015 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
23017 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
23019 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
23021 else if (expected_size
!= -1 && expected_size
< 4)
23022 return loop_1_byte
;
23023 else if (expected_size
!= -1)
23026 enum stringop_alg alg
= libcall
;
23027 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23029 /* We get here if the algorithms that were not libcall-based
23030 were rep-prefix based and we are unable to use rep prefixes
23031 based on global register usage. Break out of the loop and
23032 use the heuristic below. */
23033 if (algs
->size
[i
].max
== 0)
23035 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23037 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23039 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
23041 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23042 last non-libcall inline algorithm. */
23043 if (TARGET_INLINE_ALL_STRINGOPS
)
23045 /* When the current size is best to be copied by a libcall,
23046 but we are still forced to inline, run the heuristic below
23047 that will pick code for medium sized blocks. */
23048 if (alg
!= libcall
)
23052 else if (ALG_USABLE_P (candidate
))
23054 *noalign
= algs
->size
[i
].noalign
;
23059 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
23061 /* When asked to inline the call anyway, try to pick meaningful choice.
23062 We look for maximal size of block that is faster to copy by hand and
23063 take blocks of at most of that size guessing that average size will
23064 be roughly half of the block.
23066 If this turns out to be bad, we might simply specify the preferred
23067 choice in ix86_costs. */
23068 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23069 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
23072 enum stringop_alg alg
;
23074 bool any_alg_usable_p
= true;
23076 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23078 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23079 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
23081 if (candidate
!= libcall
&& candidate
23082 && ALG_USABLE_P (candidate
))
23083 max
= algs
->size
[i
].max
;
23085 /* If there aren't any usable algorithms, then recursing on
23086 smaller sizes isn't going to find anything. Just return the
23087 simple byte-at-a-time copy loop. */
23088 if (!any_alg_usable_p
)
23090 /* Pick something reasonable. */
23091 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23092 *dynamic_check
= 128;
23093 return loop_1_byte
;
23097 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
23098 gcc_assert (*dynamic_check
== -1);
23099 gcc_assert (alg
!= libcall
);
23100 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23101 *dynamic_check
= max
;
23104 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
23105 #undef ALG_USABLE_P
23108 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23109 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23111 decide_alignment (int align
,
23112 enum stringop_alg alg
,
23114 enum machine_mode move_mode
)
23116 int desired_align
= 0;
23118 gcc_assert (alg
!= no_stringop
);
23120 if (alg
== libcall
)
23122 if (move_mode
== VOIDmode
)
23125 desired_align
= GET_MODE_SIZE (move_mode
);
23126 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23127 copying whole cacheline at once. */
23128 if (TARGET_PENTIUMPRO
23129 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
23134 if (desired_align
< align
)
23135 desired_align
= align
;
23136 if (expected_size
!= -1 && expected_size
< 4)
23137 desired_align
= align
;
23139 return desired_align
;
23142 /* Expand string move (memcpy) operation. Use i386 string operations
23143 when profitable. expand_setmem contains similar code. The code
23144 depends upon architecture, block size and alignment, but always has
23145 the same overall structure:
23147 1) Prologue guard: Conditional that jumps up to epilogues for small
23148 blocks that can be handled by epilogue alone. This is faster
23149 but also needed for correctness, since prologue assume the block
23150 is larger than the desired alignment.
23152 Optional dynamic check for size and libcall for large
23153 blocks is emitted here too, with -minline-stringops-dynamically.
23155 2) Prologue: copy first few bytes in order to get destination
23156 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23157 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23158 copied. We emit either a jump tree on power of two sized
23159 blocks, or a byte loop.
23161 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23162 with specified algorithm.
23164 4) Epilogue: code copying tail of the block that is too small to be
23165 handled by main body (or up to size guarded by prologue guard). */
23168 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
23169 rtx expected_align_exp
, rtx expected_size_exp
)
23175 rtx jump_around_label
= NULL
;
23176 HOST_WIDE_INT align
= 1;
23177 unsigned HOST_WIDE_INT count
= 0;
23178 HOST_WIDE_INT expected_size
= -1;
23179 int size_needed
= 0, epilogue_size_needed
;
23180 int desired_align
= 0, align_bytes
= 0;
23181 enum stringop_alg alg
;
23183 bool need_zero_guard
= false;
23185 enum machine_mode move_mode
= VOIDmode
;
23186 int unroll_factor
= 1;
23188 if (CONST_INT_P (align_exp
))
23189 align
= INTVAL (align_exp
);
23190 /* i386 can do misaligned access on reasonably increased cost. */
23191 if (CONST_INT_P (expected_align_exp
)
23192 && INTVAL (expected_align_exp
) > align
)
23193 align
= INTVAL (expected_align_exp
);
23194 /* ALIGN is the minimum of destination and source alignment, but we care here
23195 just about destination alignment. */
23196 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23197 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23199 if (CONST_INT_P (count_exp
))
23200 count
= expected_size
= INTVAL (count_exp
);
23201 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23202 expected_size
= INTVAL (expected_size_exp
);
23204 /* Make sure we don't need to care about overflow later on. */
23205 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23208 /* Step 0: Decide on preferred algorithm, desired alignment and
23209 size of chunks to be copied by main loop. */
23210 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
23211 if (alg
== libcall
)
23213 gcc_assert (alg
!= no_stringop
);
23216 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23217 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23218 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
23221 move_mode
= word_mode
;
23227 gcc_unreachable ();
23229 need_zero_guard
= true;
23230 move_mode
= QImode
;
23233 need_zero_guard
= true;
23235 case unrolled_loop
:
23236 need_zero_guard
= true;
23237 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23240 need_zero_guard
= true;
23242 /* Find the widest supported mode. */
23243 move_mode
= word_mode
;
23244 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23245 != CODE_FOR_nothing
)
23246 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23248 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23249 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23250 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23252 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23253 move_mode
= mode_for_vector (word_mode
, nunits
);
23254 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23255 move_mode
= word_mode
;
23257 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23259 case rep_prefix_8_byte
:
23260 move_mode
= DImode
;
23262 case rep_prefix_4_byte
:
23263 move_mode
= SImode
;
23265 case rep_prefix_1_byte
:
23266 move_mode
= QImode
;
23269 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23270 epilogue_size_needed
= size_needed
;
23272 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23273 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23274 align
= desired_align
;
23276 /* Step 1: Prologue guard. */
23278 /* Alignment code needs count to be in register. */
23279 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23281 if (INTVAL (count_exp
) > desired_align
23282 && INTVAL (count_exp
) > size_needed
)
23285 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23286 if (align_bytes
<= 0)
23289 align_bytes
= desired_align
- align_bytes
;
23291 if (align_bytes
== 0)
23292 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23294 gcc_assert (desired_align
>= 1 && align
>= 1);
23296 /* Ensure that alignment prologue won't copy past end of block. */
23297 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23299 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23300 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
23301 Make sure it is power of 2. */
23302 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23306 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23308 /* If main algorithm works on QImode, no epilogue is needed.
23309 For small sizes just don't align anything. */
23310 if (size_needed
== 1)
23311 desired_align
= align
;
23318 label
= gen_label_rtx ();
23319 emit_cmp_and_jump_insns (count_exp
,
23320 GEN_INT (epilogue_size_needed
),
23321 LTU
, 0, counter_mode (count_exp
), 1, label
);
23322 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
23323 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23325 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23329 /* Emit code to decide on runtime whether library call or inline should be
23331 if (dynamic_check
!= -1)
23333 if (CONST_INT_P (count_exp
))
23335 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
23337 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23338 count_exp
= const0_rtx
;
23344 rtx hot_label
= gen_label_rtx ();
23345 jump_around_label
= gen_label_rtx ();
23346 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23347 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
23348 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23349 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
23350 emit_jump (jump_around_label
);
23351 emit_label (hot_label
);
23355 /* Step 2: Alignment prologue. */
23357 if (desired_align
> align
)
23359 if (align_bytes
== 0)
23361 /* Except for the first move in epilogue, we no longer know
23362 constant offset in aliasing info. It don't seems to worth
23363 the pain to maintain it for the first move, so throw away
23365 src
= change_address (src
, BLKmode
, srcreg
);
23366 dst
= change_address (dst
, BLKmode
, destreg
);
23367 dst
= expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
23372 /* If we know how many bytes need to be stored before dst is
23373 sufficiently aligned, maintain aliasing info accurately. */
23374 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
23375 desired_align
, align_bytes
);
23376 count_exp
= plus_constant (counter_mode (count_exp
),
23377 count_exp
, -align_bytes
);
23378 count
-= align_bytes
;
23380 if (need_zero_guard
23381 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23382 || (align_bytes
== 0
23383 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23384 + desired_align
- align
))))
23386 /* It is possible that we copied enough so the main loop will not
23388 gcc_assert (size_needed
> 1);
23389 if (label
== NULL_RTX
)
23390 label
= gen_label_rtx ();
23391 emit_cmp_and_jump_insns (count_exp
,
23392 GEN_INT (size_needed
),
23393 LTU
, 0, counter_mode (count_exp
), 1, label
);
23394 if (expected_size
== -1
23395 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23396 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23398 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23401 if (label
&& size_needed
== 1)
23403 emit_label (label
);
23404 LABEL_NUSES (label
) = 1;
23406 epilogue_size_needed
= 1;
23408 else if (label
== NULL_RTX
)
23409 epilogue_size_needed
= size_needed
;
23411 /* Step 3: Main loop. */
23418 gcc_unreachable ();
23421 case unrolled_loop
:
23423 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
23424 count_exp
, move_mode
, unroll_factor
,
23427 case rep_prefix_8_byte
:
23428 case rep_prefix_4_byte
:
23429 case rep_prefix_1_byte
:
23430 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
23434 /* Adjust properly the offset of src and dest memory for aliasing. */
23435 if (CONST_INT_P (count_exp
))
23437 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
23438 (count
/ size_needed
) * size_needed
);
23439 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23440 (count
/ size_needed
) * size_needed
);
23444 src
= change_address (src
, BLKmode
, srcreg
);
23445 dst
= change_address (dst
, BLKmode
, destreg
);
23448 /* Step 4: Epilogue to copy the remaining bytes. */
23452 /* When the main loop is done, COUNT_EXP might hold original count,
23453 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23454 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23455 bytes. Compensate if needed. */
23457 if (size_needed
< epilogue_size_needed
)
23460 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23461 GEN_INT (size_needed
- 1), count_exp
, 1,
23463 if (tmp
!= count_exp
)
23464 emit_move_insn (count_exp
, tmp
);
23466 emit_label (label
);
23467 LABEL_NUSES (label
) = 1;
23470 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23471 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
23473 if (jump_around_label
)
23474 emit_label (jump_around_label
);
23478 /* Helper function for memcpy. For QImode value 0xXY produce
23479 0xXYXYXYXY of wide specified by MODE. This is essentially
23480 a * 0x10101010, but we can do slightly better than
23481 synth_mult by unwinding the sequence by hand on CPUs with
23484 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23486 enum machine_mode valmode
= GET_MODE (val
);
23488 int nops
= mode
== DImode
? 3 : 2;
23490 gcc_assert (mode
== SImode
|| mode
== DImode
);
23491 if (val
== const0_rtx
)
23492 return copy_to_mode_reg (mode
, const0_rtx
);
23493 if (CONST_INT_P (val
))
23495 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23499 if (mode
== DImode
)
23500 v
|= (v
<< 16) << 16;
23501 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23504 if (valmode
== VOIDmode
)
23506 if (valmode
!= QImode
)
23507 val
= gen_lowpart (QImode
, val
);
23508 if (mode
== QImode
)
23510 if (!TARGET_PARTIAL_REG_STALL
)
23512 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23513 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23514 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23515 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23517 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23518 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23519 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23524 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23526 if (!TARGET_PARTIAL_REG_STALL
)
23527 if (mode
== SImode
)
23528 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23530 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23533 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23534 NULL
, 1, OPTAB_DIRECT
);
23536 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23538 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23539 NULL
, 1, OPTAB_DIRECT
);
23540 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23541 if (mode
== SImode
)
23543 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23544 NULL
, 1, OPTAB_DIRECT
);
23545 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23550 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23551 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23552 alignment from ALIGN to DESIRED_ALIGN. */
23554 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23559 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23560 promoted_val
= promote_duplicated_reg (DImode
, val
);
23561 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23562 promoted_val
= promote_duplicated_reg (SImode
, val
);
23563 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23564 promoted_val
= promote_duplicated_reg (HImode
, val
);
23566 promoted_val
= val
;
23568 return promoted_val
;
23571 /* Expand string clear operation (bzero). Use i386 string operations when
23572 profitable. See expand_movmem comment for explanation of individual
23573 steps performed. */
23575 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23576 rtx expected_align_exp
, rtx expected_size_exp
)
23581 rtx jump_around_label
= NULL
;
23582 HOST_WIDE_INT align
= 1;
23583 unsigned HOST_WIDE_INT count
= 0;
23584 HOST_WIDE_INT expected_size
= -1;
23585 int size_needed
= 0, epilogue_size_needed
;
23586 int desired_align
= 0, align_bytes
= 0;
23587 enum stringop_alg alg
;
23588 rtx promoted_val
= NULL
;
23589 bool force_loopy_epilogue
= false;
23591 bool need_zero_guard
= false;
23593 enum machine_mode move_mode
= VOIDmode
;
23596 if (CONST_INT_P (align_exp
))
23597 align
= INTVAL (align_exp
);
23598 /* i386 can do misaligned access on reasonably increased cost. */
23599 if (CONST_INT_P (expected_align_exp
)
23600 && INTVAL (expected_align_exp
) > align
)
23601 align
= INTVAL (expected_align_exp
);
23602 if (CONST_INT_P (count_exp
))
23603 count
= expected_size
= INTVAL (count_exp
);
23604 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23605 expected_size
= INTVAL (expected_size_exp
);
23607 /* Make sure we don't need to care about overflow later on. */
23608 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23611 /* Step 0: Decide on preferred algorithm, desired alignment and
23612 size of chunks to be copied by main loop. */
23614 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23615 if (alg
== libcall
)
23617 gcc_assert (alg
!= no_stringop
);
23620 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23621 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23623 move_mode
= word_mode
;
23630 gcc_unreachable ();
23632 need_zero_guard
= true;
23635 case unrolled_loop
:
23636 need_zero_guard
= true;
23639 case rep_prefix_8_byte
:
23640 move_mode
= DImode
;
23642 case rep_prefix_4_byte
:
23643 move_mode
= SImode
;
23645 case rep_prefix_1_byte
:
23646 move_mode
= QImode
;
23649 need_zero_guard
= true;
23650 move_mode
= QImode
;
23653 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23654 epilogue_size_needed
= size_needed
;
23656 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23657 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23658 align
= desired_align
;
23660 /* Step 1: Prologue guard. */
23662 /* Alignment code needs count to be in register. */
23663 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23665 if (INTVAL (count_exp
) > desired_align
23666 && INTVAL (count_exp
) > size_needed
)
23669 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23670 if (align_bytes
<= 0)
23673 align_bytes
= desired_align
- align_bytes
;
23675 if (align_bytes
== 0)
23677 enum machine_mode mode
= SImode
;
23678 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23680 count_exp
= force_reg (mode
, count_exp
);
23683 /* Do the cheap promotion to allow better CSE across the
23684 main loop and epilogue (ie one load of the big constant in the
23685 front of all code. */
23686 if (CONST_INT_P (val_exp
))
23687 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23688 desired_align
, align
);
23689 /* Ensure that alignment prologue won't copy past end of block. */
23690 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23692 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23693 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23694 Make sure it is power of 2. */
23695 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
23697 /* To improve performance of small blocks, we jump around the VAL
23698 promoting mode. This mean that if the promoted VAL is not constant,
23699 we might not use it in the epilogue and have to use byte
23701 if (epilogue_size_needed
> 2 && !promoted_val
)
23702 force_loopy_epilogue
= true;
23705 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23707 /* If main algorithm works on QImode, no epilogue is needed.
23708 For small sizes just don't align anything. */
23709 if (size_needed
== 1)
23710 desired_align
= align
;
23717 label
= gen_label_rtx ();
23718 emit_cmp_and_jump_insns (count_exp
,
23719 GEN_INT (epilogue_size_needed
),
23720 LTU
, 0, counter_mode (count_exp
), 1, label
);
23721 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23722 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23724 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23727 if (dynamic_check
!= -1)
23729 rtx hot_label
= gen_label_rtx ();
23730 jump_around_label
= gen_label_rtx ();
23731 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23732 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23733 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23734 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23735 emit_jump (jump_around_label
);
23736 emit_label (hot_label
);
23739 /* Step 2: Alignment prologue. */
23741 /* Do the expensive promotion once we branched off the small blocks. */
23743 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23744 desired_align
, align
);
23745 gcc_assert (desired_align
>= 1 && align
>= 1);
23747 if (desired_align
> align
)
23749 if (align_bytes
== 0)
23751 /* Except for the first move in epilogue, we no longer know
23752 constant offset in aliasing info. It don't seems to worth
23753 the pain to maintain it for the first move, so throw away
23755 dst
= change_address (dst
, BLKmode
, destreg
);
23756 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23761 /* If we know how many bytes need to be stored before dst is
23762 sufficiently aligned, maintain aliasing info accurately. */
23763 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23764 desired_align
, align_bytes
);
23765 count_exp
= plus_constant (counter_mode (count_exp
),
23766 count_exp
, -align_bytes
);
23767 count
-= align_bytes
;
23769 if (need_zero_guard
23770 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23771 || (align_bytes
== 0
23772 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23773 + desired_align
- align
))))
23775 /* It is possible that we copied enough so the main loop will not
23777 gcc_assert (size_needed
> 1);
23778 if (label
== NULL_RTX
)
23779 label
= gen_label_rtx ();
23780 emit_cmp_and_jump_insns (count_exp
,
23781 GEN_INT (size_needed
),
23782 LTU
, 0, counter_mode (count_exp
), 1, label
);
23783 if (expected_size
== -1
23784 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23785 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23787 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23790 if (label
&& size_needed
== 1)
23792 emit_label (label
);
23793 LABEL_NUSES (label
) = 1;
23795 promoted_val
= val_exp
;
23796 epilogue_size_needed
= 1;
23798 else if (label
== NULL_RTX
)
23799 epilogue_size_needed
= size_needed
;
23801 /* Step 3: Main loop. */
23808 gcc_unreachable ();
23812 case unrolled_loop
:
23813 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23814 count_exp
, move_mode
, unroll_factor
,
23817 case rep_prefix_8_byte
:
23818 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23821 case rep_prefix_4_byte
:
23822 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23825 case rep_prefix_1_byte
:
23826 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23830 /* Adjust properly the offset of src and dest memory for aliasing. */
23831 if (CONST_INT_P (count_exp
))
23832 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23833 (count
/ size_needed
) * size_needed
);
23835 dst
= change_address (dst
, BLKmode
, destreg
);
23837 /* Step 4: Epilogue to copy the remaining bytes. */
23841 /* When the main loop is done, COUNT_EXP might hold original count,
23842 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23843 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23844 bytes. Compensate if needed. */
23846 if (size_needed
< epilogue_size_needed
)
23849 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23850 GEN_INT (size_needed
- 1), count_exp
, 1,
23852 if (tmp
!= count_exp
)
23853 emit_move_insn (count_exp
, tmp
);
23855 emit_label (label
);
23856 LABEL_NUSES (label
) = 1;
23859 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23861 if (force_loopy_epilogue
)
23862 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23863 epilogue_size_needed
);
23865 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23866 epilogue_size_needed
);
23868 if (jump_around_label
)
23869 emit_label (jump_around_label
);
23873 /* Expand the appropriate insns for doing strlen if not just doing
23876 out = result, initialized with the start address
23877 align_rtx = alignment of the address.
23878 scratch = scratch register, initialized with the startaddress when
23879 not aligned, otherwise undefined
23881 This is just the body. It needs the initializations mentioned above and
23882 some address computing at the end. These things are done in i386.md. */
23885 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23889 rtx align_2_label
= NULL_RTX
;
23890 rtx align_3_label
= NULL_RTX
;
23891 rtx align_4_label
= gen_label_rtx ();
23892 rtx end_0_label
= gen_label_rtx ();
23894 rtx tmpreg
= gen_reg_rtx (SImode
);
23895 rtx scratch
= gen_reg_rtx (SImode
);
23899 if (CONST_INT_P (align_rtx
))
23900 align
= INTVAL (align_rtx
);
23902 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23904 /* Is there a known alignment and is it less than 4? */
23907 rtx scratch1
= gen_reg_rtx (Pmode
);
23908 emit_move_insn (scratch1
, out
);
23909 /* Is there a known alignment and is it not 2? */
23912 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23913 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23915 /* Leave just the 3 lower bits. */
23916 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23917 NULL_RTX
, 0, OPTAB_WIDEN
);
23919 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23920 Pmode
, 1, align_4_label
);
23921 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23922 Pmode
, 1, align_2_label
);
23923 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23924 Pmode
, 1, align_3_label
);
23928 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23929 check if is aligned to 4 - byte. */
23931 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23932 NULL_RTX
, 0, OPTAB_WIDEN
);
23934 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23935 Pmode
, 1, align_4_label
);
23938 mem
= change_address (src
, QImode
, out
);
23940 /* Now compare the bytes. */
23942 /* Compare the first n unaligned byte on a byte per byte basis. */
23943 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23944 QImode
, 1, end_0_label
);
23946 /* Increment the address. */
23947 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23949 /* Not needed with an alignment of 2 */
23952 emit_label (align_2_label
);
23954 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23957 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23959 emit_label (align_3_label
);
23962 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23965 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23968 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23969 align this loop. It gives only huge programs, but does not help to
23971 emit_label (align_4_label
);
23973 mem
= change_address (src
, SImode
, out
);
23974 emit_move_insn (scratch
, mem
);
23975 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23977 /* This formula yields a nonzero result iff one of the bytes is zero.
23978 This saves three branches inside loop and many cycles. */
23980 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23981 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23982 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23983 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23984 gen_int_mode (0x80808080, SImode
)));
23985 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23990 rtx reg
= gen_reg_rtx (SImode
);
23991 rtx reg2
= gen_reg_rtx (Pmode
);
23992 emit_move_insn (reg
, tmpreg
);
23993 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23995 /* If zero is not in the first two bytes, move two bytes forward. */
23996 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23997 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23998 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23999 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24000 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24003 /* Emit lea manually to avoid clobbering of flags. */
24004 emit_insn (gen_rtx_SET (SImode
, reg2
,
24005 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24007 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24008 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24009 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24010 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24016 rtx end_2_label
= gen_label_rtx ();
24017 /* Is zero in the first two bytes? */
24019 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24020 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24021 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24022 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24023 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24025 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24026 JUMP_LABEL (tmp
) = end_2_label
;
24028 /* Not in the first two. Move two bytes forward. */
24029 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24030 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24032 emit_label (end_2_label
);
24036 /* Avoid branch in fixing the byte. */
24037 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24038 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24039 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24040 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24041 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24043 emit_label (end_0_label
);
24046 /* Expand strlen. */
24049 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24051 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24053 /* The generic case of strlen expander is long. Avoid it's
24054 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24056 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24057 && !TARGET_INLINE_ALL_STRINGOPS
24058 && !optimize_insn_for_size_p ()
24059 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24062 addr
= force_reg (Pmode
, XEXP (src
, 0));
24063 scratch1
= gen_reg_rtx (Pmode
);
24065 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24066 && !optimize_insn_for_size_p ())
24068 /* Well it seems that some optimizer does not combine a call like
24069 foo(strlen(bar), strlen(bar));
24070 when the move and the subtraction is done here. It does calculate
24071 the length just once when these instructions are done inside of
24072 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24073 often used and I use one fewer register for the lifetime of
24074 output_strlen_unroll() this is better. */
24076 emit_move_insn (out
, addr
);
24078 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24080 /* strlensi_unroll_1 returns the address of the zero at the end of
24081 the string, like memchr(), so compute the length by subtracting
24082 the start address. */
24083 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24089 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24090 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24093 scratch2
= gen_reg_rtx (Pmode
);
24094 scratch3
= gen_reg_rtx (Pmode
);
24095 scratch4
= force_reg (Pmode
, constm1_rtx
);
24097 emit_move_insn (scratch3
, addr
);
24098 eoschar
= force_reg (QImode
, eoschar
);
24100 src
= replace_equiv_address_nv (src
, scratch3
);
24102 /* If .md starts supporting :P, this can be done in .md. */
24103 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24104 scratch4
), UNSPEC_SCAS
);
24105 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24106 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24107 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24112 /* For given symbol (function) construct code to compute address of it's PLT
24113 entry in large x86-64 PIC model. */
24115 construct_plt_address (rtx symbol
)
24119 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24120 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24121 gcc_assert (Pmode
== DImode
);
24123 tmp
= gen_reg_rtx (Pmode
);
24124 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24126 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24127 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24132 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24134 rtx pop
, bool sibcall
)
24136 unsigned int const cregs_size
24137 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24138 rtx vec
[3 + cregs_size
];
24139 rtx use
= NULL
, call
;
24140 unsigned int vec_len
= 0;
24142 if (pop
== const0_rtx
)
24144 gcc_assert (!TARGET_64BIT
|| !pop
);
24146 if (TARGET_MACHO
&& !TARGET_64BIT
)
24149 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24150 fnaddr
= machopic_indirect_call_target (fnaddr
);
24155 /* Static functions and indirect calls don't need the pic register. */
24158 || (ix86_cmodel
== CM_LARGE_PIC
24159 && DEFAULT_ABI
!= MS_ABI
))
24160 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24161 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24162 use_reg (&use
, pic_offset_table_rtx
);
24165 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24167 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24168 emit_move_insn (al
, callarg2
);
24169 use_reg (&use
, al
);
24172 if (ix86_cmodel
== CM_LARGE_PIC
24175 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24176 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24177 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24179 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24180 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24182 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24183 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24186 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24188 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24189 vec
[vec_len
++] = call
;
24193 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24194 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24195 vec
[vec_len
++] = pop
;
24198 if (TARGET_64BIT_MS_ABI
24199 && (!callarg2
|| INTVAL (callarg2
) != -2))
24203 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24204 UNSPEC_MS_TO_SYSV_CALL
);
24206 for (i
= 0; i
< cregs_size
; i
++)
24208 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24209 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24212 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24217 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24218 call
= emit_call_insn (call
);
24220 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24225 /* Output the assembly for a call instruction. */
24228 ix86_output_call_insn (rtx insn
, rtx call_op
)
24230 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24231 bool seh_nop_p
= false;
24234 if (SIBLING_CALL_P (insn
))
24238 /* SEH epilogue detection requires the indirect branch case
24239 to include REX.W. */
24240 else if (TARGET_SEH
)
24241 xasm
= "rex.W jmp %A0";
24245 output_asm_insn (xasm
, &call_op
);
24249 /* SEH unwinding can require an extra nop to be emitted in several
24250 circumstances. Determine if we have one of those. */
24255 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24257 /* If we get to another real insn, we don't need the nop. */
24261 /* If we get to the epilogue note, prevent a catch region from
24262 being adjacent to the standard epilogue sequence. If non-
24263 call-exceptions, we'll have done this during epilogue emission. */
24264 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24265 && !flag_non_call_exceptions
24266 && !can_throw_internal (insn
))
24273 /* If we didn't find a real insn following the call, prevent the
24274 unwinder from looking into the next function. */
24280 xasm
= "call\t%P0";
24282 xasm
= "call\t%A0";
24284 output_asm_insn (xasm
, &call_op
);
24292 /* Clear stack slot assignments remembered from previous functions.
24293 This is called from INIT_EXPANDERS once before RTL is emitted for each
24296 static struct machine_function
*
24297 ix86_init_machine_status (void)
24299 struct machine_function
*f
;
24301 f
= ggc_alloc_cleared_machine_function ();
24302 f
->use_fast_prologue_epilogue_nregs
= -1;
24303 f
->call_abi
= ix86_abi
;
24308 /* Return a MEM corresponding to a stack slot with mode MODE.
24309 Allocate a new slot if necessary.
24311 The RTL for a function can have several slots available: N is
24312 which slot to use. */
24315 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24317 struct stack_local_entry
*s
;
24319 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24321 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24322 if (s
->mode
== mode
&& s
->n
== n
)
24323 return validize_mem (copy_rtx (s
->rtl
));
24325 s
= ggc_alloc_stack_local_entry ();
24328 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24330 s
->next
= ix86_stack_locals
;
24331 ix86_stack_locals
= s
;
24332 return validize_mem (s
->rtl
);
24336 ix86_instantiate_decls (void)
24338 struct stack_local_entry
*s
;
24340 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24341 if (s
->rtl
!= NULL_RTX
)
24342 instantiate_decl_rtl (s
->rtl
);
24345 /* Calculate the length of the memory address in the instruction encoding.
24346 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24347 or other prefixes. We never generate addr32 prefix for LEA insn. */
24350 memory_address_length (rtx addr
, bool lea
)
24352 struct ix86_address parts
;
24353 rtx base
, index
, disp
;
24357 if (GET_CODE (addr
) == PRE_DEC
24358 || GET_CODE (addr
) == POST_INC
24359 || GET_CODE (addr
) == PRE_MODIFY
24360 || GET_CODE (addr
) == POST_MODIFY
)
24363 ok
= ix86_decompose_address (addr
, &parts
);
24366 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24368 /* If this is not LEA instruction, add the length of addr32 prefix. */
24369 if (TARGET_64BIT
&& !lea
24370 && (SImode_address_operand (addr
, VOIDmode
)
24371 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24372 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24376 index
= parts
.index
;
24379 if (base
&& GET_CODE (base
) == SUBREG
)
24380 base
= SUBREG_REG (base
);
24381 if (index
&& GET_CODE (index
) == SUBREG
)
24382 index
= SUBREG_REG (index
);
24384 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24385 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24388 - esp as the base always wants an index,
24389 - ebp as the base always wants a displacement,
24390 - r12 as the base always wants an index,
24391 - r13 as the base always wants a displacement. */
24393 /* Register Indirect. */
24394 if (base
&& !index
&& !disp
)
24396 /* esp (for its index) and ebp (for its displacement) need
24397 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24399 if (base
== arg_pointer_rtx
24400 || base
== frame_pointer_rtx
24401 || REGNO (base
) == SP_REG
24402 || REGNO (base
) == BP_REG
24403 || REGNO (base
) == R12_REG
24404 || REGNO (base
) == R13_REG
)
24408 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24409 is not disp32, but disp32(%rip), so for disp32
24410 SIB byte is needed, unless print_operand_address
24411 optimizes it into disp32(%rip) or (%rip) is implied
24413 else if (disp
&& !base
&& !index
)
24420 if (GET_CODE (disp
) == CONST
)
24421 symbol
= XEXP (disp
, 0);
24422 if (GET_CODE (symbol
) == PLUS
24423 && CONST_INT_P (XEXP (symbol
, 1)))
24424 symbol
= XEXP (symbol
, 0);
24426 if (GET_CODE (symbol
) != LABEL_REF
24427 && (GET_CODE (symbol
) != SYMBOL_REF
24428 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
24429 && (GET_CODE (symbol
) != UNSPEC
24430 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
24431 && XINT (symbol
, 1) != UNSPEC_PCREL
24432 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
24438 /* Find the length of the displacement constant. */
24441 if (base
&& satisfies_constraint_K (disp
))
24446 /* ebp always wants a displacement. Similarly r13. */
24447 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24450 /* An index requires the two-byte modrm form.... */
24452 /* ...like esp (or r12), which always wants an index. */
24453 || base
== arg_pointer_rtx
24454 || base
== frame_pointer_rtx
24455 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24462 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24463 is set, expect that insn have 8bit immediate alternative. */
24465 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24469 extract_insn_cached (insn
);
24470 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24471 if (CONSTANT_P (recog_data
.operand
[i
]))
24473 enum attr_mode mode
= get_attr_mode (insn
);
24476 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24478 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24485 ival
= trunc_int_for_mode (ival
, HImode
);
24488 ival
= trunc_int_for_mode (ival
, SImode
);
24493 if (IN_RANGE (ival
, -128, 127))
24510 /* Immediates for DImode instructions are encoded
24511 as 32bit sign extended values. */
24516 fatal_insn ("unknown insn mode", insn
);
24522 /* Compute default value for "length_address" attribute. */
24524 ix86_attr_length_address_default (rtx insn
)
24528 if (get_attr_type (insn
) == TYPE_LEA
)
24530 rtx set
= PATTERN (insn
), addr
;
24532 if (GET_CODE (set
) == PARALLEL
)
24533 set
= XVECEXP (set
, 0, 0);
24535 gcc_assert (GET_CODE (set
) == SET
);
24537 addr
= SET_SRC (set
);
24539 return memory_address_length (addr
, true);
24542 extract_insn_cached (insn
);
24543 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24544 if (MEM_P (recog_data
.operand
[i
]))
24546 constrain_operands_cached (reload_completed
);
24547 if (which_alternative
!= -1)
24549 const char *constraints
= recog_data
.constraints
[i
];
24550 int alt
= which_alternative
;
24552 while (*constraints
== '=' || *constraints
== '+')
24555 while (*constraints
++ != ',')
24557 /* Skip ignored operands. */
24558 if (*constraints
== 'X')
24561 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24566 /* Compute default value for "length_vex" attribute. It includes
24567 2 or 3 byte VEX prefix and 1 opcode byte. */
24570 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24574 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24575 byte VEX prefix. */
24576 if (!has_0f_opcode
|| has_vex_w
)
24579 /* We can always use 2 byte VEX prefix in 32bit. */
24583 extract_insn_cached (insn
);
24585 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24586 if (REG_P (recog_data
.operand
[i
]))
24588 /* REX.W bit uses 3 byte VEX prefix. */
24589 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24590 && GENERAL_REG_P (recog_data
.operand
[i
]))
24595 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24596 if (MEM_P (recog_data
.operand
[i
])
24597 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24604 /* Return the maximum number of instructions a cpu can issue. */
24607 ix86_issue_rate (void)
24611 case PROCESSOR_PENTIUM
:
24612 case PROCESSOR_ATOM
:
24613 case PROCESSOR_SLM
:
24615 case PROCESSOR_BTVER2
:
24618 case PROCESSOR_PENTIUMPRO
:
24619 case PROCESSOR_PENTIUM4
:
24620 case PROCESSOR_CORE2
:
24621 case PROCESSOR_COREI7
:
24622 case PROCESSOR_HASWELL
:
24623 case PROCESSOR_ATHLON
:
24625 case PROCESSOR_AMDFAM10
:
24626 case PROCESSOR_NOCONA
:
24627 case PROCESSOR_GENERIC32
:
24628 case PROCESSOR_GENERIC64
:
24629 case PROCESSOR_BDVER1
:
24630 case PROCESSOR_BDVER2
:
24631 case PROCESSOR_BDVER3
:
24632 case PROCESSOR_BTVER1
:
24640 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24641 by DEP_INSN and nothing set by DEP_INSN. */
24644 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24648 /* Simplify the test for uninteresting insns. */
24649 if (insn_type
!= TYPE_SETCC
24650 && insn_type
!= TYPE_ICMOV
24651 && insn_type
!= TYPE_FCMOV
24652 && insn_type
!= TYPE_IBR
)
24655 if ((set
= single_set (dep_insn
)) != 0)
24657 set
= SET_DEST (set
);
24660 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24661 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24662 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24663 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24665 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24666 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24671 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24674 /* This test is true if the dependent insn reads the flags but
24675 not any other potentially set register. */
24676 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24679 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24685 /* Return true iff USE_INSN has a memory address with operands set by
24689 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24692 extract_insn_cached (use_insn
);
24693 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24694 if (MEM_P (recog_data
.operand
[i
]))
24696 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24697 return modified_in_p (addr
, set_insn
) != 0;
24702 /* Helper function for exact_store_load_dependency.
24703 Return true if addr is found in insn. */
24705 exact_dependency_1 (rtx addr
, rtx insn
)
24707 enum rtx_code code
;
24708 const char *format_ptr
;
24711 code
= GET_CODE (insn
);
24715 if (rtx_equal_p (addr
, insn
))
24730 format_ptr
= GET_RTX_FORMAT (code
);
24731 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
24733 switch (*format_ptr
++)
24736 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
24740 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
24741 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
24749 /* Return true if there exists exact dependency for store & load, i.e.
24750 the same memory address is used in them. */
24752 exact_store_load_dependency (rtx store
, rtx load
)
24756 set1
= single_set (store
);
24759 if (!MEM_P (SET_DEST (set1
)))
24761 set2
= single_set (load
);
24764 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
24770 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24772 enum attr_type insn_type
, dep_insn_type
;
24773 enum attr_memory memory
;
24775 int dep_insn_code_number
;
24777 /* Anti and output dependencies have zero cost on all CPUs. */
24778 if (REG_NOTE_KIND (link
) != 0)
24781 dep_insn_code_number
= recog_memoized (dep_insn
);
24783 /* If we can't recognize the insns, we can't really do anything. */
24784 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24787 insn_type
= get_attr_type (insn
);
24788 dep_insn_type
= get_attr_type (dep_insn
);
24792 case PROCESSOR_PENTIUM
:
24793 /* Address Generation Interlock adds a cycle of latency. */
24794 if (insn_type
== TYPE_LEA
)
24796 rtx addr
= PATTERN (insn
);
24798 if (GET_CODE (addr
) == PARALLEL
)
24799 addr
= XVECEXP (addr
, 0, 0);
24801 gcc_assert (GET_CODE (addr
) == SET
);
24803 addr
= SET_SRC (addr
);
24804 if (modified_in_p (addr
, dep_insn
))
24807 else if (ix86_agi_dependent (dep_insn
, insn
))
24810 /* ??? Compares pair with jump/setcc. */
24811 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24814 /* Floating point stores require value to be ready one cycle earlier. */
24815 if (insn_type
== TYPE_FMOV
24816 && get_attr_memory (insn
) == MEMORY_STORE
24817 && !ix86_agi_dependent (dep_insn
, insn
))
24821 case PROCESSOR_PENTIUMPRO
:
24822 memory
= get_attr_memory (insn
);
24824 /* INT->FP conversion is expensive. */
24825 if (get_attr_fp_int_src (dep_insn
))
24828 /* There is one cycle extra latency between an FP op and a store. */
24829 if (insn_type
== TYPE_FMOV
24830 && (set
= single_set (dep_insn
)) != NULL_RTX
24831 && (set2
= single_set (insn
)) != NULL_RTX
24832 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24833 && MEM_P (SET_DEST (set2
)))
24836 /* Show ability of reorder buffer to hide latency of load by executing
24837 in parallel with previous instruction in case
24838 previous instruction is not needed to compute the address. */
24839 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24840 && !ix86_agi_dependent (dep_insn
, insn
))
24842 /* Claim moves to take one cycle, as core can issue one load
24843 at time and the next load can start cycle later. */
24844 if (dep_insn_type
== TYPE_IMOV
24845 || dep_insn_type
== TYPE_FMOV
)
24853 memory
= get_attr_memory (insn
);
24855 /* The esp dependency is resolved before the instruction is really
24857 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24858 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24861 /* INT->FP conversion is expensive. */
24862 if (get_attr_fp_int_src (dep_insn
))
24865 /* Show ability of reorder buffer to hide latency of load by executing
24866 in parallel with previous instruction in case
24867 previous instruction is not needed to compute the address. */
24868 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24869 && !ix86_agi_dependent (dep_insn
, insn
))
24871 /* Claim moves to take one cycle, as core can issue one load
24872 at time and the next load can start cycle later. */
24873 if (dep_insn_type
== TYPE_IMOV
24874 || dep_insn_type
== TYPE_FMOV
)
24883 case PROCESSOR_ATHLON
:
24885 case PROCESSOR_AMDFAM10
:
24886 case PROCESSOR_BDVER1
:
24887 case PROCESSOR_BDVER2
:
24888 case PROCESSOR_BDVER3
:
24889 case PROCESSOR_BTVER1
:
24890 case PROCESSOR_BTVER2
:
24891 case PROCESSOR_ATOM
:
24892 case PROCESSOR_GENERIC32
:
24893 case PROCESSOR_GENERIC64
:
24894 memory
= get_attr_memory (insn
);
24896 /* Show ability of reorder buffer to hide latency of load by executing
24897 in parallel with previous instruction in case
24898 previous instruction is not needed to compute the address. */
24899 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24900 && !ix86_agi_dependent (dep_insn
, insn
))
24902 enum attr_unit unit
= get_attr_unit (insn
);
24905 /* Because of the difference between the length of integer and
24906 floating unit pipeline preparation stages, the memory operands
24907 for floating point are cheaper.
24909 ??? For Athlon it the difference is most probably 2. */
24910 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24913 loadcost
= TARGET_ATHLON
? 2 : 0;
24915 if (cost
>= loadcost
)
24922 case PROCESSOR_SLM
:
24923 if (!reload_completed
)
24926 /* Increase cost of integer loads. */
24927 memory
= get_attr_memory (dep_insn
);
24928 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24930 enum attr_unit unit
= get_attr_unit (dep_insn
);
24931 if (unit
== UNIT_INTEGER
&& cost
== 1)
24933 if (memory
== MEMORY_LOAD
)
24937 /* Increase cost of ld/st for short int types only
24938 because of store forwarding issue. */
24939 rtx set
= single_set (dep_insn
);
24940 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
24941 || GET_MODE (SET_DEST (set
)) == HImode
))
24943 /* Increase cost of store/load insn if exact
24944 dependence exists and it is load insn. */
24945 enum attr_memory insn_memory
= get_attr_memory (insn
);
24946 if (insn_memory
== MEMORY_LOAD
24947 && exact_store_load_dependency (dep_insn
, insn
))
24961 /* How many alternative schedules to try. This should be as wide as the
24962 scheduling freedom in the DFA, but no wider. Making this value too
24963 large results extra work for the scheduler. */
24966 ia32_multipass_dfa_lookahead (void)
24970 case PROCESSOR_PENTIUM
:
24973 case PROCESSOR_PENTIUMPRO
:
24977 case PROCESSOR_CORE2
:
24978 case PROCESSOR_COREI7
:
24979 case PROCESSOR_HASWELL
:
24980 case PROCESSOR_ATOM
:
24981 case PROCESSOR_SLM
:
24982 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24983 as many instructions can be executed on a cycle, i.e.,
24984 issue_rate. I wonder why tuning for many CPUs does not do this. */
24985 if (reload_completed
)
24986 return ix86_issue_rate ();
24987 /* Don't use lookahead for pre-reload schedule to save compile time. */
24995 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24996 execution. It is applied if
24997 (1) IMUL instruction is on the top of list;
24998 (2) There exists the only producer of independent IMUL instruction in
25000 Return index of IMUL producer if it was found and -1 otherwise. */
25002 do_reorder_for_imul (rtx
*ready
, int n_ready
)
25004 rtx insn
, set
, insn1
, insn2
;
25005 sd_iterator_def sd_it
;
25010 if (ix86_tune
!= PROCESSOR_ATOM
)
25013 /* Check that IMUL instruction is on the top of ready list. */
25014 insn
= ready
[n_ready
- 1];
25015 set
= single_set (insn
);
25018 if (!(GET_CODE (SET_SRC (set
)) == MULT
25019 && GET_MODE (SET_SRC (set
)) == SImode
))
25022 /* Search for producer of independent IMUL instruction. */
25023 for (i
= n_ready
- 2; i
>= 0; i
--)
25026 if (!NONDEBUG_INSN_P (insn
))
25028 /* Skip IMUL instruction. */
25029 insn2
= PATTERN (insn
);
25030 if (GET_CODE (insn2
) == PARALLEL
)
25031 insn2
= XVECEXP (insn2
, 0, 0);
25032 if (GET_CODE (insn2
) == SET
25033 && GET_CODE (SET_SRC (insn2
)) == MULT
25034 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25037 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25040 con
= DEP_CON (dep
);
25041 if (!NONDEBUG_INSN_P (con
))
25043 insn1
= PATTERN (con
);
25044 if (GET_CODE (insn1
) == PARALLEL
)
25045 insn1
= XVECEXP (insn1
, 0, 0);
25047 if (GET_CODE (insn1
) == SET
25048 && GET_CODE (SET_SRC (insn1
)) == MULT
25049 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25051 sd_iterator_def sd_it1
;
25053 /* Check if there is no other dependee for IMUL. */
25055 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25058 pro
= DEP_PRO (dep1
);
25059 if (!NONDEBUG_INSN_P (pro
))
25074 /* Try to find the best candidate on the top of ready list if two insns
25075 have the same priority - candidate is best if its dependees were
25076 scheduled earlier. Applied for Silvermont only.
25077 Return true if top 2 insns must be interchanged. */
25079 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
25081 rtx top
= ready
[n_ready
- 1];
25082 rtx next
= ready
[n_ready
- 2];
25084 sd_iterator_def sd_it
;
25088 #define INSN_TICK(INSN) (HID (INSN)->tick)
25090 if (ix86_tune
!= PROCESSOR_SLM
)
25093 if (!NONDEBUG_INSN_P (top
))
25095 if (!NONJUMP_INSN_P (top
))
25097 if (!NONDEBUG_INSN_P (next
))
25099 if (!NONJUMP_INSN_P (next
))
25101 set
= single_set (top
);
25104 set
= single_set (next
);
25108 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25110 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25112 /* Determine winner more precise. */
25113 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25116 pro
= DEP_PRO (dep
);
25117 if (!NONDEBUG_INSN_P (pro
))
25119 if (INSN_TICK (pro
) > clock1
)
25120 clock1
= INSN_TICK (pro
);
25122 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25125 pro
= DEP_PRO (dep
);
25126 if (!NONDEBUG_INSN_P (pro
))
25128 if (INSN_TICK (pro
) > clock2
)
25129 clock2
= INSN_TICK (pro
);
25132 if (clock1
== clock2
)
25134 /* Determine winner - load must win. */
25135 enum attr_memory memory1
, memory2
;
25136 memory1
= get_attr_memory (top
);
25137 memory2
= get_attr_memory (next
);
25138 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25141 return (bool) (clock2
< clock1
);
25147 /* Perform possible reodering of ready list for Atom/Silvermont only.
25148 Return issue rate. */
25150 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25153 int issue_rate
= -1;
25154 int n_ready
= *pn_ready
;
25159 /* Set up issue rate. */
25160 issue_rate
= ix86_issue_rate ();
25162 /* Do reodering for Atom/SLM only. */
25163 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25166 /* Nothing to do if ready list contains only 1 instruction. */
25170 /* Do reodering for post-reload scheduler only. */
25171 if (!reload_completed
)
25174 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25176 if (sched_verbose
> 1)
25177 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25178 INSN_UID (ready
[index
]));
25180 /* Put IMUL producer (ready[index]) at the top of ready list. */
25181 insn
= ready
[index
];
25182 for (i
= index
; i
< n_ready
- 1; i
++)
25183 ready
[i
] = ready
[i
+ 1];
25184 ready
[n_ready
- 1] = insn
;
25187 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25189 if (sched_verbose
> 1)
25190 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25191 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25192 /* Swap 2 top elements of ready list. */
25193 insn
= ready
[n_ready
- 1];
25194 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25195 ready
[n_ready
- 2] = insn
;
25201 ix86_class_likely_spilled_p (reg_class_t
);
25203 /* Returns true if lhs of insn is HW function argument register and set up
25204 is_spilled to true if it is likely spilled HW register. */
25206 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25210 if (!NONDEBUG_INSN_P (insn
))
25212 /* Call instructions are not movable, ignore it. */
25215 insn
= PATTERN (insn
);
25216 if (GET_CODE (insn
) == PARALLEL
)
25217 insn
= XVECEXP (insn
, 0, 0);
25218 if (GET_CODE (insn
) != SET
)
25220 dst
= SET_DEST (insn
);
25221 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25222 && ix86_function_arg_regno_p (REGNO (dst
)))
25224 /* Is it likely spilled HW register? */
25225 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25226 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25227 *is_spilled
= true;
25233 /* Add output dependencies for chain of function adjacent arguments if only
25234 there is a move to likely spilled HW register. Return first argument
25235 if at least one dependence was added or NULL otherwise. */
25237 add_parameter_dependencies (rtx call
, rtx head
)
25241 rtx first_arg
= NULL
;
25242 bool is_spilled
= false;
25244 head
= PREV_INSN (head
);
25246 /* Find nearest to call argument passing instruction. */
25249 last
= PREV_INSN (last
);
25252 if (!NONDEBUG_INSN_P (last
))
25254 if (insn_is_function_arg (last
, &is_spilled
))
25262 insn
= PREV_INSN (last
);
25263 if (!INSN_P (insn
))
25267 if (!NONDEBUG_INSN_P (insn
))
25272 if (insn_is_function_arg (insn
, &is_spilled
))
25274 /* Add output depdendence between two function arguments if chain
25275 of output arguments contains likely spilled HW registers. */
25277 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25278 first_arg
= last
= insn
;
25288 /* Add output or anti dependency from insn to first_arg to restrict its code
25291 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25296 set
= single_set (insn
);
25299 tmp
= SET_DEST (set
);
25302 /* Add output dependency to the first function argument. */
25303 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25306 /* Add anti dependency. */
25307 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25310 /* Avoid cross block motion of function argument through adding dependency
25311 from the first non-jump instruction in bb. */
25313 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25315 rtx insn
= BB_END (bb
);
25319 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25321 rtx set
= single_set (insn
);
25324 avoid_func_arg_motion (arg
, insn
);
25328 if (insn
== BB_HEAD (bb
))
25330 insn
= PREV_INSN (insn
);
25334 /* Hook for pre-reload schedule - avoid motion of function arguments
25335 passed in likely spilled HW registers. */
25337 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25340 rtx first_arg
= NULL
;
25341 if (reload_completed
)
25343 while (head
!= tail
&& DEBUG_INSN_P (head
))
25344 head
= NEXT_INSN (head
);
25345 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25346 if (INSN_P (insn
) && CALL_P (insn
))
25348 first_arg
= add_parameter_dependencies (insn
, head
);
25351 /* Add dependee for first argument to predecessors if only
25352 region contains more than one block. */
25353 basic_block bb
= BLOCK_FOR_INSN (insn
);
25354 int rgn
= CONTAINING_RGN (bb
->index
);
25355 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25356 /* Skip trivial regions and region head blocks that can have
25357 predecessors outside of region. */
25358 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25362 /* Assume that region is SCC, i.e. all immediate predecessors
25363 of non-head block are in the same region. */
25364 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25366 /* Avoid creating of loop-carried dependencies through
25367 using topological odering in region. */
25368 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25369 add_dependee_for_func_arg (first_arg
, e
->src
);
25377 else if (first_arg
)
25378 avoid_func_arg_motion (first_arg
, insn
);
25381 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25382 HW registers to maximum, to schedule them at soon as possible. These are
25383 moves from function argument registers at the top of the function entry
25384 and moves from function return value registers after call. */
25386 ix86_adjust_priority (rtx insn
, int priority
)
25390 if (reload_completed
)
25393 if (!NONDEBUG_INSN_P (insn
))
25396 set
= single_set (insn
);
25399 rtx tmp
= SET_SRC (set
);
25401 && HARD_REGISTER_P (tmp
)
25402 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25403 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25404 return current_sched_info
->sched_max_insns_priority
;
25410 /* Model decoder of Core 2/i7.
25411 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25412 track the instruction fetch block boundaries and make sure that long
25413 (9+ bytes) instructions are assigned to D0. */
25415 /* Maximum length of an insn that can be handled by
25416 a secondary decoder unit. '8' for Core 2/i7. */
25417 static int core2i7_secondary_decoder_max_insn_size
;
25419 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25420 '16' for Core 2/i7. */
25421 static int core2i7_ifetch_block_size
;
25423 /* Maximum number of instructions decoder can handle per cycle.
25424 '6' for Core 2/i7. */
25425 static int core2i7_ifetch_block_max_insns
;
25427 typedef struct ix86_first_cycle_multipass_data_
*
25428 ix86_first_cycle_multipass_data_t
;
25429 typedef const struct ix86_first_cycle_multipass_data_
*
25430 const_ix86_first_cycle_multipass_data_t
;
25432 /* A variable to store target state across calls to max_issue within
25434 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25435 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25437 /* Initialize DATA. */
25439 core2i7_first_cycle_multipass_init (void *_data
)
25441 ix86_first_cycle_multipass_data_t data
25442 = (ix86_first_cycle_multipass_data_t
) _data
;
25444 data
->ifetch_block_len
= 0;
25445 data
->ifetch_block_n_insns
= 0;
25446 data
->ready_try_change
= NULL
;
25447 data
->ready_try_change_size
= 0;
25450 /* Advancing the cycle; reset ifetch block counts. */
25452 core2i7_dfa_post_advance_cycle (void)
25454 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
25456 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25458 data
->ifetch_block_len
= 0;
25459 data
->ifetch_block_n_insns
= 0;
25462 static int min_insn_size (rtx
);
25464 /* Filter out insns from ready_try that the core will not be able to issue
25465 on current cycle due to decoder. */
25467 core2i7_first_cycle_multipass_filter_ready_try
25468 (const_ix86_first_cycle_multipass_data_t data
,
25469 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
25476 if (ready_try
[n_ready
])
25479 insn
= get_ready_element (n_ready
);
25480 insn_size
= min_insn_size (insn
);
25482 if (/* If this is a too long an insn for a secondary decoder ... */
25483 (!first_cycle_insn_p
25484 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
25485 /* ... or it would not fit into the ifetch block ... */
25486 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
25487 /* ... or the decoder is full already ... */
25488 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
25489 /* ... mask the insn out. */
25491 ready_try
[n_ready
] = 1;
25493 if (data
->ready_try_change
)
25494 bitmap_set_bit (data
->ready_try_change
, n_ready
);
25499 /* Prepare for a new round of multipass lookahead scheduling. */
25501 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
25502 bool first_cycle_insn_p
)
25504 ix86_first_cycle_multipass_data_t data
25505 = (ix86_first_cycle_multipass_data_t
) _data
;
25506 const_ix86_first_cycle_multipass_data_t prev_data
25507 = ix86_first_cycle_multipass_data
;
25509 /* Restore the state from the end of the previous round. */
25510 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
25511 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
25513 /* Filter instructions that cannot be issued on current cycle due to
25514 decoder restrictions. */
25515 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25516 first_cycle_insn_p
);
25519 /* INSN is being issued in current solution. Account for its impact on
25520 the decoder model. */
25522 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
25523 rtx insn
, const void *_prev_data
)
25525 ix86_first_cycle_multipass_data_t data
25526 = (ix86_first_cycle_multipass_data_t
) _data
;
25527 const_ix86_first_cycle_multipass_data_t prev_data
25528 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
25530 int insn_size
= min_insn_size (insn
);
25532 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
25533 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
25534 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
25535 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
25537 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25538 if (!data
->ready_try_change
)
25540 data
->ready_try_change
= sbitmap_alloc (n_ready
);
25541 data
->ready_try_change_size
= n_ready
;
25543 else if (data
->ready_try_change_size
< n_ready
)
25545 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
25547 data
->ready_try_change_size
= n_ready
;
25549 bitmap_clear (data
->ready_try_change
);
25551 /* Filter out insns from ready_try that the core will not be able to issue
25552 on current cycle due to decoder. */
25553 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
25557 /* Revert the effect on ready_try. */
25559 core2i7_first_cycle_multipass_backtrack (const void *_data
,
25561 int n_ready ATTRIBUTE_UNUSED
)
25563 const_ix86_first_cycle_multipass_data_t data
25564 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25565 unsigned int i
= 0;
25566 sbitmap_iterator sbi
;
25568 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
25569 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
25575 /* Save the result of multipass lookahead scheduling for the next round. */
25577 core2i7_first_cycle_multipass_end (const void *_data
)
25579 const_ix86_first_cycle_multipass_data_t data
25580 = (const_ix86_first_cycle_multipass_data_t
) _data
;
25581 ix86_first_cycle_multipass_data_t next_data
25582 = ix86_first_cycle_multipass_data
;
25586 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
25587 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
25591 /* Deallocate target data. */
25593 core2i7_first_cycle_multipass_fini (void *_data
)
25595 ix86_first_cycle_multipass_data_t data
25596 = (ix86_first_cycle_multipass_data_t
) _data
;
25598 if (data
->ready_try_change
)
25600 sbitmap_free (data
->ready_try_change
);
25601 data
->ready_try_change
= NULL
;
25602 data
->ready_try_change_size
= 0;
25606 /* Prepare for scheduling pass. */
25608 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
25609 int verbose ATTRIBUTE_UNUSED
,
25610 int max_uid ATTRIBUTE_UNUSED
)
25612 /* Install scheduling hooks for current CPU. Some of these hooks are used
25613 in time-critical parts of the scheduler, so we only set them up when
25614 they are actually used. */
25617 case PROCESSOR_CORE2
:
25618 case PROCESSOR_COREI7
:
25619 case PROCESSOR_HASWELL
:
25620 /* Do not perform multipass scheduling for pre-reload schedule
25621 to save compile time. */
25622 if (reload_completed
)
25624 targetm
.sched
.dfa_post_advance_cycle
25625 = core2i7_dfa_post_advance_cycle
;
25626 targetm
.sched
.first_cycle_multipass_init
25627 = core2i7_first_cycle_multipass_init
;
25628 targetm
.sched
.first_cycle_multipass_begin
25629 = core2i7_first_cycle_multipass_begin
;
25630 targetm
.sched
.first_cycle_multipass_issue
25631 = core2i7_first_cycle_multipass_issue
;
25632 targetm
.sched
.first_cycle_multipass_backtrack
25633 = core2i7_first_cycle_multipass_backtrack
;
25634 targetm
.sched
.first_cycle_multipass_end
25635 = core2i7_first_cycle_multipass_end
;
25636 targetm
.sched
.first_cycle_multipass_fini
25637 = core2i7_first_cycle_multipass_fini
;
25639 /* Set decoder parameters. */
25640 core2i7_secondary_decoder_max_insn_size
= 8;
25641 core2i7_ifetch_block_size
= 16;
25642 core2i7_ifetch_block_max_insns
= 6;
25645 /* ... Fall through ... */
25647 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
25648 targetm
.sched
.first_cycle_multipass_init
= NULL
;
25649 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
25650 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
25651 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
25652 targetm
.sched
.first_cycle_multipass_end
= NULL
;
25653 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
25659 /* Compute the alignment given to a constant that is being placed in memory.
25660 EXP is the constant and ALIGN is the alignment that the object would
25662 The value of this function is used instead of that alignment to align
25666 ix86_constant_alignment (tree exp
, int align
)
25668 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
25669 || TREE_CODE (exp
) == INTEGER_CST
)
25671 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
25673 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
25676 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
25677 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
25678 return BITS_PER_WORD
;
25683 /* Compute the alignment for a static variable.
25684 TYPE is the data type, and ALIGN is the alignment that
25685 the object would ordinarily have. The value of this function is used
25686 instead of that alignment to align the object. */
25689 ix86_data_alignment (tree type
, int align
, bool opt
)
25691 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
25694 && AGGREGATE_TYPE_P (type
)
25695 && TYPE_SIZE (type
)
25696 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25697 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
25698 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
25699 && align
< max_align
)
25702 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25703 to 16byte boundary. */
25706 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
25707 && TYPE_SIZE (type
)
25708 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25709 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
25710 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25717 if (TREE_CODE (type
) == ARRAY_TYPE
)
25719 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25721 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25724 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25727 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25729 if ((TYPE_MODE (type
) == XCmode
25730 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25733 else if ((TREE_CODE (type
) == RECORD_TYPE
25734 || TREE_CODE (type
) == UNION_TYPE
25735 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25736 && TYPE_FIELDS (type
))
25738 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25740 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25743 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25744 || TREE_CODE (type
) == INTEGER_TYPE
)
25746 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25748 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25755 /* Compute the alignment for a local variable or a stack slot. EXP is
25756 the data type or decl itself, MODE is the widest mode available and
25757 ALIGN is the alignment that the object would ordinarily have. The
25758 value of this macro is used instead of that alignment to align the
25762 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25763 unsigned int align
)
25767 if (exp
&& DECL_P (exp
))
25769 type
= TREE_TYPE (exp
);
25778 /* Don't do dynamic stack realignment for long long objects with
25779 -mpreferred-stack-boundary=2. */
25782 && ix86_preferred_stack_boundary
< 64
25783 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25784 && (!type
|| !TYPE_USER_ALIGN (type
))
25785 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25788 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25789 register in MODE. We will return the largest alignment of XF
25793 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25794 align
= GET_MODE_ALIGNMENT (DFmode
);
25798 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25799 to 16byte boundary. Exact wording is:
25801 An array uses the same alignment as its elements, except that a local or
25802 global array variable of length at least 16 bytes or
25803 a C99 variable-length array variable always has alignment of at least 16 bytes.
25805 This was added to allow use of aligned SSE instructions at arrays. This
25806 rule is meant for static storage (where compiler can not do the analysis
25807 by itself). We follow it for automatic variables only when convenient.
25808 We fully control everything in the function compiled and functions from
25809 other unit can not rely on the alignment.
25811 Exclude va_list type. It is the common case of local array where
25812 we can not benefit from the alignment.
25814 TODO: Probably one should optimize for size only when var is not escaping. */
25815 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25818 if (AGGREGATE_TYPE_P (type
)
25819 && (va_list_type_node
== NULL_TREE
25820 || (TYPE_MAIN_VARIANT (type
)
25821 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25822 && TYPE_SIZE (type
)
25823 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25824 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25825 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25828 if (TREE_CODE (type
) == ARRAY_TYPE
)
25830 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25832 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25835 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25837 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25839 if ((TYPE_MODE (type
) == XCmode
25840 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25843 else if ((TREE_CODE (type
) == RECORD_TYPE
25844 || TREE_CODE (type
) == UNION_TYPE
25845 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25846 && TYPE_FIELDS (type
))
25848 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25850 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25853 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25854 || TREE_CODE (type
) == INTEGER_TYPE
)
25857 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25859 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25865 /* Compute the minimum required alignment for dynamic stack realignment
25866 purposes for a local variable, parameter or a stack slot. EXP is
25867 the data type or decl itself, MODE is its mode and ALIGN is the
25868 alignment that the object would ordinarily have. */
25871 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25872 unsigned int align
)
25876 if (exp
&& DECL_P (exp
))
25878 type
= TREE_TYPE (exp
);
25887 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25890 /* Don't do dynamic stack realignment for long long objects with
25891 -mpreferred-stack-boundary=2. */
25892 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25893 && (!type
|| !TYPE_USER_ALIGN (type
))
25894 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25900 /* Find a location for the static chain incoming to a nested function.
25901 This is a register, unless all free registers are used by arguments. */
25904 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25908 if (!DECL_STATIC_CHAIN (fndecl
))
25913 /* We always use R10 in 64-bit mode. */
25921 /* By default in 32-bit mode we use ECX to pass the static chain. */
25924 fntype
= TREE_TYPE (fndecl
);
25925 ccvt
= ix86_get_callcvt (fntype
);
25926 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25928 /* Fastcall functions use ecx/edx for arguments, which leaves
25929 us with EAX for the static chain.
25930 Thiscall functions use ecx for arguments, which also
25931 leaves us with EAX for the static chain. */
25934 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25936 /* Thiscall functions use ecx for arguments, which leaves
25937 us with EAX and EDX for the static chain.
25938 We are using for abi-compatibility EAX. */
25941 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25943 /* For regparm 3, we have no free call-clobbered registers in
25944 which to store the static chain. In order to implement this,
25945 we have the trampoline push the static chain to the stack.
25946 However, we can't push a value below the return address when
25947 we call the nested function directly, so we have to use an
25948 alternate entry point. For this we use ESI, and have the
25949 alternate entry point push ESI, so that things appear the
25950 same once we're executing the nested function. */
25953 if (fndecl
== current_function_decl
)
25954 ix86_static_chain_on_stack
= true;
25955 return gen_frame_mem (SImode
,
25956 plus_constant (Pmode
,
25957 arg_pointer_rtx
, -8));
25963 return gen_rtx_REG (Pmode
, regno
);
25966 /* Emit RTL insns to initialize the variable parts of a trampoline.
25967 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25968 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25969 to be passed to the target function. */
25972 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25978 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25984 /* Load the function address to r11. Try to load address using
25985 the shorter movl instead of movabs. We may want to support
25986 movq for kernel mode, but kernel does not use trampolines at
25987 the moment. FNADDR is a 32bit address and may not be in
25988 DImode when ptr_mode == SImode. Always use movl in this
25990 if (ptr_mode
== SImode
25991 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25993 fnaddr
= copy_addr_to_reg (fnaddr
);
25995 mem
= adjust_address (m_tramp
, HImode
, offset
);
25996 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25998 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25999 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26004 mem
= adjust_address (m_tramp
, HImode
, offset
);
26005 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26007 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26008 emit_move_insn (mem
, fnaddr
);
26012 /* Load static chain using movabs to r10. Use the shorter movl
26013 instead of movabs when ptr_mode == SImode. */
26014 if (ptr_mode
== SImode
)
26025 mem
= adjust_address (m_tramp
, HImode
, offset
);
26026 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26028 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26029 emit_move_insn (mem
, chain_value
);
26032 /* Jump to r11; the last (unused) byte is a nop, only there to
26033 pad the write out to a single 32-bit store. */
26034 mem
= adjust_address (m_tramp
, SImode
, offset
);
26035 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26042 /* Depending on the static chain location, either load a register
26043 with a constant, or push the constant to the stack. All of the
26044 instructions are the same size. */
26045 chain
= ix86_static_chain (fndecl
, true);
26048 switch (REGNO (chain
))
26051 opcode
= 0xb8; break;
26053 opcode
= 0xb9; break;
26055 gcc_unreachable ();
26061 mem
= adjust_address (m_tramp
, QImode
, offset
);
26062 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
26064 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26065 emit_move_insn (mem
, chain_value
);
26068 mem
= adjust_address (m_tramp
, QImode
, offset
);
26069 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
26071 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26073 /* Compute offset from the end of the jmp to the target function.
26074 In the case in which the trampoline stores the static chain on
26075 the stack, we need to skip the first insn which pushes the
26076 (call-saved) register static chain; this push is 1 byte. */
26078 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
26079 plus_constant (Pmode
, XEXP (m_tramp
, 0),
26080 offset
- (MEM_P (chain
) ? 1 : 0)),
26081 NULL_RTX
, 1, OPTAB_DIRECT
);
26082 emit_move_insn (mem
, disp
);
26085 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
26087 #ifdef HAVE_ENABLE_EXECUTE_STACK
26088 #ifdef CHECK_EXECUTE_STACK_ENABLED
26089 if (CHECK_EXECUTE_STACK_ENABLED
)
26091 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
26092 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
26096 /* The following file contains several enumerations and data structures
26097 built from the definitions in i386-builtin-types.def. */
26099 #include "i386-builtin-types.inc"
26101 /* Table for the ix86 builtin non-function types. */
26102 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
26104 /* Retrieve an element from the above table, building some of
26105 the types lazily. */
26108 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26110 unsigned int index
;
26113 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26115 type
= ix86_builtin_type_tab
[(int) tcode
];
26119 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26120 if (tcode
<= IX86_BT_LAST_VECT
)
26122 enum machine_mode mode
;
26124 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26125 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26126 mode
= ix86_builtin_type_vect_mode
[index
];
26128 type
= build_vector_type_for_mode (itype
, mode
);
26134 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26135 if (tcode
<= IX86_BT_LAST_PTR
)
26136 quals
= TYPE_UNQUALIFIED
;
26138 quals
= TYPE_QUAL_CONST
;
26140 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26141 if (quals
!= TYPE_UNQUALIFIED
)
26142 itype
= build_qualified_type (itype
, quals
);
26144 type
= build_pointer_type (itype
);
26147 ix86_builtin_type_tab
[(int) tcode
] = type
;
26151 /* Table for the ix86 builtin function types. */
26152 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26154 /* Retrieve an element from the above table, building some of
26155 the types lazily. */
26158 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26162 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26164 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26168 if (tcode
<= IX86_BT_LAST_FUNC
)
26170 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26171 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26172 tree rtype
, atype
, args
= void_list_node
;
26175 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26176 for (i
= after
- 1; i
> start
; --i
)
26178 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26179 args
= tree_cons (NULL
, atype
, args
);
26182 type
= build_function_type (rtype
, args
);
26186 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26187 enum ix86_builtin_func_type icode
;
26189 icode
= ix86_builtin_func_alias_base
[index
];
26190 type
= ix86_get_builtin_func_type (icode
);
26193 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26198 /* Codes for all the SSE/MMX builtins. */
26201 IX86_BUILTIN_ADDPS
,
26202 IX86_BUILTIN_ADDSS
,
26203 IX86_BUILTIN_DIVPS
,
26204 IX86_BUILTIN_DIVSS
,
26205 IX86_BUILTIN_MULPS
,
26206 IX86_BUILTIN_MULSS
,
26207 IX86_BUILTIN_SUBPS
,
26208 IX86_BUILTIN_SUBSS
,
26210 IX86_BUILTIN_CMPEQPS
,
26211 IX86_BUILTIN_CMPLTPS
,
26212 IX86_BUILTIN_CMPLEPS
,
26213 IX86_BUILTIN_CMPGTPS
,
26214 IX86_BUILTIN_CMPGEPS
,
26215 IX86_BUILTIN_CMPNEQPS
,
26216 IX86_BUILTIN_CMPNLTPS
,
26217 IX86_BUILTIN_CMPNLEPS
,
26218 IX86_BUILTIN_CMPNGTPS
,
26219 IX86_BUILTIN_CMPNGEPS
,
26220 IX86_BUILTIN_CMPORDPS
,
26221 IX86_BUILTIN_CMPUNORDPS
,
26222 IX86_BUILTIN_CMPEQSS
,
26223 IX86_BUILTIN_CMPLTSS
,
26224 IX86_BUILTIN_CMPLESS
,
26225 IX86_BUILTIN_CMPNEQSS
,
26226 IX86_BUILTIN_CMPNLTSS
,
26227 IX86_BUILTIN_CMPNLESS
,
26228 IX86_BUILTIN_CMPORDSS
,
26229 IX86_BUILTIN_CMPUNORDSS
,
26231 IX86_BUILTIN_COMIEQSS
,
26232 IX86_BUILTIN_COMILTSS
,
26233 IX86_BUILTIN_COMILESS
,
26234 IX86_BUILTIN_COMIGTSS
,
26235 IX86_BUILTIN_COMIGESS
,
26236 IX86_BUILTIN_COMINEQSS
,
26237 IX86_BUILTIN_UCOMIEQSS
,
26238 IX86_BUILTIN_UCOMILTSS
,
26239 IX86_BUILTIN_UCOMILESS
,
26240 IX86_BUILTIN_UCOMIGTSS
,
26241 IX86_BUILTIN_UCOMIGESS
,
26242 IX86_BUILTIN_UCOMINEQSS
,
26244 IX86_BUILTIN_CVTPI2PS
,
26245 IX86_BUILTIN_CVTPS2PI
,
26246 IX86_BUILTIN_CVTSI2SS
,
26247 IX86_BUILTIN_CVTSI642SS
,
26248 IX86_BUILTIN_CVTSS2SI
,
26249 IX86_BUILTIN_CVTSS2SI64
,
26250 IX86_BUILTIN_CVTTPS2PI
,
26251 IX86_BUILTIN_CVTTSS2SI
,
26252 IX86_BUILTIN_CVTTSS2SI64
,
26254 IX86_BUILTIN_MAXPS
,
26255 IX86_BUILTIN_MAXSS
,
26256 IX86_BUILTIN_MINPS
,
26257 IX86_BUILTIN_MINSS
,
26259 IX86_BUILTIN_LOADUPS
,
26260 IX86_BUILTIN_STOREUPS
,
26261 IX86_BUILTIN_MOVSS
,
26263 IX86_BUILTIN_MOVHLPS
,
26264 IX86_BUILTIN_MOVLHPS
,
26265 IX86_BUILTIN_LOADHPS
,
26266 IX86_BUILTIN_LOADLPS
,
26267 IX86_BUILTIN_STOREHPS
,
26268 IX86_BUILTIN_STORELPS
,
26270 IX86_BUILTIN_MASKMOVQ
,
26271 IX86_BUILTIN_MOVMSKPS
,
26272 IX86_BUILTIN_PMOVMSKB
,
26274 IX86_BUILTIN_MOVNTPS
,
26275 IX86_BUILTIN_MOVNTQ
,
26277 IX86_BUILTIN_LOADDQU
,
26278 IX86_BUILTIN_STOREDQU
,
26280 IX86_BUILTIN_PACKSSWB
,
26281 IX86_BUILTIN_PACKSSDW
,
26282 IX86_BUILTIN_PACKUSWB
,
26284 IX86_BUILTIN_PADDB
,
26285 IX86_BUILTIN_PADDW
,
26286 IX86_BUILTIN_PADDD
,
26287 IX86_BUILTIN_PADDQ
,
26288 IX86_BUILTIN_PADDSB
,
26289 IX86_BUILTIN_PADDSW
,
26290 IX86_BUILTIN_PADDUSB
,
26291 IX86_BUILTIN_PADDUSW
,
26292 IX86_BUILTIN_PSUBB
,
26293 IX86_BUILTIN_PSUBW
,
26294 IX86_BUILTIN_PSUBD
,
26295 IX86_BUILTIN_PSUBQ
,
26296 IX86_BUILTIN_PSUBSB
,
26297 IX86_BUILTIN_PSUBSW
,
26298 IX86_BUILTIN_PSUBUSB
,
26299 IX86_BUILTIN_PSUBUSW
,
26302 IX86_BUILTIN_PANDN
,
26306 IX86_BUILTIN_PAVGB
,
26307 IX86_BUILTIN_PAVGW
,
26309 IX86_BUILTIN_PCMPEQB
,
26310 IX86_BUILTIN_PCMPEQW
,
26311 IX86_BUILTIN_PCMPEQD
,
26312 IX86_BUILTIN_PCMPGTB
,
26313 IX86_BUILTIN_PCMPGTW
,
26314 IX86_BUILTIN_PCMPGTD
,
26316 IX86_BUILTIN_PMADDWD
,
26318 IX86_BUILTIN_PMAXSW
,
26319 IX86_BUILTIN_PMAXUB
,
26320 IX86_BUILTIN_PMINSW
,
26321 IX86_BUILTIN_PMINUB
,
26323 IX86_BUILTIN_PMULHUW
,
26324 IX86_BUILTIN_PMULHW
,
26325 IX86_BUILTIN_PMULLW
,
26327 IX86_BUILTIN_PSADBW
,
26328 IX86_BUILTIN_PSHUFW
,
26330 IX86_BUILTIN_PSLLW
,
26331 IX86_BUILTIN_PSLLD
,
26332 IX86_BUILTIN_PSLLQ
,
26333 IX86_BUILTIN_PSRAW
,
26334 IX86_BUILTIN_PSRAD
,
26335 IX86_BUILTIN_PSRLW
,
26336 IX86_BUILTIN_PSRLD
,
26337 IX86_BUILTIN_PSRLQ
,
26338 IX86_BUILTIN_PSLLWI
,
26339 IX86_BUILTIN_PSLLDI
,
26340 IX86_BUILTIN_PSLLQI
,
26341 IX86_BUILTIN_PSRAWI
,
26342 IX86_BUILTIN_PSRADI
,
26343 IX86_BUILTIN_PSRLWI
,
26344 IX86_BUILTIN_PSRLDI
,
26345 IX86_BUILTIN_PSRLQI
,
26347 IX86_BUILTIN_PUNPCKHBW
,
26348 IX86_BUILTIN_PUNPCKHWD
,
26349 IX86_BUILTIN_PUNPCKHDQ
,
26350 IX86_BUILTIN_PUNPCKLBW
,
26351 IX86_BUILTIN_PUNPCKLWD
,
26352 IX86_BUILTIN_PUNPCKLDQ
,
26354 IX86_BUILTIN_SHUFPS
,
26356 IX86_BUILTIN_RCPPS
,
26357 IX86_BUILTIN_RCPSS
,
26358 IX86_BUILTIN_RSQRTPS
,
26359 IX86_BUILTIN_RSQRTPS_NR
,
26360 IX86_BUILTIN_RSQRTSS
,
26361 IX86_BUILTIN_RSQRTF
,
26362 IX86_BUILTIN_SQRTPS
,
26363 IX86_BUILTIN_SQRTPS_NR
,
26364 IX86_BUILTIN_SQRTSS
,
26366 IX86_BUILTIN_UNPCKHPS
,
26367 IX86_BUILTIN_UNPCKLPS
,
26369 IX86_BUILTIN_ANDPS
,
26370 IX86_BUILTIN_ANDNPS
,
26372 IX86_BUILTIN_XORPS
,
26375 IX86_BUILTIN_LDMXCSR
,
26376 IX86_BUILTIN_STMXCSR
,
26377 IX86_BUILTIN_SFENCE
,
26379 IX86_BUILTIN_FXSAVE
,
26380 IX86_BUILTIN_FXRSTOR
,
26381 IX86_BUILTIN_FXSAVE64
,
26382 IX86_BUILTIN_FXRSTOR64
,
26384 IX86_BUILTIN_XSAVE
,
26385 IX86_BUILTIN_XRSTOR
,
26386 IX86_BUILTIN_XSAVE64
,
26387 IX86_BUILTIN_XRSTOR64
,
26389 IX86_BUILTIN_XSAVEOPT
,
26390 IX86_BUILTIN_XSAVEOPT64
,
26392 /* 3DNow! Original */
26393 IX86_BUILTIN_FEMMS
,
26394 IX86_BUILTIN_PAVGUSB
,
26395 IX86_BUILTIN_PF2ID
,
26396 IX86_BUILTIN_PFACC
,
26397 IX86_BUILTIN_PFADD
,
26398 IX86_BUILTIN_PFCMPEQ
,
26399 IX86_BUILTIN_PFCMPGE
,
26400 IX86_BUILTIN_PFCMPGT
,
26401 IX86_BUILTIN_PFMAX
,
26402 IX86_BUILTIN_PFMIN
,
26403 IX86_BUILTIN_PFMUL
,
26404 IX86_BUILTIN_PFRCP
,
26405 IX86_BUILTIN_PFRCPIT1
,
26406 IX86_BUILTIN_PFRCPIT2
,
26407 IX86_BUILTIN_PFRSQIT1
,
26408 IX86_BUILTIN_PFRSQRT
,
26409 IX86_BUILTIN_PFSUB
,
26410 IX86_BUILTIN_PFSUBR
,
26411 IX86_BUILTIN_PI2FD
,
26412 IX86_BUILTIN_PMULHRW
,
26414 /* 3DNow! Athlon Extensions */
26415 IX86_BUILTIN_PF2IW
,
26416 IX86_BUILTIN_PFNACC
,
26417 IX86_BUILTIN_PFPNACC
,
26418 IX86_BUILTIN_PI2FW
,
26419 IX86_BUILTIN_PSWAPDSI
,
26420 IX86_BUILTIN_PSWAPDSF
,
26423 IX86_BUILTIN_ADDPD
,
26424 IX86_BUILTIN_ADDSD
,
26425 IX86_BUILTIN_DIVPD
,
26426 IX86_BUILTIN_DIVSD
,
26427 IX86_BUILTIN_MULPD
,
26428 IX86_BUILTIN_MULSD
,
26429 IX86_BUILTIN_SUBPD
,
26430 IX86_BUILTIN_SUBSD
,
26432 IX86_BUILTIN_CMPEQPD
,
26433 IX86_BUILTIN_CMPLTPD
,
26434 IX86_BUILTIN_CMPLEPD
,
26435 IX86_BUILTIN_CMPGTPD
,
26436 IX86_BUILTIN_CMPGEPD
,
26437 IX86_BUILTIN_CMPNEQPD
,
26438 IX86_BUILTIN_CMPNLTPD
,
26439 IX86_BUILTIN_CMPNLEPD
,
26440 IX86_BUILTIN_CMPNGTPD
,
26441 IX86_BUILTIN_CMPNGEPD
,
26442 IX86_BUILTIN_CMPORDPD
,
26443 IX86_BUILTIN_CMPUNORDPD
,
26444 IX86_BUILTIN_CMPEQSD
,
26445 IX86_BUILTIN_CMPLTSD
,
26446 IX86_BUILTIN_CMPLESD
,
26447 IX86_BUILTIN_CMPNEQSD
,
26448 IX86_BUILTIN_CMPNLTSD
,
26449 IX86_BUILTIN_CMPNLESD
,
26450 IX86_BUILTIN_CMPORDSD
,
26451 IX86_BUILTIN_CMPUNORDSD
,
26453 IX86_BUILTIN_COMIEQSD
,
26454 IX86_BUILTIN_COMILTSD
,
26455 IX86_BUILTIN_COMILESD
,
26456 IX86_BUILTIN_COMIGTSD
,
26457 IX86_BUILTIN_COMIGESD
,
26458 IX86_BUILTIN_COMINEQSD
,
26459 IX86_BUILTIN_UCOMIEQSD
,
26460 IX86_BUILTIN_UCOMILTSD
,
26461 IX86_BUILTIN_UCOMILESD
,
26462 IX86_BUILTIN_UCOMIGTSD
,
26463 IX86_BUILTIN_UCOMIGESD
,
26464 IX86_BUILTIN_UCOMINEQSD
,
26466 IX86_BUILTIN_MAXPD
,
26467 IX86_BUILTIN_MAXSD
,
26468 IX86_BUILTIN_MINPD
,
26469 IX86_BUILTIN_MINSD
,
26471 IX86_BUILTIN_ANDPD
,
26472 IX86_BUILTIN_ANDNPD
,
26474 IX86_BUILTIN_XORPD
,
26476 IX86_BUILTIN_SQRTPD
,
26477 IX86_BUILTIN_SQRTSD
,
26479 IX86_BUILTIN_UNPCKHPD
,
26480 IX86_BUILTIN_UNPCKLPD
,
26482 IX86_BUILTIN_SHUFPD
,
26484 IX86_BUILTIN_LOADUPD
,
26485 IX86_BUILTIN_STOREUPD
,
26486 IX86_BUILTIN_MOVSD
,
26488 IX86_BUILTIN_LOADHPD
,
26489 IX86_BUILTIN_LOADLPD
,
26491 IX86_BUILTIN_CVTDQ2PD
,
26492 IX86_BUILTIN_CVTDQ2PS
,
26494 IX86_BUILTIN_CVTPD2DQ
,
26495 IX86_BUILTIN_CVTPD2PI
,
26496 IX86_BUILTIN_CVTPD2PS
,
26497 IX86_BUILTIN_CVTTPD2DQ
,
26498 IX86_BUILTIN_CVTTPD2PI
,
26500 IX86_BUILTIN_CVTPI2PD
,
26501 IX86_BUILTIN_CVTSI2SD
,
26502 IX86_BUILTIN_CVTSI642SD
,
26504 IX86_BUILTIN_CVTSD2SI
,
26505 IX86_BUILTIN_CVTSD2SI64
,
26506 IX86_BUILTIN_CVTSD2SS
,
26507 IX86_BUILTIN_CVTSS2SD
,
26508 IX86_BUILTIN_CVTTSD2SI
,
26509 IX86_BUILTIN_CVTTSD2SI64
,
26511 IX86_BUILTIN_CVTPS2DQ
,
26512 IX86_BUILTIN_CVTPS2PD
,
26513 IX86_BUILTIN_CVTTPS2DQ
,
26515 IX86_BUILTIN_MOVNTI
,
26516 IX86_BUILTIN_MOVNTI64
,
26517 IX86_BUILTIN_MOVNTPD
,
26518 IX86_BUILTIN_MOVNTDQ
,
26520 IX86_BUILTIN_MOVQ128
,
26523 IX86_BUILTIN_MASKMOVDQU
,
26524 IX86_BUILTIN_MOVMSKPD
,
26525 IX86_BUILTIN_PMOVMSKB128
,
26527 IX86_BUILTIN_PACKSSWB128
,
26528 IX86_BUILTIN_PACKSSDW128
,
26529 IX86_BUILTIN_PACKUSWB128
,
26531 IX86_BUILTIN_PADDB128
,
26532 IX86_BUILTIN_PADDW128
,
26533 IX86_BUILTIN_PADDD128
,
26534 IX86_BUILTIN_PADDQ128
,
26535 IX86_BUILTIN_PADDSB128
,
26536 IX86_BUILTIN_PADDSW128
,
26537 IX86_BUILTIN_PADDUSB128
,
26538 IX86_BUILTIN_PADDUSW128
,
26539 IX86_BUILTIN_PSUBB128
,
26540 IX86_BUILTIN_PSUBW128
,
26541 IX86_BUILTIN_PSUBD128
,
26542 IX86_BUILTIN_PSUBQ128
,
26543 IX86_BUILTIN_PSUBSB128
,
26544 IX86_BUILTIN_PSUBSW128
,
26545 IX86_BUILTIN_PSUBUSB128
,
26546 IX86_BUILTIN_PSUBUSW128
,
26548 IX86_BUILTIN_PAND128
,
26549 IX86_BUILTIN_PANDN128
,
26550 IX86_BUILTIN_POR128
,
26551 IX86_BUILTIN_PXOR128
,
26553 IX86_BUILTIN_PAVGB128
,
26554 IX86_BUILTIN_PAVGW128
,
26556 IX86_BUILTIN_PCMPEQB128
,
26557 IX86_BUILTIN_PCMPEQW128
,
26558 IX86_BUILTIN_PCMPEQD128
,
26559 IX86_BUILTIN_PCMPGTB128
,
26560 IX86_BUILTIN_PCMPGTW128
,
26561 IX86_BUILTIN_PCMPGTD128
,
26563 IX86_BUILTIN_PMADDWD128
,
26565 IX86_BUILTIN_PMAXSW128
,
26566 IX86_BUILTIN_PMAXUB128
,
26567 IX86_BUILTIN_PMINSW128
,
26568 IX86_BUILTIN_PMINUB128
,
26570 IX86_BUILTIN_PMULUDQ
,
26571 IX86_BUILTIN_PMULUDQ128
,
26572 IX86_BUILTIN_PMULHUW128
,
26573 IX86_BUILTIN_PMULHW128
,
26574 IX86_BUILTIN_PMULLW128
,
26576 IX86_BUILTIN_PSADBW128
,
26577 IX86_BUILTIN_PSHUFHW
,
26578 IX86_BUILTIN_PSHUFLW
,
26579 IX86_BUILTIN_PSHUFD
,
26581 IX86_BUILTIN_PSLLDQI128
,
26582 IX86_BUILTIN_PSLLWI128
,
26583 IX86_BUILTIN_PSLLDI128
,
26584 IX86_BUILTIN_PSLLQI128
,
26585 IX86_BUILTIN_PSRAWI128
,
26586 IX86_BUILTIN_PSRADI128
,
26587 IX86_BUILTIN_PSRLDQI128
,
26588 IX86_BUILTIN_PSRLWI128
,
26589 IX86_BUILTIN_PSRLDI128
,
26590 IX86_BUILTIN_PSRLQI128
,
26592 IX86_BUILTIN_PSLLDQ128
,
26593 IX86_BUILTIN_PSLLW128
,
26594 IX86_BUILTIN_PSLLD128
,
26595 IX86_BUILTIN_PSLLQ128
,
26596 IX86_BUILTIN_PSRAW128
,
26597 IX86_BUILTIN_PSRAD128
,
26598 IX86_BUILTIN_PSRLW128
,
26599 IX86_BUILTIN_PSRLD128
,
26600 IX86_BUILTIN_PSRLQ128
,
26602 IX86_BUILTIN_PUNPCKHBW128
,
26603 IX86_BUILTIN_PUNPCKHWD128
,
26604 IX86_BUILTIN_PUNPCKHDQ128
,
26605 IX86_BUILTIN_PUNPCKHQDQ128
,
26606 IX86_BUILTIN_PUNPCKLBW128
,
26607 IX86_BUILTIN_PUNPCKLWD128
,
26608 IX86_BUILTIN_PUNPCKLDQ128
,
26609 IX86_BUILTIN_PUNPCKLQDQ128
,
26611 IX86_BUILTIN_CLFLUSH
,
26612 IX86_BUILTIN_MFENCE
,
26613 IX86_BUILTIN_LFENCE
,
26614 IX86_BUILTIN_PAUSE
,
26616 IX86_BUILTIN_BSRSI
,
26617 IX86_BUILTIN_BSRDI
,
26618 IX86_BUILTIN_RDPMC
,
26619 IX86_BUILTIN_RDTSC
,
26620 IX86_BUILTIN_RDTSCP
,
26621 IX86_BUILTIN_ROLQI
,
26622 IX86_BUILTIN_ROLHI
,
26623 IX86_BUILTIN_RORQI
,
26624 IX86_BUILTIN_RORHI
,
26627 IX86_BUILTIN_ADDSUBPS
,
26628 IX86_BUILTIN_HADDPS
,
26629 IX86_BUILTIN_HSUBPS
,
26630 IX86_BUILTIN_MOVSHDUP
,
26631 IX86_BUILTIN_MOVSLDUP
,
26632 IX86_BUILTIN_ADDSUBPD
,
26633 IX86_BUILTIN_HADDPD
,
26634 IX86_BUILTIN_HSUBPD
,
26635 IX86_BUILTIN_LDDQU
,
26637 IX86_BUILTIN_MONITOR
,
26638 IX86_BUILTIN_MWAIT
,
26641 IX86_BUILTIN_PHADDW
,
26642 IX86_BUILTIN_PHADDD
,
26643 IX86_BUILTIN_PHADDSW
,
26644 IX86_BUILTIN_PHSUBW
,
26645 IX86_BUILTIN_PHSUBD
,
26646 IX86_BUILTIN_PHSUBSW
,
26647 IX86_BUILTIN_PMADDUBSW
,
26648 IX86_BUILTIN_PMULHRSW
,
26649 IX86_BUILTIN_PSHUFB
,
26650 IX86_BUILTIN_PSIGNB
,
26651 IX86_BUILTIN_PSIGNW
,
26652 IX86_BUILTIN_PSIGND
,
26653 IX86_BUILTIN_PALIGNR
,
26654 IX86_BUILTIN_PABSB
,
26655 IX86_BUILTIN_PABSW
,
26656 IX86_BUILTIN_PABSD
,
26658 IX86_BUILTIN_PHADDW128
,
26659 IX86_BUILTIN_PHADDD128
,
26660 IX86_BUILTIN_PHADDSW128
,
26661 IX86_BUILTIN_PHSUBW128
,
26662 IX86_BUILTIN_PHSUBD128
,
26663 IX86_BUILTIN_PHSUBSW128
,
26664 IX86_BUILTIN_PMADDUBSW128
,
26665 IX86_BUILTIN_PMULHRSW128
,
26666 IX86_BUILTIN_PSHUFB128
,
26667 IX86_BUILTIN_PSIGNB128
,
26668 IX86_BUILTIN_PSIGNW128
,
26669 IX86_BUILTIN_PSIGND128
,
26670 IX86_BUILTIN_PALIGNR128
,
26671 IX86_BUILTIN_PABSB128
,
26672 IX86_BUILTIN_PABSW128
,
26673 IX86_BUILTIN_PABSD128
,
26675 /* AMDFAM10 - SSE4A New Instructions. */
26676 IX86_BUILTIN_MOVNTSD
,
26677 IX86_BUILTIN_MOVNTSS
,
26678 IX86_BUILTIN_EXTRQI
,
26679 IX86_BUILTIN_EXTRQ
,
26680 IX86_BUILTIN_INSERTQI
,
26681 IX86_BUILTIN_INSERTQ
,
26684 IX86_BUILTIN_BLENDPD
,
26685 IX86_BUILTIN_BLENDPS
,
26686 IX86_BUILTIN_BLENDVPD
,
26687 IX86_BUILTIN_BLENDVPS
,
26688 IX86_BUILTIN_PBLENDVB128
,
26689 IX86_BUILTIN_PBLENDW128
,
26694 IX86_BUILTIN_INSERTPS128
,
26696 IX86_BUILTIN_MOVNTDQA
,
26697 IX86_BUILTIN_MPSADBW128
,
26698 IX86_BUILTIN_PACKUSDW128
,
26699 IX86_BUILTIN_PCMPEQQ
,
26700 IX86_BUILTIN_PHMINPOSUW128
,
26702 IX86_BUILTIN_PMAXSB128
,
26703 IX86_BUILTIN_PMAXSD128
,
26704 IX86_BUILTIN_PMAXUD128
,
26705 IX86_BUILTIN_PMAXUW128
,
26707 IX86_BUILTIN_PMINSB128
,
26708 IX86_BUILTIN_PMINSD128
,
26709 IX86_BUILTIN_PMINUD128
,
26710 IX86_BUILTIN_PMINUW128
,
26712 IX86_BUILTIN_PMOVSXBW128
,
26713 IX86_BUILTIN_PMOVSXBD128
,
26714 IX86_BUILTIN_PMOVSXBQ128
,
26715 IX86_BUILTIN_PMOVSXWD128
,
26716 IX86_BUILTIN_PMOVSXWQ128
,
26717 IX86_BUILTIN_PMOVSXDQ128
,
26719 IX86_BUILTIN_PMOVZXBW128
,
26720 IX86_BUILTIN_PMOVZXBD128
,
26721 IX86_BUILTIN_PMOVZXBQ128
,
26722 IX86_BUILTIN_PMOVZXWD128
,
26723 IX86_BUILTIN_PMOVZXWQ128
,
26724 IX86_BUILTIN_PMOVZXDQ128
,
26726 IX86_BUILTIN_PMULDQ128
,
26727 IX86_BUILTIN_PMULLD128
,
26729 IX86_BUILTIN_ROUNDSD
,
26730 IX86_BUILTIN_ROUNDSS
,
26732 IX86_BUILTIN_ROUNDPD
,
26733 IX86_BUILTIN_ROUNDPS
,
26735 IX86_BUILTIN_FLOORPD
,
26736 IX86_BUILTIN_CEILPD
,
26737 IX86_BUILTIN_TRUNCPD
,
26738 IX86_BUILTIN_RINTPD
,
26739 IX86_BUILTIN_ROUNDPD_AZ
,
26741 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26742 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26743 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26745 IX86_BUILTIN_FLOORPS
,
26746 IX86_BUILTIN_CEILPS
,
26747 IX86_BUILTIN_TRUNCPS
,
26748 IX86_BUILTIN_RINTPS
,
26749 IX86_BUILTIN_ROUNDPS_AZ
,
26751 IX86_BUILTIN_FLOORPS_SFIX
,
26752 IX86_BUILTIN_CEILPS_SFIX
,
26753 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26755 IX86_BUILTIN_PTESTZ
,
26756 IX86_BUILTIN_PTESTC
,
26757 IX86_BUILTIN_PTESTNZC
,
26759 IX86_BUILTIN_VEC_INIT_V2SI
,
26760 IX86_BUILTIN_VEC_INIT_V4HI
,
26761 IX86_BUILTIN_VEC_INIT_V8QI
,
26762 IX86_BUILTIN_VEC_EXT_V2DF
,
26763 IX86_BUILTIN_VEC_EXT_V2DI
,
26764 IX86_BUILTIN_VEC_EXT_V4SF
,
26765 IX86_BUILTIN_VEC_EXT_V4SI
,
26766 IX86_BUILTIN_VEC_EXT_V8HI
,
26767 IX86_BUILTIN_VEC_EXT_V2SI
,
26768 IX86_BUILTIN_VEC_EXT_V4HI
,
26769 IX86_BUILTIN_VEC_EXT_V16QI
,
26770 IX86_BUILTIN_VEC_SET_V2DI
,
26771 IX86_BUILTIN_VEC_SET_V4SF
,
26772 IX86_BUILTIN_VEC_SET_V4SI
,
26773 IX86_BUILTIN_VEC_SET_V8HI
,
26774 IX86_BUILTIN_VEC_SET_V4HI
,
26775 IX86_BUILTIN_VEC_SET_V16QI
,
26777 IX86_BUILTIN_VEC_PACK_SFIX
,
26778 IX86_BUILTIN_VEC_PACK_SFIX256
,
26781 IX86_BUILTIN_CRC32QI
,
26782 IX86_BUILTIN_CRC32HI
,
26783 IX86_BUILTIN_CRC32SI
,
26784 IX86_BUILTIN_CRC32DI
,
26786 IX86_BUILTIN_PCMPESTRI128
,
26787 IX86_BUILTIN_PCMPESTRM128
,
26788 IX86_BUILTIN_PCMPESTRA128
,
26789 IX86_BUILTIN_PCMPESTRC128
,
26790 IX86_BUILTIN_PCMPESTRO128
,
26791 IX86_BUILTIN_PCMPESTRS128
,
26792 IX86_BUILTIN_PCMPESTRZ128
,
26793 IX86_BUILTIN_PCMPISTRI128
,
26794 IX86_BUILTIN_PCMPISTRM128
,
26795 IX86_BUILTIN_PCMPISTRA128
,
26796 IX86_BUILTIN_PCMPISTRC128
,
26797 IX86_BUILTIN_PCMPISTRO128
,
26798 IX86_BUILTIN_PCMPISTRS128
,
26799 IX86_BUILTIN_PCMPISTRZ128
,
26801 IX86_BUILTIN_PCMPGTQ
,
26803 /* AES instructions */
26804 IX86_BUILTIN_AESENC128
,
26805 IX86_BUILTIN_AESENCLAST128
,
26806 IX86_BUILTIN_AESDEC128
,
26807 IX86_BUILTIN_AESDECLAST128
,
26808 IX86_BUILTIN_AESIMC128
,
26809 IX86_BUILTIN_AESKEYGENASSIST128
,
26811 /* PCLMUL instruction */
26812 IX86_BUILTIN_PCLMULQDQ128
,
26815 IX86_BUILTIN_ADDPD256
,
26816 IX86_BUILTIN_ADDPS256
,
26817 IX86_BUILTIN_ADDSUBPD256
,
26818 IX86_BUILTIN_ADDSUBPS256
,
26819 IX86_BUILTIN_ANDPD256
,
26820 IX86_BUILTIN_ANDPS256
,
26821 IX86_BUILTIN_ANDNPD256
,
26822 IX86_BUILTIN_ANDNPS256
,
26823 IX86_BUILTIN_BLENDPD256
,
26824 IX86_BUILTIN_BLENDPS256
,
26825 IX86_BUILTIN_BLENDVPD256
,
26826 IX86_BUILTIN_BLENDVPS256
,
26827 IX86_BUILTIN_DIVPD256
,
26828 IX86_BUILTIN_DIVPS256
,
26829 IX86_BUILTIN_DPPS256
,
26830 IX86_BUILTIN_HADDPD256
,
26831 IX86_BUILTIN_HADDPS256
,
26832 IX86_BUILTIN_HSUBPD256
,
26833 IX86_BUILTIN_HSUBPS256
,
26834 IX86_BUILTIN_MAXPD256
,
26835 IX86_BUILTIN_MAXPS256
,
26836 IX86_BUILTIN_MINPD256
,
26837 IX86_BUILTIN_MINPS256
,
26838 IX86_BUILTIN_MULPD256
,
26839 IX86_BUILTIN_MULPS256
,
26840 IX86_BUILTIN_ORPD256
,
26841 IX86_BUILTIN_ORPS256
,
26842 IX86_BUILTIN_SHUFPD256
,
26843 IX86_BUILTIN_SHUFPS256
,
26844 IX86_BUILTIN_SUBPD256
,
26845 IX86_BUILTIN_SUBPS256
,
26846 IX86_BUILTIN_XORPD256
,
26847 IX86_BUILTIN_XORPS256
,
26848 IX86_BUILTIN_CMPSD
,
26849 IX86_BUILTIN_CMPSS
,
26850 IX86_BUILTIN_CMPPD
,
26851 IX86_BUILTIN_CMPPS
,
26852 IX86_BUILTIN_CMPPD256
,
26853 IX86_BUILTIN_CMPPS256
,
26854 IX86_BUILTIN_CVTDQ2PD256
,
26855 IX86_BUILTIN_CVTDQ2PS256
,
26856 IX86_BUILTIN_CVTPD2PS256
,
26857 IX86_BUILTIN_CVTPS2DQ256
,
26858 IX86_BUILTIN_CVTPS2PD256
,
26859 IX86_BUILTIN_CVTTPD2DQ256
,
26860 IX86_BUILTIN_CVTPD2DQ256
,
26861 IX86_BUILTIN_CVTTPS2DQ256
,
26862 IX86_BUILTIN_EXTRACTF128PD256
,
26863 IX86_BUILTIN_EXTRACTF128PS256
,
26864 IX86_BUILTIN_EXTRACTF128SI256
,
26865 IX86_BUILTIN_VZEROALL
,
26866 IX86_BUILTIN_VZEROUPPER
,
26867 IX86_BUILTIN_VPERMILVARPD
,
26868 IX86_BUILTIN_VPERMILVARPS
,
26869 IX86_BUILTIN_VPERMILVARPD256
,
26870 IX86_BUILTIN_VPERMILVARPS256
,
26871 IX86_BUILTIN_VPERMILPD
,
26872 IX86_BUILTIN_VPERMILPS
,
26873 IX86_BUILTIN_VPERMILPD256
,
26874 IX86_BUILTIN_VPERMILPS256
,
26875 IX86_BUILTIN_VPERMIL2PD
,
26876 IX86_BUILTIN_VPERMIL2PS
,
26877 IX86_BUILTIN_VPERMIL2PD256
,
26878 IX86_BUILTIN_VPERMIL2PS256
,
26879 IX86_BUILTIN_VPERM2F128PD256
,
26880 IX86_BUILTIN_VPERM2F128PS256
,
26881 IX86_BUILTIN_VPERM2F128SI256
,
26882 IX86_BUILTIN_VBROADCASTSS
,
26883 IX86_BUILTIN_VBROADCASTSD256
,
26884 IX86_BUILTIN_VBROADCASTSS256
,
26885 IX86_BUILTIN_VBROADCASTPD256
,
26886 IX86_BUILTIN_VBROADCASTPS256
,
26887 IX86_BUILTIN_VINSERTF128PD256
,
26888 IX86_BUILTIN_VINSERTF128PS256
,
26889 IX86_BUILTIN_VINSERTF128SI256
,
26890 IX86_BUILTIN_LOADUPD256
,
26891 IX86_BUILTIN_LOADUPS256
,
26892 IX86_BUILTIN_STOREUPD256
,
26893 IX86_BUILTIN_STOREUPS256
,
26894 IX86_BUILTIN_LDDQU256
,
26895 IX86_BUILTIN_MOVNTDQ256
,
26896 IX86_BUILTIN_MOVNTPD256
,
26897 IX86_BUILTIN_MOVNTPS256
,
26898 IX86_BUILTIN_LOADDQU256
,
26899 IX86_BUILTIN_STOREDQU256
,
26900 IX86_BUILTIN_MASKLOADPD
,
26901 IX86_BUILTIN_MASKLOADPS
,
26902 IX86_BUILTIN_MASKSTOREPD
,
26903 IX86_BUILTIN_MASKSTOREPS
,
26904 IX86_BUILTIN_MASKLOADPD256
,
26905 IX86_BUILTIN_MASKLOADPS256
,
26906 IX86_BUILTIN_MASKSTOREPD256
,
26907 IX86_BUILTIN_MASKSTOREPS256
,
26908 IX86_BUILTIN_MOVSHDUP256
,
26909 IX86_BUILTIN_MOVSLDUP256
,
26910 IX86_BUILTIN_MOVDDUP256
,
26912 IX86_BUILTIN_SQRTPD256
,
26913 IX86_BUILTIN_SQRTPS256
,
26914 IX86_BUILTIN_SQRTPS_NR256
,
26915 IX86_BUILTIN_RSQRTPS256
,
26916 IX86_BUILTIN_RSQRTPS_NR256
,
26918 IX86_BUILTIN_RCPPS256
,
26920 IX86_BUILTIN_ROUNDPD256
,
26921 IX86_BUILTIN_ROUNDPS256
,
26923 IX86_BUILTIN_FLOORPD256
,
26924 IX86_BUILTIN_CEILPD256
,
26925 IX86_BUILTIN_TRUNCPD256
,
26926 IX86_BUILTIN_RINTPD256
,
26927 IX86_BUILTIN_ROUNDPD_AZ256
,
26929 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26930 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26931 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26933 IX86_BUILTIN_FLOORPS256
,
26934 IX86_BUILTIN_CEILPS256
,
26935 IX86_BUILTIN_TRUNCPS256
,
26936 IX86_BUILTIN_RINTPS256
,
26937 IX86_BUILTIN_ROUNDPS_AZ256
,
26939 IX86_BUILTIN_FLOORPS_SFIX256
,
26940 IX86_BUILTIN_CEILPS_SFIX256
,
26941 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26943 IX86_BUILTIN_UNPCKHPD256
,
26944 IX86_BUILTIN_UNPCKLPD256
,
26945 IX86_BUILTIN_UNPCKHPS256
,
26946 IX86_BUILTIN_UNPCKLPS256
,
26948 IX86_BUILTIN_SI256_SI
,
26949 IX86_BUILTIN_PS256_PS
,
26950 IX86_BUILTIN_PD256_PD
,
26951 IX86_BUILTIN_SI_SI256
,
26952 IX86_BUILTIN_PS_PS256
,
26953 IX86_BUILTIN_PD_PD256
,
26955 IX86_BUILTIN_VTESTZPD
,
26956 IX86_BUILTIN_VTESTCPD
,
26957 IX86_BUILTIN_VTESTNZCPD
,
26958 IX86_BUILTIN_VTESTZPS
,
26959 IX86_BUILTIN_VTESTCPS
,
26960 IX86_BUILTIN_VTESTNZCPS
,
26961 IX86_BUILTIN_VTESTZPD256
,
26962 IX86_BUILTIN_VTESTCPD256
,
26963 IX86_BUILTIN_VTESTNZCPD256
,
26964 IX86_BUILTIN_VTESTZPS256
,
26965 IX86_BUILTIN_VTESTCPS256
,
26966 IX86_BUILTIN_VTESTNZCPS256
,
26967 IX86_BUILTIN_PTESTZ256
,
26968 IX86_BUILTIN_PTESTC256
,
26969 IX86_BUILTIN_PTESTNZC256
,
26971 IX86_BUILTIN_MOVMSKPD256
,
26972 IX86_BUILTIN_MOVMSKPS256
,
26975 IX86_BUILTIN_MPSADBW256
,
26976 IX86_BUILTIN_PABSB256
,
26977 IX86_BUILTIN_PABSW256
,
26978 IX86_BUILTIN_PABSD256
,
26979 IX86_BUILTIN_PACKSSDW256
,
26980 IX86_BUILTIN_PACKSSWB256
,
26981 IX86_BUILTIN_PACKUSDW256
,
26982 IX86_BUILTIN_PACKUSWB256
,
26983 IX86_BUILTIN_PADDB256
,
26984 IX86_BUILTIN_PADDW256
,
26985 IX86_BUILTIN_PADDD256
,
26986 IX86_BUILTIN_PADDQ256
,
26987 IX86_BUILTIN_PADDSB256
,
26988 IX86_BUILTIN_PADDSW256
,
26989 IX86_BUILTIN_PADDUSB256
,
26990 IX86_BUILTIN_PADDUSW256
,
26991 IX86_BUILTIN_PALIGNR256
,
26992 IX86_BUILTIN_AND256I
,
26993 IX86_BUILTIN_ANDNOT256I
,
26994 IX86_BUILTIN_PAVGB256
,
26995 IX86_BUILTIN_PAVGW256
,
26996 IX86_BUILTIN_PBLENDVB256
,
26997 IX86_BUILTIN_PBLENDVW256
,
26998 IX86_BUILTIN_PCMPEQB256
,
26999 IX86_BUILTIN_PCMPEQW256
,
27000 IX86_BUILTIN_PCMPEQD256
,
27001 IX86_BUILTIN_PCMPEQQ256
,
27002 IX86_BUILTIN_PCMPGTB256
,
27003 IX86_BUILTIN_PCMPGTW256
,
27004 IX86_BUILTIN_PCMPGTD256
,
27005 IX86_BUILTIN_PCMPGTQ256
,
27006 IX86_BUILTIN_PHADDW256
,
27007 IX86_BUILTIN_PHADDD256
,
27008 IX86_BUILTIN_PHADDSW256
,
27009 IX86_BUILTIN_PHSUBW256
,
27010 IX86_BUILTIN_PHSUBD256
,
27011 IX86_BUILTIN_PHSUBSW256
,
27012 IX86_BUILTIN_PMADDUBSW256
,
27013 IX86_BUILTIN_PMADDWD256
,
27014 IX86_BUILTIN_PMAXSB256
,
27015 IX86_BUILTIN_PMAXSW256
,
27016 IX86_BUILTIN_PMAXSD256
,
27017 IX86_BUILTIN_PMAXUB256
,
27018 IX86_BUILTIN_PMAXUW256
,
27019 IX86_BUILTIN_PMAXUD256
,
27020 IX86_BUILTIN_PMINSB256
,
27021 IX86_BUILTIN_PMINSW256
,
27022 IX86_BUILTIN_PMINSD256
,
27023 IX86_BUILTIN_PMINUB256
,
27024 IX86_BUILTIN_PMINUW256
,
27025 IX86_BUILTIN_PMINUD256
,
27026 IX86_BUILTIN_PMOVMSKB256
,
27027 IX86_BUILTIN_PMOVSXBW256
,
27028 IX86_BUILTIN_PMOVSXBD256
,
27029 IX86_BUILTIN_PMOVSXBQ256
,
27030 IX86_BUILTIN_PMOVSXWD256
,
27031 IX86_BUILTIN_PMOVSXWQ256
,
27032 IX86_BUILTIN_PMOVSXDQ256
,
27033 IX86_BUILTIN_PMOVZXBW256
,
27034 IX86_BUILTIN_PMOVZXBD256
,
27035 IX86_BUILTIN_PMOVZXBQ256
,
27036 IX86_BUILTIN_PMOVZXWD256
,
27037 IX86_BUILTIN_PMOVZXWQ256
,
27038 IX86_BUILTIN_PMOVZXDQ256
,
27039 IX86_BUILTIN_PMULDQ256
,
27040 IX86_BUILTIN_PMULHRSW256
,
27041 IX86_BUILTIN_PMULHUW256
,
27042 IX86_BUILTIN_PMULHW256
,
27043 IX86_BUILTIN_PMULLW256
,
27044 IX86_BUILTIN_PMULLD256
,
27045 IX86_BUILTIN_PMULUDQ256
,
27046 IX86_BUILTIN_POR256
,
27047 IX86_BUILTIN_PSADBW256
,
27048 IX86_BUILTIN_PSHUFB256
,
27049 IX86_BUILTIN_PSHUFD256
,
27050 IX86_BUILTIN_PSHUFHW256
,
27051 IX86_BUILTIN_PSHUFLW256
,
27052 IX86_BUILTIN_PSIGNB256
,
27053 IX86_BUILTIN_PSIGNW256
,
27054 IX86_BUILTIN_PSIGND256
,
27055 IX86_BUILTIN_PSLLDQI256
,
27056 IX86_BUILTIN_PSLLWI256
,
27057 IX86_BUILTIN_PSLLW256
,
27058 IX86_BUILTIN_PSLLDI256
,
27059 IX86_BUILTIN_PSLLD256
,
27060 IX86_BUILTIN_PSLLQI256
,
27061 IX86_BUILTIN_PSLLQ256
,
27062 IX86_BUILTIN_PSRAWI256
,
27063 IX86_BUILTIN_PSRAW256
,
27064 IX86_BUILTIN_PSRADI256
,
27065 IX86_BUILTIN_PSRAD256
,
27066 IX86_BUILTIN_PSRLDQI256
,
27067 IX86_BUILTIN_PSRLWI256
,
27068 IX86_BUILTIN_PSRLW256
,
27069 IX86_BUILTIN_PSRLDI256
,
27070 IX86_BUILTIN_PSRLD256
,
27071 IX86_BUILTIN_PSRLQI256
,
27072 IX86_BUILTIN_PSRLQ256
,
27073 IX86_BUILTIN_PSUBB256
,
27074 IX86_BUILTIN_PSUBW256
,
27075 IX86_BUILTIN_PSUBD256
,
27076 IX86_BUILTIN_PSUBQ256
,
27077 IX86_BUILTIN_PSUBSB256
,
27078 IX86_BUILTIN_PSUBSW256
,
27079 IX86_BUILTIN_PSUBUSB256
,
27080 IX86_BUILTIN_PSUBUSW256
,
27081 IX86_BUILTIN_PUNPCKHBW256
,
27082 IX86_BUILTIN_PUNPCKHWD256
,
27083 IX86_BUILTIN_PUNPCKHDQ256
,
27084 IX86_BUILTIN_PUNPCKHQDQ256
,
27085 IX86_BUILTIN_PUNPCKLBW256
,
27086 IX86_BUILTIN_PUNPCKLWD256
,
27087 IX86_BUILTIN_PUNPCKLDQ256
,
27088 IX86_BUILTIN_PUNPCKLQDQ256
,
27089 IX86_BUILTIN_PXOR256
,
27090 IX86_BUILTIN_MOVNTDQA256
,
27091 IX86_BUILTIN_VBROADCASTSS_PS
,
27092 IX86_BUILTIN_VBROADCASTSS_PS256
,
27093 IX86_BUILTIN_VBROADCASTSD_PD256
,
27094 IX86_BUILTIN_VBROADCASTSI256
,
27095 IX86_BUILTIN_PBLENDD256
,
27096 IX86_BUILTIN_PBLENDD128
,
27097 IX86_BUILTIN_PBROADCASTB256
,
27098 IX86_BUILTIN_PBROADCASTW256
,
27099 IX86_BUILTIN_PBROADCASTD256
,
27100 IX86_BUILTIN_PBROADCASTQ256
,
27101 IX86_BUILTIN_PBROADCASTB128
,
27102 IX86_BUILTIN_PBROADCASTW128
,
27103 IX86_BUILTIN_PBROADCASTD128
,
27104 IX86_BUILTIN_PBROADCASTQ128
,
27105 IX86_BUILTIN_VPERMVARSI256
,
27106 IX86_BUILTIN_VPERMDF256
,
27107 IX86_BUILTIN_VPERMVARSF256
,
27108 IX86_BUILTIN_VPERMDI256
,
27109 IX86_BUILTIN_VPERMTI256
,
27110 IX86_BUILTIN_VEXTRACT128I256
,
27111 IX86_BUILTIN_VINSERT128I256
,
27112 IX86_BUILTIN_MASKLOADD
,
27113 IX86_BUILTIN_MASKLOADQ
,
27114 IX86_BUILTIN_MASKLOADD256
,
27115 IX86_BUILTIN_MASKLOADQ256
,
27116 IX86_BUILTIN_MASKSTORED
,
27117 IX86_BUILTIN_MASKSTOREQ
,
27118 IX86_BUILTIN_MASKSTORED256
,
27119 IX86_BUILTIN_MASKSTOREQ256
,
27120 IX86_BUILTIN_PSLLVV4DI
,
27121 IX86_BUILTIN_PSLLVV2DI
,
27122 IX86_BUILTIN_PSLLVV8SI
,
27123 IX86_BUILTIN_PSLLVV4SI
,
27124 IX86_BUILTIN_PSRAVV8SI
,
27125 IX86_BUILTIN_PSRAVV4SI
,
27126 IX86_BUILTIN_PSRLVV4DI
,
27127 IX86_BUILTIN_PSRLVV2DI
,
27128 IX86_BUILTIN_PSRLVV8SI
,
27129 IX86_BUILTIN_PSRLVV4SI
,
27131 IX86_BUILTIN_GATHERSIV2DF
,
27132 IX86_BUILTIN_GATHERSIV4DF
,
27133 IX86_BUILTIN_GATHERDIV2DF
,
27134 IX86_BUILTIN_GATHERDIV4DF
,
27135 IX86_BUILTIN_GATHERSIV4SF
,
27136 IX86_BUILTIN_GATHERSIV8SF
,
27137 IX86_BUILTIN_GATHERDIV4SF
,
27138 IX86_BUILTIN_GATHERDIV8SF
,
27139 IX86_BUILTIN_GATHERSIV2DI
,
27140 IX86_BUILTIN_GATHERSIV4DI
,
27141 IX86_BUILTIN_GATHERDIV2DI
,
27142 IX86_BUILTIN_GATHERDIV4DI
,
27143 IX86_BUILTIN_GATHERSIV4SI
,
27144 IX86_BUILTIN_GATHERSIV8SI
,
27145 IX86_BUILTIN_GATHERDIV4SI
,
27146 IX86_BUILTIN_GATHERDIV8SI
,
27148 /* Alternate 4 element gather for the vectorizer where
27149 all operands are 32-byte wide. */
27150 IX86_BUILTIN_GATHERALTSIV4DF
,
27151 IX86_BUILTIN_GATHERALTDIV8SF
,
27152 IX86_BUILTIN_GATHERALTSIV4DI
,
27153 IX86_BUILTIN_GATHERALTDIV8SI
,
27155 /* TFmode support builtins. */
27157 IX86_BUILTIN_HUGE_VALQ
,
27158 IX86_BUILTIN_FABSQ
,
27159 IX86_BUILTIN_COPYSIGNQ
,
27161 /* Vectorizer support builtins. */
27162 IX86_BUILTIN_CPYSGNPS
,
27163 IX86_BUILTIN_CPYSGNPD
,
27164 IX86_BUILTIN_CPYSGNPS256
,
27165 IX86_BUILTIN_CPYSGNPD256
,
27167 /* FMA4 instructions. */
27168 IX86_BUILTIN_VFMADDSS
,
27169 IX86_BUILTIN_VFMADDSD
,
27170 IX86_BUILTIN_VFMADDPS
,
27171 IX86_BUILTIN_VFMADDPD
,
27172 IX86_BUILTIN_VFMADDPS256
,
27173 IX86_BUILTIN_VFMADDPD256
,
27174 IX86_BUILTIN_VFMADDSUBPS
,
27175 IX86_BUILTIN_VFMADDSUBPD
,
27176 IX86_BUILTIN_VFMADDSUBPS256
,
27177 IX86_BUILTIN_VFMADDSUBPD256
,
27179 /* FMA3 instructions. */
27180 IX86_BUILTIN_VFMADDSS3
,
27181 IX86_BUILTIN_VFMADDSD3
,
27183 /* XOP instructions. */
27184 IX86_BUILTIN_VPCMOV
,
27185 IX86_BUILTIN_VPCMOV_V2DI
,
27186 IX86_BUILTIN_VPCMOV_V4SI
,
27187 IX86_BUILTIN_VPCMOV_V8HI
,
27188 IX86_BUILTIN_VPCMOV_V16QI
,
27189 IX86_BUILTIN_VPCMOV_V4SF
,
27190 IX86_BUILTIN_VPCMOV_V2DF
,
27191 IX86_BUILTIN_VPCMOV256
,
27192 IX86_BUILTIN_VPCMOV_V4DI256
,
27193 IX86_BUILTIN_VPCMOV_V8SI256
,
27194 IX86_BUILTIN_VPCMOV_V16HI256
,
27195 IX86_BUILTIN_VPCMOV_V32QI256
,
27196 IX86_BUILTIN_VPCMOV_V8SF256
,
27197 IX86_BUILTIN_VPCMOV_V4DF256
,
27199 IX86_BUILTIN_VPPERM
,
27201 IX86_BUILTIN_VPMACSSWW
,
27202 IX86_BUILTIN_VPMACSWW
,
27203 IX86_BUILTIN_VPMACSSWD
,
27204 IX86_BUILTIN_VPMACSWD
,
27205 IX86_BUILTIN_VPMACSSDD
,
27206 IX86_BUILTIN_VPMACSDD
,
27207 IX86_BUILTIN_VPMACSSDQL
,
27208 IX86_BUILTIN_VPMACSSDQH
,
27209 IX86_BUILTIN_VPMACSDQL
,
27210 IX86_BUILTIN_VPMACSDQH
,
27211 IX86_BUILTIN_VPMADCSSWD
,
27212 IX86_BUILTIN_VPMADCSWD
,
27214 IX86_BUILTIN_VPHADDBW
,
27215 IX86_BUILTIN_VPHADDBD
,
27216 IX86_BUILTIN_VPHADDBQ
,
27217 IX86_BUILTIN_VPHADDWD
,
27218 IX86_BUILTIN_VPHADDWQ
,
27219 IX86_BUILTIN_VPHADDDQ
,
27220 IX86_BUILTIN_VPHADDUBW
,
27221 IX86_BUILTIN_VPHADDUBD
,
27222 IX86_BUILTIN_VPHADDUBQ
,
27223 IX86_BUILTIN_VPHADDUWD
,
27224 IX86_BUILTIN_VPHADDUWQ
,
27225 IX86_BUILTIN_VPHADDUDQ
,
27226 IX86_BUILTIN_VPHSUBBW
,
27227 IX86_BUILTIN_VPHSUBWD
,
27228 IX86_BUILTIN_VPHSUBDQ
,
27230 IX86_BUILTIN_VPROTB
,
27231 IX86_BUILTIN_VPROTW
,
27232 IX86_BUILTIN_VPROTD
,
27233 IX86_BUILTIN_VPROTQ
,
27234 IX86_BUILTIN_VPROTB_IMM
,
27235 IX86_BUILTIN_VPROTW_IMM
,
27236 IX86_BUILTIN_VPROTD_IMM
,
27237 IX86_BUILTIN_VPROTQ_IMM
,
27239 IX86_BUILTIN_VPSHLB
,
27240 IX86_BUILTIN_VPSHLW
,
27241 IX86_BUILTIN_VPSHLD
,
27242 IX86_BUILTIN_VPSHLQ
,
27243 IX86_BUILTIN_VPSHAB
,
27244 IX86_BUILTIN_VPSHAW
,
27245 IX86_BUILTIN_VPSHAD
,
27246 IX86_BUILTIN_VPSHAQ
,
27248 IX86_BUILTIN_VFRCZSS
,
27249 IX86_BUILTIN_VFRCZSD
,
27250 IX86_BUILTIN_VFRCZPS
,
27251 IX86_BUILTIN_VFRCZPD
,
27252 IX86_BUILTIN_VFRCZPS256
,
27253 IX86_BUILTIN_VFRCZPD256
,
27255 IX86_BUILTIN_VPCOMEQUB
,
27256 IX86_BUILTIN_VPCOMNEUB
,
27257 IX86_BUILTIN_VPCOMLTUB
,
27258 IX86_BUILTIN_VPCOMLEUB
,
27259 IX86_BUILTIN_VPCOMGTUB
,
27260 IX86_BUILTIN_VPCOMGEUB
,
27261 IX86_BUILTIN_VPCOMFALSEUB
,
27262 IX86_BUILTIN_VPCOMTRUEUB
,
27264 IX86_BUILTIN_VPCOMEQUW
,
27265 IX86_BUILTIN_VPCOMNEUW
,
27266 IX86_BUILTIN_VPCOMLTUW
,
27267 IX86_BUILTIN_VPCOMLEUW
,
27268 IX86_BUILTIN_VPCOMGTUW
,
27269 IX86_BUILTIN_VPCOMGEUW
,
27270 IX86_BUILTIN_VPCOMFALSEUW
,
27271 IX86_BUILTIN_VPCOMTRUEUW
,
27273 IX86_BUILTIN_VPCOMEQUD
,
27274 IX86_BUILTIN_VPCOMNEUD
,
27275 IX86_BUILTIN_VPCOMLTUD
,
27276 IX86_BUILTIN_VPCOMLEUD
,
27277 IX86_BUILTIN_VPCOMGTUD
,
27278 IX86_BUILTIN_VPCOMGEUD
,
27279 IX86_BUILTIN_VPCOMFALSEUD
,
27280 IX86_BUILTIN_VPCOMTRUEUD
,
27282 IX86_BUILTIN_VPCOMEQUQ
,
27283 IX86_BUILTIN_VPCOMNEUQ
,
27284 IX86_BUILTIN_VPCOMLTUQ
,
27285 IX86_BUILTIN_VPCOMLEUQ
,
27286 IX86_BUILTIN_VPCOMGTUQ
,
27287 IX86_BUILTIN_VPCOMGEUQ
,
27288 IX86_BUILTIN_VPCOMFALSEUQ
,
27289 IX86_BUILTIN_VPCOMTRUEUQ
,
27291 IX86_BUILTIN_VPCOMEQB
,
27292 IX86_BUILTIN_VPCOMNEB
,
27293 IX86_BUILTIN_VPCOMLTB
,
27294 IX86_BUILTIN_VPCOMLEB
,
27295 IX86_BUILTIN_VPCOMGTB
,
27296 IX86_BUILTIN_VPCOMGEB
,
27297 IX86_BUILTIN_VPCOMFALSEB
,
27298 IX86_BUILTIN_VPCOMTRUEB
,
27300 IX86_BUILTIN_VPCOMEQW
,
27301 IX86_BUILTIN_VPCOMNEW
,
27302 IX86_BUILTIN_VPCOMLTW
,
27303 IX86_BUILTIN_VPCOMLEW
,
27304 IX86_BUILTIN_VPCOMGTW
,
27305 IX86_BUILTIN_VPCOMGEW
,
27306 IX86_BUILTIN_VPCOMFALSEW
,
27307 IX86_BUILTIN_VPCOMTRUEW
,
27309 IX86_BUILTIN_VPCOMEQD
,
27310 IX86_BUILTIN_VPCOMNED
,
27311 IX86_BUILTIN_VPCOMLTD
,
27312 IX86_BUILTIN_VPCOMLED
,
27313 IX86_BUILTIN_VPCOMGTD
,
27314 IX86_BUILTIN_VPCOMGED
,
27315 IX86_BUILTIN_VPCOMFALSED
,
27316 IX86_BUILTIN_VPCOMTRUED
,
27318 IX86_BUILTIN_VPCOMEQQ
,
27319 IX86_BUILTIN_VPCOMNEQ
,
27320 IX86_BUILTIN_VPCOMLTQ
,
27321 IX86_BUILTIN_VPCOMLEQ
,
27322 IX86_BUILTIN_VPCOMGTQ
,
27323 IX86_BUILTIN_VPCOMGEQ
,
27324 IX86_BUILTIN_VPCOMFALSEQ
,
27325 IX86_BUILTIN_VPCOMTRUEQ
,
27327 /* LWP instructions. */
27328 IX86_BUILTIN_LLWPCB
,
27329 IX86_BUILTIN_SLWPCB
,
27330 IX86_BUILTIN_LWPVAL32
,
27331 IX86_BUILTIN_LWPVAL64
,
27332 IX86_BUILTIN_LWPINS32
,
27333 IX86_BUILTIN_LWPINS64
,
27338 IX86_BUILTIN_XBEGIN
,
27340 IX86_BUILTIN_XABORT
,
27341 IX86_BUILTIN_XTEST
,
27343 /* BMI instructions. */
27344 IX86_BUILTIN_BEXTR32
,
27345 IX86_BUILTIN_BEXTR64
,
27348 /* TBM instructions. */
27349 IX86_BUILTIN_BEXTRI32
,
27350 IX86_BUILTIN_BEXTRI64
,
27352 /* BMI2 instructions. */
27353 IX86_BUILTIN_BZHI32
,
27354 IX86_BUILTIN_BZHI64
,
27355 IX86_BUILTIN_PDEP32
,
27356 IX86_BUILTIN_PDEP64
,
27357 IX86_BUILTIN_PEXT32
,
27358 IX86_BUILTIN_PEXT64
,
27360 /* ADX instructions. */
27361 IX86_BUILTIN_ADDCARRYX32
,
27362 IX86_BUILTIN_ADDCARRYX64
,
27364 /* FSGSBASE instructions. */
27365 IX86_BUILTIN_RDFSBASE32
,
27366 IX86_BUILTIN_RDFSBASE64
,
27367 IX86_BUILTIN_RDGSBASE32
,
27368 IX86_BUILTIN_RDGSBASE64
,
27369 IX86_BUILTIN_WRFSBASE32
,
27370 IX86_BUILTIN_WRFSBASE64
,
27371 IX86_BUILTIN_WRGSBASE32
,
27372 IX86_BUILTIN_WRGSBASE64
,
27374 /* RDRND instructions. */
27375 IX86_BUILTIN_RDRAND16_STEP
,
27376 IX86_BUILTIN_RDRAND32_STEP
,
27377 IX86_BUILTIN_RDRAND64_STEP
,
27379 /* RDSEED instructions. */
27380 IX86_BUILTIN_RDSEED16_STEP
,
27381 IX86_BUILTIN_RDSEED32_STEP
,
27382 IX86_BUILTIN_RDSEED64_STEP
,
27384 /* F16C instructions. */
27385 IX86_BUILTIN_CVTPH2PS
,
27386 IX86_BUILTIN_CVTPH2PS256
,
27387 IX86_BUILTIN_CVTPS2PH
,
27388 IX86_BUILTIN_CVTPS2PH256
,
27390 /* CFString built-in for darwin */
27391 IX86_BUILTIN_CFSTRING
,
27393 /* Builtins to get CPU type and supported features. */
27394 IX86_BUILTIN_CPU_INIT
,
27395 IX86_BUILTIN_CPU_IS
,
27396 IX86_BUILTIN_CPU_SUPPORTS
,
27401 /* Table for the ix86 builtin decls. */
27402 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27404 /* Table of all of the builtin functions that are possible with different ISA's
27405 but are waiting to be built until a function is declared to use that
27407 struct builtin_isa
{
27408 const char *name
; /* function name */
27409 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27410 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27411 bool const_p
; /* true if the declaration is constant */
27412 bool set_and_not_built_p
;
27415 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27418 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27419 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27420 function decl in the ix86_builtins array. Returns the function decl or
27421 NULL_TREE, if the builtin was not added.
27423 If the front end has a special hook for builtin functions, delay adding
27424 builtin functions that aren't in the current ISA until the ISA is changed
27425 with function specific optimization. Doing so, can save about 300K for the
27426 default compiler. When the builtin is expanded, check at that time whether
27429 If the front end doesn't have a special hook, record all builtins, even if
27430 it isn't an instruction set in the current ISA in case the user uses
27431 function specific options for a different ISA, so that we don't get scope
27432 errors if a builtin is added in the middle of a function scope. */
27435 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27436 enum ix86_builtin_func_type tcode
,
27437 enum ix86_builtins code
)
27439 tree decl
= NULL_TREE
;
27441 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
27443 ix86_builtins_isa
[(int) code
].isa
= mask
;
27445 mask
&= ~OPTION_MASK_ISA_64BIT
;
27447 || (mask
& ix86_isa_flags
) != 0
27448 || (lang_hooks
.builtin_function
27449 == lang_hooks
.builtin_function_ext_scope
))
27452 tree type
= ix86_get_builtin_func_type (tcode
);
27453 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
27455 ix86_builtins
[(int) code
] = decl
;
27456 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
27460 ix86_builtins
[(int) code
] = NULL_TREE
;
27461 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
27462 ix86_builtins_isa
[(int) code
].name
= name
;
27463 ix86_builtins_isa
[(int) code
].const_p
= false;
27464 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
27471 /* Like def_builtin, but also marks the function decl "const". */
27474 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
27475 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
27477 tree decl
= def_builtin (mask
, name
, tcode
, code
);
27479 TREE_READONLY (decl
) = 1;
27481 ix86_builtins_isa
[(int) code
].const_p
= true;
27486 /* Add any new builtin functions for a given ISA that may not have been
27487 declared. This saves a bit of space compared to adding all of the
27488 declarations to the tree, even if we didn't use them. */
27491 ix86_add_new_builtins (HOST_WIDE_INT isa
)
27495 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
27497 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
27498 && ix86_builtins_isa
[i
].set_and_not_built_p
)
27502 /* Don't define the builtin again. */
27503 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
27505 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
27506 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
27507 type
, i
, BUILT_IN_MD
, NULL
,
27510 ix86_builtins
[i
] = decl
;
27511 if (ix86_builtins_isa
[i
].const_p
)
27512 TREE_READONLY (decl
) = 1;
27517 /* Bits for builtin_description.flag. */
27519 /* Set when we don't support the comparison natively, and should
27520 swap_comparison in order to support it. */
27521 #define BUILTIN_DESC_SWAP_OPERANDS 1
27523 struct builtin_description
27525 const HOST_WIDE_INT mask
;
27526 const enum insn_code icode
;
27527 const char *const name
;
27528 const enum ix86_builtins code
;
27529 const enum rtx_code comparison
;
27533 static const struct builtin_description bdesc_comi
[] =
27535 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
27536 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
27537 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
27538 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
27539 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
27540 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
27541 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
27542 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
27543 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
27544 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
27545 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
27546 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
27547 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
27548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
27549 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
27550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
27551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
27552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
27553 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
27554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
27555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
27556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
27557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
27558 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
27561 static const struct builtin_description bdesc_pcmpestr
[] =
27564 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
27565 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
27566 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
27567 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
27568 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
27569 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
27570 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
27573 static const struct builtin_description bdesc_pcmpistr
[] =
27576 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
27577 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
27578 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
27579 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
27580 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
27581 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
27582 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
27585 /* Special builtins with variable number of arguments. */
27586 static const struct builtin_description bdesc_special_args
[] =
27588 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27589 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
27590 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27593 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27596 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27598 /* FXSR, XSAVE and XSAVEOPT */
27599 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27600 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27601 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27602 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27603 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27605 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27606 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27607 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27608 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27609 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
27612 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27613 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27614 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27616 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27617 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
27618 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27619 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
27621 /* SSE or 3DNow!A */
27622 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27623 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
27626 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27627 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27628 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27629 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
27630 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27631 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
27632 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
27633 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
27634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
27635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27637 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27638 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
27641 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
27644 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
27647 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
27648 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
27651 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27652 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27654 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
27655 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27656 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27657 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
27658 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
27660 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
27661 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
27662 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27663 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27664 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27665 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
27666 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
27668 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
27669 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
27670 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
27672 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
27673 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
27674 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
27675 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
27676 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
27677 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
27678 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
27679 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
27692 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
27693 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
27694 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
27695 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
27696 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
27697 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
27700 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27701 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27702 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27703 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
27704 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27705 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27706 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
27707 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
27710 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
27711 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
27712 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
27715 /* Builtins with variable number of arguments. */
27716 static const struct builtin_description bdesc_args
[] =
27718 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
27719 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
27720 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
27721 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27722 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27723 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
27724 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27727 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27728 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27729 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27730 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27731 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27732 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27734 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27735 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27736 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27737 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27738 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27739 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27740 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27741 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27743 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27744 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27746 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27747 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27748 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27749 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27751 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27752 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27753 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27754 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27755 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27756 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27758 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27759 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27760 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27761 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27762 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27763 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27765 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27766 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27767 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27769 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27771 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27772 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27773 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27774 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27775 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27776 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27778 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27779 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27780 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27781 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27782 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27783 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27785 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27786 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27787 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27788 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27791 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27792 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27793 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27794 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27796 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27797 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27798 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27799 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27800 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27801 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27802 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27803 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27804 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27805 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27806 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27807 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27808 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27809 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27810 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27813 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27814 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27815 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27816 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27817 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27818 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27821 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27822 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27823 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27824 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27825 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27826 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27827 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27828 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27829 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27830 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27831 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27832 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27834 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27836 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27837 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27838 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27839 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27840 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27841 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27842 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27843 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27845 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27846 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27847 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27848 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27849 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27850 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27851 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27852 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27853 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27854 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27855 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27856 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27857 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27858 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27859 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27860 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27861 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27862 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27863 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27864 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27866 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27867 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27868 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27869 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27871 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27872 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27873 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27874 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27876 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27878 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27879 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27880 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27881 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27882 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27884 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27885 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27886 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27888 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27892 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27894 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27895 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27897 /* SSE MMX or 3Dnow!A */
27898 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27899 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27900 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27902 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27903 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27904 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27905 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27907 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27908 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27910 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27913 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27915 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27916 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27917 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27918 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27919 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27921 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27922 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27923 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27924 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27925 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27927 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27929 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27930 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27931 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27932 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27934 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27935 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27936 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27938 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27939 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27940 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27941 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27942 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27943 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27944 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27945 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27947 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27948 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27949 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27950 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27951 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27952 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27953 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27954 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27955 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27956 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27957 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27958 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27959 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27960 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27961 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27962 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27963 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27964 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27965 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27966 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27968 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27969 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27970 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27971 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27973 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27974 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27975 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27976 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27978 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27980 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27981 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27982 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27984 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27986 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27987 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27988 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27989 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27990 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27991 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27992 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27993 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27995 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27996 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27997 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27998 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27999 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28000 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28001 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28002 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28004 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28005 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
28007 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28008 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28009 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28010 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28012 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28013 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28015 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28018 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28024 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28027 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28028 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28029 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28030 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28031 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28032 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28033 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28037 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28038 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28040 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
28043 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
28044 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
28048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
28049 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
28050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
28051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
28053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28054 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28056 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28058 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28059 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28062 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28065 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28066 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28067 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28070 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
28075 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
28080 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28083 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28084 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28087 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28088 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28090 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28091 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28092 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28093 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28094 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28095 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28098 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28099 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
28100 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28101 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
28102 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28103 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28105 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28106 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28107 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28108 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28109 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28110 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28111 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28112 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28113 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28114 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28115 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28116 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28117 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28118 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28119 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28120 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28121 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28122 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28123 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28124 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28125 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28126 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28127 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28128 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28131 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28132 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28135 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28136 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28137 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28138 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28139 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28140 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28141 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28142 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28143 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28144 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28146 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28147 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28148 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28149 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28150 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28151 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28152 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28153 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28154 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28155 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28156 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28157 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28158 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28160 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28161 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28162 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28163 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28164 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28165 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28166 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28167 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28168 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28169 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28170 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28171 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28174 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28175 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28176 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28177 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28179 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28180 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28181 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28182 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28184 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28185 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28187 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28188 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28190 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28191 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28192 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28193 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28195 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28196 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28198 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28199 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28201 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28202 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28203 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28206 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28207 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28208 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28209 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28210 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28213 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28214 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28215 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28216 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28231 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28232 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28233 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28234 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28235 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28236 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28237 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28238 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28239 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28240 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28241 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28242 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28243 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28244 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28245 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28246 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28247 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28248 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28249 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28250 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28251 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28252 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28253 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28254 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28255 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28256 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28258 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28259 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28260 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28261 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28263 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28264 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28265 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28266 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28267 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28268 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28269 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28270 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28271 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28272 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28273 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28274 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28275 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28276 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28277 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28278 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28279 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28280 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28281 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28282 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28283 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28284 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28285 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28286 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28287 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28288 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28289 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28290 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28291 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28292 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28293 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28294 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28295 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28296 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28298 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28299 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28300 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28302 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28303 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28304 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28305 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28306 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28308 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28310 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28311 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28313 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28314 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28315 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28316 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28318 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28319 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28321 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28322 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28324 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28325 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28326 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28327 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28329 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28330 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28332 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28333 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28335 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28336 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28337 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28338 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28340 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28341 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28342 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28343 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28344 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28345 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28347 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28348 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28349 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28350 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28351 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28352 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28353 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28354 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28355 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28356 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28357 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28358 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28359 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28360 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28361 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28363 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28364 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28366 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28367 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28369 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28372 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28373 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28374 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28375 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28376 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28377 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28378 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28379 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28380 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28381 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28382 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28383 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28384 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28385 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28386 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28387 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28388 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28389 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28390 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28391 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28392 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28393 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28394 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28395 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28396 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28397 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28398 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28399 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28400 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28401 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28402 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28403 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28404 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28405 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28406 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28407 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28408 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28409 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28410 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28411 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28412 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28413 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28414 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28415 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28416 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28417 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28418 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28419 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28420 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28421 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28422 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28423 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28424 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28425 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28426 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28427 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28428 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28429 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28430 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28431 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28432 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28433 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28434 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28435 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28436 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28437 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28438 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28439 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28440 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28441 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28442 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
28443 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28444 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28445 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28446 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
28447 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28448 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
28449 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28450 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28451 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28452 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28453 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28454 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28455 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28456 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28457 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28458 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28459 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28460 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28461 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28462 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28463 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
28464 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
28465 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
28466 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
28467 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
28468 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
28469 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
28470 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28471 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28472 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28473 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28474 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28475 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28476 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28477 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28478 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28479 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28480 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28481 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28482 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28483 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28484 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28485 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28486 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28487 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28488 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28489 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28490 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28491 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
28492 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28493 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
28494 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
28495 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28496 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
28497 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28498 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28499 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28500 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28501 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28502 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28503 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28504 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
28505 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
28506 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
28507 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
28508 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28509 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28510 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28511 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28512 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28513 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28514 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28515 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28516 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28517 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28519 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28522 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28523 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28524 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
28527 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28528 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28531 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
28532 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
28533 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
28534 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
28537 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28538 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28539 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28540 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28541 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28542 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28545 /* FMA4 and XOP. */
28546 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28547 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28548 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28549 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28550 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28551 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28552 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28553 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28554 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28555 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28556 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28557 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28558 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28559 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28560 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28561 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28562 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28563 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28564 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28565 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28566 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28567 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28568 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28569 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28570 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28571 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28572 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28573 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28574 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28575 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28576 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28577 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28578 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28579 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28580 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28581 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28582 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28583 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28584 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28585 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28586 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28587 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28588 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28589 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28590 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28591 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28592 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28593 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28594 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28595 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28596 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28597 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28599 static const struct builtin_description bdesc_multi_arg
[] =
28601 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
28602 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
28603 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28604 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
28605 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
28606 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28608 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
28609 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
28610 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28611 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
28612 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
28613 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28615 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
28616 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
28617 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28618 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
28619 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
28620 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28621 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
28622 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
28623 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28624 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
28625 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
28626 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28628 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
28629 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
28630 UNKNOWN
, (int)MULTI_ARG_3_SF
},
28631 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
28632 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
28633 UNKNOWN
, (int)MULTI_ARG_3_DF
},
28634 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
28635 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
28636 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28637 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
28638 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
28639 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28641 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28642 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
28643 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28644 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28645 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
28646 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
28647 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
28649 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28650 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
28651 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
28652 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
28653 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
28654 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
28655 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
28657 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
28659 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28660 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
28661 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28662 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28663 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28664 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
28665 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28666 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28667 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28668 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
28669 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28670 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
28672 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28673 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28674 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28675 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28676 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
28677 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
28678 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
28679 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
28680 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28681 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28682 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28683 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28684 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
28685 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
28686 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
28687 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
28689 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
28690 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
28691 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
28692 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
28693 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
28694 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
28696 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28697 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28698 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28699 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28700 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28701 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28702 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28703 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
28704 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
28705 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28706 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
28707 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28708 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
28709 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
28710 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
28712 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28713 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28714 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28715 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
28716 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
28717 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
28718 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
28720 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28721 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28722 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28723 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
28724 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28725 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28726 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28728 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28729 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28730 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28731 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28732 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28733 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28734 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28736 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28737 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28738 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28739 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28740 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28741 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28742 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28744 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28745 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28746 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28747 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28748 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28749 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28750 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28752 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28753 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28754 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28755 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28756 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28757 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28758 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28760 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28761 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28762 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28763 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28764 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28765 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28766 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28768 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28769 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28770 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28771 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28772 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28773 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28774 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28776 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28777 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28778 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28779 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28780 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28781 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28782 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28783 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28785 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28786 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28787 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28788 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28789 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28790 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28791 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28792 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28794 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28795 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28796 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28797 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28801 /* TM vector builtins. */
28803 /* Reuse the existing x86-specific `struct builtin_description' cause
28804 we're lazy. Add casts to make them fit. */
28805 static const struct builtin_description bdesc_tm
[] =
28807 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28808 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28809 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28810 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28811 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28812 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28813 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28815 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28816 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28817 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28818 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28819 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28820 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28821 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28823 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28824 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28825 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28826 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28827 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28828 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28829 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28831 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28832 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28833 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28836 /* TM callbacks. */
28838 /* Return the builtin decl needed to load a vector of TYPE. */
28841 ix86_builtin_tm_load (tree type
)
28843 if (TREE_CODE (type
) == VECTOR_TYPE
)
28845 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28848 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28850 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28852 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28858 /* Return the builtin decl needed to store a vector of TYPE. */
28861 ix86_builtin_tm_store (tree type
)
28863 if (TREE_CODE (type
) == VECTOR_TYPE
)
28865 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28868 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28870 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28872 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28878 /* Initialize the transactional memory vector load/store builtins. */
28881 ix86_init_tm_builtins (void)
28883 enum ix86_builtin_func_type ftype
;
28884 const struct builtin_description
*d
;
28887 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28888 tree attrs_log
, attrs_type_log
;
28893 /* If there are no builtins defined, we must be compiling in a
28894 language without trans-mem support. */
28895 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28898 /* Use whatever attributes a normal TM load has. */
28899 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28900 attrs_load
= DECL_ATTRIBUTES (decl
);
28901 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28902 /* Use whatever attributes a normal TM store has. */
28903 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28904 attrs_store
= DECL_ATTRIBUTES (decl
);
28905 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28906 /* Use whatever attributes a normal TM log has. */
28907 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28908 attrs_log
= DECL_ATTRIBUTES (decl
);
28909 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28911 for (i
= 0, d
= bdesc_tm
;
28912 i
< ARRAY_SIZE (bdesc_tm
);
28915 if ((d
->mask
& ix86_isa_flags
) != 0
28916 || (lang_hooks
.builtin_function
28917 == lang_hooks
.builtin_function_ext_scope
))
28919 tree type
, attrs
, attrs_type
;
28920 enum built_in_function code
= (enum built_in_function
) d
->code
;
28922 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28923 type
= ix86_get_builtin_func_type (ftype
);
28925 if (BUILTIN_TM_LOAD_P (code
))
28927 attrs
= attrs_load
;
28928 attrs_type
= attrs_type_load
;
28930 else if (BUILTIN_TM_STORE_P (code
))
28932 attrs
= attrs_store
;
28933 attrs_type
= attrs_type_store
;
28938 attrs_type
= attrs_type_log
;
28940 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28941 /* The builtin without the prefix for
28942 calling it directly. */
28943 d
->name
+ strlen ("__builtin_"),
28945 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28946 set the TYPE_ATTRIBUTES. */
28947 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28949 set_builtin_decl (code
, decl
, false);
28954 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28955 in the current target ISA to allow the user to compile particular modules
28956 with different target specific options that differ from the command line
28959 ix86_init_mmx_sse_builtins (void)
28961 const struct builtin_description
* d
;
28962 enum ix86_builtin_func_type ftype
;
28965 /* Add all special builtins with variable number of operands. */
28966 for (i
= 0, d
= bdesc_special_args
;
28967 i
< ARRAY_SIZE (bdesc_special_args
);
28973 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28974 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28977 /* Add all builtins with variable number of operands. */
28978 for (i
= 0, d
= bdesc_args
;
28979 i
< ARRAY_SIZE (bdesc_args
);
28985 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28986 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28989 /* pcmpestr[im] insns. */
28990 for (i
= 0, d
= bdesc_pcmpestr
;
28991 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28994 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28995 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28997 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28998 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29001 /* pcmpistr[im] insns. */
29002 for (i
= 0, d
= bdesc_pcmpistr
;
29003 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29006 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29007 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
29009 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
29010 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29013 /* comi/ucomi insns. */
29014 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29016 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
29017 ftype
= INT_FTYPE_V2DF_V2DF
;
29019 ftype
= INT_FTYPE_V4SF_V4SF
;
29020 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29024 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
29025 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
29026 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
29027 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
29029 /* SSE or 3DNow!A */
29030 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29031 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
29032 IX86_BUILTIN_MASKMOVQ
);
29035 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
29036 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
29038 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
29039 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
29040 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
29041 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
29044 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
29045 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
29046 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
29047 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
29050 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
29051 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
29052 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
29053 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
29054 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
29055 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
29056 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
29057 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
29058 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
29059 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
29060 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
29061 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
29064 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
29065 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
29068 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
29069 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
29070 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
29071 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
29072 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
29073 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
29074 IX86_BUILTIN_RDRAND64_STEP
);
29077 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
29078 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
29079 IX86_BUILTIN_GATHERSIV2DF
);
29081 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
29082 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
29083 IX86_BUILTIN_GATHERSIV4DF
);
29085 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
29086 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
29087 IX86_BUILTIN_GATHERDIV2DF
);
29089 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
29090 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
29091 IX86_BUILTIN_GATHERDIV4DF
);
29093 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
29094 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
29095 IX86_BUILTIN_GATHERSIV4SF
);
29097 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
29098 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
29099 IX86_BUILTIN_GATHERSIV8SF
);
29101 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
29102 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
29103 IX86_BUILTIN_GATHERDIV4SF
);
29105 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
29106 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29107 IX86_BUILTIN_GATHERDIV8SF
);
29109 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29110 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29111 IX86_BUILTIN_GATHERSIV2DI
);
29113 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29114 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29115 IX86_BUILTIN_GATHERSIV4DI
);
29117 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29118 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29119 IX86_BUILTIN_GATHERDIV2DI
);
29121 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29122 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29123 IX86_BUILTIN_GATHERDIV4DI
);
29125 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29126 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29127 IX86_BUILTIN_GATHERSIV4SI
);
29129 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29130 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29131 IX86_BUILTIN_GATHERSIV8SI
);
29133 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29134 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29135 IX86_BUILTIN_GATHERDIV4SI
);
29137 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29138 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29139 IX86_BUILTIN_GATHERDIV8SI
);
29141 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29142 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29143 IX86_BUILTIN_GATHERALTSIV4DF
);
29145 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29146 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29147 IX86_BUILTIN_GATHERALTDIV8SF
);
29149 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29150 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29151 IX86_BUILTIN_GATHERALTSIV4DI
);
29153 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29154 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29155 IX86_BUILTIN_GATHERALTDIV8SI
);
29158 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29159 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29161 /* MMX access to the vec_init patterns. */
29162 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29163 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29165 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29166 V4HI_FTYPE_HI_HI_HI_HI
,
29167 IX86_BUILTIN_VEC_INIT_V4HI
);
29169 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29170 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29171 IX86_BUILTIN_VEC_INIT_V8QI
);
29173 /* Access to the vec_extract patterns. */
29174 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29175 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29176 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29177 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29178 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29179 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29180 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29181 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29182 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29183 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29185 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29186 "__builtin_ia32_vec_ext_v4hi",
29187 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29189 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29190 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29192 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29193 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29195 /* Access to the vec_set patterns. */
29196 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29197 "__builtin_ia32_vec_set_v2di",
29198 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29200 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29201 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29203 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29204 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29206 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29207 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29209 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29210 "__builtin_ia32_vec_set_v4hi",
29211 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29213 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29214 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29217 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29218 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29219 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29220 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29221 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29222 "__builtin_ia32_rdseed_di_step",
29223 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29226 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29227 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29228 def_builtin (OPTION_MASK_ISA_64BIT
,
29229 "__builtin_ia32_addcarryx_u64",
29230 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29231 IX86_BUILTIN_ADDCARRYX64
);
29233 /* Add FMA4 multi-arg argument instructions */
29234 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29239 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29240 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29244 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29245 to return a pointer to VERSION_DECL if the outcome of the expression
29246 formed by PREDICATE_CHAIN is true. This function will be called during
29247 version dispatch to decide which function version to execute. It returns
29248 the basic block at the end, to which more conditions can be added. */
29251 add_condition_to_bb (tree function_decl
, tree version_decl
,
29252 tree predicate_chain
, basic_block new_bb
)
29254 gimple return_stmt
;
29255 tree convert_expr
, result_var
;
29256 gimple convert_stmt
;
29257 gimple call_cond_stmt
;
29258 gimple if_else_stmt
;
29260 basic_block bb1
, bb2
, bb3
;
29263 tree cond_var
, and_expr_var
= NULL_TREE
;
29266 tree predicate_decl
, predicate_arg
;
29268 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29270 gcc_assert (new_bb
!= NULL
);
29271 gseq
= bb_seq (new_bb
);
29274 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29275 build_fold_addr_expr (version_decl
));
29276 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29277 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29278 return_stmt
= gimple_build_return (result_var
);
29280 if (predicate_chain
== NULL_TREE
)
29282 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29283 gimple_seq_add_stmt (&gseq
, return_stmt
);
29284 set_bb_seq (new_bb
, gseq
);
29285 gimple_set_bb (convert_stmt
, new_bb
);
29286 gimple_set_bb (return_stmt
, new_bb
);
29291 while (predicate_chain
!= NULL
)
29293 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29294 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29295 predicate_arg
= TREE_VALUE (predicate_chain
);
29296 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29297 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29299 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29300 gimple_set_bb (call_cond_stmt
, new_bb
);
29301 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29303 predicate_chain
= TREE_CHAIN (predicate_chain
);
29305 if (and_expr_var
== NULL
)
29306 and_expr_var
= cond_var
;
29309 gimple assign_stmt
;
29310 /* Use MIN_EXPR to check if any integer is zero?.
29311 and_expr_var = min_expr <cond_var, and_expr_var> */
29312 assign_stmt
= gimple_build_assign (and_expr_var
,
29313 build2 (MIN_EXPR
, integer_type_node
,
29314 cond_var
, and_expr_var
));
29316 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29317 gimple_set_bb (assign_stmt
, new_bb
);
29318 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29322 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29324 NULL_TREE
, NULL_TREE
);
29325 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29326 gimple_set_bb (if_else_stmt
, new_bb
);
29327 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29329 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29330 gimple_seq_add_stmt (&gseq
, return_stmt
);
29331 set_bb_seq (new_bb
, gseq
);
29334 e12
= split_block (bb1
, if_else_stmt
);
29336 e12
->flags
&= ~EDGE_FALLTHRU
;
29337 e12
->flags
|= EDGE_TRUE_VALUE
;
29339 e23
= split_block (bb2
, return_stmt
);
29341 gimple_set_bb (convert_stmt
, bb2
);
29342 gimple_set_bb (return_stmt
, bb2
);
29345 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29348 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
29355 /* This parses the attribute arguments to target in DECL and determines
29356 the right builtin to use to match the platform specification.
29357 It returns the priority value for this version decl. If PREDICATE_LIST
29358 is not NULL, it stores the list of cpu features that need to be checked
29359 before dispatching this function. */
29361 static unsigned int
29362 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29365 struct cl_target_option cur_target
;
29367 struct cl_target_option
*new_target
;
29368 const char *arg_str
= NULL
;
29369 const char *attrs_str
= NULL
;
29370 char *tok_str
= NULL
;
29373 /* Priority of i386 features, greater value is higher priority. This is
29374 used to decide the order in which function dispatch must happen. For
29375 instance, a version specialized for SSE4.2 should be checked for dispatch
29376 before a version for SSE3, as SSE4.2 implies SSE3. */
29377 enum feature_priority
29398 enum feature_priority priority
= P_ZERO
;
29400 /* These are the target attribute strings for which a dispatcher is
29401 available, from fold_builtin_cpu. */
29403 static struct _feature_list
29405 const char *const name
;
29406 const enum feature_priority priority
;
29408 const feature_list
[] =
29414 {"ssse3", P_SSSE3
},
29415 {"sse4.1", P_SSE4_1
},
29416 {"sse4.2", P_SSE4_2
},
29417 {"popcnt", P_POPCNT
},
29423 static unsigned int NUM_FEATURES
29424 = sizeof (feature_list
) / sizeof (struct _feature_list
);
29428 tree predicate_chain
= NULL_TREE
;
29429 tree predicate_decl
, predicate_arg
;
29431 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29432 gcc_assert (attrs
!= NULL
);
29434 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
29436 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
29437 attrs_str
= TREE_STRING_POINTER (attrs
);
29439 /* Return priority zero for default function. */
29440 if (strcmp (attrs_str
, "default") == 0)
29443 /* Handle arch= if specified. For priority, set it to be 1 more than
29444 the best instruction set the processor can handle. For instance, if
29445 there is a version for atom and a version for ssse3 (the highest ISA
29446 priority for atom), the atom version must be checked for dispatch
29447 before the ssse3 version. */
29448 if (strstr (attrs_str
, "arch=") != NULL
)
29450 cl_target_option_save (&cur_target
, &global_options
);
29451 target_node
= ix86_valid_target_attribute_tree (attrs
);
29453 gcc_assert (target_node
);
29454 new_target
= TREE_TARGET_OPTION (target_node
);
29455 gcc_assert (new_target
);
29457 if (new_target
->arch_specified
&& new_target
->arch
> 0)
29459 switch (new_target
->arch
)
29461 case PROCESSOR_CORE2
:
29463 priority
= P_PROC_SSSE3
;
29465 case PROCESSOR_COREI7
:
29466 arg_str
= "corei7";
29467 priority
= P_PROC_SSE4_2
;
29469 case PROCESSOR_ATOM
:
29471 priority
= P_PROC_SSSE3
;
29473 case PROCESSOR_AMDFAM10
:
29474 arg_str
= "amdfam10h";
29475 priority
= P_PROC_SSE4_a
;
29477 case PROCESSOR_BDVER1
:
29478 arg_str
= "bdver1";
29479 priority
= P_PROC_FMA
;
29481 case PROCESSOR_BDVER2
:
29482 arg_str
= "bdver2";
29483 priority
= P_PROC_FMA
;
29488 cl_target_option_restore (&global_options
, &cur_target
);
29490 if (predicate_list
&& arg_str
== NULL
)
29492 error_at (DECL_SOURCE_LOCATION (decl
),
29493 "No dispatcher found for the versioning attributes");
29497 if (predicate_list
)
29499 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
29500 /* For a C string literal the length includes the trailing NULL. */
29501 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
29502 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29507 /* Process feature name. */
29508 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
29509 strcpy (tok_str
, attrs_str
);
29510 token
= strtok (tok_str
, ",");
29511 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
29513 while (token
!= NULL
)
29515 /* Do not process "arch=" */
29516 if (strncmp (token
, "arch=", 5) == 0)
29518 token
= strtok (NULL
, ",");
29521 for (i
= 0; i
< NUM_FEATURES
; ++i
)
29523 if (strcmp (token
, feature_list
[i
].name
) == 0)
29525 if (predicate_list
)
29527 predicate_arg
= build_string_literal (
29528 strlen (feature_list
[i
].name
) + 1,
29529 feature_list
[i
].name
);
29530 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
29533 /* Find the maximum priority feature. */
29534 if (feature_list
[i
].priority
> priority
)
29535 priority
= feature_list
[i
].priority
;
29540 if (predicate_list
&& i
== NUM_FEATURES
)
29542 error_at (DECL_SOURCE_LOCATION (decl
),
29543 "No dispatcher found for %s", token
);
29546 token
= strtok (NULL
, ",");
29550 if (predicate_list
&& predicate_chain
== NULL_TREE
)
29552 error_at (DECL_SOURCE_LOCATION (decl
),
29553 "No dispatcher found for the versioning attributes : %s",
29557 else if (predicate_list
)
29559 predicate_chain
= nreverse (predicate_chain
);
29560 *predicate_list
= predicate_chain
;
29566 /* This compares the priority of target features in function DECL1
29567 and DECL2. It returns positive value if DECL1 is higher priority,
29568 negative value if DECL2 is higher priority and 0 if they are the
29572 ix86_compare_version_priority (tree decl1
, tree decl2
)
29574 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
29575 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
29577 return (int)priority1
- (int)priority2
;
29580 /* V1 and V2 point to function versions with different priorities
29581 based on the target ISA. This function compares their priorities. */
29584 feature_compare (const void *v1
, const void *v2
)
29586 typedef struct _function_version_info
29589 tree predicate_chain
;
29590 unsigned int dispatch_priority
;
29591 } function_version_info
;
29593 const function_version_info c1
= *(const function_version_info
*)v1
;
29594 const function_version_info c2
= *(const function_version_info
*)v2
;
29595 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
29598 /* This function generates the dispatch function for
29599 multi-versioned functions. DISPATCH_DECL is the function which will
29600 contain the dispatch logic. FNDECLS are the function choices for
29601 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29602 in DISPATCH_DECL in which the dispatch code is generated. */
29605 dispatch_function_versions (tree dispatch_decl
,
29607 basic_block
*empty_bb
)
29610 gimple ifunc_cpu_init_stmt
;
29614 vec
<tree
> *fndecls
;
29615 unsigned int num_versions
= 0;
29616 unsigned int actual_versions
= 0;
29619 struct _function_version_info
29622 tree predicate_chain
;
29623 unsigned int dispatch_priority
;
29624 }*function_version_info
;
29626 gcc_assert (dispatch_decl
!= NULL
29627 && fndecls_p
!= NULL
29628 && empty_bb
!= NULL
);
29630 /*fndecls_p is actually a vector. */
29631 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
29633 /* At least one more version other than the default. */
29634 num_versions
= fndecls
->length ();
29635 gcc_assert (num_versions
>= 2);
29637 function_version_info
= (struct _function_version_info
*)
29638 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
29640 /* The first version in the vector is the default decl. */
29641 default_decl
= (*fndecls
)[0];
29643 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
29645 gseq
= bb_seq (*empty_bb
);
29646 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29647 constructors, so explicity call __builtin_cpu_init here. */
29648 ifunc_cpu_init_stmt
= gimple_build_call_vec (
29649 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
29650 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
29651 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
29652 set_bb_seq (*empty_bb
, gseq
);
29657 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
29659 tree version_decl
= ele
;
29660 tree predicate_chain
= NULL_TREE
;
29661 unsigned int priority
;
29662 /* Get attribute string, parse it and find the right predicate decl.
29663 The predicate function could be a lengthy combination of many
29664 features, like arch-type and various isa-variants. */
29665 priority
= get_builtin_code_for_version (version_decl
,
29668 if (predicate_chain
== NULL_TREE
)
29671 function_version_info
[actual_versions
].version_decl
= version_decl
;
29672 function_version_info
[actual_versions
].predicate_chain
29674 function_version_info
[actual_versions
].dispatch_priority
= priority
;
29678 /* Sort the versions according to descending order of dispatch priority. The
29679 priority is based on the ISA. This is not a perfect solution. There
29680 could still be ambiguity. If more than one function version is suitable
29681 to execute, which one should be dispatched? In future, allow the user
29682 to specify a dispatch priority next to the version. */
29683 qsort (function_version_info
, actual_versions
,
29684 sizeof (struct _function_version_info
), feature_compare
);
29686 for (i
= 0; i
< actual_versions
; ++i
)
29687 *empty_bb
= add_condition_to_bb (dispatch_decl
,
29688 function_version_info
[i
].version_decl
,
29689 function_version_info
[i
].predicate_chain
,
29692 /* dispatch default version at the end. */
29693 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
29696 free (function_version_info
);
29700 /* Comparator function to be used in qsort routine to sort attribute
29701 specification strings to "target". */
29704 attr_strcmp (const void *v1
, const void *v2
)
29706 const char *c1
= *(char *const*)v1
;
29707 const char *c2
= *(char *const*)v2
;
29708 return strcmp (c1
, c2
);
29711 /* ARGLIST is the argument to target attribute. This function tokenizes
29712 the comma separated arguments, sorts them and returns a string which
29713 is a unique identifier for the comma separated arguments. It also
29714 replaces non-identifier characters "=,-" with "_". */
29717 sorted_attr_string (tree arglist
)
29720 size_t str_len_sum
= 0;
29721 char **args
= NULL
;
29722 char *attr_str
, *ret_str
;
29724 unsigned int argnum
= 1;
29727 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29729 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29730 size_t len
= strlen (str
);
29731 str_len_sum
+= len
+ 1;
29732 if (arg
!= arglist
)
29734 for (i
= 0; i
< strlen (str
); i
++)
29739 attr_str
= XNEWVEC (char, str_len_sum
);
29741 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29743 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29744 size_t len
= strlen (str
);
29745 memcpy (attr_str
+ str_len_sum
, str
, len
);
29746 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29747 str_len_sum
+= len
+ 1;
29750 /* Replace "=,-" with "_". */
29751 for (i
= 0; i
< strlen (attr_str
); i
++)
29752 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29758 args
= XNEWVEC (char *, argnum
);
29761 attr
= strtok (attr_str
, ",");
29762 while (attr
!= NULL
)
29766 attr
= strtok (NULL
, ",");
29769 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29771 ret_str
= XNEWVEC (char, str_len_sum
);
29773 for (i
= 0; i
< argnum
; i
++)
29775 size_t len
= strlen (args
[i
]);
29776 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29777 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29778 str_len_sum
+= len
+ 1;
29782 XDELETEVEC (attr_str
);
29786 /* This function changes the assembler name for functions that are
29787 versions. If DECL is a function version and has a "target"
29788 attribute, it appends the attribute string to its assembler name. */
29791 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29794 const char *orig_name
, *version_string
;
29795 char *attr_str
, *assembler_name
;
29797 if (DECL_DECLARED_INLINE_P (decl
)
29798 && lookup_attribute ("gnu_inline",
29799 DECL_ATTRIBUTES (decl
)))
29800 error_at (DECL_SOURCE_LOCATION (decl
),
29801 "Function versions cannot be marked as gnu_inline,"
29802 " bodies have to be generated");
29804 if (DECL_VIRTUAL_P (decl
)
29805 || DECL_VINDEX (decl
))
29806 sorry ("Virtual function multiversioning not supported");
29808 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29810 /* target attribute string cannot be NULL. */
29811 gcc_assert (version_attr
!= NULL_TREE
);
29813 orig_name
= IDENTIFIER_POINTER (id
);
29815 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29817 if (strcmp (version_string
, "default") == 0)
29820 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29821 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29823 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29825 /* Allow assembler name to be modified if already set. */
29826 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29827 SET_DECL_RTL (decl
, NULL
);
29829 tree ret
= get_identifier (assembler_name
);
29830 XDELETEVEC (attr_str
);
29831 XDELETEVEC (assembler_name
);
29835 /* This function returns true if FN1 and FN2 are versions of the same function,
29836 that is, the target strings of the function decls are different. This assumes
29837 that FN1 and FN2 have the same signature. */
29840 ix86_function_versions (tree fn1
, tree fn2
)
29843 char *target1
, *target2
;
29846 if (TREE_CODE (fn1
) != FUNCTION_DECL
29847 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29850 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29851 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29853 /* At least one function decl should have the target attribute specified. */
29854 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29857 /* Diagnose missing target attribute if one of the decls is already
29858 multi-versioned. */
29859 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29861 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29863 if (attr2
!= NULL_TREE
)
29870 error_at (DECL_SOURCE_LOCATION (fn2
),
29871 "missing %<target%> attribute for multi-versioned %D",
29873 error_at (DECL_SOURCE_LOCATION (fn1
),
29874 "previous declaration of %D", fn1
);
29875 /* Prevent diagnosing of the same error multiple times. */
29876 DECL_ATTRIBUTES (fn2
)
29877 = tree_cons (get_identifier ("target"),
29878 copy_node (TREE_VALUE (attr1
)),
29879 DECL_ATTRIBUTES (fn2
));
29884 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29885 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29887 /* The sorted target strings must be different for fn1 and fn2
29889 if (strcmp (target1
, target2
) == 0)
29894 XDELETEVEC (target1
);
29895 XDELETEVEC (target2
);
29901 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29903 /* For function version, add the target suffix to the assembler name. */
29904 if (TREE_CODE (decl
) == FUNCTION_DECL
29905 && DECL_FUNCTION_VERSIONED (decl
))
29906 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29907 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29908 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29914 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29915 is true, append the full path name of the source file. */
29918 make_name (tree decl
, const char *suffix
, bool make_unique
)
29920 char *global_var_name
;
29923 const char *unique_name
= NULL
;
29925 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29927 /* Get a unique name that can be used globally without any chances
29928 of collision at link time. */
29930 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29932 name_len
= strlen (name
) + strlen (suffix
) + 2;
29935 name_len
+= strlen (unique_name
) + 1;
29936 global_var_name
= XNEWVEC (char, name_len
);
29938 /* Use '.' to concatenate names as it is demangler friendly. */
29940 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29943 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29945 return global_var_name
;
29948 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29950 /* Make a dispatcher declaration for the multi-versioned function DECL.
29951 Calls to DECL function will be replaced with calls to the dispatcher
29952 by the front-end. Return the decl created. */
29955 make_dispatcher_decl (const tree decl
)
29959 tree fn_type
, func_type
;
29960 bool is_uniq
= false;
29962 if (TREE_PUBLIC (decl
) == 0)
29965 func_name
= make_name (decl
, "ifunc", is_uniq
);
29967 fn_type
= TREE_TYPE (decl
);
29968 func_type
= build_function_type (TREE_TYPE (fn_type
),
29969 TYPE_ARG_TYPES (fn_type
));
29971 func_decl
= build_fn_decl (func_name
, func_type
);
29972 XDELETEVEC (func_name
);
29973 TREE_USED (func_decl
) = 1;
29974 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29975 DECL_INITIAL (func_decl
) = error_mark_node
;
29976 DECL_ARTIFICIAL (func_decl
) = 1;
29977 /* Mark this func as external, the resolver will flip it again if
29978 it gets generated. */
29979 DECL_EXTERNAL (func_decl
) = 1;
29980 /* This will be of type IFUNCs have to be externally visible. */
29981 TREE_PUBLIC (func_decl
) = 1;
29988 /* Returns true if decl is multi-versioned and DECL is the default function,
29989 that is it is not tagged with target specific optimization. */
29992 is_function_default_version (const tree decl
)
29994 if (TREE_CODE (decl
) != FUNCTION_DECL
29995 || !DECL_FUNCTION_VERSIONED (decl
))
29997 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29999 attr
= TREE_VALUE (TREE_VALUE (attr
));
30000 return (TREE_CODE (attr
) == STRING_CST
30001 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
30004 /* Make a dispatcher declaration for the multi-versioned function DECL.
30005 Calls to DECL function will be replaced with calls to the dispatcher
30006 by the front-end. Returns the decl of the dispatcher function. */
30009 ix86_get_function_versions_dispatcher (void *decl
)
30011 tree fn
= (tree
) decl
;
30012 struct cgraph_node
*node
= NULL
;
30013 struct cgraph_node
*default_node
= NULL
;
30014 struct cgraph_function_version_info
*node_v
= NULL
;
30015 struct cgraph_function_version_info
*first_v
= NULL
;
30017 tree dispatch_decl
= NULL
;
30019 struct cgraph_function_version_info
*default_version_info
= NULL
;
30021 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
30023 node
= cgraph_get_node (fn
);
30024 gcc_assert (node
!= NULL
);
30026 node_v
= get_cgraph_node_version (node
);
30027 gcc_assert (node_v
!= NULL
);
30029 if (node_v
->dispatcher_resolver
!= NULL
)
30030 return node_v
->dispatcher_resolver
;
30032 /* Find the default version and make it the first node. */
30034 /* Go to the beginning of the chain. */
30035 while (first_v
->prev
!= NULL
)
30036 first_v
= first_v
->prev
;
30037 default_version_info
= first_v
;
30038 while (default_version_info
!= NULL
)
30040 if (is_function_default_version
30041 (default_version_info
->this_node
->symbol
.decl
))
30043 default_version_info
= default_version_info
->next
;
30046 /* If there is no default node, just return NULL. */
30047 if (default_version_info
== NULL
)
30050 /* Make default info the first node. */
30051 if (first_v
!= default_version_info
)
30053 default_version_info
->prev
->next
= default_version_info
->next
;
30054 if (default_version_info
->next
)
30055 default_version_info
->next
->prev
= default_version_info
->prev
;
30056 first_v
->prev
= default_version_info
;
30057 default_version_info
->next
= first_v
;
30058 default_version_info
->prev
= NULL
;
30061 default_node
= default_version_info
->this_node
;
30063 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30064 if (targetm
.has_ifunc_p ())
30066 struct cgraph_function_version_info
*it_v
= NULL
;
30067 struct cgraph_node
*dispatcher_node
= NULL
;
30068 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
30070 /* Right now, the dispatching is done via ifunc. */
30071 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
30073 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
30074 gcc_assert (dispatcher_node
!= NULL
);
30075 dispatcher_node
->dispatcher_function
= 1;
30076 dispatcher_version_info
30077 = insert_new_cgraph_node_version (dispatcher_node
);
30078 dispatcher_version_info
->next
= default_version_info
;
30079 dispatcher_node
->symbol
.definition
= 1;
30081 /* Set the dispatcher for all the versions. */
30082 it_v
= default_version_info
;
30083 while (it_v
!= NULL
)
30085 it_v
->dispatcher_resolver
= dispatch_decl
;
30092 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
30093 "multiversioning needs ifunc which is not supported "
30097 return dispatch_decl
;
30100 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
30104 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30107 tree attr_arg_name
;
30111 attr_name
= get_identifier (name
);
30112 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30113 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30114 attr
= tree_cons (attr_name
, attr_args
, chain
);
30118 /* Make the resolver function decl to dispatch the versions of
30119 a multi-versioned function, DEFAULT_DECL. Create an
30120 empty basic block in the resolver and store the pointer in
30121 EMPTY_BB. Return the decl of the resolver function. */
30124 make_resolver_func (const tree default_decl
,
30125 const tree dispatch_decl
,
30126 basic_block
*empty_bb
)
30128 char *resolver_name
;
30129 tree decl
, type
, decl_name
, t
;
30130 bool is_uniq
= false;
30132 /* IFUNC's have to be globally visible. So, if the default_decl is
30133 not, then the name of the IFUNC should be made unique. */
30134 if (TREE_PUBLIC (default_decl
) == 0)
30137 /* Append the filename to the resolver function if the versions are
30138 not externally visible. This is because the resolver function has
30139 to be externally visible for the loader to find it. So, appending
30140 the filename will prevent conflicts with a resolver function from
30141 another module which is based on the same version name. */
30142 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30144 /* The resolver function should return a (void *). */
30145 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30147 decl
= build_fn_decl (resolver_name
, type
);
30148 decl_name
= get_identifier (resolver_name
);
30149 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30151 DECL_NAME (decl
) = decl_name
;
30152 TREE_USED (decl
) = 1;
30153 DECL_ARTIFICIAL (decl
) = 1;
30154 DECL_IGNORED_P (decl
) = 0;
30155 /* IFUNC resolvers have to be externally visible. */
30156 TREE_PUBLIC (decl
) = 1;
30157 DECL_UNINLINABLE (decl
) = 0;
30159 /* Resolver is not external, body is generated. */
30160 DECL_EXTERNAL (decl
) = 0;
30161 DECL_EXTERNAL (dispatch_decl
) = 0;
30163 DECL_CONTEXT (decl
) = NULL_TREE
;
30164 DECL_INITIAL (decl
) = make_node (BLOCK
);
30165 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30167 if (DECL_COMDAT_GROUP (default_decl
)
30168 || TREE_PUBLIC (default_decl
))
30170 /* In this case, each translation unit with a call to this
30171 versioned function will put out a resolver. Ensure it
30172 is comdat to keep just one copy. */
30173 DECL_COMDAT (decl
) = 1;
30174 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30176 /* Build result decl and add to function_decl. */
30177 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30178 DECL_ARTIFICIAL (t
) = 1;
30179 DECL_IGNORED_P (t
) = 1;
30180 DECL_RESULT (decl
) = t
;
30182 gimplify_function_tree (decl
);
30183 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30184 *empty_bb
= init_lowered_empty_function (decl
, false);
30186 cgraph_add_new_function (decl
, true);
30187 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30191 gcc_assert (dispatch_decl
!= NULL
);
30192 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30193 DECL_ATTRIBUTES (dispatch_decl
)
30194 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30196 /* Create the alias for dispatch to resolver here. */
30197 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30198 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30199 XDELETEVEC (resolver_name
);
30203 /* Generate the dispatching code body to dispatch multi-versioned function
30204 DECL. The target hook is called to process the "target" attributes and
30205 provide the code to dispatch the right function at run-time. NODE points
30206 to the dispatcher decl whose body will be created. */
30209 ix86_generate_version_dispatcher_body (void *node_p
)
30211 tree resolver_decl
;
30212 basic_block empty_bb
;
30213 vec
<tree
> fn_ver_vec
= vNULL
;
30214 tree default_ver_decl
;
30215 struct cgraph_node
*versn
;
30216 struct cgraph_node
*node
;
30218 struct cgraph_function_version_info
*node_version_info
= NULL
;
30219 struct cgraph_function_version_info
*versn_info
= NULL
;
30221 node
= (cgraph_node
*)node_p
;
30223 node_version_info
= get_cgraph_node_version (node
);
30224 gcc_assert (node
->dispatcher_function
30225 && node_version_info
!= NULL
);
30227 if (node_version_info
->dispatcher_resolver
)
30228 return node_version_info
->dispatcher_resolver
;
30230 /* The first version in the chain corresponds to the default version. */
30231 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
30233 /* node is going to be an alias, so remove the finalized bit. */
30234 node
->symbol
.definition
= false;
30236 resolver_decl
= make_resolver_func (default_ver_decl
,
30237 node
->symbol
.decl
, &empty_bb
);
30239 node_version_info
->dispatcher_resolver
= resolver_decl
;
30241 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30243 fn_ver_vec
.create (2);
30245 for (versn_info
= node_version_info
->next
; versn_info
;
30246 versn_info
= versn_info
->next
)
30248 versn
= versn_info
->this_node
;
30249 /* Check for virtual functions here again, as by this time it should
30250 have been determined if this function needs a vtable index or
30251 not. This happens for methods in derived classes that override
30252 virtual methods in base classes but are not explicitly marked as
30254 if (DECL_VINDEX (versn
->symbol
.decl
))
30255 sorry ("Virtual function multiversioning not supported");
30257 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
30260 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30261 fn_ver_vec
.release ();
30262 rebuild_cgraph_edges ();
30264 return resolver_decl
;
30266 /* This builds the processor_model struct type defined in
30267 libgcc/config/i386/cpuinfo.c */
30270 build_processor_model_struct (void)
30272 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30274 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30276 tree type
= make_node (RECORD_TYPE
);
30278 /* The first 3 fields are unsigned int. */
30279 for (i
= 0; i
< 3; ++i
)
30281 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30282 get_identifier (field_name
[i
]), unsigned_type_node
);
30283 if (field_chain
!= NULL_TREE
)
30284 DECL_CHAIN (field
) = field_chain
;
30285 field_chain
= field
;
30288 /* The last field is an array of unsigned integers of size one. */
30289 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30290 get_identifier (field_name
[3]),
30291 build_array_type (unsigned_type_node
,
30292 build_index_type (size_one_node
)));
30293 if (field_chain
!= NULL_TREE
)
30294 DECL_CHAIN (field
) = field_chain
;
30295 field_chain
= field
;
30297 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30301 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30304 make_var_decl (tree type
, const char *name
)
30308 new_decl
= build_decl (UNKNOWN_LOCATION
,
30310 get_identifier(name
),
30313 DECL_EXTERNAL (new_decl
) = 1;
30314 TREE_STATIC (new_decl
) = 1;
30315 TREE_PUBLIC (new_decl
) = 1;
30316 DECL_INITIAL (new_decl
) = 0;
30317 DECL_ARTIFICIAL (new_decl
) = 0;
30318 DECL_PRESERVE_P (new_decl
) = 1;
30320 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30321 assemble_variable (new_decl
, 0, 0, 0);
30326 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30327 into an integer defined in libgcc/config/i386/cpuinfo.c */
30330 fold_builtin_cpu (tree fndecl
, tree
*args
)
30333 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30334 DECL_FUNCTION_CODE (fndecl
);
30335 tree param_string_cst
= NULL
;
30337 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30338 enum processor_features
30354 /* These are the values for vendor types and cpu types and subtypes
30355 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30356 the corresponding start value. */
30357 enum processor_model
30368 M_CPU_SUBTYPE_START
,
30369 M_INTEL_COREI7_NEHALEM
,
30370 M_INTEL_COREI7_WESTMERE
,
30371 M_INTEL_COREI7_SANDYBRIDGE
,
30372 M_AMDFAM10H_BARCELONA
,
30373 M_AMDFAM10H_SHANGHAI
,
30374 M_AMDFAM10H_ISTANBUL
,
30375 M_AMDFAM15H_BDVER1
,
30376 M_AMDFAM15H_BDVER2
,
30380 static struct _arch_names_table
30382 const char *const name
;
30383 const enum processor_model model
;
30385 const arch_names_table
[] =
30388 {"intel", M_INTEL
},
30389 {"atom", M_INTEL_ATOM
},
30390 {"slm", M_INTEL_SLM
},
30391 {"core2", M_INTEL_CORE2
},
30392 {"corei7", M_INTEL_COREI7
},
30393 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30394 {"westmere", M_INTEL_COREI7_WESTMERE
},
30395 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30396 {"amdfam10h", M_AMDFAM10H
},
30397 {"barcelona", M_AMDFAM10H_BARCELONA
},
30398 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30399 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30400 {"amdfam15h", M_AMDFAM15H
},
30401 {"bdver1", M_AMDFAM15H_BDVER1
},
30402 {"bdver2", M_AMDFAM15H_BDVER2
},
30403 {"bdver3", M_AMDFAM15H_BDVER3
},
30406 static struct _isa_names_table
30408 const char *const name
;
30409 const enum processor_features feature
;
30411 const isa_names_table
[] =
30415 {"popcnt", F_POPCNT
},
30419 {"ssse3", F_SSSE3
},
30420 {"sse4.1", F_SSE4_1
},
30421 {"sse4.2", F_SSE4_2
},
30426 tree __processor_model_type
= build_processor_model_struct ();
30427 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
30431 varpool_add_new_variable (__cpu_model_var
);
30433 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
30435 param_string_cst
= *args
;
30436 while (param_string_cst
30437 && TREE_CODE (param_string_cst
) != STRING_CST
)
30439 /* *args must be a expr that can contain other EXPRS leading to a
30441 if (!EXPR_P (param_string_cst
))
30443 error ("Parameter to builtin must be a string constant or literal");
30444 return integer_zero_node
;
30446 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
30449 gcc_assert (param_string_cst
);
30451 if (fn_code
== IX86_BUILTIN_CPU_IS
)
30457 unsigned int field_val
= 0;
30458 unsigned int NUM_ARCH_NAMES
30459 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
30461 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
30462 if (strcmp (arch_names_table
[i
].name
,
30463 TREE_STRING_POINTER (param_string_cst
)) == 0)
30466 if (i
== NUM_ARCH_NAMES
)
30468 error ("Parameter to builtin not valid: %s",
30469 TREE_STRING_POINTER (param_string_cst
));
30470 return integer_zero_node
;
30473 field
= TYPE_FIELDS (__processor_model_type
);
30474 field_val
= arch_names_table
[i
].model
;
30476 /* CPU types are stored in the next field. */
30477 if (field_val
> M_CPU_TYPE_START
30478 && field_val
< M_CPU_SUBTYPE_START
)
30480 field
= DECL_CHAIN (field
);
30481 field_val
-= M_CPU_TYPE_START
;
30484 /* CPU subtypes are stored in the next field. */
30485 if (field_val
> M_CPU_SUBTYPE_START
)
30487 field
= DECL_CHAIN ( DECL_CHAIN (field
));
30488 field_val
-= M_CPU_SUBTYPE_START
;
30491 /* Get the appropriate field in __cpu_model. */
30492 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30495 /* Check the value. */
30496 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
30497 build_int_cstu (unsigned_type_node
, field_val
));
30498 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30500 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30507 unsigned int field_val
= 0;
30508 unsigned int NUM_ISA_NAMES
30509 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
30511 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
30512 if (strcmp (isa_names_table
[i
].name
,
30513 TREE_STRING_POINTER (param_string_cst
)) == 0)
30516 if (i
== NUM_ISA_NAMES
)
30518 error ("Parameter to builtin not valid: %s",
30519 TREE_STRING_POINTER (param_string_cst
));
30520 return integer_zero_node
;
30523 field
= TYPE_FIELDS (__processor_model_type
);
30524 /* Get the last field, which is __cpu_features. */
30525 while (DECL_CHAIN (field
))
30526 field
= DECL_CHAIN (field
);
30528 /* Get the appropriate field: __cpu_model.__cpu_features */
30529 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
30532 /* Access the 0th element of __cpu_features array. */
30533 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
30534 integer_zero_node
, NULL_TREE
, NULL_TREE
);
30536 field_val
= (1 << isa_names_table
[i
].feature
);
30537 /* Return __cpu_model.__cpu_features[0] & field_val */
30538 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
30539 build_int_cstu (unsigned_type_node
, field_val
));
30540 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
30542 gcc_unreachable ();
30546 ix86_fold_builtin (tree fndecl
, int n_args
,
30547 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
30549 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30551 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30552 DECL_FUNCTION_CODE (fndecl
);
30553 if (fn_code
== IX86_BUILTIN_CPU_IS
30554 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
30556 gcc_assert (n_args
== 1);
30557 return fold_builtin_cpu (fndecl
, args
);
30561 #ifdef SUBTARGET_FOLD_BUILTIN
30562 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
30568 /* Make builtins to detect cpu type and features supported. NAME is
30569 the builtin name, CODE is the builtin code, and FTYPE is the function
30570 type of the builtin. */
30573 make_cpu_type_builtin (const char* name
, int code
,
30574 enum ix86_builtin_func_type ftype
, bool is_const
)
30579 type
= ix86_get_builtin_func_type (ftype
);
30580 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
30582 gcc_assert (decl
!= NULL_TREE
);
30583 ix86_builtins
[(int) code
] = decl
;
30584 TREE_READONLY (decl
) = is_const
;
30587 /* Make builtins to get CPU type and features supported. The created
30590 __builtin_cpu_init (), to detect cpu type and features,
30591 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30592 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30596 ix86_init_platform_type_builtins (void)
30598 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
30599 INT_FTYPE_VOID
, false);
30600 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
30601 INT_FTYPE_PCCHAR
, true);
30602 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
30603 INT_FTYPE_PCCHAR
, true);
30606 /* Internal method for ix86_init_builtins. */
30609 ix86_init_builtins_va_builtins_abi (void)
30611 tree ms_va_ref
, sysv_va_ref
;
30612 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
30613 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
30614 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
30615 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
30619 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
30620 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
30621 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
30623 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
30626 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30627 fnvoid_va_start_ms
=
30628 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
30629 fnvoid_va_end_sysv
=
30630 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
30631 fnvoid_va_start_sysv
=
30632 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
30634 fnvoid_va_copy_ms
=
30635 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
30637 fnvoid_va_copy_sysv
=
30638 build_function_type_list (void_type_node
, sysv_va_ref
,
30639 sysv_va_ref
, NULL_TREE
);
30641 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
30642 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30643 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
30644 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30645 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
30646 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
30647 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
30648 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30649 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
30650 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30651 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
30652 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
30656 ix86_init_builtin_types (void)
30658 tree float128_type_node
, float80_type_node
;
30660 /* The __float80 type. */
30661 float80_type_node
= long_double_type_node
;
30662 if (TYPE_MODE (float80_type_node
) != XFmode
)
30664 /* The __float80 type. */
30665 float80_type_node
= make_node (REAL_TYPE
);
30667 TYPE_PRECISION (float80_type_node
) = 80;
30668 layout_type (float80_type_node
);
30670 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
30672 /* The __float128 type. */
30673 float128_type_node
= make_node (REAL_TYPE
);
30674 TYPE_PRECISION (float128_type_node
) = 128;
30675 layout_type (float128_type_node
);
30676 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
30678 /* This macro is built by i386-builtin-types.awk. */
30679 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
30683 ix86_init_builtins (void)
30687 ix86_init_builtin_types ();
30689 /* Builtins to get CPU type and features. */
30690 ix86_init_platform_type_builtins ();
30692 /* TFmode support builtins. */
30693 def_builtin_const (0, "__builtin_infq",
30694 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
30695 def_builtin_const (0, "__builtin_huge_valq",
30696 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
30698 /* We will expand them to normal call if SSE isn't available since
30699 they are used by libgcc. */
30700 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
30701 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
30702 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
30703 TREE_READONLY (t
) = 1;
30704 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
30706 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
30707 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
30708 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
30709 TREE_READONLY (t
) = 1;
30710 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
30712 ix86_init_tm_builtins ();
30713 ix86_init_mmx_sse_builtins ();
30716 ix86_init_builtins_va_builtins_abi ();
30718 #ifdef SUBTARGET_INIT_BUILTINS
30719 SUBTARGET_INIT_BUILTINS
;
30723 /* Return the ix86 builtin for CODE. */
30726 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
30728 if (code
>= IX86_BUILTIN_MAX
)
30729 return error_mark_node
;
30731 return ix86_builtins
[code
];
30734 /* Errors in the source file can cause expand_expr to return const0_rtx
30735 where we expect a vector. To avoid crashing, use one of the vector
30736 clear instructions. */
30738 safe_vector_operand (rtx x
, enum machine_mode mode
)
30740 if (x
== const0_rtx
)
30741 x
= CONST0_RTX (mode
);
30745 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30748 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30751 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30752 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30753 rtx op0
= expand_normal (arg0
);
30754 rtx op1
= expand_normal (arg1
);
30755 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30756 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30757 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30759 if (VECTOR_MODE_P (mode0
))
30760 op0
= safe_vector_operand (op0
, mode0
);
30761 if (VECTOR_MODE_P (mode1
))
30762 op1
= safe_vector_operand (op1
, mode1
);
30764 if (optimize
|| !target
30765 || GET_MODE (target
) != tmode
30766 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30767 target
= gen_reg_rtx (tmode
);
30769 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30771 rtx x
= gen_reg_rtx (V4SImode
);
30772 emit_insn (gen_sse2_loadd (x
, op1
));
30773 op1
= gen_lowpart (TImode
, x
);
30776 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30777 op0
= copy_to_mode_reg (mode0
, op0
);
30778 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30779 op1
= copy_to_mode_reg (mode1
, op1
);
30781 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30790 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30793 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30794 enum ix86_builtin_func_type m_type
,
30795 enum rtx_code sub_code
)
30800 bool comparison_p
= false;
30802 bool last_arg_constant
= false;
30803 int num_memory
= 0;
30806 enum machine_mode mode
;
30809 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30813 case MULTI_ARG_4_DF2_DI_I
:
30814 case MULTI_ARG_4_DF2_DI_I1
:
30815 case MULTI_ARG_4_SF2_SI_I
:
30816 case MULTI_ARG_4_SF2_SI_I1
:
30818 last_arg_constant
= true;
30821 case MULTI_ARG_3_SF
:
30822 case MULTI_ARG_3_DF
:
30823 case MULTI_ARG_3_SF2
:
30824 case MULTI_ARG_3_DF2
:
30825 case MULTI_ARG_3_DI
:
30826 case MULTI_ARG_3_SI
:
30827 case MULTI_ARG_3_SI_DI
:
30828 case MULTI_ARG_3_HI
:
30829 case MULTI_ARG_3_HI_SI
:
30830 case MULTI_ARG_3_QI
:
30831 case MULTI_ARG_3_DI2
:
30832 case MULTI_ARG_3_SI2
:
30833 case MULTI_ARG_3_HI2
:
30834 case MULTI_ARG_3_QI2
:
30838 case MULTI_ARG_2_SF
:
30839 case MULTI_ARG_2_DF
:
30840 case MULTI_ARG_2_DI
:
30841 case MULTI_ARG_2_SI
:
30842 case MULTI_ARG_2_HI
:
30843 case MULTI_ARG_2_QI
:
30847 case MULTI_ARG_2_DI_IMM
:
30848 case MULTI_ARG_2_SI_IMM
:
30849 case MULTI_ARG_2_HI_IMM
:
30850 case MULTI_ARG_2_QI_IMM
:
30852 last_arg_constant
= true;
30855 case MULTI_ARG_1_SF
:
30856 case MULTI_ARG_1_DF
:
30857 case MULTI_ARG_1_SF2
:
30858 case MULTI_ARG_1_DF2
:
30859 case MULTI_ARG_1_DI
:
30860 case MULTI_ARG_1_SI
:
30861 case MULTI_ARG_1_HI
:
30862 case MULTI_ARG_1_QI
:
30863 case MULTI_ARG_1_SI_DI
:
30864 case MULTI_ARG_1_HI_DI
:
30865 case MULTI_ARG_1_HI_SI
:
30866 case MULTI_ARG_1_QI_DI
:
30867 case MULTI_ARG_1_QI_SI
:
30868 case MULTI_ARG_1_QI_HI
:
30872 case MULTI_ARG_2_DI_CMP
:
30873 case MULTI_ARG_2_SI_CMP
:
30874 case MULTI_ARG_2_HI_CMP
:
30875 case MULTI_ARG_2_QI_CMP
:
30877 comparison_p
= true;
30880 case MULTI_ARG_2_SF_TF
:
30881 case MULTI_ARG_2_DF_TF
:
30882 case MULTI_ARG_2_DI_TF
:
30883 case MULTI_ARG_2_SI_TF
:
30884 case MULTI_ARG_2_HI_TF
:
30885 case MULTI_ARG_2_QI_TF
:
30891 gcc_unreachable ();
30894 if (optimize
|| !target
30895 || GET_MODE (target
) != tmode
30896 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30897 target
= gen_reg_rtx (tmode
);
30899 gcc_assert (nargs
<= 4);
30901 for (i
= 0; i
< nargs
; i
++)
30903 tree arg
= CALL_EXPR_ARG (exp
, i
);
30904 rtx op
= expand_normal (arg
);
30905 int adjust
= (comparison_p
) ? 1 : 0;
30906 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30908 if (last_arg_constant
&& i
== nargs
- 1)
30910 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30912 enum insn_code new_icode
= icode
;
30915 case CODE_FOR_xop_vpermil2v2df3
:
30916 case CODE_FOR_xop_vpermil2v4sf3
:
30917 case CODE_FOR_xop_vpermil2v4df3
:
30918 case CODE_FOR_xop_vpermil2v8sf3
:
30919 error ("the last argument must be a 2-bit immediate");
30920 return gen_reg_rtx (tmode
);
30921 case CODE_FOR_xop_rotlv2di3
:
30922 new_icode
= CODE_FOR_rotlv2di3
;
30924 case CODE_FOR_xop_rotlv4si3
:
30925 new_icode
= CODE_FOR_rotlv4si3
;
30927 case CODE_FOR_xop_rotlv8hi3
:
30928 new_icode
= CODE_FOR_rotlv8hi3
;
30930 case CODE_FOR_xop_rotlv16qi3
:
30931 new_icode
= CODE_FOR_rotlv16qi3
;
30933 if (CONST_INT_P (op
))
30935 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30936 op
= GEN_INT (INTVAL (op
) & mask
);
30937 gcc_checking_assert
30938 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30942 gcc_checking_assert
30944 && insn_data
[new_icode
].operand
[0].mode
== tmode
30945 && insn_data
[new_icode
].operand
[1].mode
== tmode
30946 && insn_data
[new_icode
].operand
[2].mode
== mode
30947 && insn_data
[new_icode
].operand
[0].predicate
30948 == insn_data
[icode
].operand
[0].predicate
30949 && insn_data
[new_icode
].operand
[1].predicate
30950 == insn_data
[icode
].operand
[1].predicate
);
30956 gcc_unreachable ();
30963 if (VECTOR_MODE_P (mode
))
30964 op
= safe_vector_operand (op
, mode
);
30966 /* If we aren't optimizing, only allow one memory operand to be
30968 if (memory_operand (op
, mode
))
30971 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30974 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30976 op
= force_reg (mode
, op
);
30980 args
[i
].mode
= mode
;
30986 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30991 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30992 GEN_INT ((int)sub_code
));
30993 else if (! comparison_p
)
30994 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30997 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
31001 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
31006 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31010 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
31014 gcc_unreachable ();
31024 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
31025 insns with vec_merge. */
31028 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
31032 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31033 rtx op1
, op0
= expand_normal (arg0
);
31034 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31035 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31037 if (optimize
|| !target
31038 || GET_MODE (target
) != tmode
31039 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31040 target
= gen_reg_rtx (tmode
);
31042 if (VECTOR_MODE_P (mode0
))
31043 op0
= safe_vector_operand (op0
, mode0
);
31045 if ((optimize
&& !register_operand (op0
, mode0
))
31046 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31047 op0
= copy_to_mode_reg (mode0
, op0
);
31050 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
31051 op1
= copy_to_mode_reg (mode0
, op1
);
31053 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31060 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
31063 ix86_expand_sse_compare (const struct builtin_description
*d
,
31064 tree exp
, rtx target
, bool swap
)
31067 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31068 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31069 rtx op0
= expand_normal (arg0
);
31070 rtx op1
= expand_normal (arg1
);
31072 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31073 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31074 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31075 enum rtx_code comparison
= d
->comparison
;
31077 if (VECTOR_MODE_P (mode0
))
31078 op0
= safe_vector_operand (op0
, mode0
);
31079 if (VECTOR_MODE_P (mode1
))
31080 op1
= safe_vector_operand (op1
, mode1
);
31082 /* Swap operands if we have a comparison that isn't available in
31086 rtx tmp
= gen_reg_rtx (mode1
);
31087 emit_move_insn (tmp
, op1
);
31092 if (optimize
|| !target
31093 || GET_MODE (target
) != tmode
31094 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31095 target
= gen_reg_rtx (tmode
);
31097 if ((optimize
&& !register_operand (op0
, mode0
))
31098 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
31099 op0
= copy_to_mode_reg (mode0
, op0
);
31100 if ((optimize
&& !register_operand (op1
, mode1
))
31101 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
31102 op1
= copy_to_mode_reg (mode1
, op1
);
31104 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
31105 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31112 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31115 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31119 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31120 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31121 rtx op0
= expand_normal (arg0
);
31122 rtx op1
= expand_normal (arg1
);
31123 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31124 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31125 enum rtx_code comparison
= d
->comparison
;
31127 if (VECTOR_MODE_P (mode0
))
31128 op0
= safe_vector_operand (op0
, mode0
);
31129 if (VECTOR_MODE_P (mode1
))
31130 op1
= safe_vector_operand (op1
, mode1
);
31132 /* Swap operands if we have a comparison that isn't available in
31134 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31141 target
= gen_reg_rtx (SImode
);
31142 emit_move_insn (target
, const0_rtx
);
31143 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31145 if ((optimize
&& !register_operand (op0
, mode0
))
31146 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31147 op0
= copy_to_mode_reg (mode0
, op0
);
31148 if ((optimize
&& !register_operand (op1
, mode1
))
31149 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31150 op1
= copy_to_mode_reg (mode1
, op1
);
31152 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31156 emit_insn (gen_rtx_SET (VOIDmode
,
31157 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31158 gen_rtx_fmt_ee (comparison
, QImode
,
31162 return SUBREG_REG (target
);
31165 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31168 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31172 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31173 rtx op1
, op0
= expand_normal (arg0
);
31174 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31175 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31177 if (optimize
|| target
== 0
31178 || GET_MODE (target
) != tmode
31179 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31180 target
= gen_reg_rtx (tmode
);
31182 if (VECTOR_MODE_P (mode0
))
31183 op0
= safe_vector_operand (op0
, mode0
);
31185 if ((optimize
&& !register_operand (op0
, mode0
))
31186 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31187 op0
= copy_to_mode_reg (mode0
, op0
);
31189 op1
= GEN_INT (d
->comparison
);
31191 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31199 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31200 tree exp
, rtx target
)
31203 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31204 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31205 rtx op0
= expand_normal (arg0
);
31206 rtx op1
= expand_normal (arg1
);
31208 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31209 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31210 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31212 if (optimize
|| target
== 0
31213 || GET_MODE (target
) != tmode
31214 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31215 target
= gen_reg_rtx (tmode
);
31217 op0
= safe_vector_operand (op0
, mode0
);
31218 op1
= safe_vector_operand (op1
, mode1
);
31220 if ((optimize
&& !register_operand (op0
, mode0
))
31221 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31222 op0
= copy_to_mode_reg (mode0
, op0
);
31223 if ((optimize
&& !register_operand (op1
, mode1
))
31224 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31225 op1
= copy_to_mode_reg (mode1
, op1
);
31227 op2
= GEN_INT (d
->comparison
);
31229 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31236 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31239 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31243 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31244 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31245 rtx op0
= expand_normal (arg0
);
31246 rtx op1
= expand_normal (arg1
);
31247 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31248 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31249 enum rtx_code comparison
= d
->comparison
;
31251 if (VECTOR_MODE_P (mode0
))
31252 op0
= safe_vector_operand (op0
, mode0
);
31253 if (VECTOR_MODE_P (mode1
))
31254 op1
= safe_vector_operand (op1
, mode1
);
31256 target
= gen_reg_rtx (SImode
);
31257 emit_move_insn (target
, const0_rtx
);
31258 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31260 if ((optimize
&& !register_operand (op0
, mode0
))
31261 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31262 op0
= copy_to_mode_reg (mode0
, op0
);
31263 if ((optimize
&& !register_operand (op1
, mode1
))
31264 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31265 op1
= copy_to_mode_reg (mode1
, op1
);
31267 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31271 emit_insn (gen_rtx_SET (VOIDmode
,
31272 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31273 gen_rtx_fmt_ee (comparison
, QImode
,
31277 return SUBREG_REG (target
);
31280 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31283 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31284 tree exp
, rtx target
)
31287 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31288 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31289 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31290 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31291 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31292 rtx scratch0
, scratch1
;
31293 rtx op0
= expand_normal (arg0
);
31294 rtx op1
= expand_normal (arg1
);
31295 rtx op2
= expand_normal (arg2
);
31296 rtx op3
= expand_normal (arg3
);
31297 rtx op4
= expand_normal (arg4
);
31298 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31300 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31301 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31302 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31303 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31304 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31305 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31306 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31308 if (VECTOR_MODE_P (modev2
))
31309 op0
= safe_vector_operand (op0
, modev2
);
31310 if (VECTOR_MODE_P (modev4
))
31311 op2
= safe_vector_operand (op2
, modev4
);
31313 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31314 op0
= copy_to_mode_reg (modev2
, op0
);
31315 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31316 op1
= copy_to_mode_reg (modei3
, op1
);
31317 if ((optimize
&& !register_operand (op2
, modev4
))
31318 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31319 op2
= copy_to_mode_reg (modev4
, op2
);
31320 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31321 op3
= copy_to_mode_reg (modei5
, op3
);
31323 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31325 error ("the fifth argument must be an 8-bit immediate");
31329 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31331 if (optimize
|| !target
31332 || GET_MODE (target
) != tmode0
31333 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31334 target
= gen_reg_rtx (tmode0
);
31336 scratch1
= gen_reg_rtx (tmode1
);
31338 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31340 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31342 if (optimize
|| !target
31343 || GET_MODE (target
) != tmode1
31344 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31345 target
= gen_reg_rtx (tmode1
);
31347 scratch0
= gen_reg_rtx (tmode0
);
31349 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31353 gcc_assert (d
->flag
);
31355 scratch0
= gen_reg_rtx (tmode0
);
31356 scratch1
= gen_reg_rtx (tmode1
);
31358 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31368 target
= gen_reg_rtx (SImode
);
31369 emit_move_insn (target
, const0_rtx
);
31370 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31373 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31374 gen_rtx_fmt_ee (EQ
, QImode
,
31375 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31378 return SUBREG_REG (target
);
31385 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31388 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31389 tree exp
, rtx target
)
31392 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31393 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31394 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31395 rtx scratch0
, scratch1
;
31396 rtx op0
= expand_normal (arg0
);
31397 rtx op1
= expand_normal (arg1
);
31398 rtx op2
= expand_normal (arg2
);
31399 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31401 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31402 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31403 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31404 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31405 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31407 if (VECTOR_MODE_P (modev2
))
31408 op0
= safe_vector_operand (op0
, modev2
);
31409 if (VECTOR_MODE_P (modev3
))
31410 op1
= safe_vector_operand (op1
, modev3
);
31412 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31413 op0
= copy_to_mode_reg (modev2
, op0
);
31414 if ((optimize
&& !register_operand (op1
, modev3
))
31415 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31416 op1
= copy_to_mode_reg (modev3
, op1
);
31418 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
31420 error ("the third argument must be an 8-bit immediate");
31424 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
31426 if (optimize
|| !target
31427 || GET_MODE (target
) != tmode0
31428 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31429 target
= gen_reg_rtx (tmode0
);
31431 scratch1
= gen_reg_rtx (tmode1
);
31433 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
31435 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
31437 if (optimize
|| !target
31438 || GET_MODE (target
) != tmode1
31439 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31440 target
= gen_reg_rtx (tmode1
);
31442 scratch0
= gen_reg_rtx (tmode0
);
31444 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
31448 gcc_assert (d
->flag
);
31450 scratch0
= gen_reg_rtx (tmode0
);
31451 scratch1
= gen_reg_rtx (tmode1
);
31453 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
31463 target
= gen_reg_rtx (SImode
);
31464 emit_move_insn (target
, const0_rtx
);
31465 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31468 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31469 gen_rtx_fmt_ee (EQ
, QImode
,
31470 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31473 return SUBREG_REG (target
);
31479 /* Subroutine of ix86_expand_builtin to take care of insns with
31480 variable number of operands. */
31483 ix86_expand_args_builtin (const struct builtin_description
*d
,
31484 tree exp
, rtx target
)
31486 rtx pat
, real_target
;
31487 unsigned int i
, nargs
;
31488 unsigned int nargs_constant
= 0;
31489 int num_memory
= 0;
31493 enum machine_mode mode
;
31495 bool last_arg_count
= false;
31496 enum insn_code icode
= d
->icode
;
31497 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31498 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31499 enum machine_mode rmode
= VOIDmode
;
31501 enum rtx_code comparison
= d
->comparison
;
31503 switch ((enum ix86_builtin_func_type
) d
->flag
)
31505 case V2DF_FTYPE_V2DF_ROUND
:
31506 case V4DF_FTYPE_V4DF_ROUND
:
31507 case V4SF_FTYPE_V4SF_ROUND
:
31508 case V8SF_FTYPE_V8SF_ROUND
:
31509 case V4SI_FTYPE_V4SF_ROUND
:
31510 case V8SI_FTYPE_V8SF_ROUND
:
31511 return ix86_expand_sse_round (d
, exp
, target
);
31512 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
31513 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
31514 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
31515 case INT_FTYPE_V8SF_V8SF_PTEST
:
31516 case INT_FTYPE_V4DI_V4DI_PTEST
:
31517 case INT_FTYPE_V4DF_V4DF_PTEST
:
31518 case INT_FTYPE_V4SF_V4SF_PTEST
:
31519 case INT_FTYPE_V2DI_V2DI_PTEST
:
31520 case INT_FTYPE_V2DF_V2DF_PTEST
:
31521 return ix86_expand_sse_ptest (d
, exp
, target
);
31522 case FLOAT128_FTYPE_FLOAT128
:
31523 case FLOAT_FTYPE_FLOAT
:
31524 case INT_FTYPE_INT
:
31525 case UINT64_FTYPE_INT
:
31526 case UINT16_FTYPE_UINT16
:
31527 case INT64_FTYPE_INT64
:
31528 case INT64_FTYPE_V4SF
:
31529 case INT64_FTYPE_V2DF
:
31530 case INT_FTYPE_V16QI
:
31531 case INT_FTYPE_V8QI
:
31532 case INT_FTYPE_V8SF
:
31533 case INT_FTYPE_V4DF
:
31534 case INT_FTYPE_V4SF
:
31535 case INT_FTYPE_V2DF
:
31536 case INT_FTYPE_V32QI
:
31537 case V16QI_FTYPE_V16QI
:
31538 case V8SI_FTYPE_V8SF
:
31539 case V8SI_FTYPE_V4SI
:
31540 case V8HI_FTYPE_V8HI
:
31541 case V8HI_FTYPE_V16QI
:
31542 case V8QI_FTYPE_V8QI
:
31543 case V8SF_FTYPE_V8SF
:
31544 case V8SF_FTYPE_V8SI
:
31545 case V8SF_FTYPE_V4SF
:
31546 case V8SF_FTYPE_V8HI
:
31547 case V4SI_FTYPE_V4SI
:
31548 case V4SI_FTYPE_V16QI
:
31549 case V4SI_FTYPE_V4SF
:
31550 case V4SI_FTYPE_V8SI
:
31551 case V4SI_FTYPE_V8HI
:
31552 case V4SI_FTYPE_V4DF
:
31553 case V4SI_FTYPE_V2DF
:
31554 case V4HI_FTYPE_V4HI
:
31555 case V4DF_FTYPE_V4DF
:
31556 case V4DF_FTYPE_V4SI
:
31557 case V4DF_FTYPE_V4SF
:
31558 case V4DF_FTYPE_V2DF
:
31559 case V4SF_FTYPE_V4SF
:
31560 case V4SF_FTYPE_V4SI
:
31561 case V4SF_FTYPE_V8SF
:
31562 case V4SF_FTYPE_V4DF
:
31563 case V4SF_FTYPE_V8HI
:
31564 case V4SF_FTYPE_V2DF
:
31565 case V2DI_FTYPE_V2DI
:
31566 case V2DI_FTYPE_V16QI
:
31567 case V2DI_FTYPE_V8HI
:
31568 case V2DI_FTYPE_V4SI
:
31569 case V2DF_FTYPE_V2DF
:
31570 case V2DF_FTYPE_V4SI
:
31571 case V2DF_FTYPE_V4DF
:
31572 case V2DF_FTYPE_V4SF
:
31573 case V2DF_FTYPE_V2SI
:
31574 case V2SI_FTYPE_V2SI
:
31575 case V2SI_FTYPE_V4SF
:
31576 case V2SI_FTYPE_V2SF
:
31577 case V2SI_FTYPE_V2DF
:
31578 case V2SF_FTYPE_V2SF
:
31579 case V2SF_FTYPE_V2SI
:
31580 case V32QI_FTYPE_V32QI
:
31581 case V32QI_FTYPE_V16QI
:
31582 case V16HI_FTYPE_V16HI
:
31583 case V16HI_FTYPE_V8HI
:
31584 case V8SI_FTYPE_V8SI
:
31585 case V16HI_FTYPE_V16QI
:
31586 case V8SI_FTYPE_V16QI
:
31587 case V4DI_FTYPE_V16QI
:
31588 case V8SI_FTYPE_V8HI
:
31589 case V4DI_FTYPE_V8HI
:
31590 case V4DI_FTYPE_V4SI
:
31591 case V4DI_FTYPE_V2DI
:
31594 case V4SF_FTYPE_V4SF_VEC_MERGE
:
31595 case V2DF_FTYPE_V2DF_VEC_MERGE
:
31596 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
31597 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
31598 case V16QI_FTYPE_V16QI_V16QI
:
31599 case V16QI_FTYPE_V8HI_V8HI
:
31600 case V8QI_FTYPE_V8QI_V8QI
:
31601 case V8QI_FTYPE_V4HI_V4HI
:
31602 case V8HI_FTYPE_V8HI_V8HI
:
31603 case V8HI_FTYPE_V16QI_V16QI
:
31604 case V8HI_FTYPE_V4SI_V4SI
:
31605 case V8SF_FTYPE_V8SF_V8SF
:
31606 case V8SF_FTYPE_V8SF_V8SI
:
31607 case V4SI_FTYPE_V4SI_V4SI
:
31608 case V4SI_FTYPE_V8HI_V8HI
:
31609 case V4SI_FTYPE_V4SF_V4SF
:
31610 case V4SI_FTYPE_V2DF_V2DF
:
31611 case V4HI_FTYPE_V4HI_V4HI
:
31612 case V4HI_FTYPE_V8QI_V8QI
:
31613 case V4HI_FTYPE_V2SI_V2SI
:
31614 case V4DF_FTYPE_V4DF_V4DF
:
31615 case V4DF_FTYPE_V4DF_V4DI
:
31616 case V4SF_FTYPE_V4SF_V4SF
:
31617 case V4SF_FTYPE_V4SF_V4SI
:
31618 case V4SF_FTYPE_V4SF_V2SI
:
31619 case V4SF_FTYPE_V4SF_V2DF
:
31620 case V4SF_FTYPE_V4SF_DI
:
31621 case V4SF_FTYPE_V4SF_SI
:
31622 case V2DI_FTYPE_V2DI_V2DI
:
31623 case V2DI_FTYPE_V16QI_V16QI
:
31624 case V2DI_FTYPE_V4SI_V4SI
:
31625 case V2UDI_FTYPE_V4USI_V4USI
:
31626 case V2DI_FTYPE_V2DI_V16QI
:
31627 case V2DI_FTYPE_V2DF_V2DF
:
31628 case V2SI_FTYPE_V2SI_V2SI
:
31629 case V2SI_FTYPE_V4HI_V4HI
:
31630 case V2SI_FTYPE_V2SF_V2SF
:
31631 case V2DF_FTYPE_V2DF_V2DF
:
31632 case V2DF_FTYPE_V2DF_V4SF
:
31633 case V2DF_FTYPE_V2DF_V2DI
:
31634 case V2DF_FTYPE_V2DF_DI
:
31635 case V2DF_FTYPE_V2DF_SI
:
31636 case V2SF_FTYPE_V2SF_V2SF
:
31637 case V1DI_FTYPE_V1DI_V1DI
:
31638 case V1DI_FTYPE_V8QI_V8QI
:
31639 case V1DI_FTYPE_V2SI_V2SI
:
31640 case V32QI_FTYPE_V16HI_V16HI
:
31641 case V16HI_FTYPE_V8SI_V8SI
:
31642 case V32QI_FTYPE_V32QI_V32QI
:
31643 case V16HI_FTYPE_V32QI_V32QI
:
31644 case V16HI_FTYPE_V16HI_V16HI
:
31645 case V8SI_FTYPE_V4DF_V4DF
:
31646 case V8SI_FTYPE_V8SI_V8SI
:
31647 case V8SI_FTYPE_V16HI_V16HI
:
31648 case V4DI_FTYPE_V4DI_V4DI
:
31649 case V4DI_FTYPE_V8SI_V8SI
:
31650 case V4UDI_FTYPE_V8USI_V8USI
:
31651 if (comparison
== UNKNOWN
)
31652 return ix86_expand_binop_builtin (icode
, exp
, target
);
31655 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
31656 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
31657 gcc_assert (comparison
!= UNKNOWN
);
31661 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
31662 case V16HI_FTYPE_V16HI_SI_COUNT
:
31663 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
31664 case V8SI_FTYPE_V8SI_SI_COUNT
:
31665 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
31666 case V4DI_FTYPE_V4DI_INT_COUNT
:
31667 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
31668 case V8HI_FTYPE_V8HI_SI_COUNT
:
31669 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
31670 case V4SI_FTYPE_V4SI_SI_COUNT
:
31671 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
31672 case V4HI_FTYPE_V4HI_SI_COUNT
:
31673 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
31674 case V2DI_FTYPE_V2DI_SI_COUNT
:
31675 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
31676 case V2SI_FTYPE_V2SI_SI_COUNT
:
31677 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
31678 case V1DI_FTYPE_V1DI_SI_COUNT
:
31680 last_arg_count
= true;
31682 case UINT64_FTYPE_UINT64_UINT64
:
31683 case UINT_FTYPE_UINT_UINT
:
31684 case UINT_FTYPE_UINT_USHORT
:
31685 case UINT_FTYPE_UINT_UCHAR
:
31686 case UINT16_FTYPE_UINT16_INT
:
31687 case UINT8_FTYPE_UINT8_INT
:
31690 case V2DI_FTYPE_V2DI_INT_CONVERT
:
31693 nargs_constant
= 1;
31695 case V4DI_FTYPE_V4DI_INT_CONVERT
:
31698 nargs_constant
= 1;
31700 case V8HI_FTYPE_V8HI_INT
:
31701 case V8HI_FTYPE_V8SF_INT
:
31702 case V8HI_FTYPE_V4SF_INT
:
31703 case V8SF_FTYPE_V8SF_INT
:
31704 case V4SI_FTYPE_V4SI_INT
:
31705 case V4SI_FTYPE_V8SI_INT
:
31706 case V4HI_FTYPE_V4HI_INT
:
31707 case V4DF_FTYPE_V4DF_INT
:
31708 case V4SF_FTYPE_V4SF_INT
:
31709 case V4SF_FTYPE_V8SF_INT
:
31710 case V2DI_FTYPE_V2DI_INT
:
31711 case V2DF_FTYPE_V2DF_INT
:
31712 case V2DF_FTYPE_V4DF_INT
:
31713 case V16HI_FTYPE_V16HI_INT
:
31714 case V8SI_FTYPE_V8SI_INT
:
31715 case V4DI_FTYPE_V4DI_INT
:
31716 case V2DI_FTYPE_V4DI_INT
:
31718 nargs_constant
= 1;
31720 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
31721 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
31722 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
31723 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
31724 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
31725 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
31728 case V32QI_FTYPE_V32QI_V32QI_INT
:
31729 case V16HI_FTYPE_V16HI_V16HI_INT
:
31730 case V16QI_FTYPE_V16QI_V16QI_INT
:
31731 case V4DI_FTYPE_V4DI_V4DI_INT
:
31732 case V8HI_FTYPE_V8HI_V8HI_INT
:
31733 case V8SI_FTYPE_V8SI_V8SI_INT
:
31734 case V8SI_FTYPE_V8SI_V4SI_INT
:
31735 case V8SF_FTYPE_V8SF_V8SF_INT
:
31736 case V8SF_FTYPE_V8SF_V4SF_INT
:
31737 case V4SI_FTYPE_V4SI_V4SI_INT
:
31738 case V4DF_FTYPE_V4DF_V4DF_INT
:
31739 case V4DF_FTYPE_V4DF_V2DF_INT
:
31740 case V4SF_FTYPE_V4SF_V4SF_INT
:
31741 case V2DI_FTYPE_V2DI_V2DI_INT
:
31742 case V4DI_FTYPE_V4DI_V2DI_INT
:
31743 case V2DF_FTYPE_V2DF_V2DF_INT
:
31745 nargs_constant
= 1;
31747 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31750 nargs_constant
= 1;
31752 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31755 nargs_constant
= 1;
31757 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31760 nargs_constant
= 1;
31762 case V2DI_FTYPE_V2DI_UINT_UINT
:
31764 nargs_constant
= 2;
31766 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31767 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31768 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31769 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31771 nargs_constant
= 1;
31773 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31775 nargs_constant
= 2;
31777 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31778 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31782 gcc_unreachable ();
31785 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31787 if (comparison
!= UNKNOWN
)
31789 gcc_assert (nargs
== 2);
31790 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31793 if (rmode
== VOIDmode
|| rmode
== tmode
)
31797 || GET_MODE (target
) != tmode
31798 || !insn_p
->operand
[0].predicate (target
, tmode
))
31799 target
= gen_reg_rtx (tmode
);
31800 real_target
= target
;
31804 target
= gen_reg_rtx (rmode
);
31805 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31808 for (i
= 0; i
< nargs
; i
++)
31810 tree arg
= CALL_EXPR_ARG (exp
, i
);
31811 rtx op
= expand_normal (arg
);
31812 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31813 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31815 if (last_arg_count
&& (i
+ 1) == nargs
)
31817 /* SIMD shift insns take either an 8-bit immediate or
31818 register as count. But builtin functions take int as
31819 count. If count doesn't match, we put it in register. */
31822 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31823 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31824 op
= copy_to_reg (op
);
31827 else if ((nargs
- i
) <= nargs_constant
)
31832 case CODE_FOR_avx2_inserti128
:
31833 case CODE_FOR_avx2_extracti128
:
31834 error ("the last argument must be an 1-bit immediate");
31837 case CODE_FOR_sse4_1_roundsd
:
31838 case CODE_FOR_sse4_1_roundss
:
31840 case CODE_FOR_sse4_1_roundpd
:
31841 case CODE_FOR_sse4_1_roundps
:
31842 case CODE_FOR_avx_roundpd256
:
31843 case CODE_FOR_avx_roundps256
:
31845 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31846 case CODE_FOR_sse4_1_roundps_sfix
:
31847 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31848 case CODE_FOR_avx_roundps_sfix256
:
31850 case CODE_FOR_sse4_1_blendps
:
31851 case CODE_FOR_avx_blendpd256
:
31852 case CODE_FOR_avx_vpermilv4df
:
31853 error ("the last argument must be a 4-bit immediate");
31856 case CODE_FOR_sse4_1_blendpd
:
31857 case CODE_FOR_avx_vpermilv2df
:
31858 case CODE_FOR_xop_vpermil2v2df3
:
31859 case CODE_FOR_xop_vpermil2v4sf3
:
31860 case CODE_FOR_xop_vpermil2v4df3
:
31861 case CODE_FOR_xop_vpermil2v8sf3
:
31862 error ("the last argument must be a 2-bit immediate");
31865 case CODE_FOR_avx_vextractf128v4df
:
31866 case CODE_FOR_avx_vextractf128v8sf
:
31867 case CODE_FOR_avx_vextractf128v8si
:
31868 case CODE_FOR_avx_vinsertf128v4df
:
31869 case CODE_FOR_avx_vinsertf128v8sf
:
31870 case CODE_FOR_avx_vinsertf128v8si
:
31871 error ("the last argument must be a 1-bit immediate");
31874 case CODE_FOR_avx_vmcmpv2df3
:
31875 case CODE_FOR_avx_vmcmpv4sf3
:
31876 case CODE_FOR_avx_cmpv2df3
:
31877 case CODE_FOR_avx_cmpv4sf3
:
31878 case CODE_FOR_avx_cmpv4df3
:
31879 case CODE_FOR_avx_cmpv8sf3
:
31880 error ("the last argument must be a 5-bit immediate");
31884 switch (nargs_constant
)
31887 if ((nargs
- i
) == nargs_constant
)
31889 error ("the next to last argument must be an 8-bit immediate");
31893 error ("the last argument must be an 8-bit immediate");
31896 gcc_unreachable ();
31903 if (VECTOR_MODE_P (mode
))
31904 op
= safe_vector_operand (op
, mode
);
31906 /* If we aren't optimizing, only allow one memory operand to
31908 if (memory_operand (op
, mode
))
31911 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31913 if (optimize
|| !match
|| num_memory
> 1)
31914 op
= copy_to_mode_reg (mode
, op
);
31918 op
= copy_to_reg (op
);
31919 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31924 args
[i
].mode
= mode
;
31930 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31933 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31936 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31940 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31941 args
[2].op
, args
[3].op
);
31944 gcc_unreachable ();
31954 /* Subroutine of ix86_expand_builtin to take care of special insns
31955 with variable number of operands. */
31958 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31959 tree exp
, rtx target
)
31963 unsigned int i
, nargs
, arg_adjust
, memory
;
31967 enum machine_mode mode
;
31969 enum insn_code icode
= d
->icode
;
31970 bool last_arg_constant
= false;
31971 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31972 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31973 enum { load
, store
} klass
;
31975 switch ((enum ix86_builtin_func_type
) d
->flag
)
31977 case VOID_FTYPE_VOID
:
31978 emit_insn (GEN_FCN (icode
) (target
));
31980 case VOID_FTYPE_UINT64
:
31981 case VOID_FTYPE_UNSIGNED
:
31987 case INT_FTYPE_VOID
:
31988 case UINT64_FTYPE_VOID
:
31989 case UNSIGNED_FTYPE_VOID
:
31994 case UINT64_FTYPE_PUNSIGNED
:
31995 case V2DI_FTYPE_PV2DI
:
31996 case V4DI_FTYPE_PV4DI
:
31997 case V32QI_FTYPE_PCCHAR
:
31998 case V16QI_FTYPE_PCCHAR
:
31999 case V8SF_FTYPE_PCV4SF
:
32000 case V8SF_FTYPE_PCFLOAT
:
32001 case V4SF_FTYPE_PCFLOAT
:
32002 case V4DF_FTYPE_PCV2DF
:
32003 case V4DF_FTYPE_PCDOUBLE
:
32004 case V2DF_FTYPE_PCDOUBLE
:
32005 case VOID_FTYPE_PVOID
:
32010 case VOID_FTYPE_PV2SF_V4SF
:
32011 case VOID_FTYPE_PV4DI_V4DI
:
32012 case VOID_FTYPE_PV2DI_V2DI
:
32013 case VOID_FTYPE_PCHAR_V32QI
:
32014 case VOID_FTYPE_PCHAR_V16QI
:
32015 case VOID_FTYPE_PFLOAT_V8SF
:
32016 case VOID_FTYPE_PFLOAT_V4SF
:
32017 case VOID_FTYPE_PDOUBLE_V4DF
:
32018 case VOID_FTYPE_PDOUBLE_V2DF
:
32019 case VOID_FTYPE_PLONGLONG_LONGLONG
:
32020 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
32021 case VOID_FTYPE_PINT_INT
:
32024 /* Reserve memory operand for target. */
32025 memory
= ARRAY_SIZE (args
);
32027 case V4SF_FTYPE_V4SF_PCV2SF
:
32028 case V2DF_FTYPE_V2DF_PCDOUBLE
:
32033 case V8SF_FTYPE_PCV8SF_V8SI
:
32034 case V4DF_FTYPE_PCV4DF_V4DI
:
32035 case V4SF_FTYPE_PCV4SF_V4SI
:
32036 case V2DF_FTYPE_PCV2DF_V2DI
:
32037 case V8SI_FTYPE_PCV8SI_V8SI
:
32038 case V4DI_FTYPE_PCV4DI_V4DI
:
32039 case V4SI_FTYPE_PCV4SI_V4SI
:
32040 case V2DI_FTYPE_PCV2DI_V2DI
:
32045 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
32046 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
32047 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
32048 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
32049 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
32050 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
32051 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
32052 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
32055 /* Reserve memory operand for target. */
32056 memory
= ARRAY_SIZE (args
);
32058 case VOID_FTYPE_UINT_UINT_UINT
:
32059 case VOID_FTYPE_UINT64_UINT_UINT
:
32060 case UCHAR_FTYPE_UINT_UINT_UINT
:
32061 case UCHAR_FTYPE_UINT64_UINT_UINT
:
32064 memory
= ARRAY_SIZE (args
);
32065 last_arg_constant
= true;
32068 gcc_unreachable ();
32071 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32073 if (klass
== store
)
32075 arg
= CALL_EXPR_ARG (exp
, 0);
32076 op
= expand_normal (arg
);
32077 gcc_assert (target
== 0);
32080 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32081 target
= gen_rtx_MEM (tmode
, op
);
32084 target
= force_reg (tmode
, op
);
32092 || !register_operand (target
, tmode
)
32093 || GET_MODE (target
) != tmode
)
32094 target
= gen_reg_rtx (tmode
);
32097 for (i
= 0; i
< nargs
; i
++)
32099 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32102 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
32103 op
= expand_normal (arg
);
32104 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32106 if (last_arg_constant
&& (i
+ 1) == nargs
)
32110 if (icode
== CODE_FOR_lwp_lwpvalsi3
32111 || icode
== CODE_FOR_lwp_lwpinssi3
32112 || icode
== CODE_FOR_lwp_lwpvaldi3
32113 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32114 error ("the last argument must be a 32-bit immediate");
32116 error ("the last argument must be an 8-bit immediate");
32124 /* This must be the memory operand. */
32125 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
32126 op
= gen_rtx_MEM (mode
, op
);
32127 gcc_assert (GET_MODE (op
) == mode
32128 || GET_MODE (op
) == VOIDmode
);
32132 /* This must be register. */
32133 if (VECTOR_MODE_P (mode
))
32134 op
= safe_vector_operand (op
, mode
);
32136 gcc_assert (GET_MODE (op
) == mode
32137 || GET_MODE (op
) == VOIDmode
);
32138 op
= copy_to_mode_reg (mode
, op
);
32143 args
[i
].mode
= mode
;
32149 pat
= GEN_FCN (icode
) (target
);
32152 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32155 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32158 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32161 gcc_unreachable ();
32167 return klass
== store
? 0 : target
;
32170 /* Return the integer constant in ARG. Constrain it to be in the range
32171 of the subparts of VEC_TYPE; issue an error if not. */
32174 get_element_number (tree vec_type
, tree arg
)
32176 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32178 if (!host_integerp (arg
, 1)
32179 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
32181 error ("selector must be an integer constant in the range 0..%wi", max
);
32188 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32189 ix86_expand_vector_init. We DO have language-level syntax for this, in
32190 the form of (type){ init-list }. Except that since we can't place emms
32191 instructions from inside the compiler, we can't allow the use of MMX
32192 registers unless the user explicitly asks for it. So we do *not* define
32193 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32194 we have builtins invoked by mmintrin.h that gives us license to emit
32195 these sorts of instructions. */
32198 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32200 enum machine_mode tmode
= TYPE_MODE (type
);
32201 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32202 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32203 rtvec v
= rtvec_alloc (n_elt
);
32205 gcc_assert (VECTOR_MODE_P (tmode
));
32206 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32208 for (i
= 0; i
< n_elt
; ++i
)
32210 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32211 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32214 if (!target
|| !register_operand (target
, tmode
))
32215 target
= gen_reg_rtx (tmode
);
32217 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32221 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32222 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32223 had a language-level syntax for referencing vector elements. */
32226 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32228 enum machine_mode tmode
, mode0
;
32233 arg0
= CALL_EXPR_ARG (exp
, 0);
32234 arg1
= CALL_EXPR_ARG (exp
, 1);
32236 op0
= expand_normal (arg0
);
32237 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32239 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32240 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32241 gcc_assert (VECTOR_MODE_P (mode0
));
32243 op0
= force_reg (mode0
, op0
);
32245 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32246 target
= gen_reg_rtx (tmode
);
32248 ix86_expand_vector_extract (true, target
, op0
, elt
);
32253 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32254 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32255 a language-level syntax for referencing vector elements. */
32258 ix86_expand_vec_set_builtin (tree exp
)
32260 enum machine_mode tmode
, mode1
;
32261 tree arg0
, arg1
, arg2
;
32263 rtx op0
, op1
, target
;
32265 arg0
= CALL_EXPR_ARG (exp
, 0);
32266 arg1
= CALL_EXPR_ARG (exp
, 1);
32267 arg2
= CALL_EXPR_ARG (exp
, 2);
32269 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32270 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32271 gcc_assert (VECTOR_MODE_P (tmode
));
32273 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32274 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32275 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32277 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32278 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32280 op0
= force_reg (tmode
, op0
);
32281 op1
= force_reg (mode1
, op1
);
32283 /* OP0 is the source of these builtin functions and shouldn't be
32284 modified. Create a copy, use it and return it as target. */
32285 target
= gen_reg_rtx (tmode
);
32286 emit_move_insn (target
, op0
);
32287 ix86_expand_vector_set (true, target
, op1
, elt
);
32292 /* Expand an expression EXP that calls a built-in function,
32293 with result going to TARGET if that's convenient
32294 (and in mode MODE if that's convenient).
32295 SUBTARGET may be used as the target for computing one of EXP's operands.
32296 IGNORE is nonzero if the value is to be ignored. */
32299 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
32300 enum machine_mode mode ATTRIBUTE_UNUSED
,
32301 int ignore ATTRIBUTE_UNUSED
)
32303 const struct builtin_description
*d
;
32305 enum insn_code icode
;
32306 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32307 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32308 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32309 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32310 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32312 /* For CPU builtins that can be folded, fold first and expand the fold. */
32315 case IX86_BUILTIN_CPU_INIT
:
32317 /* Make it call __cpu_indicator_init in libgcc. */
32318 tree call_expr
, fndecl
, type
;
32319 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32320 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32321 call_expr
= build_call_expr (fndecl
, 0);
32322 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32324 case IX86_BUILTIN_CPU_IS
:
32325 case IX86_BUILTIN_CPU_SUPPORTS
:
32327 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32328 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32329 gcc_assert (fold_expr
!= NULL_TREE
);
32330 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32334 /* Determine whether the builtin function is available under the current ISA.
32335 Originally the builtin was not created if it wasn't applicable to the
32336 current ISA based on the command line switches. With function specific
32337 options, we need to check in the context of the function making the call
32338 whether it is supported. */
32339 if (ix86_builtins_isa
[fcode
].isa
32340 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32342 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32343 NULL
, (enum fpmath_unit
) 0, false);
32346 error ("%qE needs unknown isa option", fndecl
);
32349 gcc_assert (opts
!= NULL
);
32350 error ("%qE needs isa option %s", fndecl
, opts
);
32358 case IX86_BUILTIN_MASKMOVQ
:
32359 case IX86_BUILTIN_MASKMOVDQU
:
32360 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32361 ? CODE_FOR_mmx_maskmovq
32362 : CODE_FOR_sse2_maskmovdqu
);
32363 /* Note the arg order is different from the operand order. */
32364 arg1
= CALL_EXPR_ARG (exp
, 0);
32365 arg2
= CALL_EXPR_ARG (exp
, 1);
32366 arg0
= CALL_EXPR_ARG (exp
, 2);
32367 op0
= expand_normal (arg0
);
32368 op1
= expand_normal (arg1
);
32369 op2
= expand_normal (arg2
);
32370 mode0
= insn_data
[icode
].operand
[0].mode
;
32371 mode1
= insn_data
[icode
].operand
[1].mode
;
32372 mode2
= insn_data
[icode
].operand
[2].mode
;
32374 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32375 op0
= gen_rtx_MEM (mode1
, op0
);
32377 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32378 op0
= copy_to_mode_reg (mode0
, op0
);
32379 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
32380 op1
= copy_to_mode_reg (mode1
, op1
);
32381 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
32382 op2
= copy_to_mode_reg (mode2
, op2
);
32383 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32389 case IX86_BUILTIN_LDMXCSR
:
32390 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
32391 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32392 emit_move_insn (target
, op0
);
32393 emit_insn (gen_sse_ldmxcsr (target
));
32396 case IX86_BUILTIN_STMXCSR
:
32397 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
32398 emit_insn (gen_sse_stmxcsr (target
));
32399 return copy_to_mode_reg (SImode
, target
);
32401 case IX86_BUILTIN_CLFLUSH
:
32402 arg0
= CALL_EXPR_ARG (exp
, 0);
32403 op0
= expand_normal (arg0
);
32404 icode
= CODE_FOR_sse2_clflush
;
32405 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32406 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32408 emit_insn (gen_sse2_clflush (op0
));
32411 case IX86_BUILTIN_MONITOR
:
32412 arg0
= CALL_EXPR_ARG (exp
, 0);
32413 arg1
= CALL_EXPR_ARG (exp
, 1);
32414 arg2
= CALL_EXPR_ARG (exp
, 2);
32415 op0
= expand_normal (arg0
);
32416 op1
= expand_normal (arg1
);
32417 op2
= expand_normal (arg2
);
32419 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32421 op1
= copy_to_mode_reg (SImode
, op1
);
32423 op2
= copy_to_mode_reg (SImode
, op2
);
32424 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
32427 case IX86_BUILTIN_MWAIT
:
32428 arg0
= CALL_EXPR_ARG (exp
, 0);
32429 arg1
= CALL_EXPR_ARG (exp
, 1);
32430 op0
= expand_normal (arg0
);
32431 op1
= expand_normal (arg1
);
32433 op0
= copy_to_mode_reg (SImode
, op0
);
32435 op1
= copy_to_mode_reg (SImode
, op1
);
32436 emit_insn (gen_sse3_mwait (op0
, op1
));
32439 case IX86_BUILTIN_VEC_INIT_V2SI
:
32440 case IX86_BUILTIN_VEC_INIT_V4HI
:
32441 case IX86_BUILTIN_VEC_INIT_V8QI
:
32442 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
32444 case IX86_BUILTIN_VEC_EXT_V2DF
:
32445 case IX86_BUILTIN_VEC_EXT_V2DI
:
32446 case IX86_BUILTIN_VEC_EXT_V4SF
:
32447 case IX86_BUILTIN_VEC_EXT_V4SI
:
32448 case IX86_BUILTIN_VEC_EXT_V8HI
:
32449 case IX86_BUILTIN_VEC_EXT_V2SI
:
32450 case IX86_BUILTIN_VEC_EXT_V4HI
:
32451 case IX86_BUILTIN_VEC_EXT_V16QI
:
32452 return ix86_expand_vec_ext_builtin (exp
, target
);
32454 case IX86_BUILTIN_VEC_SET_V2DI
:
32455 case IX86_BUILTIN_VEC_SET_V4SF
:
32456 case IX86_BUILTIN_VEC_SET_V4SI
:
32457 case IX86_BUILTIN_VEC_SET_V8HI
:
32458 case IX86_BUILTIN_VEC_SET_V4HI
:
32459 case IX86_BUILTIN_VEC_SET_V16QI
:
32460 return ix86_expand_vec_set_builtin (exp
);
32462 case IX86_BUILTIN_INFQ
:
32463 case IX86_BUILTIN_HUGE_VALQ
:
32465 REAL_VALUE_TYPE inf
;
32469 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
32471 tmp
= validize_mem (force_const_mem (mode
, tmp
));
32474 target
= gen_reg_rtx (mode
);
32476 emit_move_insn (target
, tmp
);
32480 case IX86_BUILTIN_RDPMC
:
32481 case IX86_BUILTIN_RDTSC
:
32482 case IX86_BUILTIN_RDTSCP
:
32484 op0
= gen_reg_rtx (DImode
);
32485 op1
= gen_reg_rtx (DImode
);
32487 if (fcode
== IX86_BUILTIN_RDPMC
)
32489 arg0
= CALL_EXPR_ARG (exp
, 0);
32490 op2
= expand_normal (arg0
);
32491 if (!register_operand (op2
, SImode
))
32492 op2
= copy_to_mode_reg (SImode
, op2
);
32494 insn
= (TARGET_64BIT
32495 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
32496 : gen_rdpmc (op0
, op2
));
32499 else if (fcode
== IX86_BUILTIN_RDTSC
)
32501 insn
= (TARGET_64BIT
32502 ? gen_rdtsc_rex64 (op0
, op1
)
32503 : gen_rdtsc (op0
));
32508 op2
= gen_reg_rtx (SImode
);
32510 insn
= (TARGET_64BIT
32511 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
32512 : gen_rdtscp (op0
, op2
));
32515 arg0
= CALL_EXPR_ARG (exp
, 0);
32516 op4
= expand_normal (arg0
);
32517 if (!address_operand (op4
, VOIDmode
))
32519 op4
= convert_memory_address (Pmode
, op4
);
32520 op4
= copy_addr_to_reg (op4
);
32522 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
32527 /* mode is VOIDmode if __builtin_rd* has been called
32529 if (mode
== VOIDmode
)
32531 target
= gen_reg_rtx (mode
);
32536 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
32537 op1
, 1, OPTAB_DIRECT
);
32538 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
32539 op0
, 1, OPTAB_DIRECT
);
32542 emit_move_insn (target
, op0
);
32545 case IX86_BUILTIN_FXSAVE
:
32546 case IX86_BUILTIN_FXRSTOR
:
32547 case IX86_BUILTIN_FXSAVE64
:
32548 case IX86_BUILTIN_FXRSTOR64
:
32551 case IX86_BUILTIN_FXSAVE
:
32552 icode
= CODE_FOR_fxsave
;
32554 case IX86_BUILTIN_FXRSTOR
:
32555 icode
= CODE_FOR_fxrstor
;
32557 case IX86_BUILTIN_FXSAVE64
:
32558 icode
= CODE_FOR_fxsave64
;
32560 case IX86_BUILTIN_FXRSTOR64
:
32561 icode
= CODE_FOR_fxrstor64
;
32564 gcc_unreachable ();
32567 arg0
= CALL_EXPR_ARG (exp
, 0);
32568 op0
= expand_normal (arg0
);
32570 if (!address_operand (op0
, VOIDmode
))
32572 op0
= convert_memory_address (Pmode
, op0
);
32573 op0
= copy_addr_to_reg (op0
);
32575 op0
= gen_rtx_MEM (BLKmode
, op0
);
32577 pat
= GEN_FCN (icode
) (op0
);
32582 case IX86_BUILTIN_XSAVE
:
32583 case IX86_BUILTIN_XRSTOR
:
32584 case IX86_BUILTIN_XSAVE64
:
32585 case IX86_BUILTIN_XRSTOR64
:
32586 case IX86_BUILTIN_XSAVEOPT
:
32587 case IX86_BUILTIN_XSAVEOPT64
:
32588 arg0
= CALL_EXPR_ARG (exp
, 0);
32589 arg1
= CALL_EXPR_ARG (exp
, 1);
32590 op0
= expand_normal (arg0
);
32591 op1
= expand_normal (arg1
);
32593 if (!address_operand (op0
, VOIDmode
))
32595 op0
= convert_memory_address (Pmode
, op0
);
32596 op0
= copy_addr_to_reg (op0
);
32598 op0
= gen_rtx_MEM (BLKmode
, op0
);
32600 op1
= force_reg (DImode
, op1
);
32604 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
32605 NULL
, 1, OPTAB_DIRECT
);
32608 case IX86_BUILTIN_XSAVE
:
32609 icode
= CODE_FOR_xsave_rex64
;
32611 case IX86_BUILTIN_XRSTOR
:
32612 icode
= CODE_FOR_xrstor_rex64
;
32614 case IX86_BUILTIN_XSAVE64
:
32615 icode
= CODE_FOR_xsave64
;
32617 case IX86_BUILTIN_XRSTOR64
:
32618 icode
= CODE_FOR_xrstor64
;
32620 case IX86_BUILTIN_XSAVEOPT
:
32621 icode
= CODE_FOR_xsaveopt_rex64
;
32623 case IX86_BUILTIN_XSAVEOPT64
:
32624 icode
= CODE_FOR_xsaveopt64
;
32627 gcc_unreachable ();
32630 op2
= gen_lowpart (SImode
, op2
);
32631 op1
= gen_lowpart (SImode
, op1
);
32632 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
32638 case IX86_BUILTIN_XSAVE
:
32639 icode
= CODE_FOR_xsave
;
32641 case IX86_BUILTIN_XRSTOR
:
32642 icode
= CODE_FOR_xrstor
;
32644 case IX86_BUILTIN_XSAVEOPT
:
32645 icode
= CODE_FOR_xsaveopt
;
32648 gcc_unreachable ();
32650 pat
= GEN_FCN (icode
) (op0
, op1
);
32657 case IX86_BUILTIN_LLWPCB
:
32658 arg0
= CALL_EXPR_ARG (exp
, 0);
32659 op0
= expand_normal (arg0
);
32660 icode
= CODE_FOR_lwp_llwpcb
;
32661 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
32662 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
32663 emit_insn (gen_lwp_llwpcb (op0
));
32666 case IX86_BUILTIN_SLWPCB
:
32667 icode
= CODE_FOR_lwp_slwpcb
;
32669 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
32670 target
= gen_reg_rtx (Pmode
);
32671 emit_insn (gen_lwp_slwpcb (target
));
32674 case IX86_BUILTIN_BEXTRI32
:
32675 case IX86_BUILTIN_BEXTRI64
:
32676 arg0
= CALL_EXPR_ARG (exp
, 0);
32677 arg1
= CALL_EXPR_ARG (exp
, 1);
32678 op0
= expand_normal (arg0
);
32679 op1
= expand_normal (arg1
);
32680 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
32681 ? CODE_FOR_tbm_bextri_si
32682 : CODE_FOR_tbm_bextri_di
);
32683 if (!CONST_INT_P (op1
))
32685 error ("last argument must be an immediate");
32690 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
32691 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
32692 op1
= GEN_INT (length
);
32693 op2
= GEN_INT (lsb_index
);
32694 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
32700 case IX86_BUILTIN_RDRAND16_STEP
:
32701 icode
= CODE_FOR_rdrandhi_1
;
32705 case IX86_BUILTIN_RDRAND32_STEP
:
32706 icode
= CODE_FOR_rdrandsi_1
;
32710 case IX86_BUILTIN_RDRAND64_STEP
:
32711 icode
= CODE_FOR_rdranddi_1
;
32715 op0
= gen_reg_rtx (mode0
);
32716 emit_insn (GEN_FCN (icode
) (op0
));
32718 arg0
= CALL_EXPR_ARG (exp
, 0);
32719 op1
= expand_normal (arg0
);
32720 if (!address_operand (op1
, VOIDmode
))
32722 op1
= convert_memory_address (Pmode
, op1
);
32723 op1
= copy_addr_to_reg (op1
);
32725 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32727 op1
= gen_reg_rtx (SImode
);
32728 emit_move_insn (op1
, CONST1_RTX (SImode
));
32730 /* Emit SImode conditional move. */
32731 if (mode0
== HImode
)
32733 op2
= gen_reg_rtx (SImode
);
32734 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32736 else if (mode0
== SImode
)
32739 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32742 target
= gen_reg_rtx (SImode
);
32744 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32746 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32747 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32750 case IX86_BUILTIN_RDSEED16_STEP
:
32751 icode
= CODE_FOR_rdseedhi_1
;
32755 case IX86_BUILTIN_RDSEED32_STEP
:
32756 icode
= CODE_FOR_rdseedsi_1
;
32760 case IX86_BUILTIN_RDSEED64_STEP
:
32761 icode
= CODE_FOR_rdseeddi_1
;
32765 op0
= gen_reg_rtx (mode0
);
32766 emit_insn (GEN_FCN (icode
) (op0
));
32768 arg0
= CALL_EXPR_ARG (exp
, 0);
32769 op1
= expand_normal (arg0
);
32770 if (!address_operand (op1
, VOIDmode
))
32772 op1
= convert_memory_address (Pmode
, op1
);
32773 op1
= copy_addr_to_reg (op1
);
32775 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32777 op2
= gen_reg_rtx (QImode
);
32779 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32781 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32784 target
= gen_reg_rtx (SImode
);
32786 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32789 case IX86_BUILTIN_ADDCARRYX32
:
32790 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32794 case IX86_BUILTIN_ADDCARRYX64
:
32795 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32799 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32800 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32801 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32802 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32804 op0
= gen_reg_rtx (QImode
);
32806 /* Generate CF from input operand. */
32807 op1
= expand_normal (arg0
);
32808 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32809 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32811 /* Gen ADCX instruction to compute X+Y+CF. */
32812 op2
= expand_normal (arg1
);
32813 op3
= expand_normal (arg2
);
32816 op2
= copy_to_mode_reg (mode0
, op2
);
32818 op3
= copy_to_mode_reg (mode0
, op3
);
32820 op0
= gen_reg_rtx (mode0
);
32822 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32823 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32824 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32826 /* Store the result. */
32827 op4
= expand_normal (arg3
);
32828 if (!address_operand (op4
, VOIDmode
))
32830 op4
= convert_memory_address (Pmode
, op4
);
32831 op4
= copy_addr_to_reg (op4
);
32833 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32835 /* Return current CF value. */
32837 target
= gen_reg_rtx (QImode
);
32839 PUT_MODE (pat
, QImode
);
32840 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32843 case IX86_BUILTIN_GATHERSIV2DF
:
32844 icode
= CODE_FOR_avx2_gathersiv2df
;
32846 case IX86_BUILTIN_GATHERSIV4DF
:
32847 icode
= CODE_FOR_avx2_gathersiv4df
;
32849 case IX86_BUILTIN_GATHERDIV2DF
:
32850 icode
= CODE_FOR_avx2_gatherdiv2df
;
32852 case IX86_BUILTIN_GATHERDIV4DF
:
32853 icode
= CODE_FOR_avx2_gatherdiv4df
;
32855 case IX86_BUILTIN_GATHERSIV4SF
:
32856 icode
= CODE_FOR_avx2_gathersiv4sf
;
32858 case IX86_BUILTIN_GATHERSIV8SF
:
32859 icode
= CODE_FOR_avx2_gathersiv8sf
;
32861 case IX86_BUILTIN_GATHERDIV4SF
:
32862 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32864 case IX86_BUILTIN_GATHERDIV8SF
:
32865 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32867 case IX86_BUILTIN_GATHERSIV2DI
:
32868 icode
= CODE_FOR_avx2_gathersiv2di
;
32870 case IX86_BUILTIN_GATHERSIV4DI
:
32871 icode
= CODE_FOR_avx2_gathersiv4di
;
32873 case IX86_BUILTIN_GATHERDIV2DI
:
32874 icode
= CODE_FOR_avx2_gatherdiv2di
;
32876 case IX86_BUILTIN_GATHERDIV4DI
:
32877 icode
= CODE_FOR_avx2_gatherdiv4di
;
32879 case IX86_BUILTIN_GATHERSIV4SI
:
32880 icode
= CODE_FOR_avx2_gathersiv4si
;
32882 case IX86_BUILTIN_GATHERSIV8SI
:
32883 icode
= CODE_FOR_avx2_gathersiv8si
;
32885 case IX86_BUILTIN_GATHERDIV4SI
:
32886 icode
= CODE_FOR_avx2_gatherdiv4si
;
32888 case IX86_BUILTIN_GATHERDIV8SI
:
32889 icode
= CODE_FOR_avx2_gatherdiv8si
;
32891 case IX86_BUILTIN_GATHERALTSIV4DF
:
32892 icode
= CODE_FOR_avx2_gathersiv4df
;
32894 case IX86_BUILTIN_GATHERALTDIV8SF
:
32895 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32897 case IX86_BUILTIN_GATHERALTSIV4DI
:
32898 icode
= CODE_FOR_avx2_gathersiv4di
;
32900 case IX86_BUILTIN_GATHERALTDIV8SI
:
32901 icode
= CODE_FOR_avx2_gatherdiv8si
;
32905 arg0
= CALL_EXPR_ARG (exp
, 0);
32906 arg1
= CALL_EXPR_ARG (exp
, 1);
32907 arg2
= CALL_EXPR_ARG (exp
, 2);
32908 arg3
= CALL_EXPR_ARG (exp
, 3);
32909 arg4
= CALL_EXPR_ARG (exp
, 4);
32910 op0
= expand_normal (arg0
);
32911 op1
= expand_normal (arg1
);
32912 op2
= expand_normal (arg2
);
32913 op3
= expand_normal (arg3
);
32914 op4
= expand_normal (arg4
);
32915 /* Note the arg order is different from the operand order. */
32916 mode0
= insn_data
[icode
].operand
[1].mode
;
32917 mode2
= insn_data
[icode
].operand
[3].mode
;
32918 mode3
= insn_data
[icode
].operand
[4].mode
;
32919 mode4
= insn_data
[icode
].operand
[5].mode
;
32921 if (target
== NULL_RTX
32922 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32923 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32925 subtarget
= target
;
32927 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32928 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32930 rtx half
= gen_reg_rtx (V4SImode
);
32931 if (!nonimmediate_operand (op2
, V8SImode
))
32932 op2
= copy_to_mode_reg (V8SImode
, op2
);
32933 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32936 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32937 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32939 rtx (*gen
) (rtx
, rtx
);
32940 rtx half
= gen_reg_rtx (mode0
);
32941 if (mode0
== V4SFmode
)
32942 gen
= gen_vec_extract_lo_v8sf
;
32944 gen
= gen_vec_extract_lo_v8si
;
32945 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32946 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32947 emit_insn (gen (half
, op0
));
32949 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32950 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32951 emit_insn (gen (half
, op3
));
32955 /* Force memory operand only with base register here. But we
32956 don't want to do it on memory operand for other builtin
32958 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32960 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32961 op0
= copy_to_mode_reg (mode0
, op0
);
32962 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32963 op1
= copy_to_mode_reg (Pmode
, op1
);
32964 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32965 op2
= copy_to_mode_reg (mode2
, op2
);
32966 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32967 op3
= copy_to_mode_reg (mode3
, op3
);
32968 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32970 error ("last argument must be scale 1, 2, 4, 8");
32974 /* Optimize. If mask is known to have all high bits set,
32975 replace op0 with pc_rtx to signal that the instruction
32976 overwrites the whole destination and doesn't use its
32977 previous contents. */
32980 if (TREE_CODE (arg3
) == VECTOR_CST
)
32982 unsigned int negative
= 0;
32983 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32985 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32986 if (TREE_CODE (cst
) == INTEGER_CST
32987 && tree_int_cst_sign_bit (cst
))
32989 else if (TREE_CODE (cst
) == REAL_CST
32990 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32993 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32996 else if (TREE_CODE (arg3
) == SSA_NAME
)
32998 /* Recognize also when mask is like:
32999 __v2df src = _mm_setzero_pd ();
33000 __v2df mask = _mm_cmpeq_pd (src, src);
33002 __v8sf src = _mm256_setzero_ps ();
33003 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
33004 as that is a cheaper way to load all ones into
33005 a register than having to load a constant from
33007 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
33008 if (is_gimple_call (def_stmt
))
33010 tree fndecl
= gimple_call_fndecl (def_stmt
);
33012 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
33013 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
33015 case IX86_BUILTIN_CMPPD
:
33016 case IX86_BUILTIN_CMPPS
:
33017 case IX86_BUILTIN_CMPPD256
:
33018 case IX86_BUILTIN_CMPPS256
:
33019 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
33022 case IX86_BUILTIN_CMPEQPD
:
33023 case IX86_BUILTIN_CMPEQPS
:
33024 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
33025 && initializer_zerop (gimple_call_arg (def_stmt
,
33036 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
33041 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
33042 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
33044 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
33045 ? V4SFmode
: V4SImode
;
33046 if (target
== NULL_RTX
)
33047 target
= gen_reg_rtx (tmode
);
33048 if (tmode
== V4SFmode
)
33049 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
33051 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
33054 target
= subtarget
;
33058 case IX86_BUILTIN_XABORT
:
33059 icode
= CODE_FOR_xabort
;
33060 arg0
= CALL_EXPR_ARG (exp
, 0);
33061 op0
= expand_normal (arg0
);
33062 mode0
= insn_data
[icode
].operand
[0].mode
;
33063 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33065 error ("the xabort's argument must be an 8-bit immediate");
33068 emit_insn (gen_xabort (op0
));
33075 for (i
= 0, d
= bdesc_special_args
;
33076 i
< ARRAY_SIZE (bdesc_special_args
);
33078 if (d
->code
== fcode
)
33079 return ix86_expand_special_args_builtin (d
, exp
, target
);
33081 for (i
= 0, d
= bdesc_args
;
33082 i
< ARRAY_SIZE (bdesc_args
);
33084 if (d
->code
== fcode
)
33087 case IX86_BUILTIN_FABSQ
:
33088 case IX86_BUILTIN_COPYSIGNQ
:
33090 /* Emit a normal call if SSE isn't available. */
33091 return expand_call (exp
, target
, ignore
);
33093 return ix86_expand_args_builtin (d
, exp
, target
);
33096 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
33097 if (d
->code
== fcode
)
33098 return ix86_expand_sse_comi (d
, exp
, target
);
33100 for (i
= 0, d
= bdesc_pcmpestr
;
33101 i
< ARRAY_SIZE (bdesc_pcmpestr
);
33103 if (d
->code
== fcode
)
33104 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
33106 for (i
= 0, d
= bdesc_pcmpistr
;
33107 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33109 if (d
->code
== fcode
)
33110 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33112 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33113 if (d
->code
== fcode
)
33114 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33115 (enum ix86_builtin_func_type
)
33116 d
->flag
, d
->comparison
);
33118 gcc_unreachable ();
33121 /* Returns a function decl for a vectorized version of the builtin function
33122 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33123 if it is not available. */
33126 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33129 enum machine_mode in_mode
, out_mode
;
33131 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33133 if (TREE_CODE (type_out
) != VECTOR_TYPE
33134 || TREE_CODE (type_in
) != VECTOR_TYPE
33135 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33138 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33139 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33140 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33141 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33145 case BUILT_IN_SQRT
:
33146 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33148 if (out_n
== 2 && in_n
== 2)
33149 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
33150 else if (out_n
== 4 && in_n
== 4)
33151 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
33155 case BUILT_IN_SQRTF
:
33156 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33158 if (out_n
== 4 && in_n
== 4)
33159 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
33160 else if (out_n
== 8 && in_n
== 8)
33161 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
33165 case BUILT_IN_IFLOOR
:
33166 case BUILT_IN_LFLOOR
:
33167 case BUILT_IN_LLFLOOR
:
33168 /* The round insn does not trap on denormals. */
33169 if (flag_trapping_math
|| !TARGET_ROUND
)
33172 if (out_mode
== SImode
&& in_mode
== DFmode
)
33174 if (out_n
== 4 && in_n
== 2)
33175 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
33176 else if (out_n
== 8 && in_n
== 4)
33177 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
33181 case BUILT_IN_IFLOORF
:
33182 case BUILT_IN_LFLOORF
:
33183 case BUILT_IN_LLFLOORF
:
33184 /* The round insn does not trap on denormals. */
33185 if (flag_trapping_math
|| !TARGET_ROUND
)
33188 if (out_mode
== SImode
&& in_mode
== SFmode
)
33190 if (out_n
== 4 && in_n
== 4)
33191 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
33192 else if (out_n
== 8 && in_n
== 8)
33193 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
33197 case BUILT_IN_ICEIL
:
33198 case BUILT_IN_LCEIL
:
33199 case BUILT_IN_LLCEIL
:
33200 /* The round insn does not trap on denormals. */
33201 if (flag_trapping_math
|| !TARGET_ROUND
)
33204 if (out_mode
== SImode
&& in_mode
== DFmode
)
33206 if (out_n
== 4 && in_n
== 2)
33207 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
33208 else if (out_n
== 8 && in_n
== 4)
33209 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
33213 case BUILT_IN_ICEILF
:
33214 case BUILT_IN_LCEILF
:
33215 case BUILT_IN_LLCEILF
:
33216 /* The round insn does not trap on denormals. */
33217 if (flag_trapping_math
|| !TARGET_ROUND
)
33220 if (out_mode
== SImode
&& in_mode
== SFmode
)
33222 if (out_n
== 4 && in_n
== 4)
33223 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
33224 else if (out_n
== 8 && in_n
== 8)
33225 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
33229 case BUILT_IN_IRINT
:
33230 case BUILT_IN_LRINT
:
33231 case BUILT_IN_LLRINT
:
33232 if (out_mode
== SImode
&& in_mode
== DFmode
)
33234 if (out_n
== 4 && in_n
== 2)
33235 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
33236 else if (out_n
== 8 && in_n
== 4)
33237 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
33241 case BUILT_IN_IRINTF
:
33242 case BUILT_IN_LRINTF
:
33243 case BUILT_IN_LLRINTF
:
33244 if (out_mode
== SImode
&& in_mode
== SFmode
)
33246 if (out_n
== 4 && in_n
== 4)
33247 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
33248 else if (out_n
== 8 && in_n
== 8)
33249 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
33253 case BUILT_IN_IROUND
:
33254 case BUILT_IN_LROUND
:
33255 case BUILT_IN_LLROUND
:
33256 /* The round insn does not trap on denormals. */
33257 if (flag_trapping_math
|| !TARGET_ROUND
)
33260 if (out_mode
== SImode
&& in_mode
== DFmode
)
33262 if (out_n
== 4 && in_n
== 2)
33263 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
33264 else if (out_n
== 8 && in_n
== 4)
33265 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
33269 case BUILT_IN_IROUNDF
:
33270 case BUILT_IN_LROUNDF
:
33271 case BUILT_IN_LLROUNDF
:
33272 /* The round insn does not trap on denormals. */
33273 if (flag_trapping_math
|| !TARGET_ROUND
)
33276 if (out_mode
== SImode
&& in_mode
== SFmode
)
33278 if (out_n
== 4 && in_n
== 4)
33279 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
33280 else if (out_n
== 8 && in_n
== 8)
33281 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
33285 case BUILT_IN_COPYSIGN
:
33286 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33288 if (out_n
== 2 && in_n
== 2)
33289 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
33290 else if (out_n
== 4 && in_n
== 4)
33291 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
33295 case BUILT_IN_COPYSIGNF
:
33296 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33298 if (out_n
== 4 && in_n
== 4)
33299 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
33300 else if (out_n
== 8 && in_n
== 8)
33301 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
33305 case BUILT_IN_FLOOR
:
33306 /* The round insn does not trap on denormals. */
33307 if (flag_trapping_math
|| !TARGET_ROUND
)
33310 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33312 if (out_n
== 2 && in_n
== 2)
33313 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
33314 else if (out_n
== 4 && in_n
== 4)
33315 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
33319 case BUILT_IN_FLOORF
:
33320 /* The round insn does not trap on denormals. */
33321 if (flag_trapping_math
|| !TARGET_ROUND
)
33324 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33326 if (out_n
== 4 && in_n
== 4)
33327 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
33328 else if (out_n
== 8 && in_n
== 8)
33329 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
33333 case BUILT_IN_CEIL
:
33334 /* The round insn does not trap on denormals. */
33335 if (flag_trapping_math
|| !TARGET_ROUND
)
33338 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33340 if (out_n
== 2 && in_n
== 2)
33341 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
33342 else if (out_n
== 4 && in_n
== 4)
33343 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
33347 case BUILT_IN_CEILF
:
33348 /* The round insn does not trap on denormals. */
33349 if (flag_trapping_math
|| !TARGET_ROUND
)
33352 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33354 if (out_n
== 4 && in_n
== 4)
33355 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
33356 else if (out_n
== 8 && in_n
== 8)
33357 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
33361 case BUILT_IN_TRUNC
:
33362 /* The round insn does not trap on denormals. */
33363 if (flag_trapping_math
|| !TARGET_ROUND
)
33366 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33368 if (out_n
== 2 && in_n
== 2)
33369 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
33370 else if (out_n
== 4 && in_n
== 4)
33371 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
33375 case BUILT_IN_TRUNCF
:
33376 /* The round insn does not trap on denormals. */
33377 if (flag_trapping_math
|| !TARGET_ROUND
)
33380 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33382 if (out_n
== 4 && in_n
== 4)
33383 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
33384 else if (out_n
== 8 && in_n
== 8)
33385 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
33389 case BUILT_IN_RINT
:
33390 /* The round insn does not trap on denormals. */
33391 if (flag_trapping_math
|| !TARGET_ROUND
)
33394 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33396 if (out_n
== 2 && in_n
== 2)
33397 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
33398 else if (out_n
== 4 && in_n
== 4)
33399 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
33403 case BUILT_IN_RINTF
:
33404 /* The round insn does not trap on denormals. */
33405 if (flag_trapping_math
|| !TARGET_ROUND
)
33408 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33410 if (out_n
== 4 && in_n
== 4)
33411 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
33412 else if (out_n
== 8 && in_n
== 8)
33413 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
33417 case BUILT_IN_ROUND
:
33418 /* The round insn does not trap on denormals. */
33419 if (flag_trapping_math
|| !TARGET_ROUND
)
33422 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33424 if (out_n
== 2 && in_n
== 2)
33425 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
33426 else if (out_n
== 4 && in_n
== 4)
33427 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
33431 case BUILT_IN_ROUNDF
:
33432 /* The round insn does not trap on denormals. */
33433 if (flag_trapping_math
|| !TARGET_ROUND
)
33436 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33438 if (out_n
== 4 && in_n
== 4)
33439 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
33440 else if (out_n
== 8 && in_n
== 8)
33441 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
33446 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33448 if (out_n
== 2 && in_n
== 2)
33449 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
33450 if (out_n
== 4 && in_n
== 4)
33451 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
33455 case BUILT_IN_FMAF
:
33456 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33458 if (out_n
== 4 && in_n
== 4)
33459 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
33460 if (out_n
== 8 && in_n
== 8)
33461 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
33469 /* Dispatch to a handler for a vectorization library. */
33470 if (ix86_veclib_handler
)
33471 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
33477 /* Handler for an SVML-style interface to
33478 a library with vectorized intrinsics. */
33481 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
33484 tree fntype
, new_fndecl
, args
;
33487 enum machine_mode el_mode
, in_mode
;
33490 /* The SVML is suitable for unsafe math only. */
33491 if (!flag_unsafe_math_optimizations
)
33494 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33495 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33496 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33497 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33498 if (el_mode
!= in_mode
33506 case BUILT_IN_LOG10
:
33508 case BUILT_IN_TANH
:
33510 case BUILT_IN_ATAN
:
33511 case BUILT_IN_ATAN2
:
33512 case BUILT_IN_ATANH
:
33513 case BUILT_IN_CBRT
:
33514 case BUILT_IN_SINH
:
33516 case BUILT_IN_ASINH
:
33517 case BUILT_IN_ASIN
:
33518 case BUILT_IN_COSH
:
33520 case BUILT_IN_ACOSH
:
33521 case BUILT_IN_ACOS
:
33522 if (el_mode
!= DFmode
|| n
!= 2)
33526 case BUILT_IN_EXPF
:
33527 case BUILT_IN_LOGF
:
33528 case BUILT_IN_LOG10F
:
33529 case BUILT_IN_POWF
:
33530 case BUILT_IN_TANHF
:
33531 case BUILT_IN_TANF
:
33532 case BUILT_IN_ATANF
:
33533 case BUILT_IN_ATAN2F
:
33534 case BUILT_IN_ATANHF
:
33535 case BUILT_IN_CBRTF
:
33536 case BUILT_IN_SINHF
:
33537 case BUILT_IN_SINF
:
33538 case BUILT_IN_ASINHF
:
33539 case BUILT_IN_ASINF
:
33540 case BUILT_IN_COSHF
:
33541 case BUILT_IN_COSF
:
33542 case BUILT_IN_ACOSHF
:
33543 case BUILT_IN_ACOSF
:
33544 if (el_mode
!= SFmode
|| n
!= 4)
33552 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33554 if (fn
== BUILT_IN_LOGF
)
33555 strcpy (name
, "vmlsLn4");
33556 else if (fn
== BUILT_IN_LOG
)
33557 strcpy (name
, "vmldLn2");
33560 sprintf (name
, "vmls%s", bname
+10);
33561 name
[strlen (name
)-1] = '4';
33564 sprintf (name
, "vmld%s2", bname
+10);
33566 /* Convert to uppercase. */
33570 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33572 args
= TREE_CHAIN (args
))
33576 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33578 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33580 /* Build a function declaration for the vectorized function. */
33581 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33582 FUNCTION_DECL
, get_identifier (name
), fntype
);
33583 TREE_PUBLIC (new_fndecl
) = 1;
33584 DECL_EXTERNAL (new_fndecl
) = 1;
33585 DECL_IS_NOVOPS (new_fndecl
) = 1;
33586 TREE_READONLY (new_fndecl
) = 1;
33591 /* Handler for an ACML-style interface to
33592 a library with vectorized intrinsics. */
33595 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
33597 char name
[20] = "__vr.._";
33598 tree fntype
, new_fndecl
, args
;
33601 enum machine_mode el_mode
, in_mode
;
33604 /* The ACML is 64bits only and suitable for unsafe math only as
33605 it does not correctly support parts of IEEE with the required
33606 precision such as denormals. */
33608 || !flag_unsafe_math_optimizations
)
33611 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33612 n
= TYPE_VECTOR_SUBPARTS (type_out
);
33613 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33614 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33615 if (el_mode
!= in_mode
33625 case BUILT_IN_LOG2
:
33626 case BUILT_IN_LOG10
:
33629 if (el_mode
!= DFmode
33634 case BUILT_IN_SINF
:
33635 case BUILT_IN_COSF
:
33636 case BUILT_IN_EXPF
:
33637 case BUILT_IN_POWF
:
33638 case BUILT_IN_LOGF
:
33639 case BUILT_IN_LOG2F
:
33640 case BUILT_IN_LOG10F
:
33643 if (el_mode
!= SFmode
33652 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
33653 sprintf (name
+ 7, "%s", bname
+10);
33656 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
33658 args
= TREE_CHAIN (args
))
33662 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
33664 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
33666 /* Build a function declaration for the vectorized function. */
33667 new_fndecl
= build_decl (BUILTINS_LOCATION
,
33668 FUNCTION_DECL
, get_identifier (name
), fntype
);
33669 TREE_PUBLIC (new_fndecl
) = 1;
33670 DECL_EXTERNAL (new_fndecl
) = 1;
33671 DECL_IS_NOVOPS (new_fndecl
) = 1;
33672 TREE_READONLY (new_fndecl
) = 1;
33677 /* Returns a decl of a function that implements gather load with
33678 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33679 Return NULL_TREE if it is not available. */
33682 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
33683 const_tree index_type
, int scale
)
33686 enum ix86_builtins code
;
33691 if ((TREE_CODE (index_type
) != INTEGER_TYPE
33692 && !POINTER_TYPE_P (index_type
))
33693 || (TYPE_MODE (index_type
) != SImode
33694 && TYPE_MODE (index_type
) != DImode
))
33697 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
33700 /* v*gather* insn sign extends index to pointer mode. */
33701 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
33702 && TYPE_UNSIGNED (index_type
))
33707 || (scale
& (scale
- 1)) != 0)
33710 si
= TYPE_MODE (index_type
) == SImode
;
33711 switch (TYPE_MODE (mem_vectype
))
33714 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
33717 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
33720 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
33723 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
33726 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
33729 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
33732 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
33735 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33741 return ix86_builtins
[code
];
33744 /* Returns a code for a target-specific builtin that implements
33745 reciprocal of the function, or NULL_TREE if not available. */
33748 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33749 bool sqrt ATTRIBUTE_UNUSED
)
33751 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33752 && flag_finite_math_only
&& !flag_trapping_math
33753 && flag_unsafe_math_optimizations
))
33757 /* Machine dependent builtins. */
33760 /* Vectorized version of sqrt to rsqrt conversion. */
33761 case IX86_BUILTIN_SQRTPS_NR
:
33762 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33764 case IX86_BUILTIN_SQRTPS_NR256
:
33765 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33771 /* Normal builtins. */
33774 /* Sqrt to rsqrt conversion. */
33775 case BUILT_IN_SQRTF
:
33776 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33783 /* Helper for avx_vpermilps256_operand et al. This is also used by
33784 the expansion functions to turn the parallel back into a mask.
33785 The return value is 0 for no match and the imm8+1 for a match. */
33788 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33790 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33792 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33794 if (XVECLEN (par
, 0) != (int) nelt
)
33797 /* Validate that all of the elements are constants, and not totally
33798 out of range. Copy the data into an integral array to make the
33799 subsequent checks easier. */
33800 for (i
= 0; i
< nelt
; ++i
)
33802 rtx er
= XVECEXP (par
, 0, i
);
33803 unsigned HOST_WIDE_INT ei
;
33805 if (!CONST_INT_P (er
))
33816 /* In the 256-bit DFmode case, we can only move elements within
33818 for (i
= 0; i
< 2; ++i
)
33822 mask
|= ipar
[i
] << i
;
33824 for (i
= 2; i
< 4; ++i
)
33828 mask
|= (ipar
[i
] - 2) << i
;
33833 /* In the 256-bit SFmode case, we have full freedom of movement
33834 within the low 128-bit lane, but the high 128-bit lane must
33835 mirror the exact same pattern. */
33836 for (i
= 0; i
< 4; ++i
)
33837 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33844 /* In the 128-bit case, we've full freedom in the placement of
33845 the elements from the source operand. */
33846 for (i
= 0; i
< nelt
; ++i
)
33847 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33851 gcc_unreachable ();
33854 /* Make sure success has a non-zero value by adding one. */
33858 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33859 the expansion functions to turn the parallel back into a mask.
33860 The return value is 0 for no match and the imm8+1 for a match. */
33863 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33865 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33867 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33869 if (XVECLEN (par
, 0) != (int) nelt
)
33872 /* Validate that all of the elements are constants, and not totally
33873 out of range. Copy the data into an integral array to make the
33874 subsequent checks easier. */
33875 for (i
= 0; i
< nelt
; ++i
)
33877 rtx er
= XVECEXP (par
, 0, i
);
33878 unsigned HOST_WIDE_INT ei
;
33880 if (!CONST_INT_P (er
))
33883 if (ei
>= 2 * nelt
)
33888 /* Validate that the halves of the permute are halves. */
33889 for (i
= 0; i
< nelt2
- 1; ++i
)
33890 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33892 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33893 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33896 /* Reconstruct the mask. */
33897 for (i
= 0; i
< 2; ++i
)
33899 unsigned e
= ipar
[i
* nelt2
];
33903 mask
|= e
<< (i
* 4);
33906 /* Make sure success has a non-zero value by adding one. */
33910 /* Store OPERAND to the memory after reload is completed. This means
33911 that we can't easily use assign_stack_local. */
33913 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33917 gcc_assert (reload_completed
);
33918 if (ix86_using_red_zone ())
33920 result
= gen_rtx_MEM (mode
,
33921 gen_rtx_PLUS (Pmode
,
33923 GEN_INT (-RED_ZONE_SIZE
)));
33924 emit_move_insn (result
, operand
);
33926 else if (TARGET_64BIT
)
33932 operand
= gen_lowpart (DImode
, operand
);
33936 gen_rtx_SET (VOIDmode
,
33937 gen_rtx_MEM (DImode
,
33938 gen_rtx_PRE_DEC (DImode
,
33939 stack_pointer_rtx
)),
33943 gcc_unreachable ();
33945 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33954 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33956 gen_rtx_SET (VOIDmode
,
33957 gen_rtx_MEM (SImode
,
33958 gen_rtx_PRE_DEC (Pmode
,
33959 stack_pointer_rtx
)),
33962 gen_rtx_SET (VOIDmode
,
33963 gen_rtx_MEM (SImode
,
33964 gen_rtx_PRE_DEC (Pmode
,
33965 stack_pointer_rtx
)),
33970 /* Store HImodes as SImodes. */
33971 operand
= gen_lowpart (SImode
, operand
);
33975 gen_rtx_SET (VOIDmode
,
33976 gen_rtx_MEM (GET_MODE (operand
),
33977 gen_rtx_PRE_DEC (SImode
,
33978 stack_pointer_rtx
)),
33982 gcc_unreachable ();
33984 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33989 /* Free operand from the memory. */
33991 ix86_free_from_memory (enum machine_mode mode
)
33993 if (!ix86_using_red_zone ())
33997 if (mode
== DImode
|| TARGET_64BIT
)
34001 /* Use LEA to deallocate stack space. In peephole2 it will be converted
34002 to pop or add instruction if registers are available. */
34003 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
34004 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
34009 /* Return a register priority for hard reg REGNO. */
34011 ix86_register_priority (int hard_regno
)
34013 /* ebp and r13 as the base always wants a displacement, r12 as the
34014 base always wants an index. So discourage their usage in an
34016 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
34018 if (hard_regno
== BP_REG
)
34020 /* New x86-64 int registers result in bigger code size. Discourage
34022 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
34024 /* New x86-64 SSE registers result in bigger code size. Discourage
34026 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
34028 /* Usage of AX register results in smaller code. Prefer it. */
34029 if (hard_regno
== 0)
34034 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
34036 Put float CONST_DOUBLE in the constant pool instead of fp regs.
34037 QImode must go into class Q_REGS.
34038 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
34039 movdf to do mem-to-mem moves through integer regs. */
34042 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
34044 enum machine_mode mode
= GET_MODE (x
);
34046 /* We're only allowed to return a subclass of CLASS. Many of the
34047 following checks fail for NO_REGS, so eliminate that early. */
34048 if (regclass
== NO_REGS
)
34051 /* All classes can load zeros. */
34052 if (x
== CONST0_RTX (mode
))
34055 /* Force constants into memory if we are loading a (nonzero) constant into
34056 an MMX or SSE register. This is because there are no MMX/SSE instructions
34057 to load from a constant. */
34059 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
34062 /* Prefer SSE regs only, if we can use them for math. */
34063 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
34064 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34066 /* Floating-point constants need more complex checks. */
34067 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
34069 /* General regs can load everything. */
34070 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
34073 /* Floats can load 0 and 1 plus some others. Note that we eliminated
34074 zero above. We only want to wind up preferring 80387 registers if
34075 we plan on doing computation with them. */
34077 && standard_80387_constant_p (x
) > 0)
34079 /* Limit class to non-sse. */
34080 if (regclass
== FLOAT_SSE_REGS
)
34082 if (regclass
== FP_TOP_SSE_REGS
)
34084 if (regclass
== FP_SECOND_SSE_REGS
)
34085 return FP_SECOND_REG
;
34086 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
34093 /* Generally when we see PLUS here, it's the function invariant
34094 (plus soft-fp const_int). Which can only be computed into general
34096 if (GET_CODE (x
) == PLUS
)
34097 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
34099 /* QImode constants are easy to load, but non-constant QImode data
34100 must go into Q_REGS. */
34101 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
34103 if (reg_class_subset_p (regclass
, Q_REGS
))
34105 if (reg_class_subset_p (Q_REGS
, regclass
))
34113 /* Discourage putting floating-point values in SSE registers unless
34114 SSE math is being used, and likewise for the 387 registers. */
34116 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34118 enum machine_mode mode
= GET_MODE (x
);
34120 /* Restrict the output reload class to the register bank that we are doing
34121 math on. If we would like not to return a subset of CLASS, reject this
34122 alternative: if reload cannot do this, it will still use its choice. */
34123 mode
= GET_MODE (x
);
34124 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34125 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
34127 if (X87_FLOAT_MODE_P (mode
))
34129 if (regclass
== FP_TOP_SSE_REGS
)
34131 else if (regclass
== FP_SECOND_SSE_REGS
)
34132 return FP_SECOND_REG
;
34134 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34141 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34142 enum machine_mode mode
, secondary_reload_info
*sri
)
34144 /* Double-word spills from general registers to non-offsettable memory
34145 references (zero-extended addresses) require special handling. */
34148 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34149 && INTEGER_CLASS_P (rclass
)
34150 && !offsettable_memref_p (x
))
34153 ? CODE_FOR_reload_noff_load
34154 : CODE_FOR_reload_noff_store
);
34155 /* Add the cost of moving address to a temporary. */
34156 sri
->extra_cost
= 1;
34161 /* QImode spills from non-QI registers require
34162 intermediate register on 32bit targets. */
34164 && !in_p
&& mode
== QImode
34165 && INTEGER_CLASS_P (rclass
)
34166 && MAYBE_NON_Q_CLASS_P (rclass
))
34175 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34176 regno
= true_regnum (x
);
34178 /* Return Q_REGS if the operand is in memory. */
34183 /* This condition handles corner case where an expression involving
34184 pointers gets vectorized. We're trying to use the address of a
34185 stack slot as a vector initializer.
34187 (set (reg:V2DI 74 [ vect_cst_.2 ])
34188 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34190 Eventually frame gets turned into sp+offset like this:
34192 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34193 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34194 (const_int 392 [0x188]))))
34196 That later gets turned into:
34198 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34199 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34200 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34202 We'll have the following reload recorded:
34204 Reload 0: reload_in (DI) =
34205 (plus:DI (reg/f:DI 7 sp)
34206 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34207 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34208 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34209 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34210 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34211 reload_reg_rtx: (reg:V2DI 22 xmm1)
34213 Which isn't going to work since SSE instructions can't handle scalar
34214 additions. Returning GENERAL_REGS forces the addition into integer
34215 register and reload can handle subsequent reloads without problems. */
34217 if (in_p
&& GET_CODE (x
) == PLUS
34218 && SSE_CLASS_P (rclass
)
34219 && SCALAR_INT_MODE_P (mode
))
34220 return GENERAL_REGS
;
34225 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34228 ix86_class_likely_spilled_p (reg_class_t rclass
)
34239 case SSE_FIRST_REG
:
34241 case FP_SECOND_REG
:
34251 /* If we are copying between general and FP registers, we need a memory
34252 location. The same is true for SSE and MMX registers.
34254 To optimize register_move_cost performance, allow inline variant.
34256 The macro can't work reliably when one of the CLASSES is class containing
34257 registers from multiple units (SSE, MMX, integer). We avoid this by never
34258 combining those units in single alternative in the machine description.
34259 Ensure that this constraint holds to avoid unexpected surprises.
34261 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34262 enforce these sanity checks. */
34265 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34266 enum machine_mode mode
, int strict
)
34268 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34270 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34271 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34272 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34273 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34274 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34275 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34277 gcc_assert (!strict
|| lra_in_progress
);
34281 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34284 /* ??? This is a lie. We do have moves between mmx/general, and for
34285 mmx/sse2. But by saying we need secondary memory we discourage the
34286 register allocator from using the mmx registers unless needed. */
34287 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34290 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34292 /* SSE1 doesn't have any direct moves from other classes. */
34296 /* If the target says that inter-unit moves are more expensive
34297 than moving through memory, then don't generate them. */
34298 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34299 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34302 /* Between SSE and general, we have moves no larger than word size. */
34303 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34311 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34312 enum machine_mode mode
, int strict
)
34314 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
34317 /* Implement the TARGET_CLASS_MAX_NREGS hook.
34319 On the 80386, this is the size of MODE in words,
34320 except in the FP regs, where a single reg is always enough. */
34322 static unsigned char
34323 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
34325 if (MAYBE_INTEGER_CLASS_P (rclass
))
34327 if (mode
== XFmode
)
34328 return (TARGET_64BIT
? 2 : 3);
34329 else if (mode
== XCmode
)
34330 return (TARGET_64BIT
? 4 : 6);
34332 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
34336 if (COMPLEX_MODE_P (mode
))
34343 /* Return true if the registers in CLASS cannot represent the change from
34344 modes FROM to TO. */
34347 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
34348 enum reg_class regclass
)
34353 /* x87 registers can't do subreg at all, as all values are reformatted
34354 to extended precision. */
34355 if (MAYBE_FLOAT_CLASS_P (regclass
))
34358 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
34360 /* Vector registers do not support QI or HImode loads. If we don't
34361 disallow a change to these modes, reload will assume it's ok to
34362 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
34363 the vec_dupv4hi pattern. */
34364 if (GET_MODE_SIZE (from
) < 4)
34367 /* Vector registers do not support subreg with nonzero offsets, which
34368 are otherwise valid for integer registers. Since we can't see
34369 whether we have a nonzero offset from here, prohibit all
34370 nonparadoxical subregs changing size. */
34371 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
34378 /* Return the cost of moving data of mode M between a
34379 register and memory. A value of 2 is the default; this cost is
34380 relative to those in `REGISTER_MOVE_COST'.
34382 This function is used extensively by register_move_cost that is used to
34383 build tables at startup. Make it inline in this case.
34384 When IN is 2, return maximum of in and out move cost.
34386 If moving between registers and memory is more expensive than
34387 between two registers, you should define this macro to express the
34390 Model also increased moving costs of QImode registers in non
34394 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
34398 if (FLOAT_CLASS_P (regclass
))
34416 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
34417 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
34419 if (SSE_CLASS_P (regclass
))
34422 switch (GET_MODE_SIZE (mode
))
34437 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
34438 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
34440 if (MMX_CLASS_P (regclass
))
34443 switch (GET_MODE_SIZE (mode
))
34455 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
34456 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
34458 switch (GET_MODE_SIZE (mode
))
34461 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
34464 return ix86_cost
->int_store
[0];
34465 if (TARGET_PARTIAL_REG_DEPENDENCY
34466 && optimize_function_for_speed_p (cfun
))
34467 cost
= ix86_cost
->movzbl_load
;
34469 cost
= ix86_cost
->int_load
[0];
34471 return MAX (cost
, ix86_cost
->int_store
[0]);
34477 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
34479 return ix86_cost
->movzbl_load
;
34481 return ix86_cost
->int_store
[0] + 4;
34486 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
34487 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
34489 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
34490 if (mode
== TFmode
)
34493 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
34495 cost
= ix86_cost
->int_load
[2];
34497 cost
= ix86_cost
->int_store
[2];
34498 return (cost
* (((int) GET_MODE_SIZE (mode
)
34499 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
34504 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
34507 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
34511 /* Return the cost of moving data from a register in class CLASS1 to
34512 one in class CLASS2.
34514 It is not required that the cost always equal 2 when FROM is the same as TO;
34515 on some machines it is expensive to move between registers if they are not
34516 general registers. */
34519 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
34520 reg_class_t class2_i
)
34522 enum reg_class class1
= (enum reg_class
) class1_i
;
34523 enum reg_class class2
= (enum reg_class
) class2_i
;
34525 /* In case we require secondary memory, compute cost of the store followed
34526 by load. In order to avoid bad register allocation choices, we need
34527 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
34529 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
34533 cost
+= inline_memory_move_cost (mode
, class1
, 2);
34534 cost
+= inline_memory_move_cost (mode
, class2
, 2);
34536 /* In case of copying from general_purpose_register we may emit multiple
34537 stores followed by single load causing memory size mismatch stall.
34538 Count this as arbitrarily high cost of 20. */
34539 if (targetm
.class_max_nregs (class1
, mode
)
34540 > targetm
.class_max_nregs (class2
, mode
))
34543 /* In the case of FP/MMX moves, the registers actually overlap, and we
34544 have to switch modes in order to treat them differently. */
34545 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
34546 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
34552 /* Moves between SSE/MMX and integer unit are expensive. */
34553 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
34554 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34556 /* ??? By keeping returned value relatively high, we limit the number
34557 of moves between integer and MMX/SSE registers for all targets.
34558 Additionally, high value prevents problem with x86_modes_tieable_p(),
34559 where integer modes in MMX/SSE registers are not tieable
34560 because of missing QImode and HImode moves to, from or between
34561 MMX/SSE registers. */
34562 return MAX (8, ix86_cost
->mmxsse_to_integer
);
34564 if (MAYBE_FLOAT_CLASS_P (class1
))
34565 return ix86_cost
->fp_move
;
34566 if (MAYBE_SSE_CLASS_P (class1
))
34567 return ix86_cost
->sse_move
;
34568 if (MAYBE_MMX_CLASS_P (class1
))
34569 return ix86_cost
->mmx_move
;
34573 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34577 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
34579 /* Flags and only flags can only hold CCmode values. */
34580 if (CC_REGNO_P (regno
))
34581 return GET_MODE_CLASS (mode
) == MODE_CC
;
34582 if (GET_MODE_CLASS (mode
) == MODE_CC
34583 || GET_MODE_CLASS (mode
) == MODE_RANDOM
34584 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
34586 if (STACK_REGNO_P (regno
))
34587 return VALID_FP_MODE_P (mode
);
34588 if (SSE_REGNO_P (regno
))
34590 /* We implement the move patterns for all vector modes into and
34591 out of SSE registers, even when no operation instructions
34592 are available. OImode move is available only when AVX is
34594 return ((TARGET_AVX
&& mode
== OImode
)
34595 || VALID_AVX256_REG_MODE (mode
)
34596 || VALID_SSE_REG_MODE (mode
)
34597 || VALID_SSE2_REG_MODE (mode
)
34598 || VALID_MMX_REG_MODE (mode
)
34599 || VALID_MMX_REG_MODE_3DNOW (mode
));
34601 if (MMX_REGNO_P (regno
))
34603 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34604 so if the register is available at all, then we can move data of
34605 the given mode into or out of it. */
34606 return (VALID_MMX_REG_MODE (mode
)
34607 || VALID_MMX_REG_MODE_3DNOW (mode
));
34610 if (mode
== QImode
)
34612 /* Take care for QImode values - they can be in non-QI regs,
34613 but then they do cause partial register stalls. */
34614 if (ANY_QI_REGNO_P (regno
))
34616 if (!TARGET_PARTIAL_REG_STALL
)
34618 /* LRA checks if the hard register is OK for the given mode.
34619 QImode values can live in non-QI regs, so we allow all
34621 if (lra_in_progress
)
34623 return !can_create_pseudo_p ();
34625 /* We handle both integer and floats in the general purpose registers. */
34626 else if (VALID_INT_MODE_P (mode
))
34628 else if (VALID_FP_MODE_P (mode
))
34630 else if (VALID_DFP_MODE_P (mode
))
34632 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34633 on to use that value in smaller contexts, this can easily force a
34634 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34635 supporting DImode, allow it. */
34636 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
34642 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34643 tieable integer mode. */
34646 ix86_tieable_integer_mode_p (enum machine_mode mode
)
34655 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
34658 return TARGET_64BIT
;
34665 /* Return true if MODE1 is accessible in a register that can hold MODE2
34666 without copying. That is, all register classes that can hold MODE2
34667 can also hold MODE1. */
34670 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
34672 if (mode1
== mode2
)
34675 if (ix86_tieable_integer_mode_p (mode1
)
34676 && ix86_tieable_integer_mode_p (mode2
))
34679 /* MODE2 being XFmode implies fp stack or general regs, which means we
34680 can tie any smaller floating point modes to it. Note that we do not
34681 tie this with TFmode. */
34682 if (mode2
== XFmode
)
34683 return mode1
== SFmode
|| mode1
== DFmode
;
34685 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34686 that we can tie it with SFmode. */
34687 if (mode2
== DFmode
)
34688 return mode1
== SFmode
;
34690 /* If MODE2 is only appropriate for an SSE register, then tie with
34691 any other mode acceptable to SSE registers. */
34692 if (GET_MODE_SIZE (mode2
) == 32
34693 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34694 return (GET_MODE_SIZE (mode1
) == 32
34695 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34696 if (GET_MODE_SIZE (mode2
) == 16
34697 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
34698 return (GET_MODE_SIZE (mode1
) == 16
34699 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
34701 /* If MODE2 is appropriate for an MMX register, then tie
34702 with any other mode acceptable to MMX registers. */
34703 if (GET_MODE_SIZE (mode2
) == 8
34704 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
34705 return (GET_MODE_SIZE (mode1
) == 8
34706 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
34711 /* Return the cost of moving between two registers of mode MODE. */
34714 ix86_set_reg_reg_cost (enum machine_mode mode
)
34716 unsigned int units
= UNITS_PER_WORD
;
34718 switch (GET_MODE_CLASS (mode
))
34724 units
= GET_MODE_SIZE (CCmode
);
34728 if ((TARGET_SSE
&& mode
== TFmode
)
34729 || (TARGET_80387
&& mode
== XFmode
)
34730 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
34731 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
34732 units
= GET_MODE_SIZE (mode
);
34735 case MODE_COMPLEX_FLOAT
:
34736 if ((TARGET_SSE
&& mode
== TCmode
)
34737 || (TARGET_80387
&& mode
== XCmode
)
34738 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
34739 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34740 units
= GET_MODE_SIZE (mode
);
34743 case MODE_VECTOR_INT
:
34744 case MODE_VECTOR_FLOAT
:
34745 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34746 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34747 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34748 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34749 units
= GET_MODE_SIZE (mode
);
34752 /* Return the cost of moving between two registers of mode MODE,
34753 assuming that the move will be in pieces of at most UNITS bytes. */
34754 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34757 /* Compute a (partial) cost for rtx X. Return true if the complete
34758 cost has been computed, and false if subexpressions should be
34759 scanned. In either case, *TOTAL contains the cost result. */
34762 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34765 enum rtx_code code
= (enum rtx_code
) code_i
;
34766 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34767 enum machine_mode mode
= GET_MODE (x
);
34768 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34773 if (register_operand (SET_DEST (x
), VOIDmode
)
34774 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34776 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34785 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34787 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34789 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34791 || (!GET_CODE (x
) != LABEL_REF
34792 && (GET_CODE (x
) != SYMBOL_REF
34793 || !SYMBOL_REF_LOCAL_P (x
)))))
34800 if (mode
== VOIDmode
)
34805 switch (standard_80387_constant_p (x
))
34810 default: /* Other constants */
34817 if (SSE_FLOAT_MODE_P (mode
))
34820 switch (standard_sse_constant_p (x
))
34824 case 1: /* 0: xor eliminates false dependency */
34827 default: /* -1: cmp contains false dependency */
34832 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34833 it'll probably end up. Add a penalty for size. */
34834 *total
= (COSTS_N_INSNS (1)
34835 + (flag_pic
!= 0 && !TARGET_64BIT
)
34836 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34840 /* The zero extensions is often completely free on x86_64, so make
34841 it as cheap as possible. */
34842 if (TARGET_64BIT
&& mode
== DImode
34843 && GET_MODE (XEXP (x
, 0)) == SImode
)
34845 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34846 *total
= cost
->add
;
34848 *total
= cost
->movzx
;
34852 *total
= cost
->movsx
;
34856 if (SCALAR_INT_MODE_P (mode
)
34857 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34858 && CONST_INT_P (XEXP (x
, 1)))
34860 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34863 *total
= cost
->add
;
34866 if ((value
== 2 || value
== 3)
34867 && cost
->lea
<= cost
->shift_const
)
34869 *total
= cost
->lea
;
34879 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34881 /* ??? Should be SSE vector operation cost. */
34882 /* At least for published AMD latencies, this really is the same
34883 as the latency for a simple fpu operation like fabs. */
34884 /* V*QImode is emulated with 1-11 insns. */
34885 if (mode
== V16QImode
|| mode
== V32QImode
)
34888 if (TARGET_XOP
&& mode
== V16QImode
)
34890 /* For XOP we use vpshab, which requires a broadcast of the
34891 value to the variable shift insn. For constants this
34892 means a V16Q const in mem; even when we can perform the
34893 shift with one insn set the cost to prefer paddb. */
34894 if (CONSTANT_P (XEXP (x
, 1)))
34896 *total
= (cost
->fabs
34897 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34898 + (speed
? 2 : COSTS_N_BYTES (16)));
34903 else if (TARGET_SSSE3
)
34905 *total
= cost
->fabs
* count
;
34908 *total
= cost
->fabs
;
34910 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34912 if (CONST_INT_P (XEXP (x
, 1)))
34914 if (INTVAL (XEXP (x
, 1)) > 32)
34915 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34917 *total
= cost
->shift_const
* 2;
34921 if (GET_CODE (XEXP (x
, 1)) == AND
)
34922 *total
= cost
->shift_var
* 2;
34924 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34929 if (CONST_INT_P (XEXP (x
, 1)))
34930 *total
= cost
->shift_const
;
34931 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
34932 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
34934 /* Return the cost after shift-and truncation. */
34935 *total
= cost
->shift_var
;
34939 *total
= cost
->shift_var
;
34947 gcc_assert (FLOAT_MODE_P (mode
));
34948 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34950 /* ??? SSE scalar/vector cost should be used here. */
34951 /* ??? Bald assumption that fma has the same cost as fmul. */
34952 *total
= cost
->fmul
;
34953 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34955 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34957 if (GET_CODE (sub
) == NEG
)
34958 sub
= XEXP (sub
, 0);
34959 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34962 if (GET_CODE (sub
) == NEG
)
34963 sub
= XEXP (sub
, 0);
34964 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34969 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34971 /* ??? SSE scalar cost should be used here. */
34972 *total
= cost
->fmul
;
34975 else if (X87_FLOAT_MODE_P (mode
))
34977 *total
= cost
->fmul
;
34980 else if (FLOAT_MODE_P (mode
))
34982 /* ??? SSE vector cost should be used here. */
34983 *total
= cost
->fmul
;
34986 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34988 /* V*QImode is emulated with 7-13 insns. */
34989 if (mode
== V16QImode
|| mode
== V32QImode
)
34992 if (TARGET_XOP
&& mode
== V16QImode
)
34994 else if (TARGET_SSSE3
)
34996 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34998 /* V*DImode is emulated with 5-8 insns. */
34999 else if (mode
== V2DImode
|| mode
== V4DImode
)
35001 if (TARGET_XOP
&& mode
== V2DImode
)
35002 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
35004 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
35006 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
35007 insns, including two PMULUDQ. */
35008 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
35009 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
35011 *total
= cost
->fmul
;
35016 rtx op0
= XEXP (x
, 0);
35017 rtx op1
= XEXP (x
, 1);
35019 if (CONST_INT_P (XEXP (x
, 1)))
35021 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35022 for (nbits
= 0; value
!= 0; value
&= value
- 1)
35026 /* This is arbitrary. */
35029 /* Compute costs correctly for widening multiplication. */
35030 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
35031 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
35032 == GET_MODE_SIZE (mode
))
35034 int is_mulwiden
= 0;
35035 enum machine_mode inner_mode
= GET_MODE (op0
);
35037 if (GET_CODE (op0
) == GET_CODE (op1
))
35038 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
35039 else if (CONST_INT_P (op1
))
35041 if (GET_CODE (op0
) == SIGN_EXTEND
)
35042 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
35045 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
35049 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
35052 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
35053 + nbits
* cost
->mult_bit
35054 + rtx_cost (op0
, outer_code
, opno
, speed
)
35055 + rtx_cost (op1
, outer_code
, opno
, speed
));
35064 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35065 /* ??? SSE cost should be used here. */
35066 *total
= cost
->fdiv
;
35067 else if (X87_FLOAT_MODE_P (mode
))
35068 *total
= cost
->fdiv
;
35069 else if (FLOAT_MODE_P (mode
))
35070 /* ??? SSE vector cost should be used here. */
35071 *total
= cost
->fdiv
;
35073 *total
= cost
->divide
[MODE_INDEX (mode
)];
35077 if (GET_MODE_CLASS (mode
) == MODE_INT
35078 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
35080 if (GET_CODE (XEXP (x
, 0)) == PLUS
35081 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
35082 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
35083 && CONSTANT_P (XEXP (x
, 1)))
35085 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35086 if (val
== 2 || val
== 4 || val
== 8)
35088 *total
= cost
->lea
;
35089 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35090 outer_code
, opno
, speed
);
35091 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35092 outer_code
, opno
, speed
);
35093 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35097 else if (GET_CODE (XEXP (x
, 0)) == MULT
35098 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35100 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35101 if (val
== 2 || val
== 4 || val
== 8)
35103 *total
= cost
->lea
;
35104 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35105 outer_code
, opno
, speed
);
35106 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35110 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35112 *total
= cost
->lea
;
35113 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35114 outer_code
, opno
, speed
);
35115 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35116 outer_code
, opno
, speed
);
35117 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35124 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35126 /* ??? SSE cost should be used here. */
35127 *total
= cost
->fadd
;
35130 else if (X87_FLOAT_MODE_P (mode
))
35132 *total
= cost
->fadd
;
35135 else if (FLOAT_MODE_P (mode
))
35137 /* ??? SSE vector cost should be used here. */
35138 *total
= cost
->fadd
;
35146 if (GET_MODE_CLASS (mode
) == MODE_INT
35147 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35149 *total
= (cost
->add
* 2
35150 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35151 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35152 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35153 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35159 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35161 /* ??? SSE cost should be used here. */
35162 *total
= cost
->fchs
;
35165 else if (X87_FLOAT_MODE_P (mode
))
35167 *total
= cost
->fchs
;
35170 else if (FLOAT_MODE_P (mode
))
35172 /* ??? SSE vector cost should be used here. */
35173 *total
= cost
->fchs
;
35179 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35181 /* ??? Should be SSE vector operation cost. */
35182 /* At least for published AMD latencies, this really is the same
35183 as the latency for a simple fpu operation like fabs. */
35184 *total
= cost
->fabs
;
35186 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35187 *total
= cost
->add
* 2;
35189 *total
= cost
->add
;
35193 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35194 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35195 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35196 && XEXP (x
, 1) == const0_rtx
)
35198 /* This kind of construct is implemented using test[bwl].
35199 Treat it as if we had an AND. */
35200 *total
= (cost
->add
35201 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35202 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35208 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35213 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35214 /* ??? SSE cost should be used here. */
35215 *total
= cost
->fabs
;
35216 else if (X87_FLOAT_MODE_P (mode
))
35217 *total
= cost
->fabs
;
35218 else if (FLOAT_MODE_P (mode
))
35219 /* ??? SSE vector cost should be used here. */
35220 *total
= cost
->fabs
;
35224 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35225 /* ??? SSE cost should be used here. */
35226 *total
= cost
->fsqrt
;
35227 else if (X87_FLOAT_MODE_P (mode
))
35228 *total
= cost
->fsqrt
;
35229 else if (FLOAT_MODE_P (mode
))
35230 /* ??? SSE vector cost should be used here. */
35231 *total
= cost
->fsqrt
;
35235 if (XINT (x
, 1) == UNSPEC_TP
)
35242 case VEC_DUPLICATE
:
35243 /* ??? Assume all of these vector manipulation patterns are
35244 recognizable. In which case they all pretty much have the
35246 *total
= cost
->fabs
;
35256 static int current_machopic_label_num
;
35258 /* Given a symbol name and its associated stub, write out the
35259 definition of the stub. */
35262 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35264 unsigned int length
;
35265 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35266 int label
= ++current_machopic_label_num
;
35268 /* For 64-bit we shouldn't get here. */
35269 gcc_assert (!TARGET_64BIT
);
35271 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35272 symb
= targetm
.strip_name_encoding (symb
);
35274 length
= strlen (stub
);
35275 binder_name
= XALLOCAVEC (char, length
+ 32);
35276 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35278 length
= strlen (symb
);
35279 symbol_name
= XALLOCAVEC (char, length
+ 32);
35280 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35282 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35284 if (MACHOPIC_ATT_STUB
)
35285 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35286 else if (MACHOPIC_PURE
)
35287 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
35289 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
35291 fprintf (file
, "%s:\n", stub
);
35292 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35294 if (MACHOPIC_ATT_STUB
)
35296 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
35298 else if (MACHOPIC_PURE
)
35301 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35302 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
35303 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
35304 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
35305 label
, lazy_ptr_name
, label
);
35306 fprintf (file
, "\tjmp\t*%%ecx\n");
35309 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
35311 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
35312 it needs no stub-binding-helper. */
35313 if (MACHOPIC_ATT_STUB
)
35316 fprintf (file
, "%s:\n", binder_name
);
35320 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
35321 fprintf (file
, "\tpushl\t%%ecx\n");
35324 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
35326 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
35328 /* N.B. Keep the correspondence of these
35329 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
35330 old-pic/new-pic/non-pic stubs; altering this will break
35331 compatibility with existing dylibs. */
35334 /* 25-byte PIC stub using "CALL get_pc_thunk". */
35335 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
35338 /* 16-byte -mdynamic-no-pic stub. */
35339 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
35341 fprintf (file
, "%s:\n", lazy_ptr_name
);
35342 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
35343 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
35345 #endif /* TARGET_MACHO */
35347 /* Order the registers for register allocator. */
35350 x86_order_regs_for_local_alloc (void)
35355 /* First allocate the local general purpose registers. */
35356 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35357 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
35358 reg_alloc_order
[pos
++] = i
;
35360 /* Global general purpose registers. */
35361 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
35362 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
35363 reg_alloc_order
[pos
++] = i
;
35365 /* x87 registers come first in case we are doing FP math
35367 if (!TARGET_SSE_MATH
)
35368 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35369 reg_alloc_order
[pos
++] = i
;
35371 /* SSE registers. */
35372 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
35373 reg_alloc_order
[pos
++] = i
;
35374 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
35375 reg_alloc_order
[pos
++] = i
;
35377 /* x87 registers. */
35378 if (TARGET_SSE_MATH
)
35379 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
35380 reg_alloc_order
[pos
++] = i
;
35382 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
35383 reg_alloc_order
[pos
++] = i
;
35385 /* Initialize the rest of array as we do not allocate some registers
35387 while (pos
< FIRST_PSEUDO_REGISTER
)
35388 reg_alloc_order
[pos
++] = 0;
35391 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
35392 in struct attribute_spec handler. */
35394 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
35396 int flags ATTRIBUTE_UNUSED
,
35397 bool *no_add_attrs
)
35399 if (TREE_CODE (*node
) != FUNCTION_TYPE
35400 && TREE_CODE (*node
) != METHOD_TYPE
35401 && TREE_CODE (*node
) != FIELD_DECL
35402 && TREE_CODE (*node
) != TYPE_DECL
)
35404 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35406 *no_add_attrs
= true;
35411 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
35413 *no_add_attrs
= true;
35416 if (is_attribute_p ("callee_pop_aggregate_return", name
))
35420 cst
= TREE_VALUE (args
);
35421 if (TREE_CODE (cst
) != INTEGER_CST
)
35423 warning (OPT_Wattributes
,
35424 "%qE attribute requires an integer constant argument",
35426 *no_add_attrs
= true;
35428 else if (compare_tree_int (cst
, 0) != 0
35429 && compare_tree_int (cst
, 1) != 0)
35431 warning (OPT_Wattributes
,
35432 "argument to %qE attribute is neither zero, nor one",
35434 *no_add_attrs
= true;
35443 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
35444 struct attribute_spec.handler. */
35446 ix86_handle_abi_attribute (tree
*node
, tree name
,
35447 tree args ATTRIBUTE_UNUSED
,
35448 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35450 if (TREE_CODE (*node
) != FUNCTION_TYPE
35451 && TREE_CODE (*node
) != METHOD_TYPE
35452 && TREE_CODE (*node
) != FIELD_DECL
35453 && TREE_CODE (*node
) != TYPE_DECL
)
35455 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35457 *no_add_attrs
= true;
35461 /* Can combine regparm with all attributes but fastcall. */
35462 if (is_attribute_p ("ms_abi", name
))
35464 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
35466 error ("ms_abi and sysv_abi attributes are not compatible");
35471 else if (is_attribute_p ("sysv_abi", name
))
35473 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
35475 error ("ms_abi and sysv_abi attributes are not compatible");
35484 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
35485 struct attribute_spec.handler. */
35487 ix86_handle_struct_attribute (tree
*node
, tree name
,
35488 tree args ATTRIBUTE_UNUSED
,
35489 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35492 if (DECL_P (*node
))
35494 if (TREE_CODE (*node
) == TYPE_DECL
)
35495 type
= &TREE_TYPE (*node
);
35500 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
35502 warning (OPT_Wattributes
, "%qE attribute ignored",
35504 *no_add_attrs
= true;
35507 else if ((is_attribute_p ("ms_struct", name
)
35508 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
35509 || ((is_attribute_p ("gcc_struct", name
)
35510 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
35512 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
35514 *no_add_attrs
= true;
35521 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
35522 tree args ATTRIBUTE_UNUSED
,
35523 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
35525 if (TREE_CODE (*node
) != FUNCTION_DECL
)
35527 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
35529 *no_add_attrs
= true;
35535 ix86_ms_bitfield_layout_p (const_tree record_type
)
35537 return ((TARGET_MS_BITFIELD_LAYOUT
35538 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
35539 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
35542 /* Returns an expression indicating where the this parameter is
35543 located on entry to the FUNCTION. */
35546 x86_this_parameter (tree function
)
35548 tree type
= TREE_TYPE (function
);
35549 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
35554 const int *parm_regs
;
35556 if (ix86_function_type_abi (type
) == MS_ABI
)
35557 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
35559 parm_regs
= x86_64_int_parameter_registers
;
35560 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
35563 nregs
= ix86_function_regparm (type
, function
);
35565 if (nregs
> 0 && !stdarg_p (type
))
35568 unsigned int ccvt
= ix86_get_callcvt (type
);
35570 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35571 regno
= aggr
? DX_REG
: CX_REG
;
35572 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35576 return gen_rtx_MEM (SImode
,
35577 plus_constant (Pmode
, stack_pointer_rtx
, 4));
35586 return gen_rtx_MEM (SImode
,
35587 plus_constant (Pmode
,
35588 stack_pointer_rtx
, 4));
35591 return gen_rtx_REG (SImode
, regno
);
35594 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
35598 /* Determine whether x86_output_mi_thunk can succeed. */
35601 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
35602 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
35603 HOST_WIDE_INT vcall_offset
, const_tree function
)
35605 /* 64-bit can handle anything. */
35609 /* For 32-bit, everything's fine if we have one free register. */
35610 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
35613 /* Need a free register for vcall_offset. */
35617 /* Need a free register for GOT references. */
35618 if (flag_pic
&& !targetm
.binds_local_p (function
))
35621 /* Otherwise ok. */
35625 /* Output the assembler code for a thunk function. THUNK_DECL is the
35626 declaration for the thunk function itself, FUNCTION is the decl for
35627 the target function. DELTA is an immediate constant offset to be
35628 added to THIS. If VCALL_OFFSET is nonzero, the word at
35629 *(*this + vcall_offset) should be added to THIS. */
35632 x86_output_mi_thunk (FILE *file
,
35633 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
35634 HOST_WIDE_INT vcall_offset
, tree function
)
35636 rtx this_param
= x86_this_parameter (function
);
35637 rtx this_reg
, tmp
, fnaddr
;
35638 unsigned int tmp_regno
;
35641 tmp_regno
= R10_REG
;
35644 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
35645 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
35646 tmp_regno
= AX_REG
;
35647 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
35648 tmp_regno
= DX_REG
;
35650 tmp_regno
= CX_REG
;
35653 emit_note (NOTE_INSN_PROLOGUE_END
);
35655 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35656 pull it in now and let DELTA benefit. */
35657 if (REG_P (this_param
))
35658 this_reg
= this_param
;
35659 else if (vcall_offset
)
35661 /* Put the this parameter into %eax. */
35662 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
35663 emit_move_insn (this_reg
, this_param
);
35666 this_reg
= NULL_RTX
;
35668 /* Adjust the this parameter by a fixed constant. */
35671 rtx delta_rtx
= GEN_INT (delta
);
35672 rtx delta_dst
= this_reg
? this_reg
: this_param
;
35676 if (!x86_64_general_operand (delta_rtx
, Pmode
))
35678 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35679 emit_move_insn (tmp
, delta_rtx
);
35684 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
35687 /* Adjust the this parameter by a value stored in the vtable. */
35690 rtx vcall_addr
, vcall_mem
, this_mem
;
35692 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
35694 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
35695 if (Pmode
!= ptr_mode
)
35696 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
35697 emit_move_insn (tmp
, this_mem
);
35699 /* Adjust the this parameter. */
35700 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
35702 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
35704 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
35705 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
35706 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
35709 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
35710 if (Pmode
!= ptr_mode
)
35711 emit_insn (gen_addsi_1_zext (this_reg
,
35712 gen_rtx_REG (ptr_mode
,
35716 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
35719 /* If necessary, drop THIS back to its stack slot. */
35720 if (this_reg
&& this_reg
!= this_param
)
35721 emit_move_insn (this_param
, this_reg
);
35723 fnaddr
= XEXP (DECL_RTL (function
), 0);
35726 if (!flag_pic
|| targetm
.binds_local_p (function
)
35731 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
35732 tmp
= gen_rtx_CONST (Pmode
, tmp
);
35733 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
35738 if (!flag_pic
|| targetm
.binds_local_p (function
))
35741 else if (TARGET_MACHO
)
35743 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
35744 fnaddr
= XEXP (fnaddr
, 0);
35746 #endif /* TARGET_MACHO */
35749 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35750 output_set_got (tmp
, NULL_RTX
);
35752 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35753 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35754 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35758 /* Our sibling call patterns do not allow memories, because we have no
35759 predicate that can distinguish between frame and non-frame memory.
35760 For our purposes here, we can get away with (ab)using a jump pattern,
35761 because we're going to do no optimization. */
35762 if (MEM_P (fnaddr
))
35763 emit_jump_insn (gen_indirect_jump (fnaddr
));
35766 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35767 fnaddr
= legitimize_pic_address (fnaddr
,
35768 gen_rtx_REG (Pmode
, tmp_regno
));
35770 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35772 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35773 if (GET_MODE (fnaddr
) != word_mode
)
35774 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35775 emit_move_insn (tmp
, fnaddr
);
35779 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35780 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35781 tmp
= emit_call_insn (tmp
);
35782 SIBLING_CALL_P (tmp
) = 1;
35786 /* Emit just enough of rest_of_compilation to get the insns emitted.
35787 Note that use_thunk calls assemble_start_function et al. */
35788 tmp
= get_insns ();
35789 shorten_branches (tmp
);
35790 final_start_function (tmp
, file
, 1);
35791 final (tmp
, file
, 1);
35792 final_end_function ();
35796 x86_file_start (void)
35798 default_file_start ();
35800 darwin_file_start ();
35802 if (X86_FILE_START_VERSION_DIRECTIVE
)
35803 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35804 if (X86_FILE_START_FLTUSED
)
35805 fputs ("\t.global\t__fltused\n", asm_out_file
);
35806 if (ix86_asm_dialect
== ASM_INTEL
)
35807 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35811 x86_field_alignment (tree field
, int computed
)
35813 enum machine_mode mode
;
35814 tree type
= TREE_TYPE (field
);
35816 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35818 mode
= TYPE_MODE (strip_array_types (type
));
35819 if (mode
== DFmode
|| mode
== DCmode
35820 || GET_MODE_CLASS (mode
) == MODE_INT
35821 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35822 return MIN (32, computed
);
35826 /* Output assembler code to FILE to increment profiler label # LABELNO
35827 for profiling a function entry. */
35829 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35831 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35836 #ifndef NO_PROFILE_COUNTERS
35837 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35840 if (!TARGET_PECOFF
&& flag_pic
)
35841 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35843 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35847 #ifndef NO_PROFILE_COUNTERS
35848 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35851 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35855 #ifndef NO_PROFILE_COUNTERS
35856 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35859 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35863 /* We don't have exact information about the insn sizes, but we may assume
35864 quite safely that we are informed about all 1 byte insns and memory
35865 address sizes. This is enough to eliminate unnecessary padding in
35869 min_insn_size (rtx insn
)
35873 if (!INSN_P (insn
) || !active_insn_p (insn
))
35876 /* Discard alignments we've emit and jump instructions. */
35877 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35878 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35881 /* Important case - calls are always 5 bytes.
35882 It is common to have many calls in the row. */
35884 && symbolic_reference_mentioned_p (PATTERN (insn
))
35885 && !SIBLING_CALL_P (insn
))
35887 len
= get_attr_length (insn
);
35891 /* For normal instructions we rely on get_attr_length being exact,
35892 with a few exceptions. */
35893 if (!JUMP_P (insn
))
35895 enum attr_type type
= get_attr_type (insn
);
35900 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35901 || asm_noperands (PATTERN (insn
)) >= 0)
35908 /* Otherwise trust get_attr_length. */
35912 l
= get_attr_length_address (insn
);
35913 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35922 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35924 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35928 ix86_avoid_jump_mispredicts (void)
35930 rtx insn
, start
= get_insns ();
35931 int nbytes
= 0, njumps
= 0;
35934 /* Look for all minimal intervals of instructions containing 4 jumps.
35935 The intervals are bounded by START and INSN. NBYTES is the total
35936 size of instructions in the interval including INSN and not including
35937 START. When the NBYTES is smaller than 16 bytes, it is possible
35938 that the end of START and INSN ends up in the same 16byte page.
35940 The smallest offset in the page INSN can start is the case where START
35941 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35942 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35944 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35948 if (LABEL_P (insn
))
35950 int align
= label_to_alignment (insn
);
35951 int max_skip
= label_to_max_skip (insn
);
35955 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35956 already in the current 16 byte page, because otherwise
35957 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35958 bytes to reach 16 byte boundary. */
35960 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35963 fprintf (dump_file
, "Label %i with max_skip %i\n",
35964 INSN_UID (insn
), max_skip
);
35967 while (nbytes
+ max_skip
>= 16)
35969 start
= NEXT_INSN (start
);
35970 if (JUMP_P (start
) || CALL_P (start
))
35971 njumps
--, isjump
= 1;
35974 nbytes
-= min_insn_size (start
);
35980 min_size
= min_insn_size (insn
);
35981 nbytes
+= min_size
;
35983 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35984 INSN_UID (insn
), min_size
);
35985 if (JUMP_P (insn
) || CALL_P (insn
))
35992 start
= NEXT_INSN (start
);
35993 if (JUMP_P (start
) || CALL_P (start
))
35994 njumps
--, isjump
= 1;
35997 nbytes
-= min_insn_size (start
);
35999 gcc_assert (njumps
>= 0);
36001 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
36002 INSN_UID (start
), INSN_UID (insn
), nbytes
);
36004 if (njumps
== 3 && isjump
&& nbytes
< 16)
36006 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
36009 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
36010 INSN_UID (insn
), padsize
);
36011 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
36017 /* AMD Athlon works faster
36018 when RET is not destination of conditional jump or directly preceded
36019 by other jump instruction. We avoid the penalty by inserting NOP just
36020 before the RET instructions in such cases. */
36022 ix86_pad_returns (void)
36027 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36029 basic_block bb
= e
->src
;
36030 rtx ret
= BB_END (bb
);
36032 bool replace
= false;
36034 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
36035 || optimize_bb_for_size_p (bb
))
36037 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
36038 if (active_insn_p (prev
) || LABEL_P (prev
))
36040 if (prev
&& LABEL_P (prev
))
36045 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36046 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
36047 && !(e
->flags
& EDGE_FALLTHRU
))
36055 prev
= prev_active_insn (ret
);
36057 && ((JUMP_P (prev
) && any_condjump_p (prev
))
36060 /* Empty functions get branch mispredict even when
36061 the jump destination is not visible to us. */
36062 if (!prev
&& !optimize_function_for_size_p (cfun
))
36067 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
36073 /* Count the minimum number of instructions in BB. Return 4 if the
36074 number of instructions >= 4. */
36077 ix86_count_insn_bb (basic_block bb
)
36080 int insn_count
= 0;
36082 /* Count number of instructions in this block. Return 4 if the number
36083 of instructions >= 4. */
36084 FOR_BB_INSNS (bb
, insn
)
36086 /* Only happen in exit blocks. */
36088 && ANY_RETURN_P (PATTERN (insn
)))
36091 if (NONDEBUG_INSN_P (insn
)
36092 && GET_CODE (PATTERN (insn
)) != USE
36093 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36096 if (insn_count
>= 4)
36105 /* Count the minimum number of instructions in code path in BB.
36106 Return 4 if the number of instructions >= 4. */
36109 ix86_count_insn (basic_block bb
)
36113 int min_prev_count
;
36115 /* Only bother counting instructions along paths with no
36116 more than 2 basic blocks between entry and exit. Given
36117 that BB has an edge to exit, determine if a predecessor
36118 of BB has an edge from entry. If so, compute the number
36119 of instructions in the predecessor block. If there
36120 happen to be multiple such blocks, compute the minimum. */
36121 min_prev_count
= 4;
36122 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36125 edge_iterator prev_ei
;
36127 if (e
->src
== ENTRY_BLOCK_PTR
)
36129 min_prev_count
= 0;
36132 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36134 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
36136 int count
= ix86_count_insn_bb (e
->src
);
36137 if (count
< min_prev_count
)
36138 min_prev_count
= count
;
36144 if (min_prev_count
< 4)
36145 min_prev_count
+= ix86_count_insn_bb (bb
);
36147 return min_prev_count
;
36150 /* Pad short function to 4 instructions. */
36153 ix86_pad_short_function (void)
36158 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36160 rtx ret
= BB_END (e
->src
);
36161 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36163 int insn_count
= ix86_count_insn (e
->src
);
36165 /* Pad short function. */
36166 if (insn_count
< 4)
36170 /* Find epilogue. */
36173 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36174 insn
= PREV_INSN (insn
);
36179 /* Two NOPs count as one instruction. */
36180 insn_count
= 2 * (4 - insn_count
);
36181 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36187 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36188 the epilogue, the Windows system unwinder will apply epilogue logic and
36189 produce incorrect offsets. This can be avoided by adding a nop between
36190 the last insn that can throw and the first insn of the epilogue. */
36193 ix86_seh_fixup_eh_fallthru (void)
36198 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
36202 /* Find the beginning of the epilogue. */
36203 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36204 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36209 /* We only care about preceding insns that can throw. */
36210 insn
= prev_active_insn (insn
);
36211 if (insn
== NULL
|| !can_throw_internal (insn
))
36214 /* Do not separate calls from their debug information. */
36215 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36217 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36218 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36223 emit_insn_after (gen_nops (const1_rtx
), insn
);
36227 /* Implement machine specific optimizations. We implement padding of returns
36228 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36232 /* We are freeing block_for_insn in the toplev to keep compatibility
36233 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36234 compute_bb_for_insn ();
36236 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36237 ix86_seh_fixup_eh_fallthru ();
36239 if (optimize
&& optimize_function_for_speed_p (cfun
))
36241 if (TARGET_PAD_SHORT_FUNCTION
)
36242 ix86_pad_short_function ();
36243 else if (TARGET_PAD_RETURNS
)
36244 ix86_pad_returns ();
36245 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36246 if (TARGET_FOUR_JUMP_LIMIT
)
36247 ix86_avoid_jump_mispredicts ();
36252 /* Return nonzero when QImode register that must be represented via REX prefix
36255 x86_extended_QIreg_mentioned_p (rtx insn
)
36258 extract_insn_cached (insn
);
36259 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36260 if (GENERAL_REG_P (recog_data
.operand
[i
])
36261 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36266 /* Return nonzero when P points to register encoded via REX prefix.
36267 Called via for_each_rtx. */
36269 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36271 unsigned int regno
;
36274 regno
= REGNO (*p
);
36275 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36278 /* Return true when INSN mentions register that must be encoded using REX
36281 x86_extended_reg_mentioned_p (rtx insn
)
36283 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
36284 extended_reg_mentioned_1
, NULL
);
36287 /* If profitable, negate (without causing overflow) integer constant
36288 of mode MODE at location LOC. Return true in this case. */
36290 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
36294 if (!CONST_INT_P (*loc
))
36300 /* DImode x86_64 constants must fit in 32 bits. */
36301 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
36312 gcc_unreachable ();
36315 /* Avoid overflows. */
36316 if (mode_signbit_p (mode
, *loc
))
36319 val
= INTVAL (*loc
);
36321 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
36322 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
36323 if ((val
< 0 && val
!= -128)
36326 *loc
= GEN_INT (-val
);
36333 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
36334 optabs would emit if we didn't have TFmode patterns. */
36337 x86_emit_floatuns (rtx operands
[2])
36339 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
36340 enum machine_mode mode
, inmode
;
36342 inmode
= GET_MODE (operands
[1]);
36343 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
36346 in
= force_reg (inmode
, operands
[1]);
36347 mode
= GET_MODE (out
);
36348 neglab
= gen_label_rtx ();
36349 donelab
= gen_label_rtx ();
36350 f0
= gen_reg_rtx (mode
);
36352 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
36354 expand_float (out
, in
, 0);
36356 emit_jump_insn (gen_jump (donelab
));
36359 emit_label (neglab
);
36361 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
36363 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
36365 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
36367 expand_float (f0
, i0
, 0);
36369 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
36371 emit_label (donelab
);
36374 /* AVX2 does support 32-byte integer vector operations,
36375 thus the longest vector we are faced with is V32QImode. */
36376 #define MAX_VECT_LEN 32
36378 struct expand_vec_perm_d
36380 rtx target
, op0
, op1
;
36381 unsigned char perm
[MAX_VECT_LEN
];
36382 enum machine_mode vmode
;
36383 unsigned char nelt
;
36384 bool one_operand_p
;
36388 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
36389 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
36390 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
36392 /* Get a vector mode of the same size as the original but with elements
36393 twice as wide. This is only guaranteed to apply to integral vectors. */
36395 static inline enum machine_mode
36396 get_mode_wider_vector (enum machine_mode o
)
36398 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
36399 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
36400 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
36401 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
36405 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36406 with all elements equal to VAR. Return true if successful. */
36409 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
36410 rtx target
, rtx val
)
36433 /* First attempt to recognize VAL as-is. */
36434 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36435 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
36436 if (recog_memoized (insn
) < 0)
36439 /* If that fails, force VAL into a register. */
36442 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
36443 seq
= get_insns ();
36446 emit_insn_before (seq
, insn
);
36448 ok
= recog_memoized (insn
) >= 0;
36457 if (TARGET_SSE
|| TARGET_3DNOW_A
)
36461 val
= gen_lowpart (SImode
, val
);
36462 x
= gen_rtx_TRUNCATE (HImode
, val
);
36463 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
36464 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36477 struct expand_vec_perm_d dperm
;
36481 memset (&dperm
, 0, sizeof (dperm
));
36482 dperm
.target
= target
;
36483 dperm
.vmode
= mode
;
36484 dperm
.nelt
= GET_MODE_NUNITS (mode
);
36485 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
36486 dperm
.one_operand_p
= true;
36488 /* Extend to SImode using a paradoxical SUBREG. */
36489 tmp1
= gen_reg_rtx (SImode
);
36490 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
36492 /* Insert the SImode value as low element of a V4SImode vector. */
36493 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
36494 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
36496 ok
= (expand_vec_perm_1 (&dperm
)
36497 || expand_vec_perm_broadcast_1 (&dperm
));
36509 /* Replicate the value once into the next wider mode and recurse. */
36511 enum machine_mode smode
, wsmode
, wvmode
;
36514 smode
= GET_MODE_INNER (mode
);
36515 wvmode
= get_mode_wider_vector (mode
);
36516 wsmode
= GET_MODE_INNER (wvmode
);
36518 val
= convert_modes (wsmode
, smode
, val
, true);
36519 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
36520 GEN_INT (GET_MODE_BITSIZE (smode
)),
36521 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36522 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
36524 x
= gen_lowpart (wvmode
, target
);
36525 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
36533 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
36534 rtx x
= gen_reg_rtx (hvmode
);
36536 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
36539 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
36540 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36549 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36550 whose ONE_VAR element is VAR, and other elements are zero. Return true
36554 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
36555 rtx target
, rtx var
, int one_var
)
36557 enum machine_mode vsimode
;
36560 bool use_vector_set
= false;
36565 /* For SSE4.1, we normally use vector set. But if the second
36566 element is zero and inter-unit moves are OK, we use movq
36568 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
36569 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36575 use_vector_set
= TARGET_SSE4_1
;
36578 use_vector_set
= TARGET_SSE2
;
36581 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
36588 use_vector_set
= TARGET_AVX
;
36591 /* Use ix86_expand_vector_set in 64bit mode only. */
36592 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
36598 if (use_vector_set
)
36600 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
36601 var
= force_reg (GET_MODE_INNER (mode
), var
);
36602 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36618 var
= force_reg (GET_MODE_INNER (mode
), var
);
36619 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
36620 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
36625 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
36626 new_target
= gen_reg_rtx (mode
);
36628 new_target
= target
;
36629 var
= force_reg (GET_MODE_INNER (mode
), var
);
36630 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
36631 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
36632 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
36635 /* We need to shuffle the value to the correct position, so
36636 create a new pseudo to store the intermediate result. */
36638 /* With SSE2, we can use the integer shuffle insns. */
36639 if (mode
!= V4SFmode
&& TARGET_SSE2
)
36641 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
36643 GEN_INT (one_var
== 1 ? 0 : 1),
36644 GEN_INT (one_var
== 2 ? 0 : 1),
36645 GEN_INT (one_var
== 3 ? 0 : 1)));
36646 if (target
!= new_target
)
36647 emit_move_insn (target
, new_target
);
36651 /* Otherwise convert the intermediate result to V4SFmode and
36652 use the SSE1 shuffle instructions. */
36653 if (mode
!= V4SFmode
)
36655 tmp
= gen_reg_rtx (V4SFmode
);
36656 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
36661 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
36663 GEN_INT (one_var
== 1 ? 0 : 1),
36664 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
36665 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
36667 if (mode
!= V4SFmode
)
36668 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
36669 else if (tmp
!= target
)
36670 emit_move_insn (target
, tmp
);
36672 else if (target
!= new_target
)
36673 emit_move_insn (target
, new_target
);
36678 vsimode
= V4SImode
;
36684 vsimode
= V2SImode
;
36690 /* Zero extend the variable element to SImode and recurse. */
36691 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
36693 x
= gen_reg_rtx (vsimode
);
36694 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
36696 gcc_unreachable ();
36698 emit_move_insn (target
, gen_lowpart (mode
, x
));
36706 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36707 consisting of the values in VALS. It is known that all elements
36708 except ONE_VAR are constants. Return true if successful. */
36711 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
36712 rtx target
, rtx vals
, int one_var
)
36714 rtx var
= XVECEXP (vals
, 0, one_var
);
36715 enum machine_mode wmode
;
36718 const_vec
= copy_rtx (vals
);
36719 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
36720 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
36728 /* For the two element vectors, it's just as easy to use
36729 the general case. */
36733 /* Use ix86_expand_vector_set in 64bit mode only. */
36756 /* There's no way to set one QImode entry easily. Combine
36757 the variable value with its adjacent constant value, and
36758 promote to an HImode set. */
36759 x
= XVECEXP (vals
, 0, one_var
^ 1);
36762 var
= convert_modes (HImode
, QImode
, var
, true);
36763 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
36764 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
36765 x
= GEN_INT (INTVAL (x
) & 0xff);
36769 var
= convert_modes (HImode
, QImode
, var
, true);
36770 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
36772 if (x
!= const0_rtx
)
36773 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
36774 1, OPTAB_LIB_WIDEN
);
36776 x
= gen_reg_rtx (wmode
);
36777 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
36778 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
36780 emit_move_insn (target
, gen_lowpart (mode
, x
));
36787 emit_move_insn (target
, const_vec
);
36788 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36792 /* A subroutine of ix86_expand_vector_init_general. Use vector
36793 concatenate to handle the most general case: all values variable,
36794 and none identical. */
36797 ix86_expand_vector_init_concat (enum machine_mode mode
,
36798 rtx target
, rtx
*ops
, int n
)
36800 enum machine_mode cmode
, hmode
= VOIDmode
;
36801 rtx first
[8], second
[4];
36841 gcc_unreachable ();
36844 if (!register_operand (ops
[1], cmode
))
36845 ops
[1] = force_reg (cmode
, ops
[1]);
36846 if (!register_operand (ops
[0], cmode
))
36847 ops
[0] = force_reg (cmode
, ops
[0]);
36848 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36849 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36869 gcc_unreachable ();
36885 gcc_unreachable ();
36890 /* FIXME: We process inputs backward to help RA. PR 36222. */
36893 for (; i
> 0; i
-= 2, j
--)
36895 first
[j
] = gen_reg_rtx (cmode
);
36896 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36897 ix86_expand_vector_init (false, first
[j
],
36898 gen_rtx_PARALLEL (cmode
, v
));
36904 gcc_assert (hmode
!= VOIDmode
);
36905 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36907 second
[j
] = gen_reg_rtx (hmode
);
36908 ix86_expand_vector_init_concat (hmode
, second
[j
],
36912 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36915 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36919 gcc_unreachable ();
36923 /* A subroutine of ix86_expand_vector_init_general. Use vector
36924 interleave to handle the most general case: all values variable,
36925 and none identical. */
36928 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36929 rtx target
, rtx
*ops
, int n
)
36931 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36934 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36935 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36936 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36941 gen_load_even
= gen_vec_setv8hi
;
36942 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36943 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36944 inner_mode
= HImode
;
36945 first_imode
= V4SImode
;
36946 second_imode
= V2DImode
;
36947 third_imode
= VOIDmode
;
36950 gen_load_even
= gen_vec_setv16qi
;
36951 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36952 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36953 inner_mode
= QImode
;
36954 first_imode
= V8HImode
;
36955 second_imode
= V4SImode
;
36956 third_imode
= V2DImode
;
36959 gcc_unreachable ();
36962 for (i
= 0; i
< n
; i
++)
36964 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36965 op0
= gen_reg_rtx (SImode
);
36966 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36968 /* Insert the SImode value as low element of V4SImode vector. */
36969 op1
= gen_reg_rtx (V4SImode
);
36970 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36971 gen_rtx_VEC_DUPLICATE (V4SImode
,
36973 CONST0_RTX (V4SImode
),
36975 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36977 /* Cast the V4SImode vector back to a vector in orignal mode. */
36978 op0
= gen_reg_rtx (mode
);
36979 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36981 /* Load even elements into the second position. */
36982 emit_insn (gen_load_even (op0
,
36983 force_reg (inner_mode
,
36987 /* Cast vector to FIRST_IMODE vector. */
36988 ops
[i
] = gen_reg_rtx (first_imode
);
36989 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36992 /* Interleave low FIRST_IMODE vectors. */
36993 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36995 op0
= gen_reg_rtx (first_imode
);
36996 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36998 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36999 ops
[j
] = gen_reg_rtx (second_imode
);
37000 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
37003 /* Interleave low SECOND_IMODE vectors. */
37004 switch (second_imode
)
37007 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
37009 op0
= gen_reg_rtx (second_imode
);
37010 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
37013 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
37015 ops
[j
] = gen_reg_rtx (third_imode
);
37016 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
37018 second_imode
= V2DImode
;
37019 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37023 op0
= gen_reg_rtx (second_imode
);
37024 emit_insn (gen_interleave_second_low (op0
, ops
[0],
37027 /* Cast the SECOND_IMODE vector back to a vector on original
37029 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37030 gen_lowpart (mode
, op0
)));
37034 gcc_unreachable ();
37038 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
37039 all values variable, and none identical. */
37042 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
37043 rtx target
, rtx vals
)
37045 rtx ops
[32], op0
, op1
;
37046 enum machine_mode half_mode
= VOIDmode
;
37053 if (!mmx_ok
&& !TARGET_SSE
)
37065 n
= GET_MODE_NUNITS (mode
);
37066 for (i
= 0; i
< n
; i
++)
37067 ops
[i
] = XVECEXP (vals
, 0, i
);
37068 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
37072 half_mode
= V16QImode
;
37076 half_mode
= V8HImode
;
37080 n
= GET_MODE_NUNITS (mode
);
37081 for (i
= 0; i
< n
; i
++)
37082 ops
[i
] = XVECEXP (vals
, 0, i
);
37083 op0
= gen_reg_rtx (half_mode
);
37084 op1
= gen_reg_rtx (half_mode
);
37085 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37087 ix86_expand_vector_init_interleave (half_mode
, op1
,
37088 &ops
[n
>> 1], n
>> 2);
37089 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37090 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37094 if (!TARGET_SSE4_1
)
37102 /* Don't use ix86_expand_vector_init_interleave if we can't
37103 move from GPR to SSE register directly. */
37104 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37107 n
= GET_MODE_NUNITS (mode
);
37108 for (i
= 0; i
< n
; i
++)
37109 ops
[i
] = XVECEXP (vals
, 0, i
);
37110 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37118 gcc_unreachable ();
37122 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37123 enum machine_mode inner_mode
;
37124 rtx words
[4], shift
;
37126 inner_mode
= GET_MODE_INNER (mode
);
37127 n_elts
= GET_MODE_NUNITS (mode
);
37128 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37129 n_elt_per_word
= n_elts
/ n_words
;
37130 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37132 for (i
= 0; i
< n_words
; ++i
)
37134 rtx word
= NULL_RTX
;
37136 for (j
= 0; j
< n_elt_per_word
; ++j
)
37138 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37139 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37145 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37146 word
, 1, OPTAB_LIB_WIDEN
);
37147 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37148 word
, 1, OPTAB_LIB_WIDEN
);
37156 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37157 else if (n_words
== 2)
37159 rtx tmp
= gen_reg_rtx (mode
);
37160 emit_clobber (tmp
);
37161 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37162 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37163 emit_move_insn (target
, tmp
);
37165 else if (n_words
== 4)
37167 rtx tmp
= gen_reg_rtx (V4SImode
);
37168 gcc_assert (word_mode
== SImode
);
37169 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37170 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37171 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37174 gcc_unreachable ();
37178 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37179 instructions unless MMX_OK is true. */
37182 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37184 enum machine_mode mode
= GET_MODE (target
);
37185 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37186 int n_elts
= GET_MODE_NUNITS (mode
);
37187 int n_var
= 0, one_var
= -1;
37188 bool all_same
= true, all_const_zero
= true;
37192 for (i
= 0; i
< n_elts
; ++i
)
37194 x
= XVECEXP (vals
, 0, i
);
37195 if (!(CONST_INT_P (x
)
37196 || GET_CODE (x
) == CONST_DOUBLE
37197 || GET_CODE (x
) == CONST_FIXED
))
37198 n_var
++, one_var
= i
;
37199 else if (x
!= CONST0_RTX (inner_mode
))
37200 all_const_zero
= false;
37201 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37205 /* Constants are best loaded from the constant pool. */
37208 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37212 /* If all values are identical, broadcast the value. */
37214 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37215 XVECEXP (vals
, 0, 0)))
37218 /* Values where only one field is non-constant are best loaded from
37219 the pool and overwritten via move later. */
37223 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37224 XVECEXP (vals
, 0, one_var
),
37228 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37232 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37236 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37238 enum machine_mode mode
= GET_MODE (target
);
37239 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37240 enum machine_mode half_mode
;
37241 bool use_vec_merge
= false;
37243 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37245 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37246 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37247 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37248 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37249 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37250 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37252 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37254 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37255 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37256 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37257 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37258 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37259 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37269 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37270 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37272 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37274 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37275 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37281 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
37285 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37286 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
37288 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37290 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37291 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37298 /* For the two element vectors, we implement a VEC_CONCAT with
37299 the extraction of the other element. */
37301 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
37302 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
37305 op0
= val
, op1
= tmp
;
37307 op0
= tmp
, op1
= val
;
37309 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
37310 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37315 use_vec_merge
= TARGET_SSE4_1
;
37322 use_vec_merge
= true;
37326 /* tmp = target = A B C D */
37327 tmp
= copy_to_reg (target
);
37328 /* target = A A B B */
37329 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
37330 /* target = X A B B */
37331 ix86_expand_vector_set (false, target
, val
, 0);
37332 /* target = A X C D */
37333 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37334 const1_rtx
, const0_rtx
,
37335 GEN_INT (2+4), GEN_INT (3+4)));
37339 /* tmp = target = A B C D */
37340 tmp
= copy_to_reg (target
);
37341 /* tmp = X B C D */
37342 ix86_expand_vector_set (false, tmp
, val
, 0);
37343 /* target = A B X D */
37344 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37345 const0_rtx
, const1_rtx
,
37346 GEN_INT (0+4), GEN_INT (3+4)));
37350 /* tmp = target = A B C D */
37351 tmp
= copy_to_reg (target
);
37352 /* tmp = X B C D */
37353 ix86_expand_vector_set (false, tmp
, val
, 0);
37354 /* target = A B X D */
37355 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
37356 const0_rtx
, const1_rtx
,
37357 GEN_INT (2+4), GEN_INT (0+4)));
37361 gcc_unreachable ();
37366 use_vec_merge
= TARGET_SSE4_1
;
37370 /* Element 0 handled by vec_merge below. */
37373 use_vec_merge
= true;
37379 /* With SSE2, use integer shuffles to swap element 0 and ELT,
37380 store into element 0, then shuffle them back. */
37384 order
[0] = GEN_INT (elt
);
37385 order
[1] = const1_rtx
;
37386 order
[2] = const2_rtx
;
37387 order
[3] = GEN_INT (3);
37388 order
[elt
] = const0_rtx
;
37390 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37391 order
[1], order
[2], order
[3]));
37393 ix86_expand_vector_set (false, target
, val
, 0);
37395 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
37396 order
[1], order
[2], order
[3]));
37400 /* For SSE1, we have to reuse the V4SF code. */
37401 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
37402 gen_lowpart (SFmode
, val
), elt
);
37407 use_vec_merge
= TARGET_SSE2
;
37410 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37414 use_vec_merge
= TARGET_SSE4_1
;
37421 half_mode
= V16QImode
;
37427 half_mode
= V8HImode
;
37433 half_mode
= V4SImode
;
37439 half_mode
= V2DImode
;
37445 half_mode
= V4SFmode
;
37451 half_mode
= V2DFmode
;
37457 /* Compute offset. */
37461 gcc_assert (i
<= 1);
37463 /* Extract the half. */
37464 tmp
= gen_reg_rtx (half_mode
);
37465 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
37467 /* Put val in tmp at elt. */
37468 ix86_expand_vector_set (false, tmp
, val
, elt
);
37471 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
37480 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37481 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
37482 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37486 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37488 emit_move_insn (mem
, target
);
37490 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37491 emit_move_insn (tmp
, val
);
37493 emit_move_insn (target
, mem
);
37498 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
37500 enum machine_mode mode
= GET_MODE (vec
);
37501 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37502 bool use_vec_extr
= false;
37515 use_vec_extr
= true;
37519 use_vec_extr
= TARGET_SSE4_1
;
37531 tmp
= gen_reg_rtx (mode
);
37532 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
37533 GEN_INT (elt
), GEN_INT (elt
),
37534 GEN_INT (elt
+4), GEN_INT (elt
+4)));
37538 tmp
= gen_reg_rtx (mode
);
37539 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
37543 gcc_unreachable ();
37546 use_vec_extr
= true;
37551 use_vec_extr
= TARGET_SSE4_1
;
37565 tmp
= gen_reg_rtx (mode
);
37566 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
37567 GEN_INT (elt
), GEN_INT (elt
),
37568 GEN_INT (elt
), GEN_INT (elt
)));
37572 tmp
= gen_reg_rtx (mode
);
37573 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
37577 gcc_unreachable ();
37580 use_vec_extr
= true;
37585 /* For SSE1, we have to reuse the V4SF code. */
37586 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
37587 gen_lowpart (V4SFmode
, vec
), elt
);
37593 use_vec_extr
= TARGET_SSE2
;
37596 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
37600 use_vec_extr
= TARGET_SSE4_1
;
37606 tmp
= gen_reg_rtx (V4SFmode
);
37608 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
37610 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
37611 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37619 tmp
= gen_reg_rtx (V2DFmode
);
37621 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
37623 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
37624 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37632 tmp
= gen_reg_rtx (V16QImode
);
37634 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
37636 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
37637 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
37645 tmp
= gen_reg_rtx (V8HImode
);
37647 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
37649 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
37650 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
37658 tmp
= gen_reg_rtx (V4SImode
);
37660 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
37662 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
37663 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
37671 tmp
= gen_reg_rtx (V2DImode
);
37673 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
37675 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
37676 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
37682 /* ??? Could extract the appropriate HImode element and shift. */
37689 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
37690 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
37692 /* Let the rtl optimizers know about the zero extension performed. */
37693 if (inner_mode
== QImode
|| inner_mode
== HImode
)
37695 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
37696 target
= gen_lowpart (SImode
, target
);
37699 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
37703 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
37705 emit_move_insn (mem
, vec
);
37707 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
37708 emit_move_insn (target
, tmp
);
37712 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37713 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37714 The upper bits of DEST are undefined, though they shouldn't cause
37715 exceptions (some bits from src or all zeros are ok). */
37718 emit_reduc_half (rtx dest
, rtx src
, int i
)
37721 switch (GET_MODE (src
))
37725 tem
= gen_sse_movhlps (dest
, src
, src
);
37727 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
37728 GEN_INT (1 + 4), GEN_INT (1 + 4));
37731 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
37737 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
37738 gen_lowpart (V1TImode
, src
),
37743 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
37745 tem
= gen_avx_shufps256 (dest
, src
, src
,
37746 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
37750 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
37752 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
37759 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
37760 gen_lowpart (V4DImode
, src
),
37761 gen_lowpart (V4DImode
, src
),
37764 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
37765 gen_lowpart (V2TImode
, src
),
37769 gcc_unreachable ();
37774 /* Expand a vector reduction. FN is the binary pattern to reduce;
37775 DEST is the destination; IN is the input vector. */
37778 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
37780 rtx half
, dst
, vec
= in
;
37781 enum machine_mode mode
= GET_MODE (in
);
37784 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37786 && mode
== V8HImode
37787 && fn
== gen_uminv8hi3
)
37789 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37793 for (i
= GET_MODE_BITSIZE (mode
);
37794 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37797 half
= gen_reg_rtx (mode
);
37798 emit_reduc_half (half
, vec
, i
);
37799 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37802 dst
= gen_reg_rtx (mode
);
37803 emit_insn (fn (dst
, half
, vec
));
37808 /* Target hook for scalar_mode_supported_p. */
37810 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37812 if (DECIMAL_FLOAT_MODE_P (mode
))
37813 return default_decimal_float_supported_p ();
37814 else if (mode
== TFmode
)
37817 return default_scalar_mode_supported_p (mode
);
37820 /* Implements target hook vector_mode_supported_p. */
37822 ix86_vector_mode_supported_p (enum machine_mode mode
)
37824 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37826 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37828 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37830 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37832 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37837 /* Target hook for c_mode_for_suffix. */
37838 static enum machine_mode
37839 ix86_c_mode_for_suffix (char suffix
)
37849 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37851 We do this in the new i386 backend to maintain source compatibility
37852 with the old cc0-based compiler. */
37855 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37856 tree inputs ATTRIBUTE_UNUSED
,
37859 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37861 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37866 /* Implements target vector targetm.asm.encode_section_info. */
37868 static void ATTRIBUTE_UNUSED
37869 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37871 default_encode_section_info (decl
, rtl
, first
);
37873 if (TREE_CODE (decl
) == VAR_DECL
37874 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37875 && ix86_in_large_data_p (decl
))
37876 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37879 /* Worker function for REVERSE_CONDITION. */
37882 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37884 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37885 ? reverse_condition (code
)
37886 : reverse_condition_maybe_unordered (code
));
37889 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37893 output_387_reg_move (rtx insn
, rtx
*operands
)
37895 if (REG_P (operands
[0]))
37897 if (REG_P (operands
[1])
37898 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37900 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37901 return output_387_ffreep (operands
, 0);
37902 return "fstp\t%y0";
37904 if (STACK_TOP_P (operands
[0]))
37905 return "fld%Z1\t%y1";
37908 else if (MEM_P (operands
[0]))
37910 gcc_assert (REG_P (operands
[1]));
37911 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37912 return "fstp%Z0\t%y0";
37915 /* There is no non-popping store to memory for XFmode.
37916 So if we need one, follow the store with a load. */
37917 if (GET_MODE (operands
[0]) == XFmode
)
37918 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37920 return "fst%Z0\t%y0";
37927 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37928 FP status register is set. */
37931 ix86_emit_fp_unordered_jump (rtx label
)
37933 rtx reg
= gen_reg_rtx (HImode
);
37936 emit_insn (gen_x86_fnstsw_1 (reg
));
37938 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37940 emit_insn (gen_x86_sahf_1 (reg
));
37942 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37943 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37947 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37949 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37950 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37953 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37954 gen_rtx_LABEL_REF (VOIDmode
, label
),
37956 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37958 emit_jump_insn (temp
);
37959 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37962 /* Output code to perform a log1p XFmode calculation. */
37964 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37966 rtx label1
= gen_label_rtx ();
37967 rtx label2
= gen_label_rtx ();
37969 rtx tmp
= gen_reg_rtx (XFmode
);
37970 rtx tmp2
= gen_reg_rtx (XFmode
);
37973 emit_insn (gen_absxf2 (tmp
, op1
));
37974 test
= gen_rtx_GE (VOIDmode
, tmp
,
37975 CONST_DOUBLE_FROM_REAL_VALUE (
37976 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37978 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37980 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37981 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37982 emit_jump (label2
);
37984 emit_label (label1
);
37985 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37986 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37987 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37988 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37990 emit_label (label2
);
37993 /* Emit code for round calculation. */
37994 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37996 enum machine_mode inmode
= GET_MODE (op1
);
37997 enum machine_mode outmode
= GET_MODE (op0
);
37998 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37999 rtx scratch
= gen_reg_rtx (HImode
);
38000 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38001 rtx jump_label
= gen_label_rtx ();
38003 rtx (*gen_abs
) (rtx
, rtx
);
38004 rtx (*gen_neg
) (rtx
, rtx
);
38009 gen_abs
= gen_abssf2
;
38012 gen_abs
= gen_absdf2
;
38015 gen_abs
= gen_absxf2
;
38018 gcc_unreachable ();
38024 gen_neg
= gen_negsf2
;
38027 gen_neg
= gen_negdf2
;
38030 gen_neg
= gen_negxf2
;
38033 gen_neg
= gen_neghi2
;
38036 gen_neg
= gen_negsi2
;
38039 gen_neg
= gen_negdi2
;
38042 gcc_unreachable ();
38045 e1
= gen_reg_rtx (inmode
);
38046 e2
= gen_reg_rtx (inmode
);
38047 res
= gen_reg_rtx (outmode
);
38049 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
38051 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
38053 /* scratch = fxam(op1) */
38054 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
38055 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
38057 /* e1 = fabs(op1) */
38058 emit_insn (gen_abs (e1
, op1
));
38060 /* e2 = e1 + 0.5 */
38061 half
= force_reg (inmode
, half
);
38062 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38063 gen_rtx_PLUS (inmode
, e1
, half
)));
38065 /* res = floor(e2) */
38066 if (inmode
!= XFmode
)
38068 tmp1
= gen_reg_rtx (XFmode
);
38070 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
38071 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38081 rtx tmp0
= gen_reg_rtx (XFmode
);
38083 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38085 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38086 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38087 UNSPEC_TRUNC_NOOP
)));
38091 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38094 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38097 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38100 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38103 gcc_unreachable ();
38106 /* flags = signbit(a) */
38107 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38109 /* if (flags) then res = -res */
38110 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38111 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38112 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38114 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38115 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38116 JUMP_LABEL (insn
) = jump_label
;
38118 emit_insn (gen_neg (res
, res
));
38120 emit_label (jump_label
);
38121 LABEL_NUSES (jump_label
) = 1;
38123 emit_move_insn (op0
, res
);
38126 /* Output code to perform a Newton-Rhapson approximation of a single precision
38127 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38129 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38131 rtx x0
, x1
, e0
, e1
;
38133 x0
= gen_reg_rtx (mode
);
38134 e0
= gen_reg_rtx (mode
);
38135 e1
= gen_reg_rtx (mode
);
38136 x1
= gen_reg_rtx (mode
);
38138 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38140 b
= force_reg (mode
, b
);
38142 /* x0 = rcp(b) estimate */
38143 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38144 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38147 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38148 gen_rtx_MULT (mode
, x0
, b
)));
38151 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38152 gen_rtx_MULT (mode
, x0
, e0
)));
38155 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38156 gen_rtx_PLUS (mode
, x0
, x0
)));
38159 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38160 gen_rtx_MINUS (mode
, e1
, e0
)));
38163 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38164 gen_rtx_MULT (mode
, a
, x1
)));
38167 /* Output code to perform a Newton-Rhapson approximation of a
38168 single precision floating point [reciprocal] square root. */
38170 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38173 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38176 x0
= gen_reg_rtx (mode
);
38177 e0
= gen_reg_rtx (mode
);
38178 e1
= gen_reg_rtx (mode
);
38179 e2
= gen_reg_rtx (mode
);
38180 e3
= gen_reg_rtx (mode
);
38182 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
38183 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38185 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38186 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38188 if (VECTOR_MODE_P (mode
))
38190 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38191 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38194 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38195 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38197 a
= force_reg (mode
, a
);
38199 /* x0 = rsqrt(a) estimate */
38200 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38201 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38204 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38209 zero
= gen_reg_rtx (mode
);
38210 mask
= gen_reg_rtx (mode
);
38212 zero
= force_reg (mode
, CONST0_RTX(mode
));
38213 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38214 gen_rtx_NE (mode
, zero
, a
)));
38216 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38217 gen_rtx_AND (mode
, x0
, mask
)));
38221 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38222 gen_rtx_MULT (mode
, x0
, a
)));
38224 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38225 gen_rtx_MULT (mode
, e0
, x0
)));
38228 mthree
= force_reg (mode
, mthree
);
38229 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38230 gen_rtx_PLUS (mode
, e1
, mthree
)));
38232 mhalf
= force_reg (mode
, mhalf
);
38234 /* e3 = -.5 * x0 */
38235 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38236 gen_rtx_MULT (mode
, x0
, mhalf
)));
38238 /* e3 = -.5 * e0 */
38239 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38240 gen_rtx_MULT (mode
, e0
, mhalf
)));
38241 /* ret = e2 * e3 */
38242 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38243 gen_rtx_MULT (mode
, e2
, e3
)));
38246 #ifdef TARGET_SOLARIS
38247 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38250 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38253 /* With Binutils 2.15, the "@unwind" marker must be specified on
38254 every occurrence of the ".eh_frame" section, not just the first
38257 && strcmp (name
, ".eh_frame") == 0)
38259 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38260 flags
& SECTION_WRITE
? "aw" : "a");
38265 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38267 solaris_elf_asm_comdat_section (name
, flags
, decl
);
38272 default_elf_asm_named_section (name
, flags
, decl
);
38274 #endif /* TARGET_SOLARIS */
38276 /* Return the mangling of TYPE if it is an extended fundamental type. */
38278 static const char *
38279 ix86_mangle_type (const_tree type
)
38281 type
= TYPE_MAIN_VARIANT (type
);
38283 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
38284 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
38287 switch (TYPE_MODE (type
))
38290 /* __float128 is "g". */
38293 /* "long double" or __float80 is "e". */
38300 /* For 32-bit code we can save PIC register setup by using
38301 __stack_chk_fail_local hidden function instead of calling
38302 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
38303 register, so it is better to call __stack_chk_fail directly. */
38305 static tree ATTRIBUTE_UNUSED
38306 ix86_stack_protect_fail (void)
38308 return TARGET_64BIT
38309 ? default_external_stack_protect_fail ()
38310 : default_hidden_stack_protect_fail ();
38313 /* Select a format to encode pointers in exception handling data. CODE
38314 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
38315 true if the symbol may be affected by dynamic relocations.
38317 ??? All x86 object file formats are capable of representing this.
38318 After all, the relocation needed is the same as for the call insn.
38319 Whether or not a particular assembler allows us to enter such, I
38320 guess we'll have to see. */
38322 asm_preferred_eh_data_format (int code
, int global
)
38326 int type
= DW_EH_PE_sdata8
;
38328 || ix86_cmodel
== CM_SMALL_PIC
38329 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
38330 type
= DW_EH_PE_sdata4
;
38331 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
38333 if (ix86_cmodel
== CM_SMALL
38334 || (ix86_cmodel
== CM_MEDIUM
&& code
))
38335 return DW_EH_PE_udata4
;
38336 return DW_EH_PE_absptr
;
38339 /* Expand copysign from SIGN to the positive value ABS_VALUE
38340 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
38343 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
38345 enum machine_mode mode
= GET_MODE (sign
);
38346 rtx sgn
= gen_reg_rtx (mode
);
38347 if (mask
== NULL_RTX
)
38349 enum machine_mode vmode
;
38351 if (mode
== SFmode
)
38353 else if (mode
== DFmode
)
38358 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
38359 if (!VECTOR_MODE_P (mode
))
38361 /* We need to generate a scalar mode mask in this case. */
38362 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38363 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38364 mask
= gen_reg_rtx (mode
);
38365 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38369 mask
= gen_rtx_NOT (mode
, mask
);
38370 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
38371 gen_rtx_AND (mode
, mask
, sign
)));
38372 emit_insn (gen_rtx_SET (VOIDmode
, result
,
38373 gen_rtx_IOR (mode
, abs_value
, sgn
)));
38376 /* Expand fabs (OP0) and return a new rtx that holds the result. The
38377 mask for masking out the sign-bit is stored in *SMASK, if that is
38380 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
38382 enum machine_mode vmode
, mode
= GET_MODE (op0
);
38385 xa
= gen_reg_rtx (mode
);
38386 if (mode
== SFmode
)
38388 else if (mode
== DFmode
)
38392 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
38393 if (!VECTOR_MODE_P (mode
))
38395 /* We need to generate a scalar mode mask in this case. */
38396 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
38397 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
38398 mask
= gen_reg_rtx (mode
);
38399 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
38401 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
38402 gen_rtx_AND (mode
, op0
, mask
)));
38410 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
38411 swapping the operands if SWAP_OPERANDS is true. The expanded
38412 code is a forward jump to a newly created label in case the
38413 comparison is true. The generated label rtx is returned. */
38415 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
38416 bool swap_operands
)
38427 label
= gen_label_rtx ();
38428 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
38429 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38430 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
38431 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
38432 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
38433 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
38434 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38435 JUMP_LABEL (tmp
) = label
;
38440 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
38441 using comparison code CODE. Operands are swapped for the comparison if
38442 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
38444 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
38445 bool swap_operands
)
38447 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
38448 enum machine_mode mode
= GET_MODE (op0
);
38449 rtx mask
= gen_reg_rtx (mode
);
38458 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
38460 emit_insn (insn (mask
, op0
, op1
,
38461 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
38465 /* Generate and return a rtx of mode MODE for 2**n where n is the number
38466 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
38468 ix86_gen_TWO52 (enum machine_mode mode
)
38470 REAL_VALUE_TYPE TWO52r
;
38473 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
38474 TWO52
= const_double_from_real_value (TWO52r
, mode
);
38475 TWO52
= force_reg (mode
, TWO52
);
38480 /* Expand SSE sequence for computing lround from OP1 storing
38483 ix86_expand_lround (rtx op0
, rtx op1
)
38485 /* C code for the stuff we're doing below:
38486 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
38489 enum machine_mode mode
= GET_MODE (op1
);
38490 const struct real_format
*fmt
;
38491 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38494 /* load nextafter (0.5, 0.0) */
38495 fmt
= REAL_MODE_FORMAT (mode
);
38496 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38497 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38499 /* adj = copysign (0.5, op1) */
38500 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38501 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
38503 /* adj = op1 + adj */
38504 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38506 /* op0 = (imode)adj */
38507 expand_fix (op0
, adj
, 0);
38510 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
38513 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
38515 /* C code for the stuff we're doing below (for do_floor):
38517 xi -= (double)xi > op1 ? 1 : 0;
38520 enum machine_mode fmode
= GET_MODE (op1
);
38521 enum machine_mode imode
= GET_MODE (op0
);
38522 rtx ireg
, freg
, label
, tmp
;
38524 /* reg = (long)op1 */
38525 ireg
= gen_reg_rtx (imode
);
38526 expand_fix (ireg
, op1
, 0);
38528 /* freg = (double)reg */
38529 freg
= gen_reg_rtx (fmode
);
38530 expand_float (freg
, ireg
, 0);
38532 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38533 label
= ix86_expand_sse_compare_and_jump (UNLE
,
38534 freg
, op1
, !do_floor
);
38535 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
38536 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
38537 emit_move_insn (ireg
, tmp
);
38539 emit_label (label
);
38540 LABEL_NUSES (label
) = 1;
38542 emit_move_insn (op0
, ireg
);
38545 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38546 result in OPERAND0. */
38548 ix86_expand_rint (rtx operand0
, rtx operand1
)
38550 /* C code for the stuff we're doing below:
38551 xa = fabs (operand1);
38552 if (!isless (xa, 2**52))
38554 xa = xa + 2**52 - 2**52;
38555 return copysign (xa, operand1);
38557 enum machine_mode mode
= GET_MODE (operand0
);
38558 rtx res
, xa
, label
, TWO52
, mask
;
38560 res
= gen_reg_rtx (mode
);
38561 emit_move_insn (res
, operand1
);
38563 /* xa = abs (operand1) */
38564 xa
= ix86_expand_sse_fabs (res
, &mask
);
38566 /* if (!isless (xa, TWO52)) goto label; */
38567 TWO52
= ix86_gen_TWO52 (mode
);
38568 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38570 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38571 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38573 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
38575 emit_label (label
);
38576 LABEL_NUSES (label
) = 1;
38578 emit_move_insn (operand0
, res
);
38581 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38584 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
38586 /* C code for the stuff we expand below.
38587 double xa = fabs (x), x2;
38588 if (!isless (xa, TWO52))
38590 xa = xa + TWO52 - TWO52;
38591 x2 = copysign (xa, x);
38600 enum machine_mode mode
= GET_MODE (operand0
);
38601 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
38603 TWO52
= ix86_gen_TWO52 (mode
);
38605 /* Temporary for holding the result, initialized to the input
38606 operand to ease control flow. */
38607 res
= gen_reg_rtx (mode
);
38608 emit_move_insn (res
, operand1
);
38610 /* xa = abs (operand1) */
38611 xa
= ix86_expand_sse_fabs (res
, &mask
);
38613 /* if (!isless (xa, TWO52)) goto label; */
38614 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38616 /* xa = xa + TWO52 - TWO52; */
38617 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38618 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
38620 /* xa = copysign (xa, operand1) */
38621 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
38623 /* generate 1.0 or -1.0 */
38624 one
= force_reg (mode
,
38625 const_double_from_real_value (do_floor
38626 ? dconst1
: dconstm1
, mode
));
38628 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38629 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38630 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38631 gen_rtx_AND (mode
, one
, tmp
)));
38632 /* We always need to subtract here to preserve signed zero. */
38633 tmp
= expand_simple_binop (mode
, MINUS
,
38634 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38635 emit_move_insn (res
, tmp
);
38637 emit_label (label
);
38638 LABEL_NUSES (label
) = 1;
38640 emit_move_insn (operand0
, res
);
38643 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38646 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
38648 /* C code for the stuff we expand below.
38649 double xa = fabs (x), x2;
38650 if (!isless (xa, TWO52))
38652 x2 = (double)(long)x;
38659 if (HONOR_SIGNED_ZEROS (mode))
38660 return copysign (x2, x);
38663 enum machine_mode mode
= GET_MODE (operand0
);
38664 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
38666 TWO52
= ix86_gen_TWO52 (mode
);
38668 /* Temporary for holding the result, initialized to the input
38669 operand to ease control flow. */
38670 res
= gen_reg_rtx (mode
);
38671 emit_move_insn (res
, operand1
);
38673 /* xa = abs (operand1) */
38674 xa
= ix86_expand_sse_fabs (res
, &mask
);
38676 /* if (!isless (xa, TWO52)) goto label; */
38677 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38679 /* xa = (double)(long)x */
38680 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38681 expand_fix (xi
, res
, 0);
38682 expand_float (xa
, xi
, 0);
38685 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38687 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38688 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
38689 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38690 gen_rtx_AND (mode
, one
, tmp
)));
38691 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
38692 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38693 emit_move_insn (res
, tmp
);
38695 if (HONOR_SIGNED_ZEROS (mode
))
38696 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38698 emit_label (label
);
38699 LABEL_NUSES (label
) = 1;
38701 emit_move_insn (operand0
, res
);
38704 /* Expand SSE sequence for computing round from OPERAND1 storing
38705 into OPERAND0. Sequence that works without relying on DImode truncation
38706 via cvttsd2siq that is only available on 64bit targets. */
38708 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
38710 /* C code for the stuff we expand below.
38711 double xa = fabs (x), xa2, x2;
38712 if (!isless (xa, TWO52))
38714 Using the absolute value and copying back sign makes
38715 -0.0 -> -0.0 correct.
38716 xa2 = xa + TWO52 - TWO52;
38721 else if (dxa > 0.5)
38723 x2 = copysign (xa2, x);
38726 enum machine_mode mode
= GET_MODE (operand0
);
38727 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
38729 TWO52
= ix86_gen_TWO52 (mode
);
38731 /* Temporary for holding the result, initialized to the input
38732 operand to ease control flow. */
38733 res
= gen_reg_rtx (mode
);
38734 emit_move_insn (res
, operand1
);
38736 /* xa = abs (operand1) */
38737 xa
= ix86_expand_sse_fabs (res
, &mask
);
38739 /* if (!isless (xa, TWO52)) goto label; */
38740 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38742 /* xa2 = xa + TWO52 - TWO52; */
38743 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38744 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
38746 /* dxa = xa2 - xa; */
38747 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
38749 /* generate 0.5, 1.0 and -0.5 */
38750 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
38751 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38752 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
38756 tmp
= gen_reg_rtx (mode
);
38757 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38758 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
38759 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38760 gen_rtx_AND (mode
, one
, tmp
)));
38761 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38762 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38763 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
38764 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
38765 gen_rtx_AND (mode
, one
, tmp
)));
38766 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
38768 /* res = copysign (xa2, operand1) */
38769 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
38771 emit_label (label
);
38772 LABEL_NUSES (label
) = 1;
38774 emit_move_insn (operand0
, res
);
38777 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38780 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38782 /* C code for SSE variant we expand below.
38783 double xa = fabs (x), x2;
38784 if (!isless (xa, TWO52))
38786 x2 = (double)(long)x;
38787 if (HONOR_SIGNED_ZEROS (mode))
38788 return copysign (x2, x);
38791 enum machine_mode mode
= GET_MODE (operand0
);
38792 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38794 TWO52
= ix86_gen_TWO52 (mode
);
38796 /* Temporary for holding the result, initialized to the input
38797 operand to ease control flow. */
38798 res
= gen_reg_rtx (mode
);
38799 emit_move_insn (res
, operand1
);
38801 /* xa = abs (operand1) */
38802 xa
= ix86_expand_sse_fabs (res
, &mask
);
38804 /* if (!isless (xa, TWO52)) goto label; */
38805 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38807 /* x = (double)(long)x */
38808 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38809 expand_fix (xi
, res
, 0);
38810 expand_float (res
, xi
, 0);
38812 if (HONOR_SIGNED_ZEROS (mode
))
38813 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38815 emit_label (label
);
38816 LABEL_NUSES (label
) = 1;
38818 emit_move_insn (operand0
, res
);
38821 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38824 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38826 enum machine_mode mode
= GET_MODE (operand0
);
38827 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38829 /* C code for SSE variant we expand below.
38830 double xa = fabs (x), x2;
38831 if (!isless (xa, TWO52))
38833 xa2 = xa + TWO52 - TWO52;
38837 x2 = copysign (xa2, x);
38841 TWO52
= ix86_gen_TWO52 (mode
);
38843 /* Temporary for holding the result, initialized to the input
38844 operand to ease control flow. */
38845 res
= gen_reg_rtx (mode
);
38846 emit_move_insn (res
, operand1
);
38848 /* xa = abs (operand1) */
38849 xa
= ix86_expand_sse_fabs (res
, &smask
);
38851 /* if (!isless (xa, TWO52)) goto label; */
38852 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38854 /* res = xa + TWO52 - TWO52; */
38855 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38856 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38857 emit_move_insn (res
, tmp
);
38860 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38862 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38863 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38864 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38865 gen_rtx_AND (mode
, mask
, one
)));
38866 tmp
= expand_simple_binop (mode
, MINUS
,
38867 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38868 emit_move_insn (res
, tmp
);
38870 /* res = copysign (res, operand1) */
38871 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38873 emit_label (label
);
38874 LABEL_NUSES (label
) = 1;
38876 emit_move_insn (operand0
, res
);
38879 /* Expand SSE sequence for computing round from OPERAND1 storing
38882 ix86_expand_round (rtx operand0
, rtx operand1
)
38884 /* C code for the stuff we're doing below:
38885 double xa = fabs (x);
38886 if (!isless (xa, TWO52))
38888 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38889 return copysign (xa, x);
38891 enum machine_mode mode
= GET_MODE (operand0
);
38892 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38893 const struct real_format
*fmt
;
38894 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38896 /* Temporary for holding the result, initialized to the input
38897 operand to ease control flow. */
38898 res
= gen_reg_rtx (mode
);
38899 emit_move_insn (res
, operand1
);
38901 TWO52
= ix86_gen_TWO52 (mode
);
38902 xa
= ix86_expand_sse_fabs (res
, &mask
);
38903 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38905 /* load nextafter (0.5, 0.0) */
38906 fmt
= REAL_MODE_FORMAT (mode
);
38907 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38908 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38910 /* xa = xa + 0.5 */
38911 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38912 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38914 /* xa = (double)(int64_t)xa */
38915 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38916 expand_fix (xi
, xa
, 0);
38917 expand_float (xa
, xi
, 0);
38919 /* res = copysign (xa, operand1) */
38920 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38922 emit_label (label
);
38923 LABEL_NUSES (label
) = 1;
38925 emit_move_insn (operand0
, res
);
38928 /* Expand SSE sequence for computing round
38929 from OP1 storing into OP0 using sse4 round insn. */
38931 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38933 enum machine_mode mode
= GET_MODE (op0
);
38934 rtx e1
, e2
, res
, half
;
38935 const struct real_format
*fmt
;
38936 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38937 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38938 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38943 gen_copysign
= gen_copysignsf3
;
38944 gen_round
= gen_sse4_1_roundsf2
;
38947 gen_copysign
= gen_copysigndf3
;
38948 gen_round
= gen_sse4_1_rounddf2
;
38951 gcc_unreachable ();
38954 /* round (a) = trunc (a + copysign (0.5, a)) */
38956 /* load nextafter (0.5, 0.0) */
38957 fmt
= REAL_MODE_FORMAT (mode
);
38958 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38959 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38960 half
= const_double_from_real_value (pred_half
, mode
);
38962 /* e1 = copysign (0.5, op1) */
38963 e1
= gen_reg_rtx (mode
);
38964 emit_insn (gen_copysign (e1
, half
, op1
));
38966 /* e2 = op1 + e1 */
38967 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38969 /* res = trunc (e2) */
38970 res
= gen_reg_rtx (mode
);
38971 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38973 emit_move_insn (op0
, res
);
38977 /* Table of valid machine attributes. */
38978 static const struct attribute_spec ix86_attribute_table
[] =
38980 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38981 affects_type_identity } */
38982 /* Stdcall attribute says callee is responsible for popping arguments
38983 if they are not variable. */
38984 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38986 /* Fastcall attribute says callee is responsible for popping arguments
38987 if they are not variable. */
38988 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38990 /* Thiscall attribute says callee is responsible for popping arguments
38991 if they are not variable. */
38992 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38994 /* Cdecl attribute says the callee is a normal C declaration */
38995 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38997 /* Regparm attribute specifies how many integer arguments are to be
38998 passed in registers. */
38999 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
39001 /* Sseregparm attribute says we are using x86_64 calling conventions
39002 for FP arguments. */
39003 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39005 /* The transactional memory builtins are implicitly regparm or fastcall
39006 depending on the ABI. Override the generic do-nothing attribute that
39007 these builtins were declared with. */
39008 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
39010 /* force_align_arg_pointer says this function realigns the stack at entry. */
39011 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
39012 false, true, true, ix86_handle_cconv_attribute
, false },
39013 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39014 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
39015 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
39016 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
39019 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39021 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39023 #ifdef SUBTARGET_ATTRIBUTE_TABLE
39024 SUBTARGET_ATTRIBUTE_TABLE
,
39026 /* ms_abi and sysv_abi calling convention function attributes. */
39027 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39028 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39029 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
39031 { "callee_pop_aggregate_return", 1, 1, false, true, true,
39032 ix86_handle_callee_pop_aggregate_return
, true },
39034 { NULL
, 0, 0, false, false, false, NULL
, false }
39037 /* Implement targetm.vectorize.builtin_vectorization_cost. */
39039 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
39041 int misalign ATTRIBUTE_UNUSED
)
39045 switch (type_of_cost
)
39048 return ix86_cost
->scalar_stmt_cost
;
39051 return ix86_cost
->scalar_load_cost
;
39054 return ix86_cost
->scalar_store_cost
;
39057 return ix86_cost
->vec_stmt_cost
;
39060 return ix86_cost
->vec_align_load_cost
;
39063 return ix86_cost
->vec_store_cost
;
39065 case vec_to_scalar
:
39066 return ix86_cost
->vec_to_scalar_cost
;
39068 case scalar_to_vec
:
39069 return ix86_cost
->scalar_to_vec_cost
;
39071 case unaligned_load
:
39072 case unaligned_store
:
39073 return ix86_cost
->vec_unalign_load_cost
;
39075 case cond_branch_taken
:
39076 return ix86_cost
->cond_taken_branch_cost
;
39078 case cond_branch_not_taken
:
39079 return ix86_cost
->cond_not_taken_branch_cost
;
39082 case vec_promote_demote
:
39083 return ix86_cost
->vec_stmt_cost
;
39085 case vec_construct
:
39086 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39087 return elements
/ 2 + 1;
39090 gcc_unreachable ();
39094 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39095 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39096 insn every time. */
39098 static GTY(()) rtx vselect_insn
;
39100 /* Initialize vselect_insn. */
39103 init_vselect_insn (void)
39108 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39109 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39110 XVECEXP (x
, 0, i
) = const0_rtx
;
39111 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39113 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39115 vselect_insn
= emit_insn (x
);
39119 /* Construct (set target (vec_select op0 (parallel perm))) and
39120 return true if that's a valid instruction in the active ISA. */
39123 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39124 unsigned nelt
, bool testing_p
)
39127 rtx x
, save_vconcat
;
39130 if (vselect_insn
== NULL_RTX
)
39131 init_vselect_insn ();
39133 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39134 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39135 for (i
= 0; i
< nelt
; ++i
)
39136 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39137 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39138 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39139 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39140 SET_DEST (PATTERN (vselect_insn
)) = target
;
39141 icode
= recog_memoized (vselect_insn
);
39143 if (icode
>= 0 && !testing_p
)
39144 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39146 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39147 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39148 INSN_CODE (vselect_insn
) = -1;
39153 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39156 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39157 const unsigned char *perm
, unsigned nelt
,
39160 enum machine_mode v2mode
;
39164 if (vselect_insn
== NULL_RTX
)
39165 init_vselect_insn ();
39167 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39168 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39169 PUT_MODE (x
, v2mode
);
39172 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39173 XEXP (x
, 0) = const0_rtx
;
39174 XEXP (x
, 1) = const0_rtx
;
39178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39179 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39182 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39184 enum machine_mode vmode
= d
->vmode
;
39185 unsigned i
, mask
, nelt
= d
->nelt
;
39186 rtx target
, op0
, op1
, x
;
39187 rtx rperm
[32], vperm
;
39189 if (d
->one_operand_p
)
39191 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39193 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39195 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39200 /* This is a blend, not a permute. Elements must stay in their
39201 respective lanes. */
39202 for (i
= 0; i
< nelt
; ++i
)
39204 unsigned e
= d
->perm
[i
];
39205 if (!(e
== i
|| e
== i
+ nelt
))
39212 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39213 decision should be extracted elsewhere, so that we only try that
39214 sequence once all budget==3 options have been tried. */
39215 target
= d
->target
;
39228 for (i
= 0; i
< nelt
; ++i
)
39229 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39233 for (i
= 0; i
< 2; ++i
)
39234 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39239 for (i
= 0; i
< 4; ++i
)
39240 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39245 /* See if bytes move in pairs so we can use pblendw with
39246 an immediate argument, rather than pblendvb with a vector
39248 for (i
= 0; i
< 16; i
+= 2)
39249 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39252 for (i
= 0; i
< nelt
; ++i
)
39253 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39256 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39257 vperm
= force_reg (vmode
, vperm
);
39259 if (GET_MODE_SIZE (vmode
) == 16)
39260 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39262 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39266 for (i
= 0; i
< 8; ++i
)
39267 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39272 target
= gen_lowpart (vmode
, target
);
39273 op0
= gen_lowpart (vmode
, op0
);
39274 op1
= gen_lowpart (vmode
, op1
);
39278 /* See if bytes move in pairs. If not, vpblendvb must be used. */
39279 for (i
= 0; i
< 32; i
+= 2)
39280 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39282 /* See if bytes move in quadruplets. If yes, vpblendd
39283 with immediate can be used. */
39284 for (i
= 0; i
< 32; i
+= 4)
39285 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
39289 /* See if bytes move the same in both lanes. If yes,
39290 vpblendw with immediate can be used. */
39291 for (i
= 0; i
< 16; i
+= 2)
39292 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
39295 /* Use vpblendw. */
39296 for (i
= 0; i
< 16; ++i
)
39297 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
39302 /* Use vpblendd. */
39303 for (i
= 0; i
< 8; ++i
)
39304 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
39309 /* See if words move in pairs. If yes, vpblendd can be used. */
39310 for (i
= 0; i
< 16; i
+= 2)
39311 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39315 /* See if words move the same in both lanes. If not,
39316 vpblendvb must be used. */
39317 for (i
= 0; i
< 8; i
++)
39318 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
39320 /* Use vpblendvb. */
39321 for (i
= 0; i
< 32; ++i
)
39322 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
39326 target
= gen_lowpart (vmode
, target
);
39327 op0
= gen_lowpart (vmode
, op0
);
39328 op1
= gen_lowpart (vmode
, op1
);
39329 goto finish_pblendvb
;
39332 /* Use vpblendw. */
39333 for (i
= 0; i
< 16; ++i
)
39334 mask
|= (d
->perm
[i
] >= 16) << i
;
39338 /* Use vpblendd. */
39339 for (i
= 0; i
< 8; ++i
)
39340 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
39345 /* Use vpblendd. */
39346 for (i
= 0; i
< 4; ++i
)
39347 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39352 gcc_unreachable ();
39355 /* This matches five different patterns with the different modes. */
39356 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
39357 x
= gen_rtx_SET (VOIDmode
, target
, x
);
39363 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39364 in terms of the variable form of vpermilps.
39366 Note that we will have already failed the immediate input vpermilps,
39367 which requires that the high and low part shuffle be identical; the
39368 variable form doesn't require that. */
39371 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
39373 rtx rperm
[8], vperm
;
39376 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
39379 /* We can only permute within the 128-bit lane. */
39380 for (i
= 0; i
< 8; ++i
)
39382 unsigned e
= d
->perm
[i
];
39383 if (i
< 4 ? e
>= 4 : e
< 4)
39390 for (i
= 0; i
< 8; ++i
)
39392 unsigned e
= d
->perm
[i
];
39394 /* Within each 128-bit lane, the elements of op0 are numbered
39395 from 0 and the elements of op1 are numbered from 4. */
39401 rperm
[i
] = GEN_INT (e
);
39404 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
39405 vperm
= force_reg (V8SImode
, vperm
);
39406 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
39411 /* Return true if permutation D can be performed as VMODE permutation
39415 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
39417 unsigned int i
, j
, chunk
;
39419 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
39420 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
39421 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
39424 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
39427 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
39428 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
39429 if (d
->perm
[i
] & (chunk
- 1))
39432 for (j
= 1; j
< chunk
; ++j
)
39433 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
39439 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39440 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
39443 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
39445 unsigned i
, nelt
, eltsz
, mask
;
39446 unsigned char perm
[32];
39447 enum machine_mode vmode
= V16QImode
;
39448 rtx rperm
[32], vperm
, target
, op0
, op1
;
39452 if (!d
->one_operand_p
)
39454 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
39457 && valid_perm_using_mode_p (V2TImode
, d
))
39462 /* Use vperm2i128 insn. The pattern uses
39463 V4DImode instead of V2TImode. */
39464 target
= gen_lowpart (V4DImode
, d
->target
);
39465 op0
= gen_lowpart (V4DImode
, d
->op0
);
39466 op1
= gen_lowpart (V4DImode
, d
->op1
);
39468 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
39469 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
39470 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
39478 if (GET_MODE_SIZE (d
->vmode
) == 16)
39483 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39488 /* V4DImode should be already handled through
39489 expand_vselect by vpermq instruction. */
39490 gcc_assert (d
->vmode
!= V4DImode
);
39493 if (d
->vmode
== V8SImode
39494 || d
->vmode
== V16HImode
39495 || d
->vmode
== V32QImode
)
39497 /* First see if vpermq can be used for
39498 V8SImode/V16HImode/V32QImode. */
39499 if (valid_perm_using_mode_p (V4DImode
, d
))
39501 for (i
= 0; i
< 4; i
++)
39502 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
39505 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
39506 gen_lowpart (V4DImode
, d
->op0
),
39510 /* Next see if vpermd can be used. */
39511 if (valid_perm_using_mode_p (V8SImode
, d
))
39514 /* Or if vpermps can be used. */
39515 else if (d
->vmode
== V8SFmode
)
39518 if (vmode
== V32QImode
)
39520 /* vpshufb only works intra lanes, it is not
39521 possible to shuffle bytes in between the lanes. */
39522 for (i
= 0; i
< nelt
; ++i
)
39523 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
39534 if (vmode
== V8SImode
)
39535 for (i
= 0; i
< 8; ++i
)
39536 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
39539 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39540 if (!d
->one_operand_p
)
39541 mask
= 2 * nelt
- 1;
39542 else if (vmode
== V16QImode
)
39545 mask
= nelt
/ 2 - 1;
39547 for (i
= 0; i
< nelt
; ++i
)
39549 unsigned j
, e
= d
->perm
[i
] & mask
;
39550 for (j
= 0; j
< eltsz
; ++j
)
39551 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
39555 vperm
= gen_rtx_CONST_VECTOR (vmode
,
39556 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
39557 vperm
= force_reg (vmode
, vperm
);
39559 target
= gen_lowpart (vmode
, d
->target
);
39560 op0
= gen_lowpart (vmode
, d
->op0
);
39561 if (d
->one_operand_p
)
39563 if (vmode
== V16QImode
)
39564 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
39565 else if (vmode
== V32QImode
)
39566 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
39567 else if (vmode
== V8SFmode
)
39568 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
39570 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
39574 op1
= gen_lowpart (vmode
, d
->op1
);
39575 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
39581 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39582 in a single instruction. */
39585 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
39587 unsigned i
, nelt
= d
->nelt
;
39588 unsigned char perm2
[MAX_VECT_LEN
];
39590 /* Check plain VEC_SELECT first, because AVX has instructions that could
39591 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39592 input where SEL+CONCAT may not. */
39593 if (d
->one_operand_p
)
39595 int mask
= nelt
- 1;
39596 bool identity_perm
= true;
39597 bool broadcast_perm
= true;
39599 for (i
= 0; i
< nelt
; i
++)
39601 perm2
[i
] = d
->perm
[i
] & mask
;
39603 identity_perm
= false;
39605 broadcast_perm
= false;
39611 emit_move_insn (d
->target
, d
->op0
);
39614 else if (broadcast_perm
&& TARGET_AVX2
)
39616 /* Use vpbroadcast{b,w,d}. */
39617 rtx (*gen
) (rtx
, rtx
) = NULL
;
39621 gen
= gen_avx2_pbroadcastv32qi_1
;
39624 gen
= gen_avx2_pbroadcastv16hi_1
;
39627 gen
= gen_avx2_pbroadcastv8si_1
;
39630 gen
= gen_avx2_pbroadcastv16qi
;
39633 gen
= gen_avx2_pbroadcastv8hi
;
39636 gen
= gen_avx2_vec_dupv8sf_1
;
39638 /* For other modes prefer other shuffles this function creates. */
39644 emit_insn (gen (d
->target
, d
->op0
));
39649 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
39652 /* There are plenty of patterns in sse.md that are written for
39653 SEL+CONCAT and are not replicated for a single op. Perhaps
39654 that should be changed, to avoid the nastiness here. */
39656 /* Recognize interleave style patterns, which means incrementing
39657 every other permutation operand. */
39658 for (i
= 0; i
< nelt
; i
+= 2)
39660 perm2
[i
] = d
->perm
[i
] & mask
;
39661 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
39663 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39667 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39670 for (i
= 0; i
< nelt
; i
+= 4)
39672 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
39673 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
39674 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
39675 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
39678 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
39684 /* Finally, try the fully general two operand permute. */
39685 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
39689 /* Recognize interleave style patterns with reversed operands. */
39690 if (!d
->one_operand_p
)
39692 for (i
= 0; i
< nelt
; ++i
)
39694 unsigned e
= d
->perm
[i
];
39702 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
39707 /* Try the SSE4.1 blend variable merge instructions. */
39708 if (expand_vec_perm_blend (d
))
39711 /* Try one of the AVX vpermil variable permutations. */
39712 if (expand_vec_perm_vpermil (d
))
39715 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39716 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39717 if (expand_vec_perm_pshufb (d
))
39723 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39724 in terms of a pair of pshuflw + pshufhw instructions. */
39727 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
39729 unsigned char perm2
[MAX_VECT_LEN
];
39733 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
39736 /* The two permutations only operate in 64-bit lanes. */
39737 for (i
= 0; i
< 4; ++i
)
39738 if (d
->perm
[i
] >= 4)
39740 for (i
= 4; i
< 8; ++i
)
39741 if (d
->perm
[i
] < 4)
39747 /* Emit the pshuflw. */
39748 memcpy (perm2
, d
->perm
, 4);
39749 for (i
= 4; i
< 8; ++i
)
39751 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
39754 /* Emit the pshufhw. */
39755 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
39756 for (i
= 0; i
< 4; ++i
)
39758 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
39764 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39765 the permutation using the SSSE3 palignr instruction. This succeeds
39766 when all of the elements in PERM fit within one vector and we merely
39767 need to shift them down so that a single vector permutation has a
39768 chance to succeed. */
39771 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
39773 unsigned i
, nelt
= d
->nelt
;
39778 /* Even with AVX, palignr only operates on 128-bit vectors. */
39779 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39782 min
= nelt
, max
= 0;
39783 for (i
= 0; i
< nelt
; ++i
)
39785 unsigned e
= d
->perm
[i
];
39791 if (min
== 0 || max
- min
>= nelt
)
39794 /* Given that we have SSSE3, we know we'll be able to implement the
39795 single operand permutation after the palignr with pshufb. */
39799 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39800 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39801 gen_lowpart (TImode
, d
->op1
),
39802 gen_lowpart (TImode
, d
->op0
), shift
));
39804 d
->op0
= d
->op1
= d
->target
;
39805 d
->one_operand_p
= true;
39808 for (i
= 0; i
< nelt
; ++i
)
39810 unsigned e
= d
->perm
[i
] - min
;
39816 /* Test for the degenerate case where the alignment by itself
39817 produces the desired permutation. */
39821 ok
= expand_vec_perm_1 (d
);
39827 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39829 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39830 a two vector permutation into a single vector permutation by using
39831 an interleave operation to merge the vectors. */
39834 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39836 struct expand_vec_perm_d dremap
, dfinal
;
39837 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39838 unsigned HOST_WIDE_INT contents
;
39839 unsigned char remap
[2 * MAX_VECT_LEN
];
39841 bool ok
, same_halves
= false;
39843 if (GET_MODE_SIZE (d
->vmode
) == 16)
39845 if (d
->one_operand_p
)
39848 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39852 /* For 32-byte modes allow even d->one_operand_p.
39853 The lack of cross-lane shuffling in some instructions
39854 might prevent a single insn shuffle. */
39856 dfinal
.testing_p
= true;
39857 /* If expand_vec_perm_interleave3 can expand this into
39858 a 3 insn sequence, give up and let it be expanded as
39859 3 insn sequence. While that is one insn longer,
39860 it doesn't need a memory operand and in the common
39861 case that both interleave low and high permutations
39862 with the same operands are adjacent needs 4 insns
39863 for both after CSE. */
39864 if (expand_vec_perm_interleave3 (&dfinal
))
39870 /* Examine from whence the elements come. */
39872 for (i
= 0; i
< nelt
; ++i
)
39873 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39875 memset (remap
, 0xff, sizeof (remap
));
39878 if (GET_MODE_SIZE (d
->vmode
) == 16)
39880 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39882 /* Split the two input vectors into 4 halves. */
39883 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39888 /* If the elements from the low halves use interleave low, and similarly
39889 for interleave high. If the elements are from mis-matched halves, we
39890 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39891 if ((contents
& (h1
| h3
)) == contents
)
39894 for (i
= 0; i
< nelt2
; ++i
)
39897 remap
[i
+ nelt
] = i
* 2 + 1;
39898 dremap
.perm
[i
* 2] = i
;
39899 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39901 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39902 dremap
.vmode
= V4SFmode
;
39904 else if ((contents
& (h2
| h4
)) == contents
)
39907 for (i
= 0; i
< nelt2
; ++i
)
39909 remap
[i
+ nelt2
] = i
* 2;
39910 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39911 dremap
.perm
[i
* 2] = i
+ nelt2
;
39912 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39914 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39915 dremap
.vmode
= V4SFmode
;
39917 else if ((contents
& (h1
| h4
)) == contents
)
39920 for (i
= 0; i
< nelt2
; ++i
)
39923 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39924 dremap
.perm
[i
] = i
;
39925 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39930 dremap
.vmode
= V2DImode
;
39932 dremap
.perm
[0] = 0;
39933 dremap
.perm
[1] = 3;
39936 else if ((contents
& (h2
| h3
)) == contents
)
39939 for (i
= 0; i
< nelt2
; ++i
)
39941 remap
[i
+ nelt2
] = i
;
39942 remap
[i
+ nelt
] = i
+ nelt2
;
39943 dremap
.perm
[i
] = i
+ nelt2
;
39944 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39949 dremap
.vmode
= V2DImode
;
39951 dremap
.perm
[0] = 1;
39952 dremap
.perm
[1] = 2;
39960 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39961 unsigned HOST_WIDE_INT q
[8];
39962 unsigned int nonzero_halves
[4];
39964 /* Split the two input vectors into 8 quarters. */
39965 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39966 for (i
= 1; i
< 8; ++i
)
39967 q
[i
] = q
[0] << (nelt4
* i
);
39968 for (i
= 0; i
< 4; ++i
)
39969 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39971 nonzero_halves
[nzcnt
] = i
;
39977 gcc_assert (d
->one_operand_p
);
39978 nonzero_halves
[1] = nonzero_halves
[0];
39979 same_halves
= true;
39981 else if (d
->one_operand_p
)
39983 gcc_assert (nonzero_halves
[0] == 0);
39984 gcc_assert (nonzero_halves
[1] == 1);
39989 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39991 /* Attempt to increase the likelihood that dfinal
39992 shuffle will be intra-lane. */
39993 char tmph
= nonzero_halves
[0];
39994 nonzero_halves
[0] = nonzero_halves
[1];
39995 nonzero_halves
[1] = tmph
;
39998 /* vperm2f128 or vperm2i128. */
39999 for (i
= 0; i
< nelt2
; ++i
)
40001 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
40002 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
40003 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
40004 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
40007 if (d
->vmode
!= V8SFmode
40008 && d
->vmode
!= V4DFmode
40009 && d
->vmode
!= V8SImode
)
40011 dremap
.vmode
= V8SImode
;
40013 for (i
= 0; i
< 4; ++i
)
40015 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
40016 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
40020 else if (d
->one_operand_p
)
40022 else if (TARGET_AVX2
40023 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
40026 for (i
= 0; i
< nelt4
; ++i
)
40029 remap
[i
+ nelt
] = i
* 2 + 1;
40030 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
40031 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
40032 dremap
.perm
[i
* 2] = i
;
40033 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40034 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
40035 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
40038 else if (TARGET_AVX2
40039 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
40042 for (i
= 0; i
< nelt4
; ++i
)
40044 remap
[i
+ nelt4
] = i
* 2;
40045 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
40046 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
40047 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
40048 dremap
.perm
[i
* 2] = i
+ nelt4
;
40049 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
40050 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
40051 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
40058 /* Use the remapping array set up above to move the elements from their
40059 swizzled locations into their final destinations. */
40061 for (i
= 0; i
< nelt
; ++i
)
40063 unsigned e
= remap
[d
->perm
[i
]];
40064 gcc_assert (e
< nelt
);
40065 /* If same_halves is true, both halves of the remapped vector are the
40066 same. Avoid cross-lane accesses if possible. */
40067 if (same_halves
&& i
>= nelt2
)
40069 gcc_assert (e
< nelt2
);
40070 dfinal
.perm
[i
] = e
+ nelt2
;
40073 dfinal
.perm
[i
] = e
;
40075 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
40076 dfinal
.op1
= dfinal
.op0
;
40077 dfinal
.one_operand_p
= true;
40078 dremap
.target
= dfinal
.op0
;
40080 /* Test if the final remap can be done with a single insn. For V4SFmode or
40081 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40083 ok
= expand_vec_perm_1 (&dfinal
);
40084 seq
= get_insns ();
40093 if (dremap
.vmode
!= dfinal
.vmode
)
40095 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
40096 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40097 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40100 ok
= expand_vec_perm_1 (&dremap
);
40107 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40108 a single vector cross-lane permutation into vpermq followed
40109 by any of the single insn permutations. */
40112 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40114 struct expand_vec_perm_d dremap
, dfinal
;
40115 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40116 unsigned contents
[2];
40120 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40121 && d
->one_operand_p
))
40126 for (i
= 0; i
< nelt2
; ++i
)
40128 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40129 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40132 for (i
= 0; i
< 2; ++i
)
40134 unsigned int cnt
= 0;
40135 for (j
= 0; j
< 4; ++j
)
40136 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40144 dremap
.vmode
= V4DImode
;
40146 dremap
.target
= gen_reg_rtx (V4DImode
);
40147 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40148 dremap
.op1
= dremap
.op0
;
40149 dremap
.one_operand_p
= true;
40150 for (i
= 0; i
< 2; ++i
)
40152 unsigned int cnt
= 0;
40153 for (j
= 0; j
< 4; ++j
)
40154 if ((contents
[i
] & (1u << j
)) != 0)
40155 dremap
.perm
[2 * i
+ cnt
++] = j
;
40156 for (; cnt
< 2; ++cnt
)
40157 dremap
.perm
[2 * i
+ cnt
] = 0;
40161 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40162 dfinal
.op1
= dfinal
.op0
;
40163 dfinal
.one_operand_p
= true;
40164 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40168 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40169 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40171 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40172 dfinal
.perm
[i
] |= nelt4
;
40174 gcc_unreachable ();
40177 ok
= expand_vec_perm_1 (&dremap
);
40180 ok
= expand_vec_perm_1 (&dfinal
);
40186 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40187 a vector permutation using two instructions, vperm2f128 resp.
40188 vperm2i128 followed by any single in-lane permutation. */
40191 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40193 struct expand_vec_perm_d dfirst
, dsecond
;
40194 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40198 || GET_MODE_SIZE (d
->vmode
) != 32
40199 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40203 dsecond
.one_operand_p
= false;
40204 dsecond
.testing_p
= true;
40206 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40207 immediate. For perm < 16 the second permutation uses
40208 d->op0 as first operand, for perm >= 16 it uses d->op1
40209 as first operand. The second operand is the result of
40211 for (perm
= 0; perm
< 32; perm
++)
40213 /* Ignore permutations which do not move anything cross-lane. */
40216 /* The second shuffle for e.g. V4DFmode has
40217 0123 and ABCD operands.
40218 Ignore AB23, as 23 is already in the second lane
40219 of the first operand. */
40220 if ((perm
& 0xc) == (1 << 2)) continue;
40221 /* And 01CD, as 01 is in the first lane of the first
40223 if ((perm
& 3) == 0) continue;
40224 /* And 4567, as then the vperm2[fi]128 doesn't change
40225 anything on the original 4567 second operand. */
40226 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40230 /* The second shuffle for e.g. V4DFmode has
40231 4567 and ABCD operands.
40232 Ignore AB67, as 67 is already in the second lane
40233 of the first operand. */
40234 if ((perm
& 0xc) == (3 << 2)) continue;
40235 /* And 45CD, as 45 is in the first lane of the first
40237 if ((perm
& 3) == 2) continue;
40238 /* And 0123, as then the vperm2[fi]128 doesn't change
40239 anything on the original 0123 first operand. */
40240 if ((perm
& 0xf) == (1 << 2)) continue;
40243 for (i
= 0; i
< nelt
; i
++)
40245 j
= d
->perm
[i
] / nelt2
;
40246 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
40247 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
40248 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
40249 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
40257 ok
= expand_vec_perm_1 (&dsecond
);
40268 /* Found a usable second shuffle. dfirst will be
40269 vperm2f128 on d->op0 and d->op1. */
40270 dsecond
.testing_p
= false;
40272 dfirst
.target
= gen_reg_rtx (d
->vmode
);
40273 for (i
= 0; i
< nelt
; i
++)
40274 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
40275 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
40277 ok
= expand_vec_perm_1 (&dfirst
);
40280 /* And dsecond is some single insn shuffle, taking
40281 d->op0 and result of vperm2f128 (if perm < 16) or
40282 d->op1 and result of vperm2f128 (otherwise). */
40283 dsecond
.op1
= dfirst
.target
;
40285 dsecond
.op0
= dfirst
.op1
;
40287 ok
= expand_vec_perm_1 (&dsecond
);
40293 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
40294 if (d
->one_operand_p
)
40301 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40302 a two vector permutation using 2 intra-lane interleave insns
40303 and cross-lane shuffle for 32-byte vectors. */
40306 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
40309 rtx (*gen
) (rtx
, rtx
, rtx
);
40311 if (d
->one_operand_p
)
40313 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
40315 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
40321 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
40323 for (i
= 0; i
< nelt
; i
+= 2)
40324 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
40325 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
40335 gen
= gen_vec_interleave_highv32qi
;
40337 gen
= gen_vec_interleave_lowv32qi
;
40341 gen
= gen_vec_interleave_highv16hi
;
40343 gen
= gen_vec_interleave_lowv16hi
;
40347 gen
= gen_vec_interleave_highv8si
;
40349 gen
= gen_vec_interleave_lowv8si
;
40353 gen
= gen_vec_interleave_highv4di
;
40355 gen
= gen_vec_interleave_lowv4di
;
40359 gen
= gen_vec_interleave_highv8sf
;
40361 gen
= gen_vec_interleave_lowv8sf
;
40365 gen
= gen_vec_interleave_highv4df
;
40367 gen
= gen_vec_interleave_lowv4df
;
40370 gcc_unreachable ();
40373 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
40377 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
40378 a single vector permutation using a single intra-lane vector
40379 permutation, vperm2f128 swapping the lanes and vblend* insn blending
40380 the non-swapped and swapped vectors together. */
40383 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
40385 struct expand_vec_perm_d dfirst
, dsecond
;
40386 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40389 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
40393 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
40394 || !d
->one_operand_p
)
40398 for (i
= 0; i
< nelt
; i
++)
40399 dfirst
.perm
[i
] = 0xff;
40400 for (i
= 0, msk
= 0; i
< nelt
; i
++)
40402 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
40403 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
40405 dfirst
.perm
[j
] = d
->perm
[i
];
40409 for (i
= 0; i
< nelt
; i
++)
40410 if (dfirst
.perm
[i
] == 0xff)
40411 dfirst
.perm
[i
] = i
;
40414 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40417 ok
= expand_vec_perm_1 (&dfirst
);
40418 seq
= get_insns ();
40430 dsecond
.op0
= dfirst
.target
;
40431 dsecond
.op1
= dfirst
.target
;
40432 dsecond
.one_operand_p
= true;
40433 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40434 for (i
= 0; i
< nelt
; i
++)
40435 dsecond
.perm
[i
] = i
^ nelt2
;
40437 ok
= expand_vec_perm_1 (&dsecond
);
40440 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
40441 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
40445 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
40446 permutation using two vperm2f128, followed by a vshufpd insn blending
40447 the two vectors together. */
40450 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
40452 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
40455 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
40465 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
40466 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
40467 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
40468 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
40469 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
40470 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
40471 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
40472 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
40473 dthird
.perm
[0] = (d
->perm
[0] % 2);
40474 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
40475 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
40476 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
40478 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
40479 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
40480 dthird
.op0
= dfirst
.target
;
40481 dthird
.op1
= dsecond
.target
;
40482 dthird
.one_operand_p
= false;
40484 canonicalize_perm (&dfirst
);
40485 canonicalize_perm (&dsecond
);
40487 ok
= expand_vec_perm_1 (&dfirst
)
40488 && expand_vec_perm_1 (&dsecond
)
40489 && expand_vec_perm_1 (&dthird
);
40496 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
40497 permutation with two pshufb insns and an ior. We should have already
40498 failed all two instruction sequences. */
40501 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
40503 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
40504 unsigned int i
, nelt
, eltsz
;
40506 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40508 gcc_assert (!d
->one_operand_p
);
40511 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40513 /* Generate two permutation masks. If the required element is within
40514 the given vector it is shuffled into the proper lane. If the required
40515 element is in the other vector, force a zero into the lane by setting
40516 bit 7 in the permutation mask. */
40517 m128
= GEN_INT (-128);
40518 for (i
= 0; i
< nelt
; ++i
)
40520 unsigned j
, e
= d
->perm
[i
];
40521 unsigned which
= (e
>= nelt
);
40525 for (j
= 0; j
< eltsz
; ++j
)
40527 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
40528 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
40532 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
40533 vperm
= force_reg (V16QImode
, vperm
);
40535 l
= gen_reg_rtx (V16QImode
);
40536 op
= gen_lowpart (V16QImode
, d
->op0
);
40537 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
40539 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
40540 vperm
= force_reg (V16QImode
, vperm
);
40542 h
= gen_reg_rtx (V16QImode
);
40543 op
= gen_lowpart (V16QImode
, d
->op1
);
40544 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
40546 op
= gen_lowpart (V16QImode
, d
->target
);
40547 emit_insn (gen_iorv16qi3 (op
, l
, h
));
40552 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40553 with two vpshufb insns, vpermq and vpor. We should have already failed
40554 all two or three instruction sequences. */
40557 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
40559 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
40560 unsigned int i
, nelt
, eltsz
;
40563 || !d
->one_operand_p
40564 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40571 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40573 /* Generate two permutation masks. If the required element is within
40574 the same lane, it is shuffled in. If the required element from the
40575 other lane, force a zero by setting bit 7 in the permutation mask.
40576 In the other mask the mask has non-negative elements if element
40577 is requested from the other lane, but also moved to the other lane,
40578 so that the result of vpshufb can have the two V2TImode halves
40580 m128
= GEN_INT (-128);
40581 for (i
= 0; i
< nelt
; ++i
)
40583 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40584 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40586 for (j
= 0; j
< eltsz
; ++j
)
40588 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
40589 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
40593 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40594 vperm
= force_reg (V32QImode
, vperm
);
40596 h
= gen_reg_rtx (V32QImode
);
40597 op
= gen_lowpart (V32QImode
, d
->op0
);
40598 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40600 /* Swap the 128-byte lanes of h into hp. */
40601 hp
= gen_reg_rtx (V4DImode
);
40602 op
= gen_lowpart (V4DImode
, h
);
40603 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
40606 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40607 vperm
= force_reg (V32QImode
, vperm
);
40609 l
= gen_reg_rtx (V32QImode
);
40610 op
= gen_lowpart (V32QImode
, d
->op0
);
40611 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40613 op
= gen_lowpart (V32QImode
, d
->target
);
40614 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
40619 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40620 and extract-odd permutations of two V32QImode and V16QImode operand
40621 with two vpshufb insns, vpor and vpermq. We should have already
40622 failed all two or three instruction sequences. */
40625 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
40627 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
40628 unsigned int i
, nelt
, eltsz
;
40631 || d
->one_operand_p
40632 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40635 for (i
= 0; i
< d
->nelt
; ++i
)
40636 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
40643 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40645 /* Generate two permutation masks. In the first permutation mask
40646 the first quarter will contain indexes for the first half
40647 of the op0, the second quarter will contain bit 7 set, third quarter
40648 will contain indexes for the second half of the op0 and the
40649 last quarter bit 7 set. In the second permutation mask
40650 the first quarter will contain bit 7 set, the second quarter
40651 indexes for the first half of the op1, the third quarter bit 7 set
40652 and last quarter indexes for the second half of the op1.
40653 I.e. the first mask e.g. for V32QImode extract even will be:
40654 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40655 (all values masked with 0xf except for -128) and second mask
40656 for extract even will be
40657 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40658 m128
= GEN_INT (-128);
40659 for (i
= 0; i
< nelt
; ++i
)
40661 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40662 unsigned which
= d
->perm
[i
] >= nelt
;
40663 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
40665 for (j
= 0; j
< eltsz
; ++j
)
40667 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
40668 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
40672 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
40673 vperm
= force_reg (V32QImode
, vperm
);
40675 l
= gen_reg_rtx (V32QImode
);
40676 op
= gen_lowpart (V32QImode
, d
->op0
);
40677 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
40679 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
40680 vperm
= force_reg (V32QImode
, vperm
);
40682 h
= gen_reg_rtx (V32QImode
);
40683 op
= gen_lowpart (V32QImode
, d
->op1
);
40684 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
40686 ior
= gen_reg_rtx (V32QImode
);
40687 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
40689 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40690 op
= gen_lowpart (V4DImode
, d
->target
);
40691 ior
= gen_lowpart (V4DImode
, ior
);
40692 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
40693 const1_rtx
, GEN_INT (3)));
40698 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40699 and extract-odd permutations. */
40702 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
40709 t1
= gen_reg_rtx (V4DFmode
);
40710 t2
= gen_reg_rtx (V4DFmode
);
40712 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40713 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40714 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40716 /* Now an unpck[lh]pd will produce the result required. */
40718 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
40720 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
40726 int mask
= odd
? 0xdd : 0x88;
40728 t1
= gen_reg_rtx (V8SFmode
);
40729 t2
= gen_reg_rtx (V8SFmode
);
40730 t3
= gen_reg_rtx (V8SFmode
);
40732 /* Shuffle within the 128-bit lanes to produce:
40733 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40734 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
40737 /* Shuffle the lanes around to produce:
40738 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40739 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
40742 /* Shuffle within the 128-bit lanes to produce:
40743 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40744 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
40746 /* Shuffle within the 128-bit lanes to produce:
40747 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40748 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
40750 /* Shuffle the lanes around to produce:
40751 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40752 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
40761 /* These are always directly implementable by expand_vec_perm_1. */
40762 gcc_unreachable ();
40766 return expand_vec_perm_pshufb2 (d
);
40769 /* We need 2*log2(N)-1 operations to achieve odd/even
40770 with interleave. */
40771 t1
= gen_reg_rtx (V8HImode
);
40772 t2
= gen_reg_rtx (V8HImode
);
40773 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
40774 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
40775 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
40776 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
40778 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
40780 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40787 return expand_vec_perm_pshufb2 (d
);
40790 t1
= gen_reg_rtx (V16QImode
);
40791 t2
= gen_reg_rtx (V16QImode
);
40792 t3
= gen_reg_rtx (V16QImode
);
40793 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40794 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40795 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40796 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40797 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40798 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40800 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40802 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40809 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40814 struct expand_vec_perm_d d_copy
= *d
;
40815 d_copy
.vmode
= V4DFmode
;
40816 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40817 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40818 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40819 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40822 t1
= gen_reg_rtx (V4DImode
);
40823 t2
= gen_reg_rtx (V4DImode
);
40825 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40826 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40827 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40829 /* Now an vpunpck[lh]qdq will produce the result required. */
40831 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40833 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40840 struct expand_vec_perm_d d_copy
= *d
;
40841 d_copy
.vmode
= V8SFmode
;
40842 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40843 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40844 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40845 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40848 t1
= gen_reg_rtx (V8SImode
);
40849 t2
= gen_reg_rtx (V8SImode
);
40851 /* Shuffle the lanes around into
40852 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40853 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40854 gen_lowpart (V4DImode
, d
->op0
),
40855 gen_lowpart (V4DImode
, d
->op1
),
40857 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40858 gen_lowpart (V4DImode
, d
->op0
),
40859 gen_lowpart (V4DImode
, d
->op1
),
40862 /* Swap the 2nd and 3rd position in each lane into
40863 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40864 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40865 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40866 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40867 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40869 /* Now an vpunpck[lh]qdq will produce
40870 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40872 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40873 gen_lowpart (V4DImode
, t1
),
40874 gen_lowpart (V4DImode
, t2
));
40876 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40877 gen_lowpart (V4DImode
, t1
),
40878 gen_lowpart (V4DImode
, t2
));
40883 gcc_unreachable ();
40889 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40890 extract-even and extract-odd permutations. */
40893 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40895 unsigned i
, odd
, nelt
= d
->nelt
;
40898 if (odd
!= 0 && odd
!= 1)
40901 for (i
= 1; i
< nelt
; ++i
)
40902 if (d
->perm
[i
] != 2 * i
+ odd
)
40905 return expand_vec_perm_even_odd_1 (d
, odd
);
40908 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40909 permutations. We assume that expand_vec_perm_1 has already failed. */
40912 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40914 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40915 enum machine_mode vmode
= d
->vmode
;
40916 unsigned char perm2
[4];
40924 /* These are special-cased in sse.md so that we can optionally
40925 use the vbroadcast instruction. They expand to two insns
40926 if the input happens to be in a register. */
40927 gcc_unreachable ();
40933 /* These are always implementable using standard shuffle patterns. */
40934 gcc_unreachable ();
40938 /* These can be implemented via interleave. We save one insn by
40939 stopping once we have promoted to V4SImode and then use pshufd. */
40943 rtx (*gen
) (rtx
, rtx
, rtx
)
40944 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40945 : gen_vec_interleave_lowv8hi
;
40949 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40950 : gen_vec_interleave_highv8hi
;
40955 dest
= gen_reg_rtx (vmode
);
40956 emit_insn (gen (dest
, op0
, op0
));
40957 vmode
= get_mode_wider_vector (vmode
);
40958 op0
= gen_lowpart (vmode
, dest
);
40960 while (vmode
!= V4SImode
);
40962 memset (perm2
, elt
, 4);
40963 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40972 /* For AVX2 broadcasts of the first element vpbroadcast* or
40973 vpermq should be used by expand_vec_perm_1. */
40974 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40978 gcc_unreachable ();
40982 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40983 broadcast permutations. */
40986 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40988 unsigned i
, elt
, nelt
= d
->nelt
;
40990 if (!d
->one_operand_p
)
40994 for (i
= 1; i
< nelt
; ++i
)
40995 if (d
->perm
[i
] != elt
)
40998 return expand_vec_perm_broadcast_1 (d
);
41001 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
41002 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
41003 all the shorter instruction sequences. */
41006 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
41008 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
41009 unsigned int i
, nelt
, eltsz
;
41013 || d
->one_operand_p
41014 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41021 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41023 /* Generate 4 permutation masks. If the required element is within
41024 the same lane, it is shuffled in. If the required element from the
41025 other lane, force a zero by setting bit 7 in the permutation mask.
41026 In the other mask the mask has non-negative elements if element
41027 is requested from the other lane, but also moved to the other lane,
41028 so that the result of vpshufb can have the two V2TImode halves
41030 m128
= GEN_INT (-128);
41031 for (i
= 0; i
< 32; ++i
)
41033 rperm
[0][i
] = m128
;
41034 rperm
[1][i
] = m128
;
41035 rperm
[2][i
] = m128
;
41036 rperm
[3][i
] = m128
;
41042 for (i
= 0; i
< nelt
; ++i
)
41044 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41045 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41046 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
41048 for (j
= 0; j
< eltsz
; ++j
)
41049 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
41050 used
[which
] = true;
41053 for (i
= 0; i
< 2; ++i
)
41055 if (!used
[2 * i
+ 1])
41060 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
41061 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
41062 vperm
= force_reg (V32QImode
, vperm
);
41063 h
[i
] = gen_reg_rtx (V32QImode
);
41064 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41065 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
41068 /* Swap the 128-byte lanes of h[X]. */
41069 for (i
= 0; i
< 2; ++i
)
41071 if (h
[i
] == NULL_RTX
)
41073 op
= gen_reg_rtx (V4DImode
);
41074 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
41075 const2_rtx
, GEN_INT (3), const0_rtx
,
41077 h
[i
] = gen_lowpart (V32QImode
, op
);
41080 for (i
= 0; i
< 2; ++i
)
41087 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41088 vperm
= force_reg (V32QImode
, vperm
);
41089 l
[i
] = gen_reg_rtx (V32QImode
);
41090 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41091 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41094 for (i
= 0; i
< 2; ++i
)
41098 op
= gen_reg_rtx (V32QImode
);
41099 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41106 gcc_assert (l
[0] && l
[1]);
41107 op
= gen_lowpart (V32QImode
, d
->target
);
41108 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41112 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41113 With all of the interface bits taken care of, perform the expansion
41114 in D and return true on success. */
41117 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41119 /* Try a single instruction expansion. */
41120 if (expand_vec_perm_1 (d
))
41123 /* Try sequences of two instructions. */
41125 if (expand_vec_perm_pshuflw_pshufhw (d
))
41128 if (expand_vec_perm_palignr (d
))
41131 if (expand_vec_perm_interleave2 (d
))
41134 if (expand_vec_perm_broadcast (d
))
41137 if (expand_vec_perm_vpermq_perm_1 (d
))
41140 if (expand_vec_perm_vperm2f128 (d
))
41143 /* Try sequences of three instructions. */
41145 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41148 if (expand_vec_perm_pshufb2 (d
))
41151 if (expand_vec_perm_interleave3 (d
))
41154 if (expand_vec_perm_vperm2f128_vblend (d
))
41157 /* Try sequences of four instructions. */
41159 if (expand_vec_perm_vpshufb2_vpermq (d
))
41162 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41165 /* ??? Look for narrow permutations whose element orderings would
41166 allow the promotion to a wider mode. */
41168 /* ??? Look for sequences of interleave or a wider permute that place
41169 the data into the correct lanes for a half-vector shuffle like
41170 pshuf[lh]w or vpermilps. */
41172 /* ??? Look for sequences of interleave that produce the desired results.
41173 The combinatorics of punpck[lh] get pretty ugly... */
41175 if (expand_vec_perm_even_odd (d
))
41178 /* Even longer sequences. */
41179 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41185 /* If a permutation only uses one operand, make it clear. Returns true
41186 if the permutation references both operands. */
41189 canonicalize_perm (struct expand_vec_perm_d
*d
)
41191 int i
, which
, nelt
= d
->nelt
;
41193 for (i
= which
= 0; i
< nelt
; ++i
)
41194 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41196 d
->one_operand_p
= true;
41203 if (!rtx_equal_p (d
->op0
, d
->op1
))
41205 d
->one_operand_p
= false;
41208 /* The elements of PERM do not suggest that only the first operand
41209 is used, but both operands are identical. Allow easier matching
41210 of the permutation by folding the permutation into the single
41215 for (i
= 0; i
< nelt
; ++i
)
41216 d
->perm
[i
] &= nelt
- 1;
41225 return (which
== 3);
41229 ix86_expand_vec_perm_const (rtx operands
[4])
41231 struct expand_vec_perm_d d
;
41232 unsigned char perm
[MAX_VECT_LEN
];
41237 d
.target
= operands
[0];
41238 d
.op0
= operands
[1];
41239 d
.op1
= operands
[2];
41242 d
.vmode
= GET_MODE (d
.target
);
41243 gcc_assert (VECTOR_MODE_P (d
.vmode
));
41244 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41245 d
.testing_p
= false;
41247 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
41248 gcc_assert (XVECLEN (sel
, 0) == nelt
);
41249 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
41251 for (i
= 0; i
< nelt
; ++i
)
41253 rtx e
= XVECEXP (sel
, 0, i
);
41254 int ei
= INTVAL (e
) & (2 * nelt
- 1);
41259 two_args
= canonicalize_perm (&d
);
41261 if (ix86_expand_vec_perm_const_1 (&d
))
41264 /* If the selector says both arguments are needed, but the operands are the
41265 same, the above tried to expand with one_operand_p and flattened selector.
41266 If that didn't work, retry without one_operand_p; we succeeded with that
41268 if (two_args
&& d
.one_operand_p
)
41270 d
.one_operand_p
= false;
41271 memcpy (d
.perm
, perm
, sizeof (perm
));
41272 return ix86_expand_vec_perm_const_1 (&d
);
41278 /* Implement targetm.vectorize.vec_perm_const_ok. */
41281 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
41282 const unsigned char *sel
)
41284 struct expand_vec_perm_d d
;
41285 unsigned int i
, nelt
, which
;
41289 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41290 d
.testing_p
= true;
41292 /* Given sufficient ISA support we can just return true here
41293 for selected vector modes. */
41294 if (GET_MODE_SIZE (d
.vmode
) == 16)
41296 /* All implementable with a single vpperm insn. */
41299 /* All implementable with 2 pshufb + 1 ior. */
41302 /* All implementable with shufpd or unpck[lh]pd. */
41307 /* Extract the values from the vector CST into the permutation
41309 memcpy (d
.perm
, sel
, nelt
);
41310 for (i
= which
= 0; i
< nelt
; ++i
)
41312 unsigned char e
= d
.perm
[i
];
41313 gcc_assert (e
< 2 * nelt
);
41314 which
|= (e
< nelt
? 1 : 2);
41317 /* For all elements from second vector, fold the elements to first. */
41319 for (i
= 0; i
< nelt
; ++i
)
41322 /* Check whether the mask can be applied to the vector type. */
41323 d
.one_operand_p
= (which
!= 3);
41325 /* Implementable with shufps or pshufd. */
41326 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
41329 /* Otherwise we have to go through the motions and see if we can
41330 figure out how to generate the requested permutation. */
41331 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
41332 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
41333 if (!d
.one_operand_p
)
41334 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
41337 ret
= ix86_expand_vec_perm_const_1 (&d
);
41344 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
41346 struct expand_vec_perm_d d
;
41352 d
.vmode
= GET_MODE (targ
);
41353 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41354 d
.one_operand_p
= false;
41355 d
.testing_p
= false;
41357 for (i
= 0; i
< nelt
; ++i
)
41358 d
.perm
[i
] = i
* 2 + odd
;
41360 /* We'll either be able to implement the permutation directly... */
41361 if (expand_vec_perm_1 (&d
))
41364 /* ... or we use the special-case patterns. */
41365 expand_vec_perm_even_odd_1 (&d
, odd
);
41369 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
41371 struct expand_vec_perm_d d
;
41372 unsigned i
, nelt
, base
;
41378 d
.vmode
= GET_MODE (targ
);
41379 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
41380 d
.one_operand_p
= false;
41381 d
.testing_p
= false;
41383 base
= high_p
? nelt
/ 2 : 0;
41384 for (i
= 0; i
< nelt
/ 2; ++i
)
41386 d
.perm
[i
* 2] = i
+ base
;
41387 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
41390 /* Note that for AVX this isn't one instruction. */
41391 ok
= ix86_expand_vec_perm_const_1 (&d
);
41396 /* Expand a vector operation CODE for a V*QImode in terms of the
41397 same operation on V*HImode. */
41400 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
41402 enum machine_mode qimode
= GET_MODE (dest
);
41403 enum machine_mode himode
;
41404 rtx (*gen_il
) (rtx
, rtx
, rtx
);
41405 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
41406 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
41407 struct expand_vec_perm_d d
;
41408 bool ok
, full_interleave
;
41409 bool uns_p
= false;
41416 gen_il
= gen_vec_interleave_lowv16qi
;
41417 gen_ih
= gen_vec_interleave_highv16qi
;
41420 himode
= V16HImode
;
41421 gen_il
= gen_avx2_interleave_lowv32qi
;
41422 gen_ih
= gen_avx2_interleave_highv32qi
;
41425 gcc_unreachable ();
41428 op2_l
= op2_h
= op2
;
41432 /* Unpack data such that we've got a source byte in each low byte of
41433 each word. We don't care what goes into the high byte of each word.
41434 Rather than trying to get zero in there, most convenient is to let
41435 it be a copy of the low byte. */
41436 op2_l
= gen_reg_rtx (qimode
);
41437 op2_h
= gen_reg_rtx (qimode
);
41438 emit_insn (gen_il (op2_l
, op2
, op2
));
41439 emit_insn (gen_ih (op2_h
, op2
, op2
));
41442 op1_l
= gen_reg_rtx (qimode
);
41443 op1_h
= gen_reg_rtx (qimode
);
41444 emit_insn (gen_il (op1_l
, op1
, op1
));
41445 emit_insn (gen_ih (op1_h
, op1
, op1
));
41446 full_interleave
= qimode
== V16QImode
;
41454 op1_l
= gen_reg_rtx (himode
);
41455 op1_h
= gen_reg_rtx (himode
);
41456 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
41457 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
41458 full_interleave
= true;
41461 gcc_unreachable ();
41464 /* Perform the operation. */
41465 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
41467 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
41469 gcc_assert (res_l
&& res_h
);
41471 /* Merge the data back into the right place. */
41473 d
.op0
= gen_lowpart (qimode
, res_l
);
41474 d
.op1
= gen_lowpart (qimode
, res_h
);
41476 d
.nelt
= GET_MODE_NUNITS (qimode
);
41477 d
.one_operand_p
= false;
41478 d
.testing_p
= false;
41480 if (full_interleave
)
41482 /* For SSE2, we used an full interleave, so the desired
41483 results are in the even elements. */
41484 for (i
= 0; i
< 32; ++i
)
41489 /* For AVX, the interleave used above was not cross-lane. So the
41490 extraction is evens but with the second and third quarter swapped.
41491 Happily, that is even one insn shorter than even extraction. */
41492 for (i
= 0; i
< 32; ++i
)
41493 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
41496 ok
= ix86_expand_vec_perm_const_1 (&d
);
41499 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41500 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
41503 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
41504 if op is CONST_VECTOR with all odd elements equal to their
41505 preceding element. */
41508 const_vector_equal_evenodd_p (rtx op
)
41510 enum machine_mode mode
= GET_MODE (op
);
41511 int i
, nunits
= GET_MODE_NUNITS (mode
);
41512 if (GET_CODE (op
) != CONST_VECTOR
41513 || nunits
!= CONST_VECTOR_NUNITS (op
))
41515 for (i
= 0; i
< nunits
; i
+= 2)
41516 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
41522 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
41523 bool uns_p
, bool odd_p
)
41525 enum machine_mode mode
= GET_MODE (op1
);
41526 enum machine_mode wmode
= GET_MODE (dest
);
41528 rtx orig_op1
= op1
, orig_op2
= op2
;
41530 if (!nonimmediate_operand (op1
, mode
))
41531 op1
= force_reg (mode
, op1
);
41532 if (!nonimmediate_operand (op2
, mode
))
41533 op2
= force_reg (mode
, op2
);
41535 /* We only play even/odd games with vectors of SImode. */
41536 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
41538 /* If we're looking for the odd results, shift those members down to
41539 the even slots. For some cpus this is faster than a PSHUFD. */
41542 /* For XOP use vpmacsdqh, but only for smult, as it is only
41544 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
41546 x
= force_reg (wmode
, CONST0_RTX (wmode
));
41547 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
41551 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
41552 if (!const_vector_equal_evenodd_p (orig_op1
))
41553 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
41554 x
, NULL
, 1, OPTAB_DIRECT
);
41555 if (!const_vector_equal_evenodd_p (orig_op2
))
41556 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
41557 x
, NULL
, 1, OPTAB_DIRECT
);
41558 op1
= gen_lowpart (mode
, op1
);
41559 op2
= gen_lowpart (mode
, op2
);
41562 if (mode
== V8SImode
)
41565 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
41567 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
41570 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
41571 else if (TARGET_SSE4_1
)
41572 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
41575 rtx s1
, s2
, t0
, t1
, t2
;
41577 /* The easiest way to implement this without PMULDQ is to go through
41578 the motions as if we are performing a full 64-bit multiply. With
41579 the exception that we need to do less shuffling of the elements. */
41581 /* Compute the sign-extension, aka highparts, of the two operands. */
41582 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41583 op1
, pc_rtx
, pc_rtx
);
41584 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
41585 op2
, pc_rtx
, pc_rtx
);
41587 /* Multiply LO(A) * HI(B), and vice-versa. */
41588 t1
= gen_reg_rtx (wmode
);
41589 t2
= gen_reg_rtx (wmode
);
41590 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
41591 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
41593 /* Multiply LO(A) * LO(B). */
41594 t0
= gen_reg_rtx (wmode
);
41595 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
41597 /* Combine and shift the highparts into place. */
41598 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
41599 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
41602 /* Combine high and low parts. */
41603 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
41610 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
41611 bool uns_p
, bool high_p
)
41613 enum machine_mode wmode
= GET_MODE (dest
);
41614 enum machine_mode mode
= GET_MODE (op1
);
41615 rtx t1
, t2
, t3
, t4
, mask
;
41620 t1
= gen_reg_rtx (mode
);
41621 t2
= gen_reg_rtx (mode
);
41622 if (TARGET_XOP
&& !uns_p
)
41624 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41625 shuffle the elements once so that all elements are in the right
41626 place for immediate use: { A C B D }. */
41627 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
41628 const1_rtx
, GEN_INT (3)));
41629 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
41630 const1_rtx
, GEN_INT (3)));
41634 /* Put the elements into place for the multiply. */
41635 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
41636 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
41639 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
41643 /* Shuffle the elements between the lanes. After this we
41644 have { A B E F | C D G H } for each operand. */
41645 t1
= gen_reg_rtx (V4DImode
);
41646 t2
= gen_reg_rtx (V4DImode
);
41647 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
41648 const0_rtx
, const2_rtx
,
41649 const1_rtx
, GEN_INT (3)));
41650 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
41651 const0_rtx
, const2_rtx
,
41652 const1_rtx
, GEN_INT (3)));
41654 /* Shuffle the elements within the lanes. After this we
41655 have { A A B B | C C D D } or { E E F F | G G H H }. */
41656 t3
= gen_reg_rtx (V8SImode
);
41657 t4
= gen_reg_rtx (V8SImode
);
41658 mask
= GEN_INT (high_p
41659 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41660 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41661 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
41662 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
41664 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
41669 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
41670 uns_p
, OPTAB_DIRECT
);
41671 t2
= expand_binop (mode
,
41672 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
41673 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
41674 gcc_assert (t1
&& t2
);
41676 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
41681 t1
= gen_reg_rtx (wmode
);
41682 t2
= gen_reg_rtx (wmode
);
41683 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
41684 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
41686 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
41690 gcc_unreachable ();
41695 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
41699 res_1
= gen_reg_rtx (V4SImode
);
41700 res_2
= gen_reg_rtx (V4SImode
);
41701 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
41702 op1
, op2
, true, false);
41703 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
41704 op1
, op2
, true, true);
41706 /* Move the results in element 2 down to element 1; we don't care
41707 what goes in elements 2 and 3. Then we can merge the parts
41708 back together with an interleave.
41710 Note that two other sequences were tried:
41711 (1) Use interleaves at the start instead of psrldq, which allows
41712 us to use a single shufps to merge things back at the end.
41713 (2) Use shufps here to combine the two vectors, then pshufd to
41714 put the elements in the correct order.
41715 In both cases the cost of the reformatting stall was too high
41716 and the overall sequence slower. */
41718 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
41719 const0_rtx
, const0_rtx
));
41720 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
41721 const0_rtx
, const0_rtx
));
41722 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
41724 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
41728 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
41730 enum machine_mode mode
= GET_MODE (op0
);
41731 rtx t1
, t2
, t3
, t4
, t5
, t6
;
41733 if (TARGET_XOP
&& mode
== V2DImode
)
41735 /* op1: A,B,C,D, op2: E,F,G,H */
41736 op1
= gen_lowpart (V4SImode
, op1
);
41737 op2
= gen_lowpart (V4SImode
, op2
);
41739 t1
= gen_reg_rtx (V4SImode
);
41740 t2
= gen_reg_rtx (V4SImode
);
41741 t3
= gen_reg_rtx (V2DImode
);
41742 t4
= gen_reg_rtx (V2DImode
);
41745 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
41751 /* t2: (B*E),(A*F),(D*G),(C*H) */
41752 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
41754 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41755 emit_insn (gen_xop_phadddq (t3
, t2
));
41757 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41758 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
41760 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41761 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
41765 enum machine_mode nmode
;
41766 rtx (*umul
) (rtx
, rtx
, rtx
);
41768 if (mode
== V2DImode
)
41770 umul
= gen_vec_widen_umult_even_v4si
;
41773 else if (mode
== V4DImode
)
41775 umul
= gen_vec_widen_umult_even_v8si
;
41779 gcc_unreachable ();
41782 /* Multiply low parts. */
41783 t1
= gen_reg_rtx (mode
);
41784 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
41786 /* Shift input vectors right 32 bits so we can multiply high parts. */
41788 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
41789 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
41791 /* Multiply high parts by low parts. */
41792 t4
= gen_reg_rtx (mode
);
41793 t5
= gen_reg_rtx (mode
);
41794 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
41795 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
41797 /* Combine and shift the highparts back. */
41798 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
41799 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
41801 /* Combine high and low parts. */
41802 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
41805 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
41806 gen_rtx_MULT (mode
, op1
, op2
));
41809 /* Expand an insert into a vector register through pinsr insn.
41810 Return true if successful. */
41813 ix86_expand_pinsr (rtx
*operands
)
41815 rtx dst
= operands
[0];
41816 rtx src
= operands
[3];
41818 unsigned int size
= INTVAL (operands
[1]);
41819 unsigned int pos
= INTVAL (operands
[2]);
41821 if (GET_CODE (dst
) == SUBREG
)
41823 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41824 dst
= SUBREG_REG (dst
);
41827 if (GET_CODE (src
) == SUBREG
)
41828 src
= SUBREG_REG (src
);
41830 switch (GET_MODE (dst
))
41837 enum machine_mode srcmode
, dstmode
;
41838 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41840 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41845 if (!TARGET_SSE4_1
)
41847 dstmode
= V16QImode
;
41848 pinsr
= gen_sse4_1_pinsrb
;
41854 dstmode
= V8HImode
;
41855 pinsr
= gen_sse2_pinsrw
;
41859 if (!TARGET_SSE4_1
)
41861 dstmode
= V4SImode
;
41862 pinsr
= gen_sse4_1_pinsrd
;
41866 gcc_assert (TARGET_64BIT
);
41867 if (!TARGET_SSE4_1
)
41869 dstmode
= V2DImode
;
41870 pinsr
= gen_sse4_1_pinsrq
;
41877 dst
= gen_lowpart (dstmode
, dst
);
41878 src
= gen_lowpart (srcmode
, src
);
41882 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41891 /* This function returns the calling abi specific va_list type node.
41892 It returns the FNDECL specific va_list type. */
41895 ix86_fn_abi_va_list (tree fndecl
)
41898 return va_list_type_node
;
41899 gcc_assert (fndecl
!= NULL_TREE
);
41901 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41902 return ms_va_list_type_node
;
41904 return sysv_va_list_type_node
;
41907 /* Returns the canonical va_list type specified by TYPE. If there
41908 is no valid TYPE provided, it return NULL_TREE. */
41911 ix86_canonical_va_list_type (tree type
)
41915 /* Resolve references and pointers to va_list type. */
41916 if (TREE_CODE (type
) == MEM_REF
)
41917 type
= TREE_TYPE (type
);
41918 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41919 type
= TREE_TYPE (type
);
41920 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41921 type
= TREE_TYPE (type
);
41923 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41925 wtype
= va_list_type_node
;
41926 gcc_assert (wtype
!= NULL_TREE
);
41928 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41930 /* If va_list is an array type, the argument may have decayed
41931 to a pointer type, e.g. by being passed to another function.
41932 In that case, unwrap both types so that we can compare the
41933 underlying records. */
41934 if (TREE_CODE (htype
) == ARRAY_TYPE
41935 || POINTER_TYPE_P (htype
))
41937 wtype
= TREE_TYPE (wtype
);
41938 htype
= TREE_TYPE (htype
);
41941 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41942 return va_list_type_node
;
41943 wtype
= sysv_va_list_type_node
;
41944 gcc_assert (wtype
!= NULL_TREE
);
41946 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41948 /* If va_list is an array type, the argument may have decayed
41949 to a pointer type, e.g. by being passed to another function.
41950 In that case, unwrap both types so that we can compare the
41951 underlying records. */
41952 if (TREE_CODE (htype
) == ARRAY_TYPE
41953 || POINTER_TYPE_P (htype
))
41955 wtype
= TREE_TYPE (wtype
);
41956 htype
= TREE_TYPE (htype
);
41959 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41960 return sysv_va_list_type_node
;
41961 wtype
= ms_va_list_type_node
;
41962 gcc_assert (wtype
!= NULL_TREE
);
41964 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41966 /* If va_list is an array type, the argument may have decayed
41967 to a pointer type, e.g. by being passed to another function.
41968 In that case, unwrap both types so that we can compare the
41969 underlying records. */
41970 if (TREE_CODE (htype
) == ARRAY_TYPE
41971 || POINTER_TYPE_P (htype
))
41973 wtype
= TREE_TYPE (wtype
);
41974 htype
= TREE_TYPE (htype
);
41977 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41978 return ms_va_list_type_node
;
41981 return std_canonical_va_list_type (type
);
41984 /* Iterate through the target-specific builtin types for va_list.
41985 IDX denotes the iterator, *PTREE is set to the result type of
41986 the va_list builtin, and *PNAME to its internal type.
41987 Returns zero if there is no element for this index, otherwise
41988 IDX should be increased upon the next call.
41989 Note, do not iterate a base builtin's name like __builtin_va_list.
41990 Used from c_common_nodes_and_builtins. */
41993 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
42003 *ptree
= ms_va_list_type_node
;
42004 *pname
= "__builtin_ms_va_list";
42008 *ptree
= sysv_va_list_type_node
;
42009 *pname
= "__builtin_sysv_va_list";
42017 #undef TARGET_SCHED_DISPATCH
42018 #define TARGET_SCHED_DISPATCH has_dispatch
42019 #undef TARGET_SCHED_DISPATCH_DO
42020 #define TARGET_SCHED_DISPATCH_DO do_dispatch
42021 #undef TARGET_SCHED_REASSOCIATION_WIDTH
42022 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
42023 #undef TARGET_SCHED_REORDER
42024 #define TARGET_SCHED_REORDER ix86_sched_reorder
42025 #undef TARGET_SCHED_ADJUST_PRIORITY
42026 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
42027 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
42028 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
42029 ix86_dependencies_evaluation_hook
42031 /* The size of the dispatch window is the total number of bytes of
42032 object code allowed in a window. */
42033 #define DISPATCH_WINDOW_SIZE 16
42035 /* Number of dispatch windows considered for scheduling. */
42036 #define MAX_DISPATCH_WINDOWS 3
42038 /* Maximum number of instructions in a window. */
42041 /* Maximum number of immediate operands in a window. */
42044 /* Maximum number of immediate bits allowed in a window. */
42045 #define MAX_IMM_SIZE 128
42047 /* Maximum number of 32 bit immediates allowed in a window. */
42048 #define MAX_IMM_32 4
42050 /* Maximum number of 64 bit immediates allowed in a window. */
42051 #define MAX_IMM_64 2
42053 /* Maximum total of loads or prefetches allowed in a window. */
42056 /* Maximum total of stores allowed in a window. */
42057 #define MAX_STORE 1
42063 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
42064 enum dispatch_group
{
42079 /* Number of allowable groups in a dispatch window. It is an array
42080 indexed by dispatch_group enum. 100 is used as a big number,
42081 because the number of these kind of operations does not have any
42082 effect in dispatch window, but we need them for other reasons in
42084 static unsigned int num_allowable_groups
[disp_last
] = {
42085 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42088 char group_name
[disp_last
+ 1][16] = {
42089 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42090 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42091 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42094 /* Instruction path. */
42097 path_single
, /* Single micro op. */
42098 path_double
, /* Double micro op. */
42099 path_multi
, /* Instructions with more than 2 micro op.. */
42103 /* sched_insn_info defines a window to the instructions scheduled in
42104 the basic block. It contains a pointer to the insn_info table and
42105 the instruction scheduled.
42107 Windows are allocated for each basic block and are linked
42109 typedef struct sched_insn_info_s
{
42111 enum dispatch_group group
;
42112 enum insn_path path
;
42117 /* Linked list of dispatch windows. This is a two way list of
42118 dispatch windows of a basic block. It contains information about
42119 the number of uops in the window and the total number of
42120 instructions and of bytes in the object code for this dispatch
42122 typedef struct dispatch_windows_s
{
42123 int num_insn
; /* Number of insn in the window. */
42124 int num_uops
; /* Number of uops in the window. */
42125 int window_size
; /* Number of bytes in the window. */
42126 int window_num
; /* Window number between 0 or 1. */
42127 int num_imm
; /* Number of immediates in an insn. */
42128 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42129 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42130 int imm_size
; /* Total immediates in the window. */
42131 int num_loads
; /* Total memory loads in the window. */
42132 int num_stores
; /* Total memory stores in the window. */
42133 int violation
; /* Violation exists in window. */
42134 sched_insn_info
*window
; /* Pointer to the window. */
42135 struct dispatch_windows_s
*next
;
42136 struct dispatch_windows_s
*prev
;
42137 } dispatch_windows
;
42139 /* Immediate valuse used in an insn. */
42140 typedef struct imm_info_s
42147 static dispatch_windows
*dispatch_window_list
;
42148 static dispatch_windows
*dispatch_window_list1
;
42150 /* Get dispatch group of insn. */
42152 static enum dispatch_group
42153 get_mem_group (rtx insn
)
42155 enum attr_memory memory
;
42157 if (INSN_CODE (insn
) < 0)
42158 return disp_no_group
;
42159 memory
= get_attr_memory (insn
);
42160 if (memory
== MEMORY_STORE
)
42163 if (memory
== MEMORY_LOAD
)
42166 if (memory
== MEMORY_BOTH
)
42167 return disp_load_store
;
42169 return disp_no_group
;
42172 /* Return true if insn is a compare instruction. */
42177 enum attr_type type
;
42179 type
= get_attr_type (insn
);
42180 return (type
== TYPE_TEST
42181 || type
== TYPE_ICMP
42182 || type
== TYPE_FCMP
42183 || GET_CODE (PATTERN (insn
)) == COMPARE
);
42186 /* Return true if a dispatch violation encountered. */
42189 dispatch_violation (void)
42191 if (dispatch_window_list
->next
)
42192 return dispatch_window_list
->next
->violation
;
42193 return dispatch_window_list
->violation
;
42196 /* Return true if insn is a branch instruction. */
42199 is_branch (rtx insn
)
42201 return (CALL_P (insn
) || JUMP_P (insn
));
42204 /* Return true if insn is a prefetch instruction. */
42207 is_prefetch (rtx insn
)
42209 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
42212 /* This function initializes a dispatch window and the list container holding a
42213 pointer to the window. */
42216 init_window (int window_num
)
42219 dispatch_windows
*new_list
;
42221 if (window_num
== 0)
42222 new_list
= dispatch_window_list
;
42224 new_list
= dispatch_window_list1
;
42226 new_list
->num_insn
= 0;
42227 new_list
->num_uops
= 0;
42228 new_list
->window_size
= 0;
42229 new_list
->next
= NULL
;
42230 new_list
->prev
= NULL
;
42231 new_list
->window_num
= window_num
;
42232 new_list
->num_imm
= 0;
42233 new_list
->num_imm_32
= 0;
42234 new_list
->num_imm_64
= 0;
42235 new_list
->imm_size
= 0;
42236 new_list
->num_loads
= 0;
42237 new_list
->num_stores
= 0;
42238 new_list
->violation
= false;
42240 for (i
= 0; i
< MAX_INSN
; i
++)
42242 new_list
->window
[i
].insn
= NULL
;
42243 new_list
->window
[i
].group
= disp_no_group
;
42244 new_list
->window
[i
].path
= no_path
;
42245 new_list
->window
[i
].byte_len
= 0;
42246 new_list
->window
[i
].imm_bytes
= 0;
42251 /* This function allocates and initializes a dispatch window and the
42252 list container holding a pointer to the window. */
42254 static dispatch_windows
*
42255 allocate_window (void)
42257 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
42258 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
42263 /* This routine initializes the dispatch scheduling information. It
42264 initiates building dispatch scheduler tables and constructs the
42265 first dispatch window. */
42268 init_dispatch_sched (void)
42270 /* Allocate a dispatch list and a window. */
42271 dispatch_window_list
= allocate_window ();
42272 dispatch_window_list1
= allocate_window ();
42277 /* This function returns true if a branch is detected. End of a basic block
42278 does not have to be a branch, but here we assume only branches end a
42282 is_end_basic_block (enum dispatch_group group
)
42284 return group
== disp_branch
;
42287 /* This function is called when the end of a window processing is reached. */
42290 process_end_window (void)
42292 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
42293 if (dispatch_window_list
->next
)
42295 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
42296 gcc_assert (dispatch_window_list
->window_size
42297 + dispatch_window_list1
->window_size
<= 48);
42303 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
42304 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
42305 for 48 bytes of instructions. Note that these windows are not dispatch
42306 windows that their sizes are DISPATCH_WINDOW_SIZE. */
42308 static dispatch_windows
*
42309 allocate_next_window (int window_num
)
42311 if (window_num
== 0)
42313 if (dispatch_window_list
->next
)
42316 return dispatch_window_list
;
42319 dispatch_window_list
->next
= dispatch_window_list1
;
42320 dispatch_window_list1
->prev
= dispatch_window_list
;
42322 return dispatch_window_list1
;
42325 /* Increment the number of immediate operands of an instruction. */
42328 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
42333 switch ( GET_CODE (*in_rtx
))
42338 (imm_values
->imm
)++;
42339 if (x86_64_immediate_operand (*in_rtx
, SImode
))
42340 (imm_values
->imm32
)++;
42342 (imm_values
->imm64
)++;
42346 (imm_values
->imm
)++;
42347 (imm_values
->imm64
)++;
42351 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
42353 (imm_values
->imm
)++;
42354 (imm_values
->imm32
)++;
42365 /* Compute number of immediate operands of an instruction. */
42368 find_constant (rtx in_rtx
, imm_info
*imm_values
)
42370 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
42371 (rtx_function
) find_constant_1
, (void *) imm_values
);
42374 /* Return total size of immediate operands of an instruction along with number
42375 of corresponding immediate-operands. It initializes its parameters to zero
42376 befor calling FIND_CONSTANT.
42377 INSN is the input instruction. IMM is the total of immediates.
42378 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
42382 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
42384 imm_info imm_values
= {0, 0, 0};
42386 find_constant (insn
, &imm_values
);
42387 *imm
= imm_values
.imm
;
42388 *imm32
= imm_values
.imm32
;
42389 *imm64
= imm_values
.imm64
;
42390 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
42393 /* This function indicates if an operand of an instruction is an
42397 has_immediate (rtx insn
)
42399 int num_imm_operand
;
42400 int num_imm32_operand
;
42401 int num_imm64_operand
;
42404 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42405 &num_imm64_operand
);
42409 /* Return single or double path for instructions. */
42411 static enum insn_path
42412 get_insn_path (rtx insn
)
42414 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
42416 if ((int)path
== 0)
42417 return path_single
;
42419 if ((int)path
== 1)
42420 return path_double
;
42425 /* Return insn dispatch group. */
42427 static enum dispatch_group
42428 get_insn_group (rtx insn
)
42430 enum dispatch_group group
= get_mem_group (insn
);
42434 if (is_branch (insn
))
42435 return disp_branch
;
42440 if (has_immediate (insn
))
42443 if (is_prefetch (insn
))
42444 return disp_prefetch
;
42446 return disp_no_group
;
42449 /* Count number of GROUP restricted instructions in a dispatch
42450 window WINDOW_LIST. */
42453 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
42455 enum dispatch_group group
= get_insn_group (insn
);
42457 int num_imm_operand
;
42458 int num_imm32_operand
;
42459 int num_imm64_operand
;
42461 if (group
== disp_no_group
)
42464 if (group
== disp_imm
)
42466 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42467 &num_imm64_operand
);
42468 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
42469 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
42470 || (num_imm32_operand
> 0
42471 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
42472 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
42473 || (num_imm64_operand
> 0
42474 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
42475 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
42476 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
42477 && num_imm64_operand
> 0
42478 && ((window_list
->num_imm_64
> 0
42479 && window_list
->num_insn
>= 2)
42480 || window_list
->num_insn
>= 3)))
42486 if ((group
== disp_load_store
42487 && (window_list
->num_loads
>= MAX_LOAD
42488 || window_list
->num_stores
>= MAX_STORE
))
42489 || ((group
== disp_load
42490 || group
== disp_prefetch
)
42491 && window_list
->num_loads
>= MAX_LOAD
)
42492 || (group
== disp_store
42493 && window_list
->num_stores
>= MAX_STORE
))
42499 /* This function returns true if insn satisfies dispatch rules on the
42500 last window scheduled. */
42503 fits_dispatch_window (rtx insn
)
42505 dispatch_windows
*window_list
= dispatch_window_list
;
42506 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
42507 unsigned int num_restrict
;
42508 enum dispatch_group group
= get_insn_group (insn
);
42509 enum insn_path path
= get_insn_path (insn
);
42512 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
42513 instructions should be given the lowest priority in the
42514 scheduling process in Haifa scheduler to make sure they will be
42515 scheduled in the same dispatch window as the reference to them. */
42516 if (group
== disp_jcc
|| group
== disp_cmp
)
42519 /* Check nonrestricted. */
42520 if (group
== disp_no_group
|| group
== disp_branch
)
42523 /* Get last dispatch window. */
42524 if (window_list_next
)
42525 window_list
= window_list_next
;
42527 if (window_list
->window_num
== 1)
42529 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
42532 || (min_insn_size (insn
) + sum
) >= 48)
42533 /* Window 1 is full. Go for next window. */
42537 num_restrict
= count_num_restricted (insn
, window_list
);
42539 if (num_restrict
> num_allowable_groups
[group
])
42542 /* See if it fits in the first window. */
42543 if (window_list
->window_num
== 0)
42545 /* The first widow should have only single and double path
42547 if (path
== path_double
42548 && (window_list
->num_uops
+ 2) > MAX_INSN
)
42550 else if (path
!= path_single
)
42556 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42557 dispatch window WINDOW_LIST. */
42560 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
42562 int byte_len
= min_insn_size (insn
);
42563 int num_insn
= window_list
->num_insn
;
42565 sched_insn_info
*window
= window_list
->window
;
42566 enum dispatch_group group
= get_insn_group (insn
);
42567 enum insn_path path
= get_insn_path (insn
);
42568 int num_imm_operand
;
42569 int num_imm32_operand
;
42570 int num_imm64_operand
;
42572 if (!window_list
->violation
&& group
!= disp_cmp
42573 && !fits_dispatch_window (insn
))
42574 window_list
->violation
= true;
42576 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42577 &num_imm64_operand
);
42579 /* Initialize window with new instruction. */
42580 window
[num_insn
].insn
= insn
;
42581 window
[num_insn
].byte_len
= byte_len
;
42582 window
[num_insn
].group
= group
;
42583 window
[num_insn
].path
= path
;
42584 window
[num_insn
].imm_bytes
= imm_size
;
42586 window_list
->window_size
+= byte_len
;
42587 window_list
->num_insn
= num_insn
+ 1;
42588 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
42589 window_list
->imm_size
+= imm_size
;
42590 window_list
->num_imm
+= num_imm_operand
;
42591 window_list
->num_imm_32
+= num_imm32_operand
;
42592 window_list
->num_imm_64
+= num_imm64_operand
;
42594 if (group
== disp_store
)
42595 window_list
->num_stores
+= 1;
42596 else if (group
== disp_load
42597 || group
== disp_prefetch
)
42598 window_list
->num_loads
+= 1;
42599 else if (group
== disp_load_store
)
42601 window_list
->num_stores
+= 1;
42602 window_list
->num_loads
+= 1;
42606 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42607 If the total bytes of instructions or the number of instructions in
42608 the window exceed allowable, it allocates a new window. */
42611 add_to_dispatch_window (rtx insn
)
42614 dispatch_windows
*window_list
;
42615 dispatch_windows
*next_list
;
42616 dispatch_windows
*window0_list
;
42617 enum insn_path path
;
42618 enum dispatch_group insn_group
;
42626 if (INSN_CODE (insn
) < 0)
42629 byte_len
= min_insn_size (insn
);
42630 window_list
= dispatch_window_list
;
42631 next_list
= window_list
->next
;
42632 path
= get_insn_path (insn
);
42633 insn_group
= get_insn_group (insn
);
42635 /* Get the last dispatch window. */
42637 window_list
= dispatch_window_list
->next
;
42639 if (path
== path_single
)
42641 else if (path
== path_double
)
42644 insn_num_uops
= (int) path
;
42646 /* If current window is full, get a new window.
42647 Window number zero is full, if MAX_INSN uops are scheduled in it.
42648 Window number one is full, if window zero's bytes plus window
42649 one's bytes is 32, or if the bytes of the new instruction added
42650 to the total makes it greater than 48, or it has already MAX_INSN
42651 instructions in it. */
42652 num_insn
= window_list
->num_insn
;
42653 num_uops
= window_list
->num_uops
;
42654 window_num
= window_list
->window_num
;
42655 insn_fits
= fits_dispatch_window (insn
);
42657 if (num_insn
>= MAX_INSN
42658 || num_uops
+ insn_num_uops
> MAX_INSN
42661 window_num
= ~window_num
& 1;
42662 window_list
= allocate_next_window (window_num
);
42665 if (window_num
== 0)
42667 add_insn_window (insn
, window_list
, insn_num_uops
);
42668 if (window_list
->num_insn
>= MAX_INSN
42669 && insn_group
== disp_branch
)
42671 process_end_window ();
42675 else if (window_num
== 1)
42677 window0_list
= window_list
->prev
;
42678 sum
= window0_list
->window_size
+ window_list
->window_size
;
42680 || (byte_len
+ sum
) >= 48)
42682 process_end_window ();
42683 window_list
= dispatch_window_list
;
42686 add_insn_window (insn
, window_list
, insn_num_uops
);
42689 gcc_unreachable ();
42691 if (is_end_basic_block (insn_group
))
42693 /* End of basic block is reached do end-basic-block process. */
42694 process_end_window ();
42699 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42701 DEBUG_FUNCTION
static void
42702 debug_dispatch_window_file (FILE *file
, int window_num
)
42704 dispatch_windows
*list
;
42707 if (window_num
== 0)
42708 list
= dispatch_window_list
;
42710 list
= dispatch_window_list1
;
42712 fprintf (file
, "Window #%d:\n", list
->window_num
);
42713 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
42714 list
->num_insn
, list
->num_uops
, list
->window_size
);
42715 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42716 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
42718 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
42720 fprintf (file
, " insn info:\n");
42722 for (i
= 0; i
< MAX_INSN
; i
++)
42724 if (!list
->window
[i
].insn
)
42726 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42727 i
, group_name
[list
->window
[i
].group
],
42728 i
, (void *)list
->window
[i
].insn
,
42729 i
, list
->window
[i
].path
,
42730 i
, list
->window
[i
].byte_len
,
42731 i
, list
->window
[i
].imm_bytes
);
42735 /* Print to stdout a dispatch window. */
42737 DEBUG_FUNCTION
void
42738 debug_dispatch_window (int window_num
)
42740 debug_dispatch_window_file (stdout
, window_num
);
42743 /* Print INSN dispatch information to FILE. */
42745 DEBUG_FUNCTION
static void
42746 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
42749 enum insn_path path
;
42750 enum dispatch_group group
;
42752 int num_imm_operand
;
42753 int num_imm32_operand
;
42754 int num_imm64_operand
;
42756 if (INSN_CODE (insn
) < 0)
42759 byte_len
= min_insn_size (insn
);
42760 path
= get_insn_path (insn
);
42761 group
= get_insn_group (insn
);
42762 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
42763 &num_imm64_operand
);
42765 fprintf (file
, " insn info:\n");
42766 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
42767 group_name
[group
], path
, byte_len
);
42768 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42769 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
42772 /* Print to STDERR the status of the ready list with respect to
42773 dispatch windows. */
42775 DEBUG_FUNCTION
void
42776 debug_ready_dispatch (void)
42779 int no_ready
= number_in_ready ();
42781 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
42783 for (i
= 0; i
< no_ready
; i
++)
42784 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
42787 /* This routine is the driver of the dispatch scheduler. */
42790 do_dispatch (rtx insn
, int mode
)
42792 if (mode
== DISPATCH_INIT
)
42793 init_dispatch_sched ();
42794 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
42795 add_to_dispatch_window (insn
);
42798 /* Return TRUE if Dispatch Scheduling is supported. */
42801 has_dispatch (rtx insn
, int action
)
42803 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
42804 && flag_dispatch_scheduler
)
42810 case IS_DISPATCH_ON
:
42815 return is_cmp (insn
);
42817 case DISPATCH_VIOLATION
:
42818 return dispatch_violation ();
42820 case FITS_DISPATCH_WINDOW
:
42821 return fits_dispatch_window (insn
);
42827 /* Implementation of reassociation_width target hook used by
42828 reassoc phase to identify parallelism level in reassociated
42829 tree. Statements tree_code is passed in OPC. Arguments type
42832 Currently parallel reassociation is enabled for Atom
42833 processors only and we set reassociation width to be 2
42834 because Atom may issue up to 2 instructions per cycle.
42836 Return value should be fixed if parallel reassociation is
42837 enabled for other processors. */
42840 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42841 enum machine_mode mode
)
42845 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42847 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42853 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42854 place emms and femms instructions. */
42856 static enum machine_mode
42857 ix86_preferred_simd_mode (enum machine_mode mode
)
42865 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42867 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42869 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42871 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42874 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42880 if (!TARGET_VECTORIZE_DOUBLE
)
42882 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42884 else if (TARGET_SSE2
)
42893 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42896 static unsigned int
42897 ix86_autovectorize_vector_sizes (void)
42899 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42904 /* Return class of registers which could be used for pseudo of MODE
42905 and of class RCLASS for spilling instead of memory. Return NO_REGS
42906 if it is not possible or non-profitable. */
42908 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42910 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42911 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42912 && INTEGER_CLASS_P (rclass
))
42917 /* Implement targetm.vectorize.init_cost. */
42920 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42922 unsigned *cost
= XNEWVEC (unsigned, 3);
42923 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42927 /* Implement targetm.vectorize.add_stmt_cost. */
42930 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42931 struct _stmt_vec_info
*stmt_info
, int misalign
,
42932 enum vect_cost_model_location where
)
42934 unsigned *cost
= (unsigned *) data
;
42935 unsigned retval
= 0;
42937 if (flag_vect_cost_model
)
42939 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42940 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42942 /* Statements in an inner loop relative to the loop being
42943 vectorized are weighted more heavily. The value here is
42944 arbitrary and could potentially be improved with analysis. */
42945 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42946 count
*= 50; /* FIXME. */
42948 retval
= (unsigned) (count
* stmt_cost
);
42949 cost
[where
] += retval
;
42955 /* Implement targetm.vectorize.finish_cost. */
42958 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42959 unsigned *body_cost
, unsigned *epilogue_cost
)
42961 unsigned *cost
= (unsigned *) data
;
42962 *prologue_cost
= cost
[vect_prologue
];
42963 *body_cost
= cost
[vect_body
];
42964 *epilogue_cost
= cost
[vect_epilogue
];
42967 /* Implement targetm.vectorize.destroy_cost_data. */
42970 ix86_destroy_cost_data (void *data
)
42975 /* Validate target specific memory model bits in VAL. */
42977 static unsigned HOST_WIDE_INT
42978 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42980 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42983 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42985 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42987 warning (OPT_Winvalid_memory_model
,
42988 "Unknown architecture specific memory model");
42989 return MEMMODEL_SEQ_CST
;
42991 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42992 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42994 warning (OPT_Winvalid_memory_model
,
42995 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42996 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42998 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
43000 warning (OPT_Winvalid_memory_model
,
43001 "HLE_RELEASE not used with RELEASE or stronger memory model");
43002 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
43007 /* Initialize the GCC target structure. */
43008 #undef TARGET_RETURN_IN_MEMORY
43009 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
43011 #undef TARGET_LEGITIMIZE_ADDRESS
43012 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
43014 #undef TARGET_ATTRIBUTE_TABLE
43015 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
43016 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
43017 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
43018 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43019 # undef TARGET_MERGE_DECL_ATTRIBUTES
43020 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
43023 #undef TARGET_COMP_TYPE_ATTRIBUTES
43024 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
43026 #undef TARGET_INIT_BUILTINS
43027 #define TARGET_INIT_BUILTINS ix86_init_builtins
43028 #undef TARGET_BUILTIN_DECL
43029 #define TARGET_BUILTIN_DECL ix86_builtin_decl
43030 #undef TARGET_EXPAND_BUILTIN
43031 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
43033 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
43034 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
43035 ix86_builtin_vectorized_function
43037 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
43038 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
43040 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
43041 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
43043 #undef TARGET_VECTORIZE_BUILTIN_GATHER
43044 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
43046 #undef TARGET_BUILTIN_RECIPROCAL
43047 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
43049 #undef TARGET_ASM_FUNCTION_EPILOGUE
43050 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
43052 #undef TARGET_ENCODE_SECTION_INFO
43053 #ifndef SUBTARGET_ENCODE_SECTION_INFO
43054 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
43056 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
43059 #undef TARGET_ASM_OPEN_PAREN
43060 #define TARGET_ASM_OPEN_PAREN ""
43061 #undef TARGET_ASM_CLOSE_PAREN
43062 #define TARGET_ASM_CLOSE_PAREN ""
43064 #undef TARGET_ASM_BYTE_OP
43065 #define TARGET_ASM_BYTE_OP ASM_BYTE
43067 #undef TARGET_ASM_ALIGNED_HI_OP
43068 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
43069 #undef TARGET_ASM_ALIGNED_SI_OP
43070 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
43072 #undef TARGET_ASM_ALIGNED_DI_OP
43073 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
43076 #undef TARGET_PROFILE_BEFORE_PROLOGUE
43077 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
43079 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
43080 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
43082 #undef TARGET_ASM_UNALIGNED_HI_OP
43083 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
43084 #undef TARGET_ASM_UNALIGNED_SI_OP
43085 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
43086 #undef TARGET_ASM_UNALIGNED_DI_OP
43087 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
43089 #undef TARGET_PRINT_OPERAND
43090 #define TARGET_PRINT_OPERAND ix86_print_operand
43091 #undef TARGET_PRINT_OPERAND_ADDRESS
43092 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
43093 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
43094 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
43095 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
43096 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
43098 #undef TARGET_SCHED_INIT_GLOBAL
43099 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
43100 #undef TARGET_SCHED_ADJUST_COST
43101 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
43102 #undef TARGET_SCHED_ISSUE_RATE
43103 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
43104 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
43105 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
43106 ia32_multipass_dfa_lookahead
43108 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
43109 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
43111 #undef TARGET_MEMMODEL_CHECK
43112 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
43115 #undef TARGET_HAVE_TLS
43116 #define TARGET_HAVE_TLS true
43118 #undef TARGET_CANNOT_FORCE_CONST_MEM
43119 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
43120 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
43121 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
43123 #undef TARGET_DELEGITIMIZE_ADDRESS
43124 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
43126 #undef TARGET_MS_BITFIELD_LAYOUT_P
43127 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
43130 #undef TARGET_BINDS_LOCAL_P
43131 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
43133 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
43134 #undef TARGET_BINDS_LOCAL_P
43135 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
43138 #undef TARGET_ASM_OUTPUT_MI_THUNK
43139 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
43140 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
43141 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
43143 #undef TARGET_ASM_FILE_START
43144 #define TARGET_ASM_FILE_START x86_file_start
43146 #undef TARGET_OPTION_OVERRIDE
43147 #define TARGET_OPTION_OVERRIDE ix86_option_override
43149 #undef TARGET_REGISTER_MOVE_COST
43150 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
43151 #undef TARGET_MEMORY_MOVE_COST
43152 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
43153 #undef TARGET_RTX_COSTS
43154 #define TARGET_RTX_COSTS ix86_rtx_costs
43155 #undef TARGET_ADDRESS_COST
43156 #define TARGET_ADDRESS_COST ix86_address_cost
43158 #undef TARGET_FIXED_CONDITION_CODE_REGS
43159 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
43160 #undef TARGET_CC_MODES_COMPATIBLE
43161 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
43163 #undef TARGET_MACHINE_DEPENDENT_REORG
43164 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
43166 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
43167 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
43169 #undef TARGET_BUILD_BUILTIN_VA_LIST
43170 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
43172 #undef TARGET_FOLD_BUILTIN
43173 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
43175 #undef TARGET_COMPARE_VERSION_PRIORITY
43176 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
43178 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
43179 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
43180 ix86_generate_version_dispatcher_body
43182 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
43183 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
43184 ix86_get_function_versions_dispatcher
43186 #undef TARGET_ENUM_VA_LIST_P
43187 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
43189 #undef TARGET_FN_ABI_VA_LIST
43190 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
43192 #undef TARGET_CANONICAL_VA_LIST_TYPE
43193 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
43195 #undef TARGET_EXPAND_BUILTIN_VA_START
43196 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
43198 #undef TARGET_MD_ASM_CLOBBERS
43199 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
43201 #undef TARGET_PROMOTE_PROTOTYPES
43202 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
43203 #undef TARGET_STRUCT_VALUE_RTX
43204 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
43205 #undef TARGET_SETUP_INCOMING_VARARGS
43206 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
43207 #undef TARGET_MUST_PASS_IN_STACK
43208 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
43209 #undef TARGET_FUNCTION_ARG_ADVANCE
43210 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
43211 #undef TARGET_FUNCTION_ARG
43212 #define TARGET_FUNCTION_ARG ix86_function_arg
43213 #undef TARGET_FUNCTION_ARG_BOUNDARY
43214 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
43215 #undef TARGET_PASS_BY_REFERENCE
43216 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
43217 #undef TARGET_INTERNAL_ARG_POINTER
43218 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
43219 #undef TARGET_UPDATE_STACK_BOUNDARY
43220 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
43221 #undef TARGET_GET_DRAP_RTX
43222 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
43223 #undef TARGET_STRICT_ARGUMENT_NAMING
43224 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
43225 #undef TARGET_STATIC_CHAIN
43226 #define TARGET_STATIC_CHAIN ix86_static_chain
43227 #undef TARGET_TRAMPOLINE_INIT
43228 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
43229 #undef TARGET_RETURN_POPS_ARGS
43230 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
43232 #undef TARGET_LEGITIMATE_COMBINED_INSN
43233 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
43235 #undef TARGET_ASAN_SHADOW_OFFSET
43236 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
43238 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
43239 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
43241 #undef TARGET_SCALAR_MODE_SUPPORTED_P
43242 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
43244 #undef TARGET_VECTOR_MODE_SUPPORTED_P
43245 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
43247 #undef TARGET_C_MODE_FOR_SUFFIX
43248 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
43251 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
43252 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
43255 #ifdef SUBTARGET_INSERT_ATTRIBUTES
43256 #undef TARGET_INSERT_ATTRIBUTES
43257 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
43260 #undef TARGET_MANGLE_TYPE
43261 #define TARGET_MANGLE_TYPE ix86_mangle_type
43264 #undef TARGET_STACK_PROTECT_FAIL
43265 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
43268 #undef TARGET_FUNCTION_VALUE
43269 #define TARGET_FUNCTION_VALUE ix86_function_value
43271 #undef TARGET_FUNCTION_VALUE_REGNO_P
43272 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
43274 #undef TARGET_PROMOTE_FUNCTION_MODE
43275 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
43277 #undef TARGET_MEMBER_TYPE_FORCES_BLK
43278 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
43280 #undef TARGET_INSTANTIATE_DECLS
43281 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
43283 #undef TARGET_SECONDARY_RELOAD
43284 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
43286 #undef TARGET_CLASS_MAX_NREGS
43287 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
43289 #undef TARGET_PREFERRED_RELOAD_CLASS
43290 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
43291 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
43292 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
43293 #undef TARGET_CLASS_LIKELY_SPILLED_P
43294 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
43296 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
43297 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
43298 ix86_builtin_vectorization_cost
43299 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
43300 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
43301 ix86_vectorize_vec_perm_const_ok
43302 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
43303 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
43304 ix86_preferred_simd_mode
43305 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
43306 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
43307 ix86_autovectorize_vector_sizes
43308 #undef TARGET_VECTORIZE_INIT_COST
43309 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
43310 #undef TARGET_VECTORIZE_ADD_STMT_COST
43311 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
43312 #undef TARGET_VECTORIZE_FINISH_COST
43313 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
43314 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
43315 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
43317 #undef TARGET_SET_CURRENT_FUNCTION
43318 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
43320 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
43321 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
43323 #undef TARGET_OPTION_SAVE
43324 #define TARGET_OPTION_SAVE ix86_function_specific_save
43326 #undef TARGET_OPTION_RESTORE
43327 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
43329 #undef TARGET_OPTION_PRINT
43330 #define TARGET_OPTION_PRINT ix86_function_specific_print
43332 #undef TARGET_OPTION_FUNCTION_VERSIONS
43333 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
43335 #undef TARGET_CAN_INLINE_P
43336 #define TARGET_CAN_INLINE_P ix86_can_inline_p
43338 #undef TARGET_EXPAND_TO_RTL_HOOK
43339 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
43341 #undef TARGET_LEGITIMATE_ADDRESS_P
43342 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
43344 #undef TARGET_LRA_P
43345 #define TARGET_LRA_P hook_bool_void_true
43347 #undef TARGET_REGISTER_PRIORITY
43348 #define TARGET_REGISTER_PRIORITY ix86_register_priority
43350 #undef TARGET_REGISTER_USAGE_LEVELING_P
43351 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
43353 #undef TARGET_LEGITIMATE_CONSTANT_P
43354 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
43356 #undef TARGET_FRAME_POINTER_REQUIRED
43357 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
43359 #undef TARGET_CAN_ELIMINATE
43360 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
43362 #undef TARGET_EXTRA_LIVE_ON_ENTRY
43363 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
43365 #undef TARGET_ASM_CODE_END
43366 #define TARGET_ASM_CODE_END ix86_code_end
43368 #undef TARGET_CONDITIONAL_REGISTER_USAGE
43369 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
43372 #undef TARGET_INIT_LIBFUNCS
43373 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
43376 #undef TARGET_SPILL_CLASS
43377 #define TARGET_SPILL_CLASS ix86_spill_class
43379 struct gcc_target targetm
= TARGET_INITIALIZER
;
43381 #include "gt-i386.h"