1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 #include "stringpool.h"
29 #include "stor-layout.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
45 #include "diagnostic-core.h"
47 #include "basic-block.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
63 #include "gimple-expr.h"
69 #include "tm-constrs.h"
73 #include "sched-int.h"
77 #include "diagnostic.h"
79 #include "tree-pass.h"
81 #include "pass_manager.h"
83 static rtx
legitimize_dllimport_symbol (rtx
, bool);
84 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
85 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
87 #ifndef CHECK_STACK_LIMIT
88 #define CHECK_STACK_LIMIT (-1)
91 /* Return index of given mode in mult and division cost tables. */
92 #define MODE_INDEX(mode) \
93 ((mode) == QImode ? 0 \
94 : (mode) == HImode ? 1 \
95 : (mode) == SImode ? 2 \
96 : (mode) == DImode ? 3 \
99 /* Processor costs (relative to an add) */
100 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
101 #define COSTS_N_BYTES(N) ((N) * 2)
103 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
105 static stringop_algs ix86_size_memcpy
[2] = {
106 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
107 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
108 static stringop_algs ix86_size_memset
[2] = {
109 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
110 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
113 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
114 COSTS_N_BYTES (2), /* cost of an add instruction */
115 COSTS_N_BYTES (3), /* cost of a lea instruction */
116 COSTS_N_BYTES (2), /* variable shift costs */
117 COSTS_N_BYTES (3), /* constant shift costs */
118 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
119 COSTS_N_BYTES (3), /* HI */
120 COSTS_N_BYTES (3), /* SI */
121 COSTS_N_BYTES (3), /* DI */
122 COSTS_N_BYTES (5)}, /* other */
123 0, /* cost of multiply per each bit set */
124 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 COSTS_N_BYTES (3), /* cost of movsx */
130 COSTS_N_BYTES (3), /* cost of movzx */
131 0, /* "large" insn */
133 2, /* cost for loading QImode using movzbl */
134 {2, 2, 2}, /* cost of loading integer registers
135 in QImode, HImode and SImode.
136 Relative to reg-reg move (2). */
137 {2, 2, 2}, /* cost of storing integer registers */
138 2, /* cost of reg,reg fld/fst */
139 {2, 2, 2}, /* cost of loading fp registers
140 in SFmode, DFmode and XFmode */
141 {2, 2, 2}, /* cost of storing fp registers
142 in SFmode, DFmode and XFmode */
143 3, /* cost of moving MMX register */
144 {3, 3}, /* cost of loading MMX registers
145 in SImode and DImode */
146 {3, 3}, /* cost of storing MMX registers
147 in SImode and DImode */
148 3, /* cost of moving SSE register */
149 {3, 3, 3}, /* cost of loading SSE registers
150 in SImode, DImode and TImode */
151 {3, 3, 3}, /* cost of storing SSE registers
152 in SImode, DImode and TImode */
153 3, /* MMX or SSE register to integer */
154 0, /* size of l1 cache */
155 0, /* size of l2 cache */
156 0, /* size of prefetch block */
157 0, /* number of parallel prefetches */
159 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
160 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
161 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
162 COSTS_N_BYTES (2), /* cost of FABS instruction. */
163 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
164 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
167 1, /* scalar_stmt_cost. */
168 1, /* scalar load_cost. */
169 1, /* scalar_store_cost. */
170 1, /* vec_stmt_cost. */
171 1, /* vec_to_scalar_cost. */
172 1, /* scalar_to_vec_cost. */
173 1, /* vec_align_load_cost. */
174 1, /* vec_unalign_load_cost. */
175 1, /* vec_store_cost. */
176 1, /* cond_taken_branch_cost. */
177 1, /* cond_not_taken_branch_cost. */
180 /* Processor costs (relative to an add) */
181 static stringop_algs i386_memcpy
[2] = {
182 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
183 DUMMY_STRINGOP_ALGS
};
184 static stringop_algs i386_memset
[2] = {
185 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
186 DUMMY_STRINGOP_ALGS
};
189 struct processor_costs i386_cost
= { /* 386 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (6), /* HI */
196 COSTS_N_INSNS (6), /* SI */
197 COSTS_N_INSNS (6), /* DI */
198 COSTS_N_INSNS (6)}, /* other */
199 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (23), /* HI */
202 COSTS_N_INSNS (23), /* SI */
203 COSTS_N_INSNS (23), /* DI */
204 COSTS_N_INSNS (23)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of l1 cache */
231 0, /* size of l2 cache */
232 0, /* size of prefetch block */
233 0, /* number of parallel prefetches */
235 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
236 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
237 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
238 COSTS_N_INSNS (22), /* cost of FABS instruction. */
239 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
240 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
243 1, /* scalar_stmt_cost. */
244 1, /* scalar load_cost. */
245 1, /* scalar_store_cost. */
246 1, /* vec_stmt_cost. */
247 1, /* vec_to_scalar_cost. */
248 1, /* scalar_to_vec_cost. */
249 1, /* vec_align_load_cost. */
250 2, /* vec_unalign_load_cost. */
251 1, /* vec_store_cost. */
252 3, /* cond_taken_branch_cost. */
253 1, /* cond_not_taken_branch_cost. */
256 static stringop_algs i486_memcpy
[2] = {
257 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
258 DUMMY_STRINGOP_ALGS
};
259 static stringop_algs i486_memset
[2] = {
260 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
261 DUMMY_STRINGOP_ALGS
};
264 struct processor_costs i486_cost
= { /* 486 specific costs */
265 COSTS_N_INSNS (1), /* cost of an add instruction */
266 COSTS_N_INSNS (1), /* cost of a lea instruction */
267 COSTS_N_INSNS (3), /* variable shift costs */
268 COSTS_N_INSNS (2), /* constant shift costs */
269 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
270 COSTS_N_INSNS (12), /* HI */
271 COSTS_N_INSNS (12), /* SI */
272 COSTS_N_INSNS (12), /* DI */
273 COSTS_N_INSNS (12)}, /* other */
274 1, /* cost of multiply per each bit set */
275 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
276 COSTS_N_INSNS (40), /* HI */
277 COSTS_N_INSNS (40), /* SI */
278 COSTS_N_INSNS (40), /* DI */
279 COSTS_N_INSNS (40)}, /* other */
280 COSTS_N_INSNS (3), /* cost of movsx */
281 COSTS_N_INSNS (2), /* cost of movzx */
282 15, /* "large" insn */
284 4, /* cost for loading QImode using movzbl */
285 {2, 4, 2}, /* cost of loading integer registers
286 in QImode, HImode and SImode.
287 Relative to reg-reg move (2). */
288 {2, 4, 2}, /* cost of storing integer registers */
289 2, /* cost of reg,reg fld/fst */
290 {8, 8, 8}, /* cost of loading fp registers
291 in SFmode, DFmode and XFmode */
292 {8, 8, 8}, /* cost of storing fp registers
293 in SFmode, DFmode and XFmode */
294 2, /* cost of moving MMX register */
295 {4, 8}, /* cost of loading MMX registers
296 in SImode and DImode */
297 {4, 8}, /* cost of storing MMX registers
298 in SImode and DImode */
299 2, /* cost of moving SSE register */
300 {4, 8, 16}, /* cost of loading SSE registers
301 in SImode, DImode and TImode */
302 {4, 8, 16}, /* cost of storing SSE registers
303 in SImode, DImode and TImode */
304 3, /* MMX or SSE register to integer */
305 4, /* size of l1 cache. 486 has 8kB cache
306 shared for code and data, so 4kB is
307 not really precise. */
308 4, /* size of l2 cache */
309 0, /* size of prefetch block */
310 0, /* number of parallel prefetches */
312 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
313 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
314 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
315 COSTS_N_INSNS (3), /* cost of FABS instruction. */
316 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
317 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
320 1, /* scalar_stmt_cost. */
321 1, /* scalar load_cost. */
322 1, /* scalar_store_cost. */
323 1, /* vec_stmt_cost. */
324 1, /* vec_to_scalar_cost. */
325 1, /* scalar_to_vec_cost. */
326 1, /* vec_align_load_cost. */
327 2, /* vec_unalign_load_cost. */
328 1, /* vec_store_cost. */
329 3, /* cond_taken_branch_cost. */
330 1, /* cond_not_taken_branch_cost. */
333 static stringop_algs pentium_memcpy
[2] = {
334 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
335 DUMMY_STRINGOP_ALGS
};
336 static stringop_algs pentium_memset
[2] = {
337 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
338 DUMMY_STRINGOP_ALGS
};
341 struct processor_costs pentium_cost
= {
342 COSTS_N_INSNS (1), /* cost of an add instruction */
343 COSTS_N_INSNS (1), /* cost of a lea instruction */
344 COSTS_N_INSNS (4), /* variable shift costs */
345 COSTS_N_INSNS (1), /* constant shift costs */
346 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
347 COSTS_N_INSNS (11), /* HI */
348 COSTS_N_INSNS (11), /* SI */
349 COSTS_N_INSNS (11), /* DI */
350 COSTS_N_INSNS (11)}, /* other */
351 0, /* cost of multiply per each bit set */
352 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
353 COSTS_N_INSNS (25), /* HI */
354 COSTS_N_INSNS (25), /* SI */
355 COSTS_N_INSNS (25), /* DI */
356 COSTS_N_INSNS (25)}, /* other */
357 COSTS_N_INSNS (3), /* cost of movsx */
358 COSTS_N_INSNS (2), /* cost of movzx */
359 8, /* "large" insn */
361 6, /* cost for loading QImode using movzbl */
362 {2, 4, 2}, /* cost of loading integer registers
363 in QImode, HImode and SImode.
364 Relative to reg-reg move (2). */
365 {2, 4, 2}, /* cost of storing integer registers */
366 2, /* cost of reg,reg fld/fst */
367 {2, 2, 6}, /* cost of loading fp registers
368 in SFmode, DFmode and XFmode */
369 {4, 4, 6}, /* cost of storing fp registers
370 in SFmode, DFmode and XFmode */
371 8, /* cost of moving MMX register */
372 {8, 8}, /* cost of loading MMX registers
373 in SImode and DImode */
374 {8, 8}, /* cost of storing MMX registers
375 in SImode and DImode */
376 2, /* cost of moving SSE register */
377 {4, 8, 16}, /* cost of loading SSE registers
378 in SImode, DImode and TImode */
379 {4, 8, 16}, /* cost of storing SSE registers
380 in SImode, DImode and TImode */
381 3, /* MMX or SSE register to integer */
382 8, /* size of l1 cache. */
383 8, /* size of l2 cache */
384 0, /* size of prefetch block */
385 0, /* number of parallel prefetches */
387 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
388 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
389 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
390 COSTS_N_INSNS (1), /* cost of FABS instruction. */
391 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
392 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
395 1, /* scalar_stmt_cost. */
396 1, /* scalar load_cost. */
397 1, /* scalar_store_cost. */
398 1, /* vec_stmt_cost. */
399 1, /* vec_to_scalar_cost. */
400 1, /* scalar_to_vec_cost. */
401 1, /* vec_align_load_cost. */
402 2, /* vec_unalign_load_cost. */
403 1, /* vec_store_cost. */
404 3, /* cond_taken_branch_cost. */
405 1, /* cond_not_taken_branch_cost. */
408 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
409 (we ensure the alignment). For small blocks inline loop is still a
410 noticeable win, for bigger blocks either rep movsl or rep movsb is
411 way to go. Rep movsb has apparently more expensive startup time in CPU,
412 but after 4K the difference is down in the noise. */
413 static stringop_algs pentiumpro_memcpy
[2] = {
414 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
415 {8192, rep_prefix_4_byte
, false},
416 {-1, rep_prefix_1_byte
, false}}},
417 DUMMY_STRINGOP_ALGS
};
418 static stringop_algs pentiumpro_memset
[2] = {
419 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
420 {8192, rep_prefix_4_byte
, false},
421 {-1, libcall
, false}}},
422 DUMMY_STRINGOP_ALGS
};
424 struct processor_costs pentiumpro_cost
= {
425 COSTS_N_INSNS (1), /* cost of an add instruction */
426 COSTS_N_INSNS (1), /* cost of a lea instruction */
427 COSTS_N_INSNS (1), /* variable shift costs */
428 COSTS_N_INSNS (1), /* constant shift costs */
429 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
430 COSTS_N_INSNS (4), /* HI */
431 COSTS_N_INSNS (4), /* SI */
432 COSTS_N_INSNS (4), /* DI */
433 COSTS_N_INSNS (4)}, /* other */
434 0, /* cost of multiply per each bit set */
435 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
436 COSTS_N_INSNS (17), /* HI */
437 COSTS_N_INSNS (17), /* SI */
438 COSTS_N_INSNS (17), /* DI */
439 COSTS_N_INSNS (17)}, /* other */
440 COSTS_N_INSNS (1), /* cost of movsx */
441 COSTS_N_INSNS (1), /* cost of movzx */
442 8, /* "large" insn */
444 2, /* cost for loading QImode using movzbl */
445 {4, 4, 4}, /* cost of loading integer registers
446 in QImode, HImode and SImode.
447 Relative to reg-reg move (2). */
448 {2, 2, 2}, /* cost of storing integer registers */
449 2, /* cost of reg,reg fld/fst */
450 {2, 2, 6}, /* cost of loading fp registers
451 in SFmode, DFmode and XFmode */
452 {4, 4, 6}, /* cost of storing fp registers
453 in SFmode, DFmode and XFmode */
454 2, /* cost of moving MMX register */
455 {2, 2}, /* cost of loading MMX registers
456 in SImode and DImode */
457 {2, 2}, /* cost of storing MMX registers
458 in SImode and DImode */
459 2, /* cost of moving SSE register */
460 {2, 2, 8}, /* cost of loading SSE registers
461 in SImode, DImode and TImode */
462 {2, 2, 8}, /* cost of storing SSE registers
463 in SImode, DImode and TImode */
464 3, /* MMX or SSE register to integer */
465 8, /* size of l1 cache. */
466 256, /* size of l2 cache */
467 32, /* size of prefetch block */
468 6, /* number of parallel prefetches */
470 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (2), /* cost of FABS instruction. */
474 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
478 1, /* scalar_stmt_cost. */
479 1, /* scalar load_cost. */
480 1, /* scalar_store_cost. */
481 1, /* vec_stmt_cost. */
482 1, /* vec_to_scalar_cost. */
483 1, /* scalar_to_vec_cost. */
484 1, /* vec_align_load_cost. */
485 2, /* vec_unalign_load_cost. */
486 1, /* vec_store_cost. */
487 3, /* cond_taken_branch_cost. */
488 1, /* cond_not_taken_branch_cost. */
491 static stringop_algs geode_memcpy
[2] = {
492 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
493 DUMMY_STRINGOP_ALGS
};
494 static stringop_algs geode_memset
[2] = {
495 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
496 DUMMY_STRINGOP_ALGS
};
498 struct processor_costs geode_cost
= {
499 COSTS_N_INSNS (1), /* cost of an add instruction */
500 COSTS_N_INSNS (1), /* cost of a lea instruction */
501 COSTS_N_INSNS (2), /* variable shift costs */
502 COSTS_N_INSNS (1), /* constant shift costs */
503 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
504 COSTS_N_INSNS (4), /* HI */
505 COSTS_N_INSNS (7), /* SI */
506 COSTS_N_INSNS (7), /* DI */
507 COSTS_N_INSNS (7)}, /* other */
508 0, /* cost of multiply per each bit set */
509 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
510 COSTS_N_INSNS (23), /* HI */
511 COSTS_N_INSNS (39), /* SI */
512 COSTS_N_INSNS (39), /* DI */
513 COSTS_N_INSNS (39)}, /* other */
514 COSTS_N_INSNS (1), /* cost of movsx */
515 COSTS_N_INSNS (1), /* cost of movzx */
516 8, /* "large" insn */
518 1, /* cost for loading QImode using movzbl */
519 {1, 1, 1}, /* cost of loading integer registers
520 in QImode, HImode and SImode.
521 Relative to reg-reg move (2). */
522 {1, 1, 1}, /* cost of storing integer registers */
523 1, /* cost of reg,reg fld/fst */
524 {1, 1, 1}, /* cost of loading fp registers
525 in SFmode, DFmode and XFmode */
526 {4, 6, 6}, /* cost of storing fp registers
527 in SFmode, DFmode and XFmode */
529 1, /* cost of moving MMX register */
530 {1, 1}, /* cost of loading MMX registers
531 in SImode and DImode */
532 {1, 1}, /* cost of storing MMX registers
533 in SImode and DImode */
534 1, /* cost of moving SSE register */
535 {1, 1, 1}, /* cost of loading SSE registers
536 in SImode, DImode and TImode */
537 {1, 1, 1}, /* cost of storing SSE registers
538 in SImode, DImode and TImode */
539 1, /* MMX or SSE register to integer */
540 64, /* size of l1 cache. */
541 128, /* size of l2 cache. */
542 32, /* size of prefetch block */
543 1, /* number of parallel prefetches */
545 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
546 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
547 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
548 COSTS_N_INSNS (1), /* cost of FABS instruction. */
549 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
550 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
553 1, /* scalar_stmt_cost. */
554 1, /* scalar load_cost. */
555 1, /* scalar_store_cost. */
556 1, /* vec_stmt_cost. */
557 1, /* vec_to_scalar_cost. */
558 1, /* scalar_to_vec_cost. */
559 1, /* vec_align_load_cost. */
560 2, /* vec_unalign_load_cost. */
561 1, /* vec_store_cost. */
562 3, /* cond_taken_branch_cost. */
563 1, /* cond_not_taken_branch_cost. */
566 static stringop_algs k6_memcpy
[2] = {
567 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
568 DUMMY_STRINGOP_ALGS
};
569 static stringop_algs k6_memset
[2] = {
570 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
571 DUMMY_STRINGOP_ALGS
};
573 struct processor_costs k6_cost
= {
574 COSTS_N_INSNS (1), /* cost of an add instruction */
575 COSTS_N_INSNS (2), /* cost of a lea instruction */
576 COSTS_N_INSNS (1), /* variable shift costs */
577 COSTS_N_INSNS (1), /* constant shift costs */
578 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
579 COSTS_N_INSNS (3), /* HI */
580 COSTS_N_INSNS (3), /* SI */
581 COSTS_N_INSNS (3), /* DI */
582 COSTS_N_INSNS (3)}, /* other */
583 0, /* cost of multiply per each bit set */
584 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
585 COSTS_N_INSNS (18), /* HI */
586 COSTS_N_INSNS (18), /* SI */
587 COSTS_N_INSNS (18), /* DI */
588 COSTS_N_INSNS (18)}, /* other */
589 COSTS_N_INSNS (2), /* cost of movsx */
590 COSTS_N_INSNS (2), /* cost of movzx */
591 8, /* "large" insn */
593 3, /* cost for loading QImode using movzbl */
594 {4, 5, 4}, /* cost of loading integer registers
595 in QImode, HImode and SImode.
596 Relative to reg-reg move (2). */
597 {2, 3, 2}, /* cost of storing integer registers */
598 4, /* cost of reg,reg fld/fst */
599 {6, 6, 6}, /* cost of loading fp registers
600 in SFmode, DFmode and XFmode */
601 {4, 4, 4}, /* cost of storing fp registers
602 in SFmode, DFmode and XFmode */
603 2, /* cost of moving MMX register */
604 {2, 2}, /* cost of loading MMX registers
605 in SImode and DImode */
606 {2, 2}, /* cost of storing MMX registers
607 in SImode and DImode */
608 2, /* cost of moving SSE register */
609 {2, 2, 8}, /* cost of loading SSE registers
610 in SImode, DImode and TImode */
611 {2, 2, 8}, /* cost of storing SSE registers
612 in SImode, DImode and TImode */
613 6, /* MMX or SSE register to integer */
614 32, /* size of l1 cache. */
615 32, /* size of l2 cache. Some models
616 have integrated l2 cache, but
617 optimizing for k6 is not important
618 enough to worry about that. */
619 32, /* size of prefetch block */
620 1, /* number of parallel prefetches */
622 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
623 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
624 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
625 COSTS_N_INSNS (2), /* cost of FABS instruction. */
626 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
627 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
630 1, /* scalar_stmt_cost. */
631 1, /* scalar load_cost. */
632 1, /* scalar_store_cost. */
633 1, /* vec_stmt_cost. */
634 1, /* vec_to_scalar_cost. */
635 1, /* scalar_to_vec_cost. */
636 1, /* vec_align_load_cost. */
637 2, /* vec_unalign_load_cost. */
638 1, /* vec_store_cost. */
639 3, /* cond_taken_branch_cost. */
640 1, /* cond_not_taken_branch_cost. */
643 /* For some reason, Athlon deals better with REP prefix (relative to loops)
644 compared to K8. Alignment becomes important after 8 bytes for memcpy and
645 128 bytes for memset. */
646 static stringop_algs athlon_memcpy
[2] = {
647 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
648 DUMMY_STRINGOP_ALGS
};
649 static stringop_algs athlon_memset
[2] = {
650 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
};
653 struct processor_costs athlon_cost
= {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (5), /* HI */
660 COSTS_N_INSNS (5), /* SI */
661 COSTS_N_INSNS (5), /* DI */
662 COSTS_N_INSNS (5)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (26), /* HI */
666 COSTS_N_INSNS (42), /* SI */
667 COSTS_N_INSNS (74), /* DI */
668 COSTS_N_INSNS (74)}, /* other */
669 COSTS_N_INSNS (1), /* cost of movsx */
670 COSTS_N_INSNS (1), /* cost of movzx */
671 8, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {3, 4, 3}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {3, 4, 3}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {4, 4, 12}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {6, 6, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 4}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 4}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 4, 6}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 4, 5}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 5, /* MMX or SSE register to integer */
694 64, /* size of l1 cache. */
695 256, /* size of l2 cache. */
696 64, /* size of prefetch block */
697 6, /* number of parallel prefetches */
699 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
700 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
701 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
702 COSTS_N_INSNS (2), /* cost of FABS instruction. */
703 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
704 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
707 1, /* scalar_stmt_cost. */
708 1, /* scalar load_cost. */
709 1, /* scalar_store_cost. */
710 1, /* vec_stmt_cost. */
711 1, /* vec_to_scalar_cost. */
712 1, /* scalar_to_vec_cost. */
713 1, /* vec_align_load_cost. */
714 2, /* vec_unalign_load_cost. */
715 1, /* vec_store_cost. */
716 3, /* cond_taken_branch_cost. */
717 1, /* cond_not_taken_branch_cost. */
720 /* K8 has optimized REP instruction for medium sized blocks, but for very
721 small blocks it is better to use loop. For large blocks, libcall can
722 do nontemporary accesses and beat inline considerably. */
723 static stringop_algs k8_memcpy
[2] = {
724 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
725 {-1, rep_prefix_4_byte
, false}}},
726 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
727 {-1, libcall
, false}}}};
728 static stringop_algs k8_memset
[2] = {
729 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
730 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
731 {libcall
, {{48, unrolled_loop
, false},
732 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
734 struct processor_costs k8_cost
= {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (26), /* HI */
747 COSTS_N_INSNS (42), /* SI */
748 COSTS_N_INSNS (74), /* DI */
749 COSTS_N_INSNS (74)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 3, 6}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 5, /* MMX or SSE register to integer */
775 64, /* size of l1 cache. */
776 512, /* size of l2 cache. */
777 64, /* size of prefetch block */
778 /* New AMD processors never drop prefetches; if they cannot be performed
779 immediately, they are queued. We set number of simultaneous prefetches
780 to a large constant to reflect this (it probably is not a good idea not
781 to limit number of prefetches at all, as their execution also takes some
783 100, /* number of parallel prefetches */
785 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
786 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
787 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
788 COSTS_N_INSNS (2), /* cost of FABS instruction. */
789 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
790 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
794 4, /* scalar_stmt_cost. */
795 2, /* scalar load_cost. */
796 2, /* scalar_store_cost. */
797 5, /* vec_stmt_cost. */
798 0, /* vec_to_scalar_cost. */
799 2, /* scalar_to_vec_cost. */
800 2, /* vec_align_load_cost. */
801 3, /* vec_unalign_load_cost. */
802 3, /* vec_store_cost. */
803 3, /* cond_taken_branch_cost. */
804 2, /* cond_not_taken_branch_cost. */
807 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
808 very small blocks it is better to use loop. For large blocks, libcall can
809 do nontemporary accesses and beat inline considerably. */
810 static stringop_algs amdfam10_memcpy
[2] = {
811 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
812 {-1, rep_prefix_4_byte
, false}}},
813 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
814 {-1, libcall
, false}}}};
815 static stringop_algs amdfam10_memset
[2] = {
816 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
817 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
818 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}};
820 struct processor_costs amdfam10_cost
= {
821 COSTS_N_INSNS (1), /* cost of an add instruction */
822 COSTS_N_INSNS (2), /* cost of a lea instruction */
823 COSTS_N_INSNS (1), /* variable shift costs */
824 COSTS_N_INSNS (1), /* constant shift costs */
825 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
826 COSTS_N_INSNS (4), /* HI */
827 COSTS_N_INSNS (3), /* SI */
828 COSTS_N_INSNS (4), /* DI */
829 COSTS_N_INSNS (5)}, /* other */
830 0, /* cost of multiply per each bit set */
831 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
832 COSTS_N_INSNS (35), /* HI */
833 COSTS_N_INSNS (51), /* SI */
834 COSTS_N_INSNS (83), /* DI */
835 COSTS_N_INSNS (83)}, /* other */
836 COSTS_N_INSNS (1), /* cost of movsx */
837 COSTS_N_INSNS (1), /* cost of movzx */
838 8, /* "large" insn */
840 4, /* cost for loading QImode using movzbl */
841 {3, 4, 3}, /* cost of loading integer registers
842 in QImode, HImode and SImode.
843 Relative to reg-reg move (2). */
844 {3, 4, 3}, /* cost of storing integer registers */
845 4, /* cost of reg,reg fld/fst */
846 {4, 4, 12}, /* cost of loading fp registers
847 in SFmode, DFmode and XFmode */
848 {6, 6, 8}, /* cost of storing fp registers
849 in SFmode, DFmode and XFmode */
850 2, /* cost of moving MMX register */
851 {3, 3}, /* cost of loading MMX registers
852 in SImode and DImode */
853 {4, 4}, /* cost of storing MMX registers
854 in SImode and DImode */
855 2, /* cost of moving SSE register */
856 {4, 4, 3}, /* cost of loading SSE registers
857 in SImode, DImode and TImode */
858 {4, 4, 5}, /* cost of storing SSE registers
859 in SImode, DImode and TImode */
860 3, /* MMX or SSE register to integer */
862 MOVD reg64, xmmreg Double FSTORE 4
863 MOVD reg32, xmmreg Double FSTORE 4
865 MOVD reg64, xmmreg Double FADD 3
867 MOVD reg32, xmmreg Double FADD 3
869 64, /* size of l1 cache. */
870 512, /* size of l2 cache. */
871 64, /* size of prefetch block */
872 /* New AMD processors never drop prefetches; if they cannot be performed
873 immediately, they are queued. We set number of simultaneous prefetches
874 to a large constant to reflect this (it probably is not a good idea not
875 to limit number of prefetches at all, as their execution also takes some
877 100, /* number of parallel prefetches */
879 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
880 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
881 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
882 COSTS_N_INSNS (2), /* cost of FABS instruction. */
883 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
884 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 4, /* scalar_stmt_cost. */
889 2, /* scalar load_cost. */
890 2, /* scalar_store_cost. */
891 6, /* vec_stmt_cost. */
892 0, /* vec_to_scalar_cost. */
893 2, /* scalar_to_vec_cost. */
894 2, /* vec_align_load_cost. */
895 2, /* vec_unalign_load_cost. */
896 2, /* vec_store_cost. */
897 2, /* cond_taken_branch_cost. */
898 1, /* cond_not_taken_branch_cost. */
901 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
902 very small blocks it is better to use loop. For large blocks, libcall
903 can do nontemporary accesses and beat inline considerably. */
904 static stringop_algs bdver1_memcpy
[2] = {
905 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
906 {-1, rep_prefix_4_byte
, false}}},
907 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
908 {-1, libcall
, false}}}};
909 static stringop_algs bdver1_memset
[2] = {
910 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}};
915 const struct processor_costs bdver1_cost
= {
916 COSTS_N_INSNS (1), /* cost of an add instruction */
917 COSTS_N_INSNS (1), /* cost of a lea instruction */
918 COSTS_N_INSNS (1), /* variable shift costs */
919 COSTS_N_INSNS (1), /* constant shift costs */
920 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
921 COSTS_N_INSNS (4), /* HI */
922 COSTS_N_INSNS (4), /* SI */
923 COSTS_N_INSNS (6), /* DI */
924 COSTS_N_INSNS (6)}, /* other */
925 0, /* cost of multiply per each bit set */
926 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
927 COSTS_N_INSNS (35), /* HI */
928 COSTS_N_INSNS (51), /* SI */
929 COSTS_N_INSNS (83), /* DI */
930 COSTS_N_INSNS (83)}, /* other */
931 COSTS_N_INSNS (1), /* cost of movsx */
932 COSTS_N_INSNS (1), /* cost of movzx */
933 8, /* "large" insn */
935 4, /* cost for loading QImode using movzbl */
936 {5, 5, 4}, /* cost of loading integer registers
937 in QImode, HImode and SImode.
938 Relative to reg-reg move (2). */
939 {4, 4, 4}, /* cost of storing integer registers */
940 2, /* cost of reg,reg fld/fst */
941 {5, 5, 12}, /* cost of loading fp registers
942 in SFmode, DFmode and XFmode */
943 {4, 4, 8}, /* cost of storing fp registers
944 in SFmode, DFmode and XFmode */
945 2, /* cost of moving MMX register */
946 {4, 4}, /* cost of loading MMX registers
947 in SImode and DImode */
948 {4, 4}, /* cost of storing MMX registers
949 in SImode and DImode */
950 2, /* cost of moving SSE register */
951 {4, 4, 4}, /* cost of loading SSE registers
952 in SImode, DImode and TImode */
953 {4, 4, 4}, /* cost of storing SSE registers
954 in SImode, DImode and TImode */
955 2, /* MMX or SSE register to integer */
957 MOVD reg64, xmmreg Double FSTORE 4
958 MOVD reg32, xmmreg Double FSTORE 4
960 MOVD reg64, xmmreg Double FADD 3
962 MOVD reg32, xmmreg Double FADD 3
964 16, /* size of l1 cache. */
965 2048, /* size of l2 cache. */
966 64, /* size of prefetch block */
967 /* New AMD processors never drop prefetches; if they cannot be performed
968 immediately, they are queued. We set number of simultaneous prefetches
969 to a large constant to reflect this (it probably is not a good idea not
970 to limit number of prefetches at all, as their execution also takes some
972 100, /* number of parallel prefetches */
974 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
975 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
976 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
977 COSTS_N_INSNS (2), /* cost of FABS instruction. */
978 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
979 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
983 6, /* scalar_stmt_cost. */
984 4, /* scalar load_cost. */
985 4, /* scalar_store_cost. */
986 6, /* vec_stmt_cost. */
987 0, /* vec_to_scalar_cost. */
988 2, /* scalar_to_vec_cost. */
989 4, /* vec_align_load_cost. */
990 4, /* vec_unalign_load_cost. */
991 4, /* vec_store_cost. */
992 2, /* cond_taken_branch_cost. */
993 1, /* cond_not_taken_branch_cost. */
996 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
997 very small blocks it is better to use loop. For large blocks, libcall
998 can do nontemporary accesses and beat inline considerably. */
1000 static stringop_algs bdver2_memcpy
[2] = {
1001 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1002 {-1, rep_prefix_4_byte
, false}}},
1003 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1004 {-1, libcall
, false}}}};
1005 static stringop_algs bdver2_memset
[2] = {
1006 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1007 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1008 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1009 {-1, libcall
, false}}}};
1011 const struct processor_costs bdver2_cost
= {
1012 COSTS_N_INSNS (1), /* cost of an add instruction */
1013 COSTS_N_INSNS (1), /* cost of a lea instruction */
1014 COSTS_N_INSNS (1), /* variable shift costs */
1015 COSTS_N_INSNS (1), /* constant shift costs */
1016 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1017 COSTS_N_INSNS (4), /* HI */
1018 COSTS_N_INSNS (4), /* SI */
1019 COSTS_N_INSNS (6), /* DI */
1020 COSTS_N_INSNS (6)}, /* other */
1021 0, /* cost of multiply per each bit set */
1022 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1023 COSTS_N_INSNS (35), /* HI */
1024 COSTS_N_INSNS (51), /* SI */
1025 COSTS_N_INSNS (83), /* DI */
1026 COSTS_N_INSNS (83)}, /* other */
1027 COSTS_N_INSNS (1), /* cost of movsx */
1028 COSTS_N_INSNS (1), /* cost of movzx */
1029 8, /* "large" insn */
1031 4, /* cost for loading QImode using movzbl */
1032 {5, 5, 4}, /* cost of loading integer registers
1033 in QImode, HImode and SImode.
1034 Relative to reg-reg move (2). */
1035 {4, 4, 4}, /* cost of storing integer registers */
1036 2, /* cost of reg,reg fld/fst */
1037 {5, 5, 12}, /* cost of loading fp registers
1038 in SFmode, DFmode and XFmode */
1039 {4, 4, 8}, /* cost of storing fp registers
1040 in SFmode, DFmode and XFmode */
1041 2, /* cost of moving MMX register */
1042 {4, 4}, /* cost of loading MMX registers
1043 in SImode and DImode */
1044 {4, 4}, /* cost of storing MMX registers
1045 in SImode and DImode */
1046 2, /* cost of moving SSE register */
1047 {4, 4, 4}, /* cost of loading SSE registers
1048 in SImode, DImode and TImode */
1049 {4, 4, 4}, /* cost of storing SSE registers
1050 in SImode, DImode and TImode */
1051 2, /* MMX or SSE register to integer */
1053 MOVD reg64, xmmreg Double FSTORE 4
1054 MOVD reg32, xmmreg Double FSTORE 4
1056 MOVD reg64, xmmreg Double FADD 3
1058 MOVD reg32, xmmreg Double FADD 3
1060 16, /* size of l1 cache. */
1061 2048, /* size of l2 cache. */
1062 64, /* size of prefetch block */
1063 /* New AMD processors never drop prefetches; if they cannot be performed
1064 immediately, they are queued. We set number of simultaneous prefetches
1065 to a large constant to reflect this (it probably is not a good idea not
1066 to limit number of prefetches at all, as their execution also takes some
1068 100, /* number of parallel prefetches */
1069 2, /* Branch cost */
1070 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1071 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1072 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1073 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1074 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1075 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1079 6, /* scalar_stmt_cost. */
1080 4, /* scalar load_cost. */
1081 4, /* scalar_store_cost. */
1082 6, /* vec_stmt_cost. */
1083 0, /* vec_to_scalar_cost. */
1084 2, /* scalar_to_vec_cost. */
1085 4, /* vec_align_load_cost. */
1086 4, /* vec_unalign_load_cost. */
1087 4, /* vec_store_cost. */
1088 2, /* cond_taken_branch_cost. */
1089 1, /* cond_not_taken_branch_cost. */
1093 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1094 very small blocks it is better to use loop. For large blocks, libcall
1095 can do nontemporary accesses and beat inline considerably. */
1096 static stringop_algs bdver3_memcpy
[2] = {
1097 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1098 {-1, rep_prefix_4_byte
, false}}},
1099 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1100 {-1, libcall
, false}}}};
1101 static stringop_algs bdver3_memset
[2] = {
1102 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1103 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1104 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1105 {-1, libcall
, false}}}};
1106 struct processor_costs bdver3_cost
= {
1107 COSTS_N_INSNS (1), /* cost of an add instruction */
1108 COSTS_N_INSNS (1), /* cost of a lea instruction */
1109 COSTS_N_INSNS (1), /* variable shift costs */
1110 COSTS_N_INSNS (1), /* constant shift costs */
1111 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1112 COSTS_N_INSNS (4), /* HI */
1113 COSTS_N_INSNS (4), /* SI */
1114 COSTS_N_INSNS (6), /* DI */
1115 COSTS_N_INSNS (6)}, /* other */
1116 0, /* cost of multiply per each bit set */
1117 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1118 COSTS_N_INSNS (35), /* HI */
1119 COSTS_N_INSNS (51), /* SI */
1120 COSTS_N_INSNS (83), /* DI */
1121 COSTS_N_INSNS (83)}, /* other */
1122 COSTS_N_INSNS (1), /* cost of movsx */
1123 COSTS_N_INSNS (1), /* cost of movzx */
1124 8, /* "large" insn */
1126 4, /* cost for loading QImode using movzbl */
1127 {5, 5, 4}, /* cost of loading integer registers
1128 in QImode, HImode and SImode.
1129 Relative to reg-reg move (2). */
1130 {4, 4, 4}, /* cost of storing integer registers */
1131 2, /* cost of reg,reg fld/fst */
1132 {5, 5, 12}, /* cost of loading fp registers
1133 in SFmode, DFmode and XFmode */
1134 {4, 4, 8}, /* cost of storing fp registers
1135 in SFmode, DFmode and XFmode */
1136 2, /* cost of moving MMX register */
1137 {4, 4}, /* cost of loading MMX registers
1138 in SImode and DImode */
1139 {4, 4}, /* cost of storing MMX registers
1140 in SImode and DImode */
1141 2, /* cost of moving SSE register */
1142 {4, 4, 4}, /* cost of loading SSE registers
1143 in SImode, DImode and TImode */
1144 {4, 4, 4}, /* cost of storing SSE registers
1145 in SImode, DImode and TImode */
1146 2, /* MMX or SSE register to integer */
1147 16, /* size of l1 cache. */
1148 2048, /* size of l2 cache. */
1149 64, /* size of prefetch block */
1150 /* New AMD processors never drop prefetches; if they cannot be performed
1151 immediately, they are queued. We set number of simultaneous prefetches
1152 to a large constant to reflect this (it probably is not a good idea not
1153 to limit number of prefetches at all, as their execution also takes some
1155 100, /* number of parallel prefetches */
1156 2, /* Branch cost */
1157 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1158 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1159 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1160 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1161 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1162 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1166 6, /* scalar_stmt_cost. */
1167 4, /* scalar load_cost. */
1168 4, /* scalar_store_cost. */
1169 6, /* vec_stmt_cost. */
1170 0, /* vec_to_scalar_cost. */
1171 2, /* scalar_to_vec_cost. */
1172 4, /* vec_align_load_cost. */
1173 4, /* vec_unalign_load_cost. */
1174 4, /* vec_store_cost. */
1175 2, /* cond_taken_branch_cost. */
1176 1, /* cond_not_taken_branch_cost. */
1179 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1180 very small blocks it is better to use loop. For large blocks, libcall
1181 can do nontemporary accesses and beat inline considerably. */
1182 static stringop_algs bdver4_memcpy
[2] = {
1183 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1184 {-1, rep_prefix_4_byte
, false}}},
1185 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1186 {-1, libcall
, false}}}};
1187 static stringop_algs bdver4_memset
[2] = {
1188 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1189 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1190 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1191 {-1, libcall
, false}}}};
1192 struct processor_costs bdver4_cost
= {
1193 COSTS_N_INSNS (1), /* cost of an add instruction */
1194 COSTS_N_INSNS (1), /* cost of a lea instruction */
1195 COSTS_N_INSNS (1), /* variable shift costs */
1196 COSTS_N_INSNS (1), /* constant shift costs */
1197 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1198 COSTS_N_INSNS (4), /* HI */
1199 COSTS_N_INSNS (4), /* SI */
1200 COSTS_N_INSNS (6), /* DI */
1201 COSTS_N_INSNS (6)}, /* other */
1202 0, /* cost of multiply per each bit set */
1203 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1204 COSTS_N_INSNS (35), /* HI */
1205 COSTS_N_INSNS (51), /* SI */
1206 COSTS_N_INSNS (83), /* DI */
1207 COSTS_N_INSNS (83)}, /* other */
1208 COSTS_N_INSNS (1), /* cost of movsx */
1209 COSTS_N_INSNS (1), /* cost of movzx */
1210 8, /* "large" insn */
1212 4, /* cost for loading QImode using movzbl */
1213 {5, 5, 4}, /* cost of loading integer registers
1214 in QImode, HImode and SImode.
1215 Relative to reg-reg move (2). */
1216 {4, 4, 4}, /* cost of storing integer registers */
1217 2, /* cost of reg,reg fld/fst */
1218 {5, 5, 12}, /* cost of loading fp registers
1219 in SFmode, DFmode and XFmode */
1220 {4, 4, 8}, /* cost of storing fp registers
1221 in SFmode, DFmode and XFmode */
1222 2, /* cost of moving MMX register */
1223 {4, 4}, /* cost of loading MMX registers
1224 in SImode and DImode */
1225 {4, 4}, /* cost of storing MMX registers
1226 in SImode and DImode */
1227 2, /* cost of moving SSE register */
1228 {4, 4, 4}, /* cost of loading SSE registers
1229 in SImode, DImode and TImode */
1230 {4, 4, 4}, /* cost of storing SSE registers
1231 in SImode, DImode and TImode */
1232 2, /* MMX or SSE register to integer */
1233 16, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 /* New AMD processors never drop prefetches; if they cannot be performed
1237 immediately, they are queued. We set number of simultaneous prefetches
1238 to a large constant to reflect this (it probably is not a good idea not
1239 to limit number of prefetches at all, as their execution also takes some
1241 100, /* number of parallel prefetches */
1242 2, /* Branch cost */
1243 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1244 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1245 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1246 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1247 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1248 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1252 6, /* scalar_stmt_cost. */
1253 4, /* scalar load_cost. */
1254 4, /* scalar_store_cost. */
1255 6, /* vec_stmt_cost. */
1256 0, /* vec_to_scalar_cost. */
1257 2, /* scalar_to_vec_cost. */
1258 4, /* vec_align_load_cost. */
1259 4, /* vec_unalign_load_cost. */
1260 4, /* vec_store_cost. */
1261 2, /* cond_taken_branch_cost. */
1262 1, /* cond_not_taken_branch_cost. */
1265 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1266 very small blocks it is better to use loop. For large blocks, libcall can
1267 do nontemporary accesses and beat inline considerably. */
1268 static stringop_algs btver1_memcpy
[2] = {
1269 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1270 {-1, rep_prefix_4_byte
, false}}},
1271 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1272 {-1, libcall
, false}}}};
1273 static stringop_algs btver1_memset
[2] = {
1274 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1275 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1276 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1277 {-1, libcall
, false}}}};
1278 const struct processor_costs btver1_cost
= {
1279 COSTS_N_INSNS (1), /* cost of an add instruction */
1280 COSTS_N_INSNS (2), /* cost of a lea instruction */
1281 COSTS_N_INSNS (1), /* variable shift costs */
1282 COSTS_N_INSNS (1), /* constant shift costs */
1283 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1284 COSTS_N_INSNS (4), /* HI */
1285 COSTS_N_INSNS (3), /* SI */
1286 COSTS_N_INSNS (4), /* DI */
1287 COSTS_N_INSNS (5)}, /* other */
1288 0, /* cost of multiply per each bit set */
1289 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1290 COSTS_N_INSNS (35), /* HI */
1291 COSTS_N_INSNS (51), /* SI */
1292 COSTS_N_INSNS (83), /* DI */
1293 COSTS_N_INSNS (83)}, /* other */
1294 COSTS_N_INSNS (1), /* cost of movsx */
1295 COSTS_N_INSNS (1), /* cost of movzx */
1296 8, /* "large" insn */
1298 4, /* cost for loading QImode using movzbl */
1299 {3, 4, 3}, /* cost of loading integer registers
1300 in QImode, HImode and SImode.
1301 Relative to reg-reg move (2). */
1302 {3, 4, 3}, /* cost of storing integer registers */
1303 4, /* cost of reg,reg fld/fst */
1304 {4, 4, 12}, /* cost of loading fp registers
1305 in SFmode, DFmode and XFmode */
1306 {6, 6, 8}, /* cost of storing fp registers
1307 in SFmode, DFmode and XFmode */
1308 2, /* cost of moving MMX register */
1309 {3, 3}, /* cost of loading MMX registers
1310 in SImode and DImode */
1311 {4, 4}, /* cost of storing MMX registers
1312 in SImode and DImode */
1313 2, /* cost of moving SSE register */
1314 {4, 4, 3}, /* cost of loading SSE registers
1315 in SImode, DImode and TImode */
1316 {4, 4, 5}, /* cost of storing SSE registers
1317 in SImode, DImode and TImode */
1318 3, /* MMX or SSE register to integer */
1320 MOVD reg64, xmmreg Double FSTORE 4
1321 MOVD reg32, xmmreg Double FSTORE 4
1323 MOVD reg64, xmmreg Double FADD 3
1325 MOVD reg32, xmmreg Double FADD 3
1327 32, /* size of l1 cache. */
1328 512, /* size of l2 cache. */
1329 64, /* size of prefetch block */
1330 100, /* number of parallel prefetches */
1331 2, /* Branch cost */
1332 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1333 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1334 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1335 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1336 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1337 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1341 4, /* scalar_stmt_cost. */
1342 2, /* scalar load_cost. */
1343 2, /* scalar_store_cost. */
1344 6, /* vec_stmt_cost. */
1345 0, /* vec_to_scalar_cost. */
1346 2, /* scalar_to_vec_cost. */
1347 2, /* vec_align_load_cost. */
1348 2, /* vec_unalign_load_cost. */
1349 2, /* vec_store_cost. */
1350 2, /* cond_taken_branch_cost. */
1351 1, /* cond_not_taken_branch_cost. */
1354 static stringop_algs btver2_memcpy
[2] = {
1355 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1356 {-1, rep_prefix_4_byte
, false}}},
1357 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1358 {-1, libcall
, false}}}};
1359 static stringop_algs btver2_memset
[2] = {
1360 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1361 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1362 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1363 {-1, libcall
, false}}}};
1364 const struct processor_costs btver2_cost
= {
1365 COSTS_N_INSNS (1), /* cost of an add instruction */
1366 COSTS_N_INSNS (2), /* cost of a lea instruction */
1367 COSTS_N_INSNS (1), /* variable shift costs */
1368 COSTS_N_INSNS (1), /* constant shift costs */
1369 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1370 COSTS_N_INSNS (4), /* HI */
1371 COSTS_N_INSNS (3), /* SI */
1372 COSTS_N_INSNS (4), /* DI */
1373 COSTS_N_INSNS (5)}, /* other */
1374 0, /* cost of multiply per each bit set */
1375 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1376 COSTS_N_INSNS (35), /* HI */
1377 COSTS_N_INSNS (51), /* SI */
1378 COSTS_N_INSNS (83), /* DI */
1379 COSTS_N_INSNS (83)}, /* other */
1380 COSTS_N_INSNS (1), /* cost of movsx */
1381 COSTS_N_INSNS (1), /* cost of movzx */
1382 8, /* "large" insn */
1384 4, /* cost for loading QImode using movzbl */
1385 {3, 4, 3}, /* cost of loading integer registers
1386 in QImode, HImode and SImode.
1387 Relative to reg-reg move (2). */
1388 {3, 4, 3}, /* cost of storing integer registers */
1389 4, /* cost of reg,reg fld/fst */
1390 {4, 4, 12}, /* cost of loading fp registers
1391 in SFmode, DFmode and XFmode */
1392 {6, 6, 8}, /* cost of storing fp registers
1393 in SFmode, DFmode and XFmode */
1394 2, /* cost of moving MMX register */
1395 {3, 3}, /* cost of loading MMX registers
1396 in SImode and DImode */
1397 {4, 4}, /* cost of storing MMX registers
1398 in SImode and DImode */
1399 2, /* cost of moving SSE register */
1400 {4, 4, 3}, /* cost of loading SSE registers
1401 in SImode, DImode and TImode */
1402 {4, 4, 5}, /* cost of storing SSE registers
1403 in SImode, DImode and TImode */
1404 3, /* MMX or SSE register to integer */
1406 MOVD reg64, xmmreg Double FSTORE 4
1407 MOVD reg32, xmmreg Double FSTORE 4
1409 MOVD reg64, xmmreg Double FADD 3
1411 MOVD reg32, xmmreg Double FADD 3
1413 32, /* size of l1 cache. */
1414 2048, /* size of l2 cache. */
1415 64, /* size of prefetch block */
1416 100, /* number of parallel prefetches */
1417 2, /* Branch cost */
1418 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1419 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1420 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1421 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1422 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1423 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1426 4, /* scalar_stmt_cost. */
1427 2, /* scalar load_cost. */
1428 2, /* scalar_store_cost. */
1429 6, /* vec_stmt_cost. */
1430 0, /* vec_to_scalar_cost. */
1431 2, /* scalar_to_vec_cost. */
1432 2, /* vec_align_load_cost. */
1433 2, /* vec_unalign_load_cost. */
1434 2, /* vec_store_cost. */
1435 2, /* cond_taken_branch_cost. */
1436 1, /* cond_not_taken_branch_cost. */
1439 static stringop_algs pentium4_memcpy
[2] = {
1440 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1441 DUMMY_STRINGOP_ALGS
};
1442 static stringop_algs pentium4_memset
[2] = {
1443 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1444 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1445 DUMMY_STRINGOP_ALGS
};
1448 struct processor_costs pentium4_cost
= {
1449 COSTS_N_INSNS (1), /* cost of an add instruction */
1450 COSTS_N_INSNS (3), /* cost of a lea instruction */
1451 COSTS_N_INSNS (4), /* variable shift costs */
1452 COSTS_N_INSNS (4), /* constant shift costs */
1453 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1454 COSTS_N_INSNS (15), /* HI */
1455 COSTS_N_INSNS (15), /* SI */
1456 COSTS_N_INSNS (15), /* DI */
1457 COSTS_N_INSNS (15)}, /* other */
1458 0, /* cost of multiply per each bit set */
1459 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1460 COSTS_N_INSNS (56), /* HI */
1461 COSTS_N_INSNS (56), /* SI */
1462 COSTS_N_INSNS (56), /* DI */
1463 COSTS_N_INSNS (56)}, /* other */
1464 COSTS_N_INSNS (1), /* cost of movsx */
1465 COSTS_N_INSNS (1), /* cost of movzx */
1466 16, /* "large" insn */
1468 2, /* cost for loading QImode using movzbl */
1469 {4, 5, 4}, /* cost of loading integer registers
1470 in QImode, HImode and SImode.
1471 Relative to reg-reg move (2). */
1472 {2, 3, 2}, /* cost of storing integer registers */
1473 2, /* cost of reg,reg fld/fst */
1474 {2, 2, 6}, /* cost of loading fp registers
1475 in SFmode, DFmode and XFmode */
1476 {4, 4, 6}, /* cost of storing fp registers
1477 in SFmode, DFmode and XFmode */
1478 2, /* cost of moving MMX register */
1479 {2, 2}, /* cost of loading MMX registers
1480 in SImode and DImode */
1481 {2, 2}, /* cost of storing MMX registers
1482 in SImode and DImode */
1483 12, /* cost of moving SSE register */
1484 {12, 12, 12}, /* cost of loading SSE registers
1485 in SImode, DImode and TImode */
1486 {2, 2, 8}, /* cost of storing SSE registers
1487 in SImode, DImode and TImode */
1488 10, /* MMX or SSE register to integer */
1489 8, /* size of l1 cache. */
1490 256, /* size of l2 cache. */
1491 64, /* size of prefetch block */
1492 6, /* number of parallel prefetches */
1493 2, /* Branch cost */
1494 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1495 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1496 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1497 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1498 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1499 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1502 1, /* scalar_stmt_cost. */
1503 1, /* scalar load_cost. */
1504 1, /* scalar_store_cost. */
1505 1, /* vec_stmt_cost. */
1506 1, /* vec_to_scalar_cost. */
1507 1, /* scalar_to_vec_cost. */
1508 1, /* vec_align_load_cost. */
1509 2, /* vec_unalign_load_cost. */
1510 1, /* vec_store_cost. */
1511 3, /* cond_taken_branch_cost. */
1512 1, /* cond_not_taken_branch_cost. */
1515 static stringop_algs nocona_memcpy
[2] = {
1516 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1517 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1518 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1520 static stringop_algs nocona_memset
[2] = {
1521 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1522 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1523 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1524 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1527 struct processor_costs nocona_cost
= {
1528 COSTS_N_INSNS (1), /* cost of an add instruction */
1529 COSTS_N_INSNS (1), /* cost of a lea instruction */
1530 COSTS_N_INSNS (1), /* variable shift costs */
1531 COSTS_N_INSNS (1), /* constant shift costs */
1532 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1533 COSTS_N_INSNS (10), /* HI */
1534 COSTS_N_INSNS (10), /* SI */
1535 COSTS_N_INSNS (10), /* DI */
1536 COSTS_N_INSNS (10)}, /* other */
1537 0, /* cost of multiply per each bit set */
1538 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1539 COSTS_N_INSNS (66), /* HI */
1540 COSTS_N_INSNS (66), /* SI */
1541 COSTS_N_INSNS (66), /* DI */
1542 COSTS_N_INSNS (66)}, /* other */
1543 COSTS_N_INSNS (1), /* cost of movsx */
1544 COSTS_N_INSNS (1), /* cost of movzx */
1545 16, /* "large" insn */
1546 17, /* MOVE_RATIO */
1547 4, /* cost for loading QImode using movzbl */
1548 {4, 4, 4}, /* cost of loading integer registers
1549 in QImode, HImode and SImode.
1550 Relative to reg-reg move (2). */
1551 {4, 4, 4}, /* cost of storing integer registers */
1552 3, /* cost of reg,reg fld/fst */
1553 {12, 12, 12}, /* cost of loading fp registers
1554 in SFmode, DFmode and XFmode */
1555 {4, 4, 4}, /* cost of storing fp registers
1556 in SFmode, DFmode and XFmode */
1557 6, /* cost of moving MMX register */
1558 {12, 12}, /* cost of loading MMX registers
1559 in SImode and DImode */
1560 {12, 12}, /* cost of storing MMX registers
1561 in SImode and DImode */
1562 6, /* cost of moving SSE register */
1563 {12, 12, 12}, /* cost of loading SSE registers
1564 in SImode, DImode and TImode */
1565 {12, 12, 12}, /* cost of storing SSE registers
1566 in SImode, DImode and TImode */
1567 8, /* MMX or SSE register to integer */
1568 8, /* size of l1 cache. */
1569 1024, /* size of l2 cache. */
1570 128, /* size of prefetch block */
1571 8, /* number of parallel prefetches */
1572 1, /* Branch cost */
1573 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1574 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1575 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1576 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1577 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1578 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1581 1, /* scalar_stmt_cost. */
1582 1, /* scalar load_cost. */
1583 1, /* scalar_store_cost. */
1584 1, /* vec_stmt_cost. */
1585 1, /* vec_to_scalar_cost. */
1586 1, /* scalar_to_vec_cost. */
1587 1, /* vec_align_load_cost. */
1588 2, /* vec_unalign_load_cost. */
1589 1, /* vec_store_cost. */
1590 3, /* cond_taken_branch_cost. */
1591 1, /* cond_not_taken_branch_cost. */
1594 static stringop_algs atom_memcpy
[2] = {
1595 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1596 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1597 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1598 static stringop_algs atom_memset
[2] = {
1599 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1600 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1601 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1602 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1604 struct processor_costs atom_cost
= {
1605 COSTS_N_INSNS (1), /* cost of an add instruction */
1606 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1607 COSTS_N_INSNS (1), /* variable shift costs */
1608 COSTS_N_INSNS (1), /* constant shift costs */
1609 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1610 COSTS_N_INSNS (4), /* HI */
1611 COSTS_N_INSNS (3), /* SI */
1612 COSTS_N_INSNS (4), /* DI */
1613 COSTS_N_INSNS (2)}, /* other */
1614 0, /* cost of multiply per each bit set */
1615 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1616 COSTS_N_INSNS (26), /* HI */
1617 COSTS_N_INSNS (42), /* SI */
1618 COSTS_N_INSNS (74), /* DI */
1619 COSTS_N_INSNS (74)}, /* other */
1620 COSTS_N_INSNS (1), /* cost of movsx */
1621 COSTS_N_INSNS (1), /* cost of movzx */
1622 8, /* "large" insn */
1623 17, /* MOVE_RATIO */
1624 4, /* cost for loading QImode using movzbl */
1625 {4, 4, 4}, /* cost of loading integer registers
1626 in QImode, HImode and SImode.
1627 Relative to reg-reg move (2). */
1628 {4, 4, 4}, /* cost of storing integer registers */
1629 4, /* cost of reg,reg fld/fst */
1630 {12, 12, 12}, /* cost of loading fp registers
1631 in SFmode, DFmode and XFmode */
1632 {6, 6, 8}, /* cost of storing fp registers
1633 in SFmode, DFmode and XFmode */
1634 2, /* cost of moving MMX register */
1635 {8, 8}, /* cost of loading MMX registers
1636 in SImode and DImode */
1637 {8, 8}, /* cost of storing MMX registers
1638 in SImode and DImode */
1639 2, /* cost of moving SSE register */
1640 {8, 8, 8}, /* cost of loading SSE registers
1641 in SImode, DImode and TImode */
1642 {8, 8, 8}, /* cost of storing SSE registers
1643 in SImode, DImode and TImode */
1644 5, /* MMX or SSE register to integer */
1645 32, /* size of l1 cache. */
1646 256, /* size of l2 cache. */
1647 64, /* size of prefetch block */
1648 6, /* number of parallel prefetches */
1649 3, /* Branch cost */
1650 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1651 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1652 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1653 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1654 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1655 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1658 1, /* scalar_stmt_cost. */
1659 1, /* scalar load_cost. */
1660 1, /* scalar_store_cost. */
1661 1, /* vec_stmt_cost. */
1662 1, /* vec_to_scalar_cost. */
1663 1, /* scalar_to_vec_cost. */
1664 1, /* vec_align_load_cost. */
1665 2, /* vec_unalign_load_cost. */
1666 1, /* vec_store_cost. */
1667 3, /* cond_taken_branch_cost. */
1668 1, /* cond_not_taken_branch_cost. */
1671 static stringop_algs slm_memcpy
[2] = {
1672 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1673 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1674 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1675 static stringop_algs slm_memset
[2] = {
1676 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1677 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1678 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1679 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1681 struct processor_costs slm_cost
= {
1682 COSTS_N_INSNS (1), /* cost of an add instruction */
1683 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1684 COSTS_N_INSNS (1), /* variable shift costs */
1685 COSTS_N_INSNS (1), /* constant shift costs */
1686 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1687 COSTS_N_INSNS (3), /* HI */
1688 COSTS_N_INSNS (3), /* SI */
1689 COSTS_N_INSNS (4), /* DI */
1690 COSTS_N_INSNS (2)}, /* other */
1691 0, /* cost of multiply per each bit set */
1692 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1693 COSTS_N_INSNS (26), /* HI */
1694 COSTS_N_INSNS (42), /* SI */
1695 COSTS_N_INSNS (74), /* DI */
1696 COSTS_N_INSNS (74)}, /* other */
1697 COSTS_N_INSNS (1), /* cost of movsx */
1698 COSTS_N_INSNS (1), /* cost of movzx */
1699 8, /* "large" insn */
1700 17, /* MOVE_RATIO */
1701 4, /* cost for loading QImode using movzbl */
1702 {4, 4, 4}, /* cost of loading integer registers
1703 in QImode, HImode and SImode.
1704 Relative to reg-reg move (2). */
1705 {4, 4, 4}, /* cost of storing integer registers */
1706 4, /* cost of reg,reg fld/fst */
1707 {12, 12, 12}, /* cost of loading fp registers
1708 in SFmode, DFmode and XFmode */
1709 {6, 6, 8}, /* cost of storing fp registers
1710 in SFmode, DFmode and XFmode */
1711 2, /* cost of moving MMX register */
1712 {8, 8}, /* cost of loading MMX registers
1713 in SImode and DImode */
1714 {8, 8}, /* cost of storing MMX registers
1715 in SImode and DImode */
1716 2, /* cost of moving SSE register */
1717 {8, 8, 8}, /* cost of loading SSE registers
1718 in SImode, DImode and TImode */
1719 {8, 8, 8}, /* cost of storing SSE registers
1720 in SImode, DImode and TImode */
1721 5, /* MMX or SSE register to integer */
1722 32, /* size of l1 cache. */
1723 256, /* size of l2 cache. */
1724 64, /* size of prefetch block */
1725 6, /* number of parallel prefetches */
1726 3, /* Branch cost */
1727 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1728 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1729 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1730 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1731 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1732 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 1, /* scalar_stmt_cost. */
1736 1, /* scalar load_cost. */
1737 1, /* scalar_store_cost. */
1738 1, /* vec_stmt_cost. */
1739 1, /* vec_to_scalar_cost. */
1740 1, /* scalar_to_vec_cost. */
1741 1, /* vec_align_load_cost. */
1742 2, /* vec_unalign_load_cost. */
1743 1, /* vec_store_cost. */
1744 3, /* cond_taken_branch_cost. */
1745 1, /* cond_not_taken_branch_cost. */
1748 /* Generic should produce code tuned for Core-i7 (and newer chips)
1749 and btver1 (and newer chips). */
1751 static stringop_algs generic_memcpy
[2] = {
1752 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1753 {-1, libcall
, false}}},
1754 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1755 {-1, libcall
, false}}}};
1756 static stringop_algs generic_memset
[2] = {
1757 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1758 {-1, libcall
, false}}},
1759 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1760 {-1, libcall
, false}}}};
1762 struct processor_costs generic_cost
= {
1763 COSTS_N_INSNS (1), /* cost of an add instruction */
1764 /* On all chips taken into consideration lea is 2 cycles and more. With
1765 this cost however our current implementation of synth_mult results in
1766 use of unnecessary temporary registers causing regression on several
1767 SPECfp benchmarks. */
1768 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1769 COSTS_N_INSNS (1), /* variable shift costs */
1770 COSTS_N_INSNS (1), /* constant shift costs */
1771 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1772 COSTS_N_INSNS (4), /* HI */
1773 COSTS_N_INSNS (3), /* SI */
1774 COSTS_N_INSNS (4), /* DI */
1775 COSTS_N_INSNS (2)}, /* other */
1776 0, /* cost of multiply per each bit set */
1777 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1778 COSTS_N_INSNS (26), /* HI */
1779 COSTS_N_INSNS (42), /* SI */
1780 COSTS_N_INSNS (74), /* DI */
1781 COSTS_N_INSNS (74)}, /* other */
1782 COSTS_N_INSNS (1), /* cost of movsx */
1783 COSTS_N_INSNS (1), /* cost of movzx */
1784 8, /* "large" insn */
1785 17, /* MOVE_RATIO */
1786 4, /* cost for loading QImode using movzbl */
1787 {4, 4, 4}, /* cost of loading integer registers
1788 in QImode, HImode and SImode.
1789 Relative to reg-reg move (2). */
1790 {4, 4, 4}, /* cost of storing integer registers */
1791 4, /* cost of reg,reg fld/fst */
1792 {12, 12, 12}, /* cost of loading fp registers
1793 in SFmode, DFmode and XFmode */
1794 {6, 6, 8}, /* cost of storing fp registers
1795 in SFmode, DFmode and XFmode */
1796 2, /* cost of moving MMX register */
1797 {8, 8}, /* cost of loading MMX registers
1798 in SImode and DImode */
1799 {8, 8}, /* cost of storing MMX registers
1800 in SImode and DImode */
1801 2, /* cost of moving SSE register */
1802 {8, 8, 8}, /* cost of loading SSE registers
1803 in SImode, DImode and TImode */
1804 {8, 8, 8}, /* cost of storing SSE registers
1805 in SImode, DImode and TImode */
1806 5, /* MMX or SSE register to integer */
1807 32, /* size of l1 cache. */
1808 512, /* size of l2 cache. */
1809 64, /* size of prefetch block */
1810 6, /* number of parallel prefetches */
1811 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1812 value is increased to perhaps more appropriate value of 5. */
1813 3, /* Branch cost */
1814 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1815 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1816 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1817 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1818 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1819 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1822 1, /* scalar_stmt_cost. */
1823 1, /* scalar load_cost. */
1824 1, /* scalar_store_cost. */
1825 1, /* vec_stmt_cost. */
1826 1, /* vec_to_scalar_cost. */
1827 1, /* scalar_to_vec_cost. */
1828 1, /* vec_align_load_cost. */
1829 2, /* vec_unalign_load_cost. */
1830 1, /* vec_store_cost. */
1831 3, /* cond_taken_branch_cost. */
1832 1, /* cond_not_taken_branch_cost. */
1835 /* core_cost should produce code tuned for Core familly of CPUs. */
1836 static stringop_algs core_memcpy
[2] = {
1837 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1838 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1839 {-1, libcall
, false}}}};
1840 static stringop_algs core_memset
[2] = {
1841 {libcall
, {{6, loop_1_byte
, true},
1843 {8192, rep_prefix_4_byte
, true},
1844 {-1, libcall
, false}}},
1845 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1846 {-1, libcall
, false}}}};
1849 struct processor_costs core_cost
= {
1850 COSTS_N_INSNS (1), /* cost of an add instruction */
1851 /* On all chips taken into consideration lea is 2 cycles and more. With
1852 this cost however our current implementation of synth_mult results in
1853 use of unnecessary temporary registers causing regression on several
1854 SPECfp benchmarks. */
1855 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1856 COSTS_N_INSNS (1), /* variable shift costs */
1857 COSTS_N_INSNS (1), /* constant shift costs */
1858 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1859 COSTS_N_INSNS (4), /* HI */
1860 COSTS_N_INSNS (3), /* SI */
1861 COSTS_N_INSNS (4), /* DI */
1862 COSTS_N_INSNS (2)}, /* other */
1863 0, /* cost of multiply per each bit set */
1864 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1865 COSTS_N_INSNS (26), /* HI */
1866 COSTS_N_INSNS (42), /* SI */
1867 COSTS_N_INSNS (74), /* DI */
1868 COSTS_N_INSNS (74)}, /* other */
1869 COSTS_N_INSNS (1), /* cost of movsx */
1870 COSTS_N_INSNS (1), /* cost of movzx */
1871 8, /* "large" insn */
1872 17, /* MOVE_RATIO */
1873 4, /* cost for loading QImode using movzbl */
1874 {4, 4, 4}, /* cost of loading integer registers
1875 in QImode, HImode and SImode.
1876 Relative to reg-reg move (2). */
1877 {4, 4, 4}, /* cost of storing integer registers */
1878 4, /* cost of reg,reg fld/fst */
1879 {12, 12, 12}, /* cost of loading fp registers
1880 in SFmode, DFmode and XFmode */
1881 {6, 6, 8}, /* cost of storing fp registers
1882 in SFmode, DFmode and XFmode */
1883 2, /* cost of moving MMX register */
1884 {8, 8}, /* cost of loading MMX registers
1885 in SImode and DImode */
1886 {8, 8}, /* cost of storing MMX registers
1887 in SImode and DImode */
1888 2, /* cost of moving SSE register */
1889 {8, 8, 8}, /* cost of loading SSE registers
1890 in SImode, DImode and TImode */
1891 {8, 8, 8}, /* cost of storing SSE registers
1892 in SImode, DImode and TImode */
1893 5, /* MMX or SSE register to integer */
1894 64, /* size of l1 cache. */
1895 512, /* size of l2 cache. */
1896 64, /* size of prefetch block */
1897 6, /* number of parallel prefetches */
1898 /* FIXME perhaps more appropriate value is 5. */
1899 3, /* Branch cost */
1900 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1901 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1902 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1903 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1904 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1905 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1908 1, /* scalar_stmt_cost. */
1909 1, /* scalar load_cost. */
1910 1, /* scalar_store_cost. */
1911 1, /* vec_stmt_cost. */
1912 1, /* vec_to_scalar_cost. */
1913 1, /* scalar_to_vec_cost. */
1914 1, /* vec_align_load_cost. */
1915 2, /* vec_unalign_load_cost. */
1916 1, /* vec_store_cost. */
1917 3, /* cond_taken_branch_cost. */
1918 1, /* cond_not_taken_branch_cost. */
1922 /* Set by -mtune. */
1923 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1925 /* Set by -mtune or -Os. */
1926 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1928 /* Processor feature/optimization bitmasks. */
1929 #define m_386 (1<<PROCESSOR_I386)
1930 #define m_486 (1<<PROCESSOR_I486)
1931 #define m_PENT (1<<PROCESSOR_PENTIUM)
1932 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1933 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1934 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1935 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1936 #define m_CORE2 (1<<PROCESSOR_CORE2)
1937 #define m_COREI7 (1<<PROCESSOR_COREI7)
1938 #define m_COREI7_AVX (1<<PROCESSOR_COREI7_AVX)
1939 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1940 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_COREI7_AVX | m_HASWELL)
1941 #define m_ATOM (1<<PROCESSOR_ATOM)
1942 #define m_SLM (1<<PROCESSOR_SLM)
1944 #define m_GEODE (1<<PROCESSOR_GEODE)
1945 #define m_K6 (1<<PROCESSOR_K6)
1946 #define m_K6_GEODE (m_K6 | m_GEODE)
1947 #define m_K8 (1<<PROCESSOR_K8)
1948 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1949 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1950 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1951 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1952 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1953 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1954 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
1955 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1956 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1957 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
1958 #define m_BTVER (m_BTVER1 | m_BTVER2)
1959 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1961 #define m_GENERIC (1<<PROCESSOR_GENERIC)
1963 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
1965 #define DEF_TUNE(tune, name, selector) name,
1966 #include "x86-tune.def"
1970 /* Feature tests against the various tunings. */
1971 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1973 /* Feature tests against the various tunings used to create ix86_tune_features
1974 based on the processor mask. */
1975 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1977 #define DEF_TUNE(tune, name, selector) selector,
1978 #include "x86-tune.def"
1982 /* Feature tests against the various architecture variations. */
1983 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
1985 /* Feature tests against the various architecture variations, used to create
1986 ix86_arch_features based on the processor mask. */
1987 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
1988 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
1989 ~(m_386
| m_486
| m_PENT
| m_K6
),
1991 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1994 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1997 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2000 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2004 /* In case the average insn count for single function invocation is
2005 lower than this constant, emit fast (but longer) prologue and
2007 #define FAST_PROLOGUE_INSN_COUNT 20
2009 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2010 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2011 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2012 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2014 /* Array of the smallest class containing reg number REGNO, indexed by
2015 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2017 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2019 /* ax, dx, cx, bx */
2020 AREG
, DREG
, CREG
, BREG
,
2021 /* si, di, bp, sp */
2022 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2024 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2025 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2028 /* flags, fpsr, fpcr, frame */
2029 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2031 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2034 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2037 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2038 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2039 /* SSE REX registers */
2040 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2042 /* AVX-512 SSE registers */
2043 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2044 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2045 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2046 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2047 /* Mask registers. */
2048 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2049 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2052 /* The "default" register map used in 32bit mode. */
2054 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2056 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2057 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2058 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2059 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2060 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2061 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2062 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2063 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2064 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2065 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2068 /* The "default" register map used in 64bit mode. */
2070 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2072 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2073 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2074 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2075 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2076 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2077 8,9,10,11,12,13,14,15, /* extended integer registers */
2078 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2079 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2080 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2081 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2084 /* Define the register numbers to be used in Dwarf debugging information.
2085 The SVR4 reference port C compiler uses the following register numbers
2086 in its Dwarf output code:
2087 0 for %eax (gcc regno = 0)
2088 1 for %ecx (gcc regno = 2)
2089 2 for %edx (gcc regno = 1)
2090 3 for %ebx (gcc regno = 3)
2091 4 for %esp (gcc regno = 7)
2092 5 for %ebp (gcc regno = 6)
2093 6 for %esi (gcc regno = 4)
2094 7 for %edi (gcc regno = 5)
2095 The following three DWARF register numbers are never generated by
2096 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2097 believes these numbers have these meanings.
2098 8 for %eip (no gcc equivalent)
2099 9 for %eflags (gcc regno = 17)
2100 10 for %trapno (no gcc equivalent)
2101 It is not at all clear how we should number the FP stack registers
2102 for the x86 architecture. If the version of SDB on x86/svr4 were
2103 a bit less brain dead with respect to floating-point then we would
2104 have a precedent to follow with respect to DWARF register numbers
2105 for x86 FP registers, but the SDB on x86/svr4 is so completely
2106 broken with respect to FP registers that it is hardly worth thinking
2107 of it as something to strive for compatibility with.
2108 The version of x86/svr4 SDB I have at the moment does (partially)
2109 seem to believe that DWARF register number 11 is associated with
2110 the x86 register %st(0), but that's about all. Higher DWARF
2111 register numbers don't seem to be associated with anything in
2112 particular, and even for DWARF regno 11, SDB only seems to under-
2113 stand that it should say that a variable lives in %st(0) (when
2114 asked via an `=' command) if we said it was in DWARF regno 11,
2115 but SDB still prints garbage when asked for the value of the
2116 variable in question (via a `/' command).
2117 (Also note that the labels SDB prints for various FP stack regs
2118 when doing an `x' command are all wrong.)
2119 Note that these problems generally don't affect the native SVR4
2120 C compiler because it doesn't allow the use of -O with -g and
2121 because when it is *not* optimizing, it allocates a memory
2122 location for each floating-point variable, and the memory
2123 location is what gets described in the DWARF AT_location
2124 attribute for the variable in question.
2125 Regardless of the severe mental illness of the x86/svr4 SDB, we
2126 do something sensible here and we use the following DWARF
2127 register numbers. Note that these are all stack-top-relative
2129 11 for %st(0) (gcc regno = 8)
2130 12 for %st(1) (gcc regno = 9)
2131 13 for %st(2) (gcc regno = 10)
2132 14 for %st(3) (gcc regno = 11)
2133 15 for %st(4) (gcc regno = 12)
2134 16 for %st(5) (gcc regno = 13)
2135 17 for %st(6) (gcc regno = 14)
2136 18 for %st(7) (gcc regno = 15)
2138 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2140 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2141 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2142 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2152 /* Define parameter passing and return registers. */
2154 static int const x86_64_int_parameter_registers
[6] =
2156 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2159 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2161 CX_REG
, DX_REG
, R8_REG
, R9_REG
2164 static int const x86_64_int_return_registers
[4] =
2166 AX_REG
, DX_REG
, DI_REG
, SI_REG
2169 /* Additional registers that are clobbered by SYSV calls. */
2171 int const x86_64_ms_sysv_extra_clobbered_registers
[12] =
2175 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2176 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2179 /* Define the structure for the machine field in struct function. */
2181 struct GTY(()) stack_local_entry
{
2182 unsigned short mode
;
2185 struct stack_local_entry
*next
;
2188 /* Structure describing stack frame layout.
2189 Stack grows downward:
2195 saved static chain if ix86_static_chain_on_stack
2197 saved frame pointer if frame_pointer_needed
2198 <- HARD_FRAME_POINTER
2204 <- sse_regs_save_offset
2207 [va_arg registers] |
2211 [padding2] | = to_allocate
2220 int outgoing_arguments_size
;
2222 /* The offsets relative to ARG_POINTER. */
2223 HOST_WIDE_INT frame_pointer_offset
;
2224 HOST_WIDE_INT hard_frame_pointer_offset
;
2225 HOST_WIDE_INT stack_pointer_offset
;
2226 HOST_WIDE_INT hfp_save_offset
;
2227 HOST_WIDE_INT reg_save_offset
;
2228 HOST_WIDE_INT sse_reg_save_offset
;
2230 /* When save_regs_using_mov is set, emit prologue using
2231 move instead of push instructions. */
2232 bool save_regs_using_mov
;
2235 /* Which cpu are we scheduling for. */
2236 enum attr_cpu ix86_schedule
;
2238 /* Which cpu are we optimizing for. */
2239 enum processor_type ix86_tune
;
2241 /* Which instruction set architecture to use. */
2242 enum processor_type ix86_arch
;
2244 /* True if processor has SSE prefetch instruction. */
2245 unsigned char x86_prefetch_sse
;
2247 /* -mstackrealign option */
2248 static const char ix86_force_align_arg_pointer_string
[]
2249 = "force_align_arg_pointer";
2251 static rtx (*ix86_gen_leave
) (void);
2252 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2253 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2254 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2255 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2256 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2257 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2258 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2259 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2260 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2261 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2262 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2264 /* Preferred alignment for stack boundary in bits. */
2265 unsigned int ix86_preferred_stack_boundary
;
2267 /* Alignment for incoming stack boundary in bits specified at
2269 static unsigned int ix86_user_incoming_stack_boundary
;
2271 /* Default alignment for incoming stack boundary in bits. */
2272 static unsigned int ix86_default_incoming_stack_boundary
;
2274 /* Alignment for incoming stack boundary in bits. */
2275 unsigned int ix86_incoming_stack_boundary
;
2277 /* Calling abi specific va_list type nodes. */
2278 static GTY(()) tree sysv_va_list_type_node
;
2279 static GTY(()) tree ms_va_list_type_node
;
2281 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2282 char internal_label_prefix
[16];
2283 int internal_label_prefix_len
;
2285 /* Fence to use after loop using movnt. */
2288 /* Register class used for passing given 64bit part of the argument.
2289 These represent classes as documented by the PS ABI, with the exception
2290 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2291 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2293 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2294 whenever possible (upper half does contain padding). */
2295 enum x86_64_reg_class
2298 X86_64_INTEGER_CLASS
,
2299 X86_64_INTEGERSI_CLASS
,
2306 X86_64_COMPLEX_X87_CLASS
,
2310 #define MAX_CLASSES 4
2312 /* Table of constants used by fldpi, fldln2, etc.... */
2313 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2314 static bool ext_80387_constants_init
= 0;
2317 static struct machine_function
* ix86_init_machine_status (void);
2318 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2319 static bool ix86_function_value_regno_p (const unsigned int);
2320 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2322 static rtx
ix86_static_chain (const_tree
, bool);
2323 static int ix86_function_regparm (const_tree
, const_tree
);
2324 static void ix86_compute_frame_layout (struct ix86_frame
*);
2325 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2327 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2328 static tree
ix86_canonical_va_list_type (tree
);
2329 static void predict_jump (int);
2330 static unsigned int split_stack_prologue_scratch_regno (void);
2331 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2333 enum ix86_function_specific_strings
2335 IX86_FUNCTION_SPECIFIC_ARCH
,
2336 IX86_FUNCTION_SPECIFIC_TUNE
,
2337 IX86_FUNCTION_SPECIFIC_MAX
2340 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2341 const char *, enum fpmath_unit
, bool);
2342 static void ix86_function_specific_save (struct cl_target_option
*,
2343 struct gcc_options
*opts
);
2344 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2345 struct cl_target_option
*);
2346 static void ix86_function_specific_print (FILE *, int,
2347 struct cl_target_option
*);
2348 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2349 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2350 struct gcc_options
*,
2351 struct gcc_options
*,
2352 struct gcc_options
*);
2353 static bool ix86_can_inline_p (tree
, tree
);
2354 static void ix86_set_current_function (tree
);
2355 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2357 static enum calling_abi
ix86_function_abi (const_tree
);
2360 #ifndef SUBTARGET32_DEFAULT_CPU
2361 #define SUBTARGET32_DEFAULT_CPU "i386"
2364 /* Whether -mtune= or -march= were specified */
2365 static int ix86_tune_defaulted
;
2366 static int ix86_arch_specified
;
2368 /* Vectorization library interface and handlers. */
2369 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2371 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2372 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2374 /* Processor target table, indexed by processor number */
2377 const struct processor_costs
*cost
; /* Processor costs */
2378 const int align_loop
; /* Default alignments. */
2379 const int align_loop_max_skip
;
2380 const int align_jump
;
2381 const int align_jump_max_skip
;
2382 const int align_func
;
2385 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2387 {&i386_cost
, 4, 3, 4, 3, 4},
2388 {&i486_cost
, 16, 15, 16, 15, 16},
2389 {&pentium_cost
, 16, 7, 16, 7, 16},
2390 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2391 {&geode_cost
, 0, 0, 0, 0, 0},
2392 {&k6_cost
, 32, 7, 32, 7, 32},
2393 {&athlon_cost
, 16, 7, 16, 7, 16},
2394 {&pentium4_cost
, 0, 0, 0, 0, 0},
2395 {&k8_cost
, 16, 7, 16, 7, 16},
2396 {&nocona_cost
, 0, 0, 0, 0, 0},
2398 {&core_cost
, 16, 10, 16, 10, 16},
2400 {&core_cost
, 16, 10, 16, 10, 16},
2402 {&core_cost
, 16, 10, 16, 10, 16},
2404 {&core_cost
, 16, 10, 16, 10, 16},
2405 {&generic_cost
, 16, 10, 16, 10, 16},
2406 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2407 {&bdver1_cost
, 16, 10, 16, 7, 11},
2408 {&bdver2_cost
, 16, 10, 16, 7, 11},
2409 {&bdver3_cost
, 16, 10, 16, 7, 11},
2410 {&bdver4_cost
, 16, 10, 16, 7, 11},
2411 {&btver1_cost
, 16, 10, 16, 7, 11},
2412 {&btver2_cost
, 16, 10, 16, 7, 11},
2413 {&atom_cost
, 16, 15, 16, 7, 16},
2414 {&slm_cost
, 16, 15, 16, 7, 16}
2417 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2455 gate_insert_vzeroupper (void)
2457 return TARGET_AVX
&& !TARGET_AVX512F
&& TARGET_VZEROUPPER
;
2461 rest_of_handle_insert_vzeroupper (void)
2465 /* vzeroupper instructions are inserted immediately after reload to
2466 account for possible spills from 256bit registers. The pass
2467 reuses mode switching infrastructure by re-running mode insertion
2468 pass, so disable entities that have already been processed. */
2469 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2470 ix86_optimize_mode_switching
[i
] = 0;
2472 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2474 /* Call optimize_mode_switching. */
2475 g
->get_passes ()->execute_pass_mode_switching ();
2481 const pass_data pass_data_insert_vzeroupper
=
2483 RTL_PASS
, /* type */
2484 "vzeroupper", /* name */
2485 OPTGROUP_NONE
, /* optinfo_flags */
2486 true, /* has_gate */
2487 true, /* has_execute */
2488 TV_NONE
, /* tv_id */
2489 0, /* properties_required */
2490 0, /* properties_provided */
2491 0, /* properties_destroyed */
2492 0, /* todo_flags_start */
2493 ( TODO_df_finish
| TODO_verify_rtl_sharing
| 0 ), /* todo_flags_finish */
2496 class pass_insert_vzeroupper
: public rtl_opt_pass
2499 pass_insert_vzeroupper(gcc::context
*ctxt
)
2500 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
2503 /* opt_pass methods: */
2504 bool gate () { return gate_insert_vzeroupper (); }
2505 unsigned int execute () { return rest_of_handle_insert_vzeroupper (); }
2507 }; // class pass_insert_vzeroupper
2512 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
2514 return new pass_insert_vzeroupper (ctxt
);
2517 /* Return true if a red-zone is in use. */
2520 ix86_using_red_zone (void)
2522 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2525 /* Return a string that documents the current -m options. The caller is
2526 responsible for freeing the string. */
2529 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2530 const char *tune
, enum fpmath_unit fpmath
,
2533 struct ix86_target_opts
2535 const char *option
; /* option string */
2536 HOST_WIDE_INT mask
; /* isa mask options */
2539 /* This table is ordered so that options like -msse4.2 that imply
2540 preceding options while match those first. */
2541 static struct ix86_target_opts isa_opts
[] =
2543 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2544 { "-mfma", OPTION_MASK_ISA_FMA
},
2545 { "-mxop", OPTION_MASK_ISA_XOP
},
2546 { "-mlwp", OPTION_MASK_ISA_LWP
},
2547 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
2548 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
2549 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
2550 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
2551 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2552 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2553 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2554 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2555 { "-msse3", OPTION_MASK_ISA_SSE3
},
2556 { "-msse2", OPTION_MASK_ISA_SSE2
},
2557 { "-msse", OPTION_MASK_ISA_SSE
},
2558 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2559 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2560 { "-mmmx", OPTION_MASK_ISA_MMX
},
2561 { "-mabm", OPTION_MASK_ISA_ABM
},
2562 { "-mbmi", OPTION_MASK_ISA_BMI
},
2563 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2564 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2565 { "-mhle", OPTION_MASK_ISA_HLE
},
2566 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2567 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2568 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2569 { "-madx", OPTION_MASK_ISA_ADX
},
2570 { "-mtbm", OPTION_MASK_ISA_TBM
},
2571 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2572 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2573 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2574 { "-maes", OPTION_MASK_ISA_AES
},
2575 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2576 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2577 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2578 { "-mf16c", OPTION_MASK_ISA_F16C
},
2579 { "-mrtm", OPTION_MASK_ISA_RTM
},
2580 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2581 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2585 static struct ix86_target_opts flag_opts
[] =
2587 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2588 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2589 { "-m80387", MASK_80387
},
2590 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2591 { "-malign-double", MASK_ALIGN_DOUBLE
},
2592 { "-mcld", MASK_CLD
},
2593 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2594 { "-mieee-fp", MASK_IEEE_FP
},
2595 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2596 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2597 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2598 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2599 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2600 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2601 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2602 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2603 { "-mrecip", MASK_RECIP
},
2604 { "-mrtd", MASK_RTD
},
2605 { "-msseregparm", MASK_SSEREGPARM
},
2606 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2607 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2608 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2609 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2610 { "-mvzeroupper", MASK_VZEROUPPER
},
2611 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2612 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2613 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2616 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2619 char target_other
[40];
2629 memset (opts
, '\0', sizeof (opts
));
2631 /* Add -march= option. */
2634 opts
[num
][0] = "-march=";
2635 opts
[num
++][1] = arch
;
2638 /* Add -mtune= option. */
2641 opts
[num
][0] = "-mtune=";
2642 opts
[num
++][1] = tune
;
2645 /* Add -m32/-m64/-mx32. */
2646 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2648 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2652 isa
&= ~ (OPTION_MASK_ISA_64BIT
2653 | OPTION_MASK_ABI_64
2654 | OPTION_MASK_ABI_X32
);
2658 opts
[num
++][0] = abi
;
2660 /* Pick out the options in isa options. */
2661 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2663 if ((isa
& isa_opts
[i
].mask
) != 0)
2665 opts
[num
++][0] = isa_opts
[i
].option
;
2666 isa
&= ~ isa_opts
[i
].mask
;
2670 if (isa
&& add_nl_p
)
2672 opts
[num
++][0] = isa_other
;
2673 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2677 /* Add flag options. */
2678 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2680 if ((flags
& flag_opts
[i
].mask
) != 0)
2682 opts
[num
++][0] = flag_opts
[i
].option
;
2683 flags
&= ~ flag_opts
[i
].mask
;
2687 if (flags
&& add_nl_p
)
2689 opts
[num
++][0] = target_other
;
2690 sprintf (target_other
, "(other flags: %#x)", flags
);
2693 /* Add -fpmath= option. */
2696 opts
[num
][0] = "-mfpmath=";
2697 switch ((int) fpmath
)
2700 opts
[num
++][1] = "387";
2704 opts
[num
++][1] = "sse";
2707 case FPMATH_387
| FPMATH_SSE
:
2708 opts
[num
++][1] = "sse+387";
2720 gcc_assert (num
< ARRAY_SIZE (opts
));
2722 /* Size the string. */
2724 sep_len
= (add_nl_p
) ? 3 : 1;
2725 for (i
= 0; i
< num
; i
++)
2728 for (j
= 0; j
< 2; j
++)
2730 len
+= strlen (opts
[i
][j
]);
2733 /* Build the string. */
2734 ret
= ptr
= (char *) xmalloc (len
);
2737 for (i
= 0; i
< num
; i
++)
2741 for (j
= 0; j
< 2; j
++)
2742 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2749 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2757 for (j
= 0; j
< 2; j
++)
2760 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2762 line_len
+= len2
[j
];
2767 gcc_assert (ret
+ len
>= ptr
);
2772 /* Return true, if profiling code should be emitted before
2773 prologue. Otherwise it returns false.
2774 Note: For x86 with "hotfix" it is sorried. */
2776 ix86_profile_before_prologue (void)
2778 return flag_fentry
!= 0;
2781 /* Function that is callable from the debugger to print the current
2783 void ATTRIBUTE_UNUSED
2784 ix86_debug_options (void)
2786 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2787 ix86_arch_string
, ix86_tune_string
,
2792 fprintf (stderr
, "%s\n\n", opts
);
2796 fputs ("<no options>\n\n", stderr
);
2801 static const char *stringop_alg_names
[] = {
2803 #define DEF_ALG(alg, name) #name,
2804 #include "stringop.def"
2809 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2810 The string is of the following form (or comma separated list of it):
2812 strategy_alg:max_size:[align|noalign]
2814 where the full size range for the strategy is either [0, max_size] or
2815 [min_size, max_size], in which min_size is the max_size + 1 of the
2816 preceding range. The last size range must have max_size == -1.
2821 -mmemcpy-strategy=libcall:-1:noalign
2823 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2827 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2829 This is to tell the compiler to use the following strategy for memset
2830 1) when the expected size is between [1, 16], use rep_8byte strategy;
2831 2) when the size is between [17, 2048], use vector_loop;
2832 3) when the size is > 2048, use libcall. */
2834 struct stringop_size_range
2842 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
2844 const struct stringop_algs
*default_algs
;
2845 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
2846 char *curr_range_str
, *next_range_str
;
2850 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
2852 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
2854 curr_range_str
= strategy_str
;
2862 next_range_str
= strchr (curr_range_str
, ',');
2864 *next_range_str
++ = '\0';
2866 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
2867 alg_name
, &maxs
, align
))
2869 error ("wrong arg %s to option %s", curr_range_str
,
2870 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2874 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
2876 error ("size ranges of option %s should be increasing",
2877 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2881 for (i
= 0; i
< last_alg
; i
++)
2883 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
2885 alg
= (stringop_alg
) i
;
2892 error ("wrong stringop strategy name %s specified for option %s",
2894 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2898 input_ranges
[n
].max
= maxs
;
2899 input_ranges
[n
].alg
= alg
;
2900 if (!strcmp (align
, "align"))
2901 input_ranges
[n
].noalign
= false;
2902 else if (!strcmp (align
, "noalign"))
2903 input_ranges
[n
].noalign
= true;
2906 error ("unknown alignment %s specified for option %s",
2907 align
, is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2911 curr_range_str
= next_range_str
;
2913 while (curr_range_str
);
2915 if (input_ranges
[n
- 1].max
!= -1)
2917 error ("the max value for the last size range should be -1"
2919 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 if (n
> MAX_STRINGOP_ALGS
)
2925 error ("too many size ranges specified in option %s",
2926 is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 /* Now override the default algs array. */
2931 for (i
= 0; i
< n
; i
++)
2933 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
2934 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
2935 = input_ranges
[i
].alg
;
2936 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
2937 = input_ranges
[i
].noalign
;
2942 /* parse -mtune-ctrl= option. When DUMP is true,
2943 print the features that are explicitly set. */
2946 parse_mtune_ctrl_str (bool dump
)
2948 if (!ix86_tune_ctrl_string
)
2951 char *next_feature_string
= NULL
;
2952 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
2953 char *orig
= curr_feature_string
;
2959 next_feature_string
= strchr (curr_feature_string
, ',');
2960 if (next_feature_string
)
2961 *next_feature_string
++ = '\0';
2962 if (*curr_feature_string
== '^')
2964 curr_feature_string
++;
2967 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
2969 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
2971 ix86_tune_features
[i
] = !clear
;
2973 fprintf (stderr
, "Explicitly %s feature %s\n",
2974 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
2978 if (i
== X86_TUNE_LAST
)
2979 error ("Unknown parameter to option -mtune-ctrl: %s",
2980 clear
? curr_feature_string
- 1 : curr_feature_string
);
2981 curr_feature_string
= next_feature_string
;
2983 while (curr_feature_string
);
2987 /* Helper function to set ix86_tune_features. IX86_TUNE is the
2991 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
2993 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
2996 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
2998 if (ix86_tune_no_default
)
2999 ix86_tune_features
[i
] = 0;
3001 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3006 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
3007 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
3008 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
3009 ix86_tune_features
[i
] ? "on" : "off");
3012 parse_mtune_ctrl_str (dump
);
3016 /* Override various settings based on options. If MAIN_ARGS_P, the
3017 options are from the command line, otherwise they are from
3021 ix86_option_override_internal (bool main_args_p
,
3022 struct gcc_options
*opts
,
3023 struct gcc_options
*opts_set
)
3026 unsigned int ix86_arch_mask
;
3027 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
3032 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3033 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3034 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3035 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3036 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3037 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3038 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3039 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3040 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3041 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3042 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3043 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3044 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3045 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3046 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3047 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3048 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3049 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3050 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3051 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3052 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3053 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3054 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3055 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3056 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3057 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3058 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3059 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3060 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3061 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3062 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3063 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3064 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3065 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3066 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3067 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3068 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3069 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3070 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3071 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3072 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3073 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3074 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3075 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3077 /* if this reaches 64, need to widen struct pta flags below */
3081 const char *const name
; /* processor name or nickname. */
3082 const enum processor_type processor
;
3083 const enum attr_cpu schedule
;
3084 const unsigned HOST_WIDE_INT flags
;
3086 const processor_alias_table
[] =
3088 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3089 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3090 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3091 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3092 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3093 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3094 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3095 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3096 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3097 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3098 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3099 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3100 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3101 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3102 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3103 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3104 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3105 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3106 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3107 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3108 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3109 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3110 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3111 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3112 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3113 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3114 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3115 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3116 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
3117 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3118 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3119 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
3120 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3121 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3122 {"corei7-avx", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3123 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3124 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3125 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3126 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3127 {"core-avx-i", PROCESSOR_COREI7_AVX
, CPU_COREI7
,
3128 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3129 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3130 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3131 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3132 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
3133 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3134 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3135 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3136 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3137 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3139 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3140 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3141 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3142 {"slm", PROCESSOR_SLM
, CPU_SLM
,
3143 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3144 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_AES
3145 | PTA_PCLMUL
| PTA_RDRND
| PTA_MOVBE
| PTA_FXSR
},
3146 {"intel", PROCESSOR_SLM
, CPU_SLM
,
3147 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_SSSE3
3148 | PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_POPCNT
| PTA_FXSR
},
3149 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3150 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3151 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3152 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3153 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
| PTA_PRFCHW
},
3154 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3155 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3156 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3157 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
| PTA_PRFCHW
},
3158 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3159 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3160 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3161 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3162 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3163 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_PRFCHW
| PTA_FXSR
},
3164 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3165 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
3166 {"k8", PROCESSOR_K8
, CPU_K8
,
3167 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3168 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3169 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3170 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3171 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3172 {"opteron", PROCESSOR_K8
, CPU_K8
,
3173 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3174 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3175 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3176 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3177 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3178 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3179 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3180 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3181 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3182 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3183 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3184 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3185 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3186 | PTA_SSE2
| PTA_NO_SAHF
| PTA_PRFCHW
| PTA_FXSR
},
3187 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3188 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3189 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3190 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3191 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
3192 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
3193 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3194 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3195 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3196 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3197 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3198 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3199 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3200 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3201 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3202 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3203 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
3204 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
3205 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3206 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3207 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3208 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3209 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3210 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
3211 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
3212 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3213 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3214 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
3215 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
3216 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
3217 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
},
3218 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
3219 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3220 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3221 | PTA_FXSR
| PTA_XSAVE
},
3222 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
3223 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3224 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3225 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3226 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3227 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3229 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
3231 | PTA_HLE
/* flags are only used for -march switch. */ },
3234 /* -mrecip options. */
3237 const char *string
; /* option name */
3238 unsigned int mask
; /* mask bits to set */
3240 const recip_options
[] =
3242 { "all", RECIP_MASK_ALL
},
3243 { "none", RECIP_MASK_NONE
},
3244 { "div", RECIP_MASK_DIV
},
3245 { "sqrt", RECIP_MASK_SQRT
},
3246 { "vec-div", RECIP_MASK_VEC_DIV
},
3247 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3250 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3252 /* Set up prefix/suffix so the error messages refer to either the command
3253 line argument, or the attribute(target). */
3262 prefix
= "option(\"";
3267 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3268 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3269 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3270 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3271 #ifdef TARGET_BI_ARCH
3274 #if TARGET_BI_ARCH == 1
3275 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3276 is on and OPTION_MASK_ABI_X32 is off. We turn off
3277 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3279 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3280 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3282 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3283 on and OPTION_MASK_ABI_64 is off. We turn off
3284 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3286 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3287 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3292 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3294 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3295 OPTION_MASK_ABI_64 for TARGET_X32. */
3296 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3297 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3299 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
3301 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3302 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3303 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3304 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3307 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3308 SUBTARGET_OVERRIDE_OPTIONS
;
3311 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3312 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3315 /* -fPIC is the default for x86_64. */
3316 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3317 opts
->x_flag_pic
= 2;
3319 /* Need to check -mtune=generic first. */
3320 if (opts
->x_ix86_tune_string
)
3322 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3323 || !strcmp (opts
->x_ix86_tune_string
, "i686")
3324 /* As special support for cross compilers we read -mtune=native
3325 as -mtune=generic. With native compilers we won't see the
3326 -mtune=native, as it was changed by the driver. */
3327 || !strcmp (opts
->x_ix86_tune_string
, "native"))
3329 opts
->x_ix86_tune_string
= "generic";
3331 /* If this call is for setting the option attribute, allow the
3332 generic that was previously set. */
3333 else if (!main_args_p
3334 && !strcmp (opts
->x_ix86_tune_string
, "generic"))
3336 else if (!strncmp (opts
->x_ix86_tune_string
, "generic", 7))
3337 error ("bad value (%s) for %stune=%s %s",
3338 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3339 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
3340 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3341 "%stune=k8%s or %stune=generic%s instead as appropriate",
3342 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3346 if (opts
->x_ix86_arch_string
)
3347 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
3348 if (!opts
->x_ix86_tune_string
)
3350 opts
->x_ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3351 ix86_tune_defaulted
= 1;
3354 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3355 or defaulted. We need to use a sensible tune option. */
3356 if (!strcmp (opts
->x_ix86_tune_string
, "generic")
3357 || !strcmp (opts
->x_ix86_tune_string
, "x86-64")
3358 || !strcmp (opts
->x_ix86_tune_string
, "i686"))
3360 opts
->x_ix86_tune_string
= "generic";
3364 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
3365 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3367 /* rep; movq isn't available in 32-bit code. */
3368 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3369 opts
->x_ix86_stringop_alg
= no_stringop
;
3372 if (!opts
->x_ix86_arch_string
)
3373 opts
->x_ix86_arch_string
3374 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3375 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3377 ix86_arch_specified
= 1;
3379 if (opts_set
->x_ix86_pmode
)
3381 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3382 && opts
->x_ix86_pmode
== PMODE_SI
)
3383 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3384 && opts
->x_ix86_pmode
== PMODE_DI
))
3385 error ("address mode %qs not supported in the %s bit mode",
3386 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
3387 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
3390 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
3391 ? PMODE_DI
: PMODE_SI
;
3393 if (!opts_set
->x_ix86_abi
)
3394 opts
->x_ix86_abi
= DEFAULT_ABI
;
3396 /* For targets using ms ABI enable ms-extensions, if not
3397 explicit turned off. For non-ms ABI we turn off this
3399 if (!opts_set
->x_flag_ms_extensions
)
3400 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
3402 if (opts_set
->x_ix86_cmodel
)
3404 switch (opts
->x_ix86_cmodel
)
3408 if (opts
->x_flag_pic
)
3409 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
3410 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3411 error ("code model %qs not supported in the %s bit mode",
3417 if (opts
->x_flag_pic
)
3418 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
3419 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3420 error ("code model %qs not supported in the %s bit mode",
3422 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3423 error ("code model %qs not supported in x32 mode",
3429 if (opts
->x_flag_pic
)
3430 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
3431 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3432 error ("code model %qs not supported in the %s bit mode",
3434 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
3435 error ("code model %qs not supported in x32 mode",
3440 if (opts
->x_flag_pic
)
3441 error ("code model %s does not support PIC mode", "32");
3442 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3443 error ("code model %qs not supported in the %s bit mode",
3448 if (opts
->x_flag_pic
)
3450 error ("code model %s does not support PIC mode", "kernel");
3451 opts
->x_ix86_cmodel
= CM_32
;
3453 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3454 error ("code model %qs not supported in the %s bit mode",
3464 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3465 use of rip-relative addressing. This eliminates fixups that
3466 would otherwise be needed if this object is to be placed in a
3467 DLL, and is essentially just as efficient as direct addressing. */
3468 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3469 && (TARGET_RDOS
|| TARGET_PECOFF
))
3470 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
3471 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3472 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3474 opts
->x_ix86_cmodel
= CM_32
;
3476 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
3478 error ("-masm=intel not supported in this configuration");
3479 opts
->x_ix86_asm_dialect
= ASM_ATT
;
3481 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
3482 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3483 sorry ("%i-bit mode not compiled in",
3484 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3486 for (i
= 0; i
< pta_size
; i
++)
3487 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
3489 ix86_schedule
= processor_alias_table
[i
].schedule
;
3490 ix86_arch
= processor_alias_table
[i
].processor
;
3491 /* Default cpu tuning to the architecture. */
3492 ix86_tune
= ix86_arch
;
3494 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3495 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3496 error ("CPU you selected does not support x86-64 "
3499 if (processor_alias_table
[i
].flags
& PTA_MMX
3500 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3501 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3502 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3503 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3504 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3505 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3506 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3507 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3508 if (processor_alias_table
[i
].flags
& PTA_SSE
3509 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3510 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3511 if (processor_alias_table
[i
].flags
& PTA_SSE2
3512 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3513 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3514 if (processor_alias_table
[i
].flags
& PTA_SSE3
3515 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3516 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3517 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3518 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3519 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3520 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3521 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3522 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3523 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3524 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3525 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3526 if (processor_alias_table
[i
].flags
& PTA_AVX
3527 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3528 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3529 if (processor_alias_table
[i
].flags
& PTA_AVX2
3530 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3531 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3532 if (processor_alias_table
[i
].flags
& PTA_FMA
3533 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3534 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3535 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3536 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3537 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3538 if (processor_alias_table
[i
].flags
& PTA_FMA4
3539 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3540 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3541 if (processor_alias_table
[i
].flags
& PTA_XOP
3542 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3543 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3544 if (processor_alias_table
[i
].flags
& PTA_LWP
3545 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3546 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3547 if (processor_alias_table
[i
].flags
& PTA_ABM
3548 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3549 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3550 if (processor_alias_table
[i
].flags
& PTA_BMI
3551 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3552 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3553 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3554 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3555 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3556 if (processor_alias_table
[i
].flags
& PTA_TBM
3557 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3558 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3559 if (processor_alias_table
[i
].flags
& PTA_BMI2
3560 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3561 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3562 if (processor_alias_table
[i
].flags
& PTA_CX16
3563 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3564 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3565 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3566 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3567 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3568 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3569 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3570 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3571 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3572 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3573 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3574 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3575 if (processor_alias_table
[i
].flags
& PTA_AES
3576 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3577 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3578 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3579 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3580 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3581 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3582 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3583 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3584 if (processor_alias_table
[i
].flags
& PTA_RDRND
3585 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3586 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3587 if (processor_alias_table
[i
].flags
& PTA_F16C
3588 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3589 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3590 if (processor_alias_table
[i
].flags
& PTA_RTM
3591 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3592 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3593 if (processor_alias_table
[i
].flags
& PTA_HLE
3594 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3595 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3596 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3597 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3598 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3599 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3600 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3601 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3602 if (processor_alias_table
[i
].flags
& PTA_ADX
3603 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3604 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3605 if (processor_alias_table
[i
].flags
& PTA_FXSR
3606 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3607 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3608 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3609 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3610 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3611 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3612 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3613 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3614 if (processor_alias_table
[i
].flags
& PTA_AVX512F
3615 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
3616 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
3617 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
3618 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
3619 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
3620 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
3621 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
3622 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
3623 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
3624 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
3625 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
3626 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3627 x86_prefetch_sse
= true;
3632 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
3633 error ("generic CPU can be used only for %stune=%s %s",
3634 prefix
, suffix
, sw
);
3635 else if (!strcmp (ix86_arch_string
, "intel"))
3636 error ("intel CPU can be used only for %stune=%s %s",
3637 prefix
, suffix
, sw
);
3638 else if (!strncmp (opts
->x_ix86_arch_string
, "generic", 7) || i
== pta_size
)
3639 error ("bad value (%s) for %sarch=%s %s",
3640 opts
->x_ix86_arch_string
, prefix
, suffix
, sw
);
3642 ix86_arch_mask
= 1u << ix86_arch
;
3643 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3644 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3646 for (i
= 0; i
< pta_size
; i
++)
3647 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
3649 ix86_schedule
= processor_alias_table
[i
].schedule
;
3650 ix86_tune
= processor_alias_table
[i
].processor
;
3651 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3653 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3655 if (ix86_tune_defaulted
)
3657 opts
->x_ix86_tune_string
= "x86-64";
3658 for (i
= 0; i
< pta_size
; i
++)
3659 if (! strcmp (opts
->x_ix86_tune_string
,
3660 processor_alias_table
[i
].name
))
3662 ix86_schedule
= processor_alias_table
[i
].schedule
;
3663 ix86_tune
= processor_alias_table
[i
].processor
;
3666 error ("CPU you selected does not support x86-64 "
3670 /* Intel CPUs have always interpreted SSE prefetch instructions as
3671 NOPs; so, we can enable SSE prefetch instructions even when
3672 -mtune (rather than -march) points us to a processor that has them.
3673 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3674 higher processors. */
3676 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3677 x86_prefetch_sse
= true;
3681 if (ix86_tune_specified
&& i
== pta_size
)
3682 error ("bad value (%s) for %stune=%s %s",
3683 opts
->x_ix86_tune_string
, prefix
, suffix
, sw
);
3685 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
3687 #ifndef USE_IX86_FRAME_POINTER
3688 #define USE_IX86_FRAME_POINTER 0
3691 #ifndef USE_X86_64_FRAME_POINTER
3692 #define USE_X86_64_FRAME_POINTER 0
3695 /* Set the default values for switches whose default depends on TARGET_64BIT
3696 in case they weren't overwritten by command line options. */
3697 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3699 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3700 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3701 if (opts
->x_flag_asynchronous_unwind_tables
3702 && !opts_set
->x_flag_unwind_tables
3703 && TARGET_64BIT_MS_ABI
)
3704 opts
->x_flag_unwind_tables
= 1;
3705 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3706 opts
->x_flag_unwind_tables
3707 = opts
->x_flag_asynchronous_unwind_tables
= 1;
3708 if (opts
->x_flag_pcc_struct_return
== 2)
3709 opts
->x_flag_pcc_struct_return
= 0;
3713 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
3714 opts
->x_flag_omit_frame_pointer
3715 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
3716 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
3717 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3718 if (opts
->x_flag_pcc_struct_return
== 2)
3719 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3722 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3723 if (opts
->x_optimize_size
)
3724 ix86_cost
= &ix86_size_cost
;
3726 ix86_cost
= ix86_tune_cost
;
3728 /* Arrange to set up i386_stack_locals for all functions. */
3729 init_machine_status
= ix86_init_machine_status
;
3731 /* Validate -mregparm= value. */
3732 if (opts_set
->x_ix86_regparm
)
3734 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3735 warning (0, "-mregparm is ignored in 64-bit mode");
3736 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
3738 error ("-mregparm=%d is not between 0 and %d",
3739 opts
->x_ix86_regparm
, REGPARM_MAX
);
3740 opts
->x_ix86_regparm
= 0;
3743 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3744 opts
->x_ix86_regparm
= REGPARM_MAX
;
3746 /* Default align_* from the processor table. */
3747 if (opts
->x_align_loops
== 0)
3749 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3750 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3752 if (opts
->x_align_jumps
== 0)
3754 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3755 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3757 if (opts
->x_align_functions
== 0)
3759 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
3762 /* Provide default for -mbranch-cost= value. */
3763 if (!opts_set
->x_ix86_branch_cost
)
3764 opts
->x_ix86_branch_cost
= ix86_cost
->branch_cost
;
3766 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
3768 opts
->x_target_flags
3769 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
3771 /* Enable by default the SSE and MMX builtins. Do allow the user to
3772 explicitly disable any of these. In particular, disabling SSE and
3773 MMX for kernel code is extremely useful. */
3774 if (!ix86_arch_specified
)
3775 opts
->x_ix86_isa_flags
3776 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3777 | TARGET_SUBTARGET64_ISA_DEFAULT
)
3778 & ~opts
->x_ix86_isa_flags_explicit
);
3780 if (TARGET_RTD_P (opts
->x_target_flags
))
3781 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3785 opts
->x_target_flags
3786 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
3788 if (!ix86_arch_specified
)
3789 opts
->x_ix86_isa_flags
3790 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
3792 /* i386 ABI does not specify red zone. It still makes sense to use it
3793 when programmer takes care to stack from being destroyed. */
3794 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
3795 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
3798 /* Keep nonleaf frame pointers. */
3799 if (opts
->x_flag_omit_frame_pointer
)
3800 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3801 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
3802 opts
->x_flag_omit_frame_pointer
= 1;
3804 /* If we're doing fast math, we don't care about comparison order
3805 wrt NaNs. This lets us use a shorter comparison sequence. */
3806 if (opts
->x_flag_finite_math_only
)
3807 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
3809 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3810 since the insns won't need emulation. */
3811 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
3812 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3814 /* Likewise, if the target doesn't have a 387, or we've specified
3815 software floating point, don't use 387 inline intrinsics. */
3816 if (!TARGET_80387_P (opts
->x_target_flags
))
3817 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
3819 /* Turn on MMX builtins for -msse. */
3820 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3821 opts
->x_ix86_isa_flags
3822 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
3824 /* Enable SSE prefetch. */
3825 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
3826 || (TARGET_PRFCHW
&& !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
)))
3827 x86_prefetch_sse
= true;
3829 /* Enable prefetch{,w} instructions for -m3dnow. */
3830 if (TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
3831 opts
->x_ix86_isa_flags
3832 |= OPTION_MASK_ISA_PRFCHW
& ~opts
->x_ix86_isa_flags_explicit
;
3834 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3835 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
3836 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
3837 opts
->x_ix86_isa_flags
3838 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3840 /* Enable lzcnt instruction for -mabm. */
3841 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
3842 opts
->x_ix86_isa_flags
3843 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
3845 /* Validate -mpreferred-stack-boundary= value or default it to
3846 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3847 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3848 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
3850 int min
= (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
3851 ? (TARGET_SSE_P (opts
->x_ix86_isa_flags
) ? 4 : 3) : 2);
3852 int max
= (TARGET_SEH
? 4 : 12);
3854 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
3855 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
3858 error ("-mpreferred-stack-boundary is not supported "
3861 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3862 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
3865 ix86_preferred_stack_boundary
3866 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3869 /* Set the default value for -mstackrealign. */
3870 if (opts
->x_ix86_force_align_arg_pointer
== -1)
3871 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3873 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3875 /* Validate -mincoming-stack-boundary= value or default it to
3876 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3877 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3878 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
3880 if (opts
->x_ix86_incoming_stack_boundary_arg
3881 < (TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2)
3882 || opts
->x_ix86_incoming_stack_boundary_arg
> 12)
3883 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3884 opts
->x_ix86_incoming_stack_boundary_arg
,
3885 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 4 : 2);
3888 ix86_user_incoming_stack_boundary
3889 = (1 << opts
->x_ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3890 ix86_incoming_stack_boundary
3891 = ix86_user_incoming_stack_boundary
;
3895 /* Accept -msseregparm only if at least SSE support is enabled. */
3896 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
3897 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3898 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3900 if (opts_set
->x_ix86_fpmath
)
3902 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
3904 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
3906 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3907 opts
->x_ix86_fpmath
= FPMATH_387
;
3909 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
3910 && !TARGET_80387_P (opts
->x_target_flags
))
3912 warning (0, "387 instruction set disabled, using SSE arithmetics");
3913 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3917 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3918 fpmath=387. The second is however default at many targets since the
3919 extra 80bit precision of temporaries is considered to be part of ABI.
3920 Overwrite the default at least for -ffast-math.
3921 TODO: -mfpmath=both seems to produce same performing code with bit
3922 smaller binaries. It is however not clear if register allocation is
3923 ready for this setting.
3924 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3925 codegen. We may switch to 387 with -ffast-math for size optimized
3927 else if (fast_math_flags_set_p (&global_options
)
3928 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
3929 opts
->x_ix86_fpmath
= FPMATH_SSE
;
3931 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
3933 /* If the i387 is disabled, then do not return values in it. */
3934 if (!TARGET_80387_P (opts
->x_target_flags
))
3935 opts
->x_target_flags
&= ~MASK_FLOAT_RETURNS
;
3937 /* Use external vectorized library in vectorizing intrinsics. */
3938 if (opts_set
->x_ix86_veclibabi_type
)
3939 switch (opts
->x_ix86_veclibabi_type
)
3941 case ix86_veclibabi_type_svml
:
3942 ix86_veclib_handler
= ix86_veclibabi_svml
;
3945 case ix86_veclibabi_type_acml
:
3946 ix86_veclib_handler
= ix86_veclibabi_acml
;
3953 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
3954 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3955 && !opts
->x_optimize_size
)
3956 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3958 /* If stack probes are required, the space used for large function
3959 arguments on the stack must also be probed, so enable
3960 -maccumulate-outgoing-args so this happens in the prologue. */
3961 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
3962 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3964 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3965 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3966 "for correctness", prefix
, suffix
);
3967 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3970 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3973 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3974 p
= strchr (internal_label_prefix
, 'X');
3975 internal_label_prefix_len
= p
- internal_label_prefix
;
3979 /* When scheduling description is not available, disable scheduler pass
3980 so it won't slow down the compilation and make x87 code slower. */
3981 if (!TARGET_SCHEDULE
)
3982 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
3984 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3985 ix86_tune_cost
->simultaneous_prefetches
,
3986 opts
->x_param_values
,
3987 opts_set
->x_param_values
);
3988 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3989 ix86_tune_cost
->prefetch_block
,
3990 opts
->x_param_values
,
3991 opts_set
->x_param_values
);
3992 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3993 ix86_tune_cost
->l1_cache_size
,
3994 opts
->x_param_values
,
3995 opts_set
->x_param_values
);
3996 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3997 ix86_tune_cost
->l2_cache_size
,
3998 opts
->x_param_values
,
3999 opts_set
->x_param_values
);
4001 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4002 if (opts
->x_flag_prefetch_loop_arrays
< 0
4004 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
4005 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
4006 opts
->x_flag_prefetch_loop_arrays
= 1;
4008 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4009 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4010 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
4011 targetm
.expand_builtin_va_start
= NULL
;
4013 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4015 ix86_gen_leave
= gen_leave_rex64
;
4016 if (Pmode
== DImode
)
4018 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
4019 ix86_gen_tls_local_dynamic_base_64
4020 = gen_tls_local_dynamic_base_64_di
;
4024 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
4025 ix86_gen_tls_local_dynamic_base_64
4026 = gen_tls_local_dynamic_base_64_si
;
4030 ix86_gen_leave
= gen_leave
;
4032 if (Pmode
== DImode
)
4034 ix86_gen_add3
= gen_adddi3
;
4035 ix86_gen_sub3
= gen_subdi3
;
4036 ix86_gen_sub3_carry
= gen_subdi3_carry
;
4037 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
4038 ix86_gen_andsp
= gen_anddi3
;
4039 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
4040 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
4041 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
4042 ix86_gen_monitor
= gen_sse3_monitor_di
;
4046 ix86_gen_add3
= gen_addsi3
;
4047 ix86_gen_sub3
= gen_subsi3
;
4048 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4049 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4050 ix86_gen_andsp
= gen_andsi3
;
4051 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4052 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4053 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4054 ix86_gen_monitor
= gen_sse3_monitor_si
;
4058 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4059 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
4060 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
4063 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
)
4065 if (opts
->x_flag_fentry
> 0)
4066 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4068 opts
->x_flag_fentry
= 0;
4070 else if (TARGET_SEH
)
4072 if (opts
->x_flag_fentry
== 0)
4073 sorry ("-mno-fentry isn%'t compatible with SEH");
4074 opts
->x_flag_fentry
= 1;
4076 else if (opts
->x_flag_fentry
< 0)
4078 #if defined(PROFILE_BEFORE_PROLOGUE)
4079 opts
->x_flag_fentry
= 1;
4081 opts
->x_flag_fentry
= 0;
4085 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4086 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4087 AVX unaligned load/store. */
4088 if (!opts
->x_optimize_size
)
4090 if (flag_expensive_optimizations
4091 && !(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
4092 opts
->x_target_flags
|= MASK_VZEROUPPER
;
4093 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
4094 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4095 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4096 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
4097 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4098 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4099 /* Enable 128-bit AVX instruction generation
4100 for the auto-vectorizer. */
4101 if (TARGET_AVX128_OPTIMAL
4102 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
4103 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
4106 if (opts
->x_ix86_recip_name
)
4108 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
4110 unsigned int mask
, i
;
4113 while ((q
= strtok (p
, ",")) != NULL
)
4124 if (!strcmp (q
, "default"))
4125 mask
= RECIP_MASK_ALL
;
4128 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4129 if (!strcmp (q
, recip_options
[i
].string
))
4131 mask
= recip_options
[i
].mask
;
4135 if (i
== ARRAY_SIZE (recip_options
))
4137 error ("unknown option for -mrecip=%s", q
);
4139 mask
= RECIP_MASK_NONE
;
4143 opts
->x_recip_mask_explicit
|= mask
;
4145 opts
->x_recip_mask
&= ~mask
;
4147 opts
->x_recip_mask
|= mask
;
4151 if (TARGET_RECIP_P (opts
->x_target_flags
))
4152 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
4153 else if (opts_set
->x_target_flags
& MASK_RECIP
)
4154 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
4156 /* Default long double to 64-bit for Bionic. */
4157 if (TARGET_HAS_BIONIC
4158 && !(opts_set
->x_target_flags
& MASK_LONG_DOUBLE_64
))
4159 opts
->x_target_flags
|= MASK_LONG_DOUBLE_64
;
4161 /* Save the initial options in case the user does function specific
4164 target_option_default_node
= target_option_current_node
4165 = build_target_option_node (opts
);
4167 /* Handle stack protector */
4168 if (!opts_set
->x_ix86_stack_protector_guard
)
4169 opts
->x_ix86_stack_protector_guard
4170 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
4172 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4173 if (opts
->x_ix86_tune_memcpy_strategy
)
4175 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
4176 ix86_parse_stringop_strategy_string (str
, false);
4180 if (opts
->x_ix86_tune_memset_strategy
)
4182 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
4183 ix86_parse_stringop_strategy_string (str
, true);
4188 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4191 ix86_option_override (void)
4193 opt_pass
*pass_insert_vzeroupper
= make_pass_insert_vzeroupper (g
);
4194 static struct register_pass_info insert_vzeroupper_info
4195 = { pass_insert_vzeroupper
, "reload",
4196 1, PASS_POS_INSERT_AFTER
4199 ix86_option_override_internal (true, &global_options
, &global_options_set
);
4202 /* This needs to be done at start up. It's convenient to do it here. */
4203 register_pass (&insert_vzeroupper_info
);
4206 /* Update register usage after having seen the compiler flags. */
4209 ix86_conditional_register_usage (void)
4214 /* The PIC register, if it exists, is fixed. */
4215 j
= PIC_OFFSET_TABLE_REGNUM
;
4216 if (j
!= INVALID_REGNUM
)
4217 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4219 /* For 32-bit targets, squash the REX registers. */
4222 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4223 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4224 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4225 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4226 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4227 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4230 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4231 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4232 : TARGET_64BIT
? (1 << 2)
4235 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4237 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4239 /* Set/reset conditionally defined registers from
4240 CALL_USED_REGISTERS initializer. */
4241 if (call_used_regs
[i
] > 1)
4242 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4244 /* Calculate registers of CLOBBERED_REGS register set
4245 as call used registers from GENERAL_REGS register set. */
4246 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4247 && call_used_regs
[i
])
4248 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4251 /* If MMX is disabled, squash the registers. */
4253 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4254 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4255 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4257 /* If SSE is disabled, squash the registers. */
4259 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4260 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4261 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4263 /* If the FPU is disabled, squash the registers. */
4264 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4265 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4266 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4267 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4269 /* If AVX512F is disabled, squash the registers. */
4270 if (! TARGET_AVX512F
)
4272 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
4273 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4275 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
4276 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4281 /* Save the current options */
4284 ix86_function_specific_save (struct cl_target_option
*ptr
,
4285 struct gcc_options
*opts
)
4287 ptr
->arch
= ix86_arch
;
4288 ptr
->schedule
= ix86_schedule
;
4289 ptr
->tune
= ix86_tune
;
4290 ptr
->branch_cost
= ix86_branch_cost
;
4291 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4292 ptr
->arch_specified
= ix86_arch_specified
;
4293 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
4294 ptr
->x_ix86_target_flags_explicit
= opts
->x_ix86_target_flags_explicit
;
4295 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
4296 ptr
->x_ix86_arch_string
= opts
->x_ix86_arch_string
;
4297 ptr
->x_ix86_tune_string
= opts
->x_ix86_tune_string
;
4298 ptr
->x_ix86_cmodel
= opts
->x_ix86_cmodel
;
4299 ptr
->x_ix86_abi
= opts
->x_ix86_abi
;
4300 ptr
->x_ix86_asm_dialect
= opts
->x_ix86_asm_dialect
;
4301 ptr
->x_ix86_branch_cost
= opts
->x_ix86_branch_cost
;
4302 ptr
->x_ix86_dump_tunes
= opts
->x_ix86_dump_tunes
;
4303 ptr
->x_ix86_force_align_arg_pointer
= opts
->x_ix86_force_align_arg_pointer
;
4304 ptr
->x_ix86_force_drap
= opts
->x_ix86_force_drap
;
4305 ptr
->x_ix86_incoming_stack_boundary_arg
= opts
->x_ix86_incoming_stack_boundary_arg
;
4306 ptr
->x_ix86_pmode
= opts
->x_ix86_pmode
;
4307 ptr
->x_ix86_preferred_stack_boundary_arg
= opts
->x_ix86_preferred_stack_boundary_arg
;
4308 ptr
->x_ix86_recip_name
= opts
->x_ix86_recip_name
;
4309 ptr
->x_ix86_regparm
= opts
->x_ix86_regparm
;
4310 ptr
->x_ix86_section_threshold
= opts
->x_ix86_section_threshold
;
4311 ptr
->x_ix86_sse2avx
= opts
->x_ix86_sse2avx
;
4312 ptr
->x_ix86_stack_protector_guard
= opts
->x_ix86_stack_protector_guard
;
4313 ptr
->x_ix86_stringop_alg
= opts
->x_ix86_stringop_alg
;
4314 ptr
->x_ix86_tls_dialect
= opts
->x_ix86_tls_dialect
;
4315 ptr
->x_ix86_tune_ctrl_string
= opts
->x_ix86_tune_ctrl_string
;
4316 ptr
->x_ix86_tune_memcpy_strategy
= opts
->x_ix86_tune_memcpy_strategy
;
4317 ptr
->x_ix86_tune_memset_strategy
= opts
->x_ix86_tune_memset_strategy
;
4318 ptr
->x_ix86_tune_no_default
= opts
->x_ix86_tune_no_default
;
4319 ptr
->x_ix86_veclibabi_type
= opts
->x_ix86_veclibabi_type
;
4321 /* The fields are char but the variables are not; make sure the
4322 values fit in the fields. */
4323 gcc_assert (ptr
->arch
== ix86_arch
);
4324 gcc_assert (ptr
->schedule
== ix86_schedule
);
4325 gcc_assert (ptr
->tune
== ix86_tune
);
4326 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4329 /* Restore the current options */
4332 ix86_function_specific_restore (struct gcc_options
*opts
,
4333 struct cl_target_option
*ptr
)
4335 enum processor_type old_tune
= ix86_tune
;
4336 enum processor_type old_arch
= ix86_arch
;
4337 unsigned int ix86_arch_mask
;
4340 /* We don't change -fPIC. */
4341 opts
->x_flag_pic
= flag_pic
;
4343 ix86_arch
= (enum processor_type
) ptr
->arch
;
4344 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4345 ix86_tune
= (enum processor_type
) ptr
->tune
;
4346 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
4347 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4348 ix86_arch_specified
= ptr
->arch_specified
;
4349 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4350 opts
->x_ix86_target_flags_explicit
= ptr
->x_ix86_target_flags_explicit
;
4351 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4352 opts
->x_ix86_arch_string
= ptr
->x_ix86_arch_string
;
4353 opts
->x_ix86_tune_string
= ptr
->x_ix86_tune_string
;
4354 opts
->x_ix86_cmodel
= ptr
->x_ix86_cmodel
;
4355 opts
->x_ix86_abi
= ptr
->x_ix86_abi
;
4356 opts
->x_ix86_asm_dialect
= ptr
->x_ix86_asm_dialect
;
4357 opts
->x_ix86_branch_cost
= ptr
->x_ix86_branch_cost
;
4358 opts
->x_ix86_dump_tunes
= ptr
->x_ix86_dump_tunes
;
4359 opts
->x_ix86_force_align_arg_pointer
= ptr
->x_ix86_force_align_arg_pointer
;
4360 opts
->x_ix86_force_drap
= ptr
->x_ix86_force_drap
;
4361 opts
->x_ix86_incoming_stack_boundary_arg
= ptr
->x_ix86_incoming_stack_boundary_arg
;
4362 opts
->x_ix86_pmode
= ptr
->x_ix86_pmode
;
4363 opts
->x_ix86_preferred_stack_boundary_arg
= ptr
->x_ix86_preferred_stack_boundary_arg
;
4364 opts
->x_ix86_recip_name
= ptr
->x_ix86_recip_name
;
4365 opts
->x_ix86_regparm
= ptr
->x_ix86_regparm
;
4366 opts
->x_ix86_section_threshold
= ptr
->x_ix86_section_threshold
;
4367 opts
->x_ix86_sse2avx
= ptr
->x_ix86_sse2avx
;
4368 opts
->x_ix86_stack_protector_guard
= ptr
->x_ix86_stack_protector_guard
;
4369 opts
->x_ix86_stringop_alg
= ptr
->x_ix86_stringop_alg
;
4370 opts
->x_ix86_tls_dialect
= ptr
->x_ix86_tls_dialect
;
4371 opts
->x_ix86_tune_ctrl_string
= ptr
->x_ix86_tune_ctrl_string
;
4372 opts
->x_ix86_tune_memcpy_strategy
= ptr
->x_ix86_tune_memcpy_strategy
;
4373 opts
->x_ix86_tune_memset_strategy
= ptr
->x_ix86_tune_memset_strategy
;
4374 opts
->x_ix86_tune_no_default
= ptr
->x_ix86_tune_no_default
;
4375 opts
->x_ix86_veclibabi_type
= ptr
->x_ix86_veclibabi_type
;
4377 /* Recreate the arch feature tests if the arch changed */
4378 if (old_arch
!= ix86_arch
)
4380 ix86_arch_mask
= 1u << ix86_arch
;
4381 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4382 ix86_arch_features
[i
]
4383 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4386 /* Recreate the tune optimization tests */
4387 if (old_tune
!= ix86_tune
)
4388 set_ix86_tune_features (ix86_tune
, false);
4391 /* Print the current options */
4394 ix86_function_specific_print (FILE *file
, int indent
,
4395 struct cl_target_option
*ptr
)
4398 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4399 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4401 fprintf (file
, "%*sarch = %d (%s)\n",
4404 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4405 ? cpu_names
[ptr
->arch
]
4408 fprintf (file
, "%*stune = %d (%s)\n",
4411 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4412 ? cpu_names
[ptr
->tune
]
4415 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4419 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4420 free (target_string
);
4425 /* Inner function to process the attribute((target(...))), take an argument and
4426 set the current options from the argument. If we have a list, recursively go
4430 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4431 struct gcc_options
*opts
,
4432 struct gcc_options
*opts_set
,
4433 struct gcc_options
*enum_opts_set
)
4438 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4439 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4440 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4441 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4442 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4458 enum ix86_opt_type type
;
4463 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4464 IX86_ATTR_ISA ("abm", OPT_mabm
),
4465 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4466 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4467 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4468 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4469 IX86_ATTR_ISA ("aes", OPT_maes
),
4470 IX86_ATTR_ISA ("avx", OPT_mavx
),
4471 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4472 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
4473 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
4474 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
4475 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
4476 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4477 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4478 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4479 IX86_ATTR_ISA ("sse", OPT_msse
),
4480 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4481 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4482 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4483 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4484 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4485 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4486 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4487 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4488 IX86_ATTR_ISA ("fma", OPT_mfma
),
4489 IX86_ATTR_ISA ("xop", OPT_mxop
),
4490 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4491 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4492 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4493 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4494 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4495 IX86_ATTR_ISA ("hle", OPT_mhle
),
4496 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4497 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4498 IX86_ATTR_ISA ("adx", OPT_madx
),
4499 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4500 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4501 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4504 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4506 /* string options */
4507 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4508 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4511 IX86_ATTR_YES ("cld",
4515 IX86_ATTR_NO ("fancy-math-387",
4516 OPT_mfancy_math_387
,
4517 MASK_NO_FANCY_MATH_387
),
4519 IX86_ATTR_YES ("ieee-fp",
4523 IX86_ATTR_YES ("inline-all-stringops",
4524 OPT_minline_all_stringops
,
4525 MASK_INLINE_ALL_STRINGOPS
),
4527 IX86_ATTR_YES ("inline-stringops-dynamically",
4528 OPT_minline_stringops_dynamically
,
4529 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4531 IX86_ATTR_NO ("align-stringops",
4532 OPT_mno_align_stringops
,
4533 MASK_NO_ALIGN_STRINGOPS
),
4535 IX86_ATTR_YES ("recip",
4541 /* If this is a list, recurse to get the options. */
4542 if (TREE_CODE (args
) == TREE_LIST
)
4546 for (; args
; args
= TREE_CHAIN (args
))
4547 if (TREE_VALUE (args
)
4548 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4549 p_strings
, opts
, opts_set
,
4556 else if (TREE_CODE (args
) != STRING_CST
)
4558 error ("attribute %<target%> argument not a string");
4562 /* Handle multiple arguments separated by commas. */
4563 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4565 while (next_optstr
&& *next_optstr
!= '\0')
4567 char *p
= next_optstr
;
4569 char *comma
= strchr (next_optstr
, ',');
4570 const char *opt_string
;
4571 size_t len
, opt_len
;
4576 enum ix86_opt_type type
= ix86_opt_unknown
;
4582 len
= comma
- next_optstr
;
4583 next_optstr
= comma
+ 1;
4591 /* Recognize no-xxx. */
4592 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4601 /* Find the option. */
4604 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4606 type
= attrs
[i
].type
;
4607 opt_len
= attrs
[i
].len
;
4608 if (ch
== attrs
[i
].string
[0]
4609 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4612 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4615 mask
= attrs
[i
].mask
;
4616 opt_string
= attrs
[i
].string
;
4621 /* Process the option. */
4624 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4628 else if (type
== ix86_opt_isa
)
4630 struct cl_decoded_option decoded
;
4632 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4633 ix86_handle_option (opts
, opts_set
,
4634 &decoded
, input_location
);
4637 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4639 if (type
== ix86_opt_no
)
4640 opt_set_p
= !opt_set_p
;
4643 opts
->x_target_flags
|= mask
;
4645 opts
->x_target_flags
&= ~mask
;
4648 else if (type
== ix86_opt_str
)
4652 error ("option(\"%s\") was already specified", opt_string
);
4656 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4659 else if (type
== ix86_opt_enum
)
4664 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4666 set_option (opts
, enum_opts_set
, opt
, value
,
4667 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4671 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4683 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4686 ix86_valid_target_attribute_tree (tree args
,
4687 struct gcc_options
*opts
,
4688 struct gcc_options
*opts_set
)
4690 const char *orig_arch_string
= opts
->x_ix86_arch_string
;
4691 const char *orig_tune_string
= opts
->x_ix86_tune_string
;
4692 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
4693 int orig_tune_defaulted
= ix86_tune_defaulted
;
4694 int orig_arch_specified
= ix86_arch_specified
;
4695 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4698 struct cl_target_option
*def
4699 = TREE_TARGET_OPTION (target_option_default_node
);
4700 struct gcc_options enum_opts_set
;
4702 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4704 /* Process each of the options on the chain. */
4705 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
4706 opts_set
, &enum_opts_set
))
4707 return error_mark_node
;
4709 /* If the changed options are different from the default, rerun
4710 ix86_option_override_internal, and then save the options away.
4711 The string options are are attribute options, and will be undone
4712 when we copy the save structure. */
4713 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
4714 || opts
->x_target_flags
!= def
->x_target_flags
4715 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4716 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4717 || enum_opts_set
.x_ix86_fpmath
)
4719 /* If we are using the default tune= or arch=, undo the string assigned,
4720 and use the default. */
4721 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4722 opts
->x_ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4723 else if (!orig_arch_specified
)
4724 opts
->x_ix86_arch_string
= NULL
;
4726 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4727 opts
->x_ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4728 else if (orig_tune_defaulted
)
4729 opts
->x_ix86_tune_string
= NULL
;
4731 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4732 if (enum_opts_set
.x_ix86_fpmath
)
4733 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4734 else if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
4735 && TARGET_SSE_P (opts
->x_ix86_isa_flags
))
4737 opts
->x_ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4738 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
4741 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4742 ix86_option_override_internal (false, opts
, opts_set
);
4744 /* Add any builtin functions with the new isa if any. */
4745 ix86_add_new_builtins (opts
->x_ix86_isa_flags
);
4747 /* Save the current options unless we are validating options for
4749 t
= build_target_option_node (opts
);
4751 opts
->x_ix86_arch_string
= orig_arch_string
;
4752 opts
->x_ix86_tune_string
= orig_tune_string
;
4753 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
4755 /* Free up memory allocated to hold the strings */
4756 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4757 free (option_strings
[i
]);
4763 /* Hook to validate attribute((target("string"))). */
4766 ix86_valid_target_attribute_p (tree fndecl
,
4767 tree
ARG_UNUSED (name
),
4769 int ARG_UNUSED (flags
))
4771 struct gcc_options func_options
;
4772 tree new_target
, new_optimize
;
4775 /* attribute((target("default"))) does nothing, beyond
4776 affecting multi-versioning. */
4777 if (TREE_VALUE (args
)
4778 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4779 && TREE_CHAIN (args
) == NULL_TREE
4780 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4783 tree old_optimize
= build_optimization_node (&global_options
);
4785 /* Get the optimization options of the current function. */
4786 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4789 func_optimize
= old_optimize
;
4791 /* Init func_options. */
4792 memset (&func_options
, 0, sizeof (func_options
));
4793 init_options_struct (&func_options
, NULL
);
4794 lang_hooks
.init_options_struct (&func_options
);
4796 cl_optimization_restore (&func_options
,
4797 TREE_OPTIMIZATION (func_optimize
));
4799 /* Initialize func_options to the default before its target options can
4801 cl_target_option_restore (&func_options
,
4802 TREE_TARGET_OPTION (target_option_default_node
));
4804 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
4805 &global_options_set
);
4807 new_optimize
= build_optimization_node (&func_options
);
4809 if (new_target
== error_mark_node
)
4812 else if (fndecl
&& new_target
)
4814 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4816 if (old_optimize
!= new_optimize
)
4817 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4824 /* Hook to determine if one function can safely inline another. */
4827 ix86_can_inline_p (tree caller
, tree callee
)
4830 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4831 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4833 /* If callee has no option attributes, then it is ok to inline. */
4837 /* If caller has no option attributes, but callee does then it is not ok to
4839 else if (!caller_tree
)
4844 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4845 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4847 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4848 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4850 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4851 != callee_opts
->x_ix86_isa_flags
)
4854 /* See if we have the same non-isa options. */
4855 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4858 /* See if arch, tune, etc. are the same. */
4859 else if (caller_opts
->arch
!= callee_opts
->arch
)
4862 else if (caller_opts
->tune
!= callee_opts
->tune
)
4865 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4868 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4879 /* Remember the last target of ix86_set_current_function. */
4880 static GTY(()) tree ix86_previous_fndecl
;
4882 /* Invalidate ix86_previous_fndecl cache. */
4884 ix86_reset_previous_fndecl (void)
4886 ix86_previous_fndecl
= NULL_TREE
;
4889 /* Establish appropriate back-end context for processing the function
4890 FNDECL. The argument might be NULL to indicate processing at top
4891 level, outside of any function scope. */
4893 ix86_set_current_function (tree fndecl
)
4895 /* Only change the context if the function changes. This hook is called
4896 several times in the course of compiling a function, and we don't want to
4897 slow things down too much or call target_reinit when it isn't safe. */
4898 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4900 tree old_tree
= (ix86_previous_fndecl
4901 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4904 tree new_tree
= (fndecl
4905 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4908 ix86_previous_fndecl
= fndecl
;
4909 if (old_tree
== new_tree
)
4914 cl_target_option_restore (&global_options
,
4915 TREE_TARGET_OPTION (new_tree
));
4921 struct cl_target_option
*def
4922 = TREE_TARGET_OPTION (target_option_current_node
);
4924 cl_target_option_restore (&global_options
, def
);
4931 /* Return true if this goes in large data/bss. */
4934 ix86_in_large_data_p (tree exp
)
4936 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4939 /* Functions are never large data. */
4940 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4943 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4945 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4946 if (strcmp (section
, ".ldata") == 0
4947 || strcmp (section
, ".lbss") == 0)
4953 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4955 /* If this is an incomplete type with size 0, then we can't put it
4956 in data because it might be too big when completed. */
4957 if (!size
|| size
> ix86_section_threshold
)
4964 /* Switch to the appropriate section for output of DECL.
4965 DECL is either a `VAR_DECL' node or a constant of some sort.
4966 RELOC indicates whether forming the initial value of DECL requires
4967 link-time relocations. */
4969 ATTRIBUTE_UNUSED
static section
*
4970 x86_64_elf_select_section (tree decl
, int reloc
,
4971 unsigned HOST_WIDE_INT align
)
4973 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4974 && ix86_in_large_data_p (decl
))
4976 const char *sname
= NULL
;
4977 unsigned int flags
= SECTION_WRITE
;
4978 switch (categorize_decl_for_section (decl
, reloc
))
4983 case SECCAT_DATA_REL
:
4984 sname
= ".ldata.rel";
4986 case SECCAT_DATA_REL_LOCAL
:
4987 sname
= ".ldata.rel.local";
4989 case SECCAT_DATA_REL_RO
:
4990 sname
= ".ldata.rel.ro";
4992 case SECCAT_DATA_REL_RO_LOCAL
:
4993 sname
= ".ldata.rel.ro.local";
4997 flags
|= SECTION_BSS
;
5000 case SECCAT_RODATA_MERGE_STR
:
5001 case SECCAT_RODATA_MERGE_STR_INIT
:
5002 case SECCAT_RODATA_MERGE_CONST
:
5006 case SECCAT_SRODATA
:
5013 /* We don't split these for medium model. Place them into
5014 default sections and hope for best. */
5019 /* We might get called with string constants, but get_named_section
5020 doesn't like them as they are not DECLs. Also, we need to set
5021 flags in that case. */
5023 return get_section (sname
, flags
, NULL
);
5024 return get_named_section (decl
, sname
, reloc
);
5027 return default_elf_select_section (decl
, reloc
, align
);
5030 /* Select a set of attributes for section NAME based on the properties
5031 of DECL and whether or not RELOC indicates that DECL's initializer
5032 might contain runtime relocations. */
5034 static unsigned int ATTRIBUTE_UNUSED
5035 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
5037 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
5039 if (decl
== NULL_TREE
5040 && (strcmp (name
, ".ldata.rel.ro") == 0
5041 || strcmp (name
, ".ldata.rel.ro.local") == 0))
5042 flags
|= SECTION_RELRO
;
5044 if (strcmp (name
, ".lbss") == 0
5045 || strncmp (name
, ".lbss.", 5) == 0
5046 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
5047 flags
|= SECTION_BSS
;
5052 /* Build up a unique section name, expressed as a
5053 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5054 RELOC indicates whether the initial value of EXP requires
5055 link-time relocations. */
5057 static void ATTRIBUTE_UNUSED
5058 x86_64_elf_unique_section (tree decl
, int reloc
)
5060 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5061 && ix86_in_large_data_p (decl
))
5063 const char *prefix
= NULL
;
5064 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5065 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
5067 switch (categorize_decl_for_section (decl
, reloc
))
5070 case SECCAT_DATA_REL
:
5071 case SECCAT_DATA_REL_LOCAL
:
5072 case SECCAT_DATA_REL_RO
:
5073 case SECCAT_DATA_REL_RO_LOCAL
:
5074 prefix
= one_only
? ".ld" : ".ldata";
5077 prefix
= one_only
? ".lb" : ".lbss";
5080 case SECCAT_RODATA_MERGE_STR
:
5081 case SECCAT_RODATA_MERGE_STR_INIT
:
5082 case SECCAT_RODATA_MERGE_CONST
:
5083 prefix
= one_only
? ".lr" : ".lrodata";
5085 case SECCAT_SRODATA
:
5092 /* We don't split these for medium model. Place them into
5093 default sections and hope for best. */
5098 const char *name
, *linkonce
;
5101 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
5102 name
= targetm
.strip_name_encoding (name
);
5104 /* If we're using one_only, then there needs to be a .gnu.linkonce
5105 prefix to the section name. */
5106 linkonce
= one_only
? ".gnu.linkonce" : "";
5108 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
5110 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
5114 default_unique_section (decl
, reloc
);
5117 #ifdef COMMON_ASM_OP
5118 /* This says how to output assembler code to declare an
5119 uninitialized external linkage data object.
5121 For medium model x86-64 we need to use .largecomm opcode for
5124 x86_elf_aligned_common (FILE *file
,
5125 const char *name
, unsigned HOST_WIDE_INT size
,
5128 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5129 && size
> (unsigned int)ix86_section_threshold
)
5130 fputs (".largecomm\t", file
);
5132 fputs (COMMON_ASM_OP
, file
);
5133 assemble_name (file
, name
);
5134 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
5135 size
, align
/ BITS_PER_UNIT
);
5139 /* Utility function for targets to use in implementing
5140 ASM_OUTPUT_ALIGNED_BSS. */
5143 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
5144 const char *name
, unsigned HOST_WIDE_INT size
,
5147 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
5148 && size
> (unsigned int)ix86_section_threshold
)
5149 switch_to_section (get_named_section (decl
, ".lbss", 0));
5151 switch_to_section (bss_section
);
5152 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
5153 #ifdef ASM_DECLARE_OBJECT_NAME
5154 last_assemble_variable_decl
= decl
;
5155 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5157 /* Standard thing is just output label for the object. */
5158 ASM_OUTPUT_LABEL (file
, name
);
5159 #endif /* ASM_DECLARE_OBJECT_NAME */
5160 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5163 /* Decide whether we must probe the stack before any space allocation
5164 on this target. It's essentially TARGET_STACK_PROBE except when
5165 -fstack-check causes the stack to be already probed differently. */
5168 ix86_target_stack_probe (void)
5170 /* Do not probe the stack twice if static stack checking is enabled. */
5171 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5174 return TARGET_STACK_PROBE
;
5177 /* Decide whether we can make a sibling call to a function. DECL is the
5178 declaration of the function being targeted by the call and EXP is the
5179 CALL_EXPR representing the call. */
5182 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5184 tree type
, decl_or_type
;
5187 /* If we are generating position-independent code, we cannot sibcall
5188 optimize any indirect call, or a direct call to a global function,
5189 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5193 && (!decl
|| !targetm
.binds_local_p (decl
)))
5196 /* If we need to align the outgoing stack, then sibcalling would
5197 unalign the stack, which may break the called function. */
5198 if (ix86_minimum_incoming_stack_boundary (true)
5199 < PREFERRED_STACK_BOUNDARY
)
5204 decl_or_type
= decl
;
5205 type
= TREE_TYPE (decl
);
5209 /* We're looking at the CALL_EXPR, we need the type of the function. */
5210 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5211 type
= TREE_TYPE (type
); /* pointer type */
5212 type
= TREE_TYPE (type
); /* function type */
5213 decl_or_type
= type
;
5216 /* Check that the return value locations are the same. Like
5217 if we are returning floats on the 80387 register stack, we cannot
5218 make a sibcall from a function that doesn't return a float to a
5219 function that does or, conversely, from a function that does return
5220 a float to a function that doesn't; the necessary stack adjustment
5221 would not be executed. This is also the place we notice
5222 differences in the return value ABI. Note that it is ok for one
5223 of the functions to have void return type as long as the return
5224 value of the other is passed in a register. */
5225 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5226 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5228 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5230 if (!rtx_equal_p (a
, b
))
5233 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5235 else if (!rtx_equal_p (a
, b
))
5240 /* The SYSV ABI has more call-clobbered registers;
5241 disallow sibcalls from MS to SYSV. */
5242 if (cfun
->machine
->call_abi
== MS_ABI
5243 && ix86_function_type_abi (type
) == SYSV_ABI
)
5248 /* If this call is indirect, we'll need to be able to use a
5249 call-clobbered register for the address of the target function.
5250 Make sure that all such registers are not used for passing
5251 parameters. Note that DLLIMPORT functions are indirect. */
5253 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5255 if (ix86_function_regparm (type
, NULL
) >= 3)
5257 /* ??? Need to count the actual number of registers to be used,
5258 not the possible number of registers. Fix later. */
5264 /* Otherwise okay. That also includes certain types of indirect calls. */
5268 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5269 and "sseregparm" calling convention attributes;
5270 arguments as in struct attribute_spec.handler. */
5273 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5275 int flags ATTRIBUTE_UNUSED
,
5278 if (TREE_CODE (*node
) != FUNCTION_TYPE
5279 && TREE_CODE (*node
) != METHOD_TYPE
5280 && TREE_CODE (*node
) != FIELD_DECL
5281 && TREE_CODE (*node
) != TYPE_DECL
)
5283 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5285 *no_add_attrs
= true;
5289 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5290 if (is_attribute_p ("regparm", name
))
5294 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5296 error ("fastcall and regparm attributes are not compatible");
5299 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5301 error ("regparam and thiscall attributes are not compatible");
5304 cst
= TREE_VALUE (args
);
5305 if (TREE_CODE (cst
) != INTEGER_CST
)
5307 warning (OPT_Wattributes
,
5308 "%qE attribute requires an integer constant argument",
5310 *no_add_attrs
= true;
5312 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5314 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5316 *no_add_attrs
= true;
5324 /* Do not warn when emulating the MS ABI. */
5325 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5326 && TREE_CODE (*node
) != METHOD_TYPE
)
5327 || ix86_function_type_abi (*node
) != MS_ABI
)
5328 warning (OPT_Wattributes
, "%qE attribute ignored",
5330 *no_add_attrs
= true;
5334 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5335 if (is_attribute_p ("fastcall", name
))
5337 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5339 error ("fastcall and cdecl attributes are not compatible");
5341 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5343 error ("fastcall and stdcall attributes are not compatible");
5345 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5347 error ("fastcall and regparm attributes are not compatible");
5349 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5351 error ("fastcall and thiscall attributes are not compatible");
5355 /* Can combine stdcall with fastcall (redundant), regparm and
5357 else if (is_attribute_p ("stdcall", name
))
5359 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5361 error ("stdcall and cdecl attributes are not compatible");
5363 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5365 error ("stdcall and fastcall attributes are not compatible");
5367 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5369 error ("stdcall and thiscall attributes are not compatible");
5373 /* Can combine cdecl with regparm and sseregparm. */
5374 else if (is_attribute_p ("cdecl", name
))
5376 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5378 error ("stdcall and cdecl attributes are not compatible");
5380 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5382 error ("fastcall and cdecl attributes are not compatible");
5384 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5386 error ("cdecl and thiscall attributes are not compatible");
5389 else if (is_attribute_p ("thiscall", name
))
5391 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5392 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5394 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5396 error ("stdcall and thiscall attributes are not compatible");
5398 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5400 error ("fastcall and thiscall attributes are not compatible");
5402 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5404 error ("cdecl and thiscall attributes are not compatible");
5408 /* Can combine sseregparm with all attributes. */
5413 /* The transactional memory builtins are implicitly regparm or fastcall
5414 depending on the ABI. Override the generic do-nothing attribute that
5415 these builtins were declared with, and replace it with one of the two
5416 attributes that we expect elsewhere. */
5419 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5420 tree args ATTRIBUTE_UNUSED
,
5421 int flags
, bool *no_add_attrs
)
5425 /* In no case do we want to add the placeholder attribute. */
5426 *no_add_attrs
= true;
5428 /* The 64-bit ABI is unchanged for transactional memory. */
5432 /* ??? Is there a better way to validate 32-bit windows? We have
5433 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5434 if (CHECK_STACK_LIMIT
> 0)
5435 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5438 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5439 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5441 decl_attributes (node
, alt
, flags
);
5446 /* This function determines from TYPE the calling-convention. */
5449 ix86_get_callcvt (const_tree type
)
5451 unsigned int ret
= 0;
5456 return IX86_CALLCVT_CDECL
;
5458 attrs
= TYPE_ATTRIBUTES (type
);
5459 if (attrs
!= NULL_TREE
)
5461 if (lookup_attribute ("cdecl", attrs
))
5462 ret
|= IX86_CALLCVT_CDECL
;
5463 else if (lookup_attribute ("stdcall", attrs
))
5464 ret
|= IX86_CALLCVT_STDCALL
;
5465 else if (lookup_attribute ("fastcall", attrs
))
5466 ret
|= IX86_CALLCVT_FASTCALL
;
5467 else if (lookup_attribute ("thiscall", attrs
))
5468 ret
|= IX86_CALLCVT_THISCALL
;
5470 /* Regparam isn't allowed for thiscall and fastcall. */
5471 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5473 if (lookup_attribute ("regparm", attrs
))
5474 ret
|= IX86_CALLCVT_REGPARM
;
5475 if (lookup_attribute ("sseregparm", attrs
))
5476 ret
|= IX86_CALLCVT_SSEREGPARM
;
5479 if (IX86_BASE_CALLCVT(ret
) != 0)
5483 is_stdarg
= stdarg_p (type
);
5484 if (TARGET_RTD
&& !is_stdarg
)
5485 return IX86_CALLCVT_STDCALL
| ret
;
5489 || TREE_CODE (type
) != METHOD_TYPE
5490 || ix86_function_type_abi (type
) != MS_ABI
)
5491 return IX86_CALLCVT_CDECL
| ret
;
5493 return IX86_CALLCVT_THISCALL
;
5496 /* Return 0 if the attributes for two types are incompatible, 1 if they
5497 are compatible, and 2 if they are nearly compatible (which causes a
5498 warning to be generated). */
5501 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5503 unsigned int ccvt1
, ccvt2
;
5505 if (TREE_CODE (type1
) != FUNCTION_TYPE
5506 && TREE_CODE (type1
) != METHOD_TYPE
)
5509 ccvt1
= ix86_get_callcvt (type1
);
5510 ccvt2
= ix86_get_callcvt (type2
);
5513 if (ix86_function_regparm (type1
, NULL
)
5514 != ix86_function_regparm (type2
, NULL
))
5520 /* Return the regparm value for a function with the indicated TYPE and DECL.
5521 DECL may be NULL when calling function indirectly
5522 or considering a libcall. */
5525 ix86_function_regparm (const_tree type
, const_tree decl
)
5532 return (ix86_function_type_abi (type
) == SYSV_ABI
5533 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5534 ccvt
= ix86_get_callcvt (type
);
5535 regparm
= ix86_regparm
;
5537 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5539 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5542 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5546 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5548 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5551 /* Use register calling convention for local functions when possible. */
5553 && TREE_CODE (decl
) == FUNCTION_DECL
5555 && !(profile_flag
&& !flag_fentry
))
5557 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5558 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5559 if (i
&& i
->local
&& i
->can_change_signature
)
5561 int local_regparm
, globals
= 0, regno
;
5563 /* Make sure no regparm register is taken by a
5564 fixed register variable. */
5565 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5566 if (fixed_regs
[local_regparm
])
5569 /* We don't want to use regparm(3) for nested functions as
5570 these use a static chain pointer in the third argument. */
5571 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5574 /* In 32-bit mode save a register for the split stack. */
5575 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5578 /* Each fixed register usage increases register pressure,
5579 so less registers should be used for argument passing.
5580 This functionality can be overriden by an explicit
5582 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5583 if (fixed_regs
[regno
])
5587 = globals
< local_regparm
? local_regparm
- globals
: 0;
5589 if (local_regparm
> regparm
)
5590 regparm
= local_regparm
;
5597 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5598 DFmode (2) arguments in SSE registers for a function with the
5599 indicated TYPE and DECL. DECL may be NULL when calling function
5600 indirectly or considering a libcall. Otherwise return 0. */
5603 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5605 gcc_assert (!TARGET_64BIT
);
5607 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5608 by the sseregparm attribute. */
5609 if (TARGET_SSEREGPARM
5610 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5617 error ("calling %qD with attribute sseregparm without "
5618 "SSE/SSE2 enabled", decl
);
5620 error ("calling %qT with attribute sseregparm without "
5621 "SSE/SSE2 enabled", type
);
5629 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5630 (and DFmode for SSE2) arguments in SSE registers. */
5631 if (decl
&& TARGET_SSE_MATH
&& optimize
5632 && !(profile_flag
&& !flag_fentry
))
5634 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5635 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5636 if (i
&& i
->local
&& i
->can_change_signature
)
5637 return TARGET_SSE2
? 2 : 1;
5643 /* Return true if EAX is live at the start of the function. Used by
5644 ix86_expand_prologue to determine if we need special help before
5645 calling allocate_stack_worker. */
5648 ix86_eax_live_at_start_p (void)
5650 /* Cheat. Don't bother working forward from ix86_function_regparm
5651 to the function type to whether an actual argument is located in
5652 eax. Instead just look at cfg info, which is still close enough
5653 to correct at this point. This gives false positives for broken
5654 functions that might use uninitialized data that happens to be
5655 allocated in eax, but who cares? */
5656 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
5660 ix86_keep_aggregate_return_pointer (tree fntype
)
5666 attr
= lookup_attribute ("callee_pop_aggregate_return",
5667 TYPE_ATTRIBUTES (fntype
));
5669 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5671 /* For 32-bit MS-ABI the default is to keep aggregate
5673 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5676 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5679 /* Value is the number of bytes of arguments automatically
5680 popped when returning from a subroutine call.
5681 FUNDECL is the declaration node of the function (as a tree),
5682 FUNTYPE is the data type of the function (as a tree),
5683 or for a library call it is an identifier node for the subroutine name.
5684 SIZE is the number of bytes of arguments passed on the stack.
5686 On the 80386, the RTD insn may be used to pop them if the number
5687 of args is fixed, but if the number is variable then the caller
5688 must pop them all. RTD can't be used for library calls now
5689 because the library is compiled with the Unix compiler.
5690 Use of RTD is a selectable option, since it is incompatible with
5691 standard Unix calling sequences. If the option is not selected,
5692 the caller must always pop the args.
5694 The attribute stdcall is equivalent to RTD on a per module basis. */
5697 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5701 /* None of the 64-bit ABIs pop arguments. */
5705 ccvt
= ix86_get_callcvt (funtype
);
5707 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5708 | IX86_CALLCVT_THISCALL
)) != 0
5709 && ! stdarg_p (funtype
))
5712 /* Lose any fake structure return argument if it is passed on the stack. */
5713 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5714 && !ix86_keep_aggregate_return_pointer (funtype
))
5716 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5718 return GET_MODE_SIZE (Pmode
);
5724 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5727 ix86_legitimate_combined_insn (rtx insn
)
5729 /* Check operand constraints in case hard registers were propagated
5730 into insn pattern. This check prevents combine pass from
5731 generating insn patterns with invalid hard register operands.
5732 These invalid insns can eventually confuse reload to error out
5733 with a spill failure. See also PRs 46829 and 46843. */
5734 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5738 extract_insn (insn
);
5739 preprocess_constraints ();
5741 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5743 rtx op
= recog_data
.operand
[i
];
5744 enum machine_mode mode
= GET_MODE (op
);
5745 struct operand_alternative
*op_alt
;
5750 /* For pre-AVX disallow unaligned loads/stores where the
5751 instructions don't support it. */
5753 && VECTOR_MODE_P (GET_MODE (op
))
5754 && misaligned_operand (op
, GET_MODE (op
)))
5756 int min_align
= get_attr_ssememalign (insn
);
5761 /* A unary operator may be accepted by the predicate, but it
5762 is irrelevant for matching constraints. */
5766 if (GET_CODE (op
) == SUBREG
)
5768 if (REG_P (SUBREG_REG (op
))
5769 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5770 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5771 GET_MODE (SUBREG_REG (op
)),
5774 op
= SUBREG_REG (op
);
5777 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5780 op_alt
= recog_op_alt
[i
];
5782 /* Operand has no constraints, anything is OK. */
5783 win
= !recog_data
.n_alternatives
;
5785 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5787 if (op_alt
[j
].anything_ok
5788 || (op_alt
[j
].matches
!= -1
5790 (recog_data
.operand
[i
],
5791 recog_data
.operand
[op_alt
[j
].matches
]))
5792 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5807 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5809 static unsigned HOST_WIDE_INT
5810 ix86_asan_shadow_offset (void)
5812 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
5813 : HOST_WIDE_INT_C (0x7fff8000))
5814 : (HOST_WIDE_INT_1
<< 29);
5817 /* Argument support functions. */
5819 /* Return true when register may be used to pass function parameters. */
5821 ix86_function_arg_regno_p (int regno
)
5824 const int *parm_regs
;
5829 return (regno
< REGPARM_MAX
5830 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5832 return (regno
< REGPARM_MAX
5833 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5834 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5835 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5836 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5839 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5840 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5843 /* TODO: The function should depend on current function ABI but
5844 builtins.c would need updating then. Therefore we use the
5847 /* RAX is used as hidden argument to va_arg functions. */
5848 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5851 if (ix86_abi
== MS_ABI
)
5852 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5854 parm_regs
= x86_64_int_parameter_registers
;
5855 for (i
= 0; i
< (ix86_abi
== MS_ABI
5856 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5857 if (regno
== parm_regs
[i
])
5862 /* Return if we do not know how to pass TYPE solely in registers. */
5865 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5867 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5870 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5871 The layout_type routine is crafty and tries to trick us into passing
5872 currently unsupported vector types on the stack by using TImode. */
5873 return (!TARGET_64BIT
&& mode
== TImode
5874 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5877 /* It returns the size, in bytes, of the area reserved for arguments passed
5878 in registers for the function represented by fndecl dependent to the used
5881 ix86_reg_parm_stack_space (const_tree fndecl
)
5883 enum calling_abi call_abi
= SYSV_ABI
;
5884 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5885 call_abi
= ix86_function_abi (fndecl
);
5887 call_abi
= ix86_function_type_abi (fndecl
);
5888 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5893 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5896 ix86_function_type_abi (const_tree fntype
)
5898 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5900 enum calling_abi abi
= ix86_abi
;
5901 if (abi
== SYSV_ABI
)
5903 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5906 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5913 /* We add this as a workaround in order to use libc_has_function
5916 ix86_libc_has_function (enum function_class fn_class
)
5918 return targetm
.libc_has_function (fn_class
);
5922 ix86_function_ms_hook_prologue (const_tree fn
)
5924 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5926 if (decl_function_context (fn
) != NULL_TREE
)
5927 error_at (DECL_SOURCE_LOCATION (fn
),
5928 "ms_hook_prologue is not compatible with nested function");
5935 static enum calling_abi
5936 ix86_function_abi (const_tree fndecl
)
5940 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5943 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5946 ix86_cfun_abi (void)
5950 return cfun
->machine
->call_abi
;
5953 /* Write the extra assembler code needed to declare a function properly. */
5956 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5959 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5963 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5964 unsigned int filler_cc
= 0xcccccccc;
5966 for (i
= 0; i
< filler_count
; i
+= 4)
5967 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5970 #ifdef SUBTARGET_ASM_UNWIND_INIT
5971 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5974 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5976 /* Output magic byte marker, if hot-patch attribute is set. */
5981 /* leaq [%rsp + 0], %rsp */
5982 asm_fprintf (asm_out_file
, ASM_BYTE
5983 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5987 /* movl.s %edi, %edi
5989 movl.s %esp, %ebp */
5990 asm_fprintf (asm_out_file
, ASM_BYTE
5991 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5997 extern void init_regs (void);
5999 /* Implementation of call abi switching target hook. Specific to FNDECL
6000 the specific call register sets are set. See also
6001 ix86_conditional_register_usage for more details. */
6003 ix86_call_abi_override (const_tree fndecl
)
6005 if (fndecl
== NULL_TREE
)
6006 cfun
->machine
->call_abi
= ix86_abi
;
6008 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
6011 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6012 expensive re-initialization of init_regs each time we switch function context
6013 since this is needed only during RTL expansion. */
6015 ix86_maybe_switch_abi (void)
6018 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
6022 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6023 for a call to a function whose data type is FNTYPE.
6024 For a library call, FNTYPE is 0. */
6027 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
6028 tree fntype
, /* tree ptr for function decl */
6029 rtx libname
, /* SYMBOL_REF of library name or 0 */
6033 struct cgraph_local_info
*i
;
6035 memset (cum
, 0, sizeof (*cum
));
6039 i
= cgraph_local_info (fndecl
);
6040 cum
->call_abi
= ix86_function_abi (fndecl
);
6045 cum
->call_abi
= ix86_function_type_abi (fntype
);
6048 cum
->caller
= caller
;
6050 /* Set up the number of registers to use for passing arguments. */
6052 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
6053 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
6054 "or subtarget optimization implying it");
6055 cum
->nregs
= ix86_regparm
;
6058 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
6059 ? X86_64_REGPARM_MAX
6060 : X86_64_MS_REGPARM_MAX
);
6064 cum
->sse_nregs
= SSE_REGPARM_MAX
;
6067 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
6068 ? X86_64_SSE_REGPARM_MAX
6069 : X86_64_MS_SSE_REGPARM_MAX
);
6073 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
6074 cum
->warn_avx
= true;
6075 cum
->warn_sse
= true;
6076 cum
->warn_mmx
= true;
6078 /* Because type might mismatch in between caller and callee, we need to
6079 use actual type of function for local calls.
6080 FIXME: cgraph_analyze can be told to actually record if function uses
6081 va_start so for local functions maybe_vaarg can be made aggressive
6083 FIXME: once typesytem is fixed, we won't need this code anymore. */
6084 if (i
&& i
->local
&& i
->can_change_signature
)
6085 fntype
= TREE_TYPE (fndecl
);
6086 cum
->maybe_vaarg
= (fntype
6087 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
6092 /* If there are variable arguments, then we won't pass anything
6093 in registers in 32-bit mode. */
6094 if (stdarg_p (fntype
))
6105 /* Use ecx and edx registers if function has fastcall attribute,
6106 else look for regparm information. */
6109 unsigned int ccvt
= ix86_get_callcvt (fntype
);
6110 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
6113 cum
->fastcall
= 1; /* Same first register as in fastcall. */
6115 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
6121 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
6124 /* Set up the number of SSE registers used for passing SFmode
6125 and DFmode arguments. Warn for mismatching ABI. */
6126 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
6130 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6131 But in the case of vector types, it is some vector mode.
6133 When we have only some of our vector isa extensions enabled, then there
6134 are some modes for which vector_mode_supported_p is false. For these
6135 modes, the generic vector support in gcc will choose some non-vector mode
6136 in order to implement the type. By computing the natural mode, we'll
6137 select the proper ABI location for the operand and not depend on whatever
6138 the middle-end decides to do with these vector types.
6140 The midde-end can't deal with the vector types > 16 bytes. In this
6141 case, we return the original mode and warn ABI change if CUM isn't
6144 static enum machine_mode
6145 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
6147 enum machine_mode mode
= TYPE_MODE (type
);
6149 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6151 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6152 if ((size
== 8 || size
== 16 || size
== 32)
6153 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6154 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6156 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6158 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6159 mode
= MIN_MODE_VECTOR_FLOAT
;
6161 mode
= MIN_MODE_VECTOR_INT
;
6163 /* Get the mode which has this inner mode and number of units. */
6164 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6165 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6166 && GET_MODE_INNER (mode
) == innermode
)
6168 if (size
== 32 && !TARGET_AVX
)
6170 static bool warnedavx
;
6177 warning (0, "AVX vector argument without AVX "
6178 "enabled changes the ABI");
6180 return TYPE_MODE (type
);
6182 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
6185 static bool warnedsse
;
6192 warning (0, "SSE vector argument without SSE "
6193 "enabled changes the ABI");
6196 else if ((size
== 8 && !TARGET_64BIT
) && !TARGET_MMX
)
6198 static bool warnedmmx
;
6205 warning (0, "MMX vector argument without MMX "
6206 "enabled changes the ABI");
6219 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6220 this may not agree with the mode that the type system has chosen for the
6221 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6222 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6225 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6230 if (orig_mode
!= BLKmode
)
6231 tmp
= gen_rtx_REG (orig_mode
, regno
);
6234 tmp
= gen_rtx_REG (mode
, regno
);
6235 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6236 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6242 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6243 of this code is to classify each 8bytes of incoming argument by the register
6244 class and assign registers accordingly. */
6246 /* Return the union class of CLASS1 and CLASS2.
6247 See the x86-64 PS ABI for details. */
6249 static enum x86_64_reg_class
6250 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6252 /* Rule #1: If both classes are equal, this is the resulting class. */
6253 if (class1
== class2
)
6256 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6258 if (class1
== X86_64_NO_CLASS
)
6260 if (class2
== X86_64_NO_CLASS
)
6263 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6264 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6265 return X86_64_MEMORY_CLASS
;
6267 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6268 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6269 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6270 return X86_64_INTEGERSI_CLASS
;
6271 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6272 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6273 return X86_64_INTEGER_CLASS
;
6275 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6277 if (class1
== X86_64_X87_CLASS
6278 || class1
== X86_64_X87UP_CLASS
6279 || class1
== X86_64_COMPLEX_X87_CLASS
6280 || class2
== X86_64_X87_CLASS
6281 || class2
== X86_64_X87UP_CLASS
6282 || class2
== X86_64_COMPLEX_X87_CLASS
)
6283 return X86_64_MEMORY_CLASS
;
6285 /* Rule #6: Otherwise class SSE is used. */
6286 return X86_64_SSE_CLASS
;
6289 /* Classify the argument of type TYPE and mode MODE.
6290 CLASSES will be filled by the register class used to pass each word
6291 of the operand. The number of words is returned. In case the parameter
6292 should be passed in memory, 0 is returned. As a special case for zero
6293 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6295 BIT_OFFSET is used internally for handling records and specifies offset
6296 of the offset in bits modulo 256 to avoid overflow cases.
6298 See the x86-64 PS ABI for details.
6302 classify_argument (enum machine_mode mode
, const_tree type
,
6303 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6305 HOST_WIDE_INT bytes
=
6306 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6308 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6310 /* Variable sized entities are always passed/returned in memory. */
6314 if (mode
!= VOIDmode
6315 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6318 if (type
&& AGGREGATE_TYPE_P (type
))
6322 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6324 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6328 for (i
= 0; i
< words
; i
++)
6329 classes
[i
] = X86_64_NO_CLASS
;
6331 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6332 signalize memory class, so handle it as special case. */
6335 classes
[0] = X86_64_NO_CLASS
;
6339 /* Classify each field of record and merge classes. */
6340 switch (TREE_CODE (type
))
6343 /* And now merge the fields of structure. */
6344 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6346 if (TREE_CODE (field
) == FIELD_DECL
)
6350 if (TREE_TYPE (field
) == error_mark_node
)
6353 /* Bitfields are always classified as integer. Handle them
6354 early, since later code would consider them to be
6355 misaligned integers. */
6356 if (DECL_BIT_FIELD (field
))
6358 for (i
= (int_bit_position (field
)
6359 + (bit_offset
% 64)) / 8 / 8;
6360 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6361 + tree_to_shwi (DECL_SIZE (field
))
6364 merge_classes (X86_64_INTEGER_CLASS
,
6371 type
= TREE_TYPE (field
);
6373 /* Flexible array member is ignored. */
6374 if (TYPE_MODE (type
) == BLKmode
6375 && TREE_CODE (type
) == ARRAY_TYPE
6376 && TYPE_SIZE (type
) == NULL_TREE
6377 && TYPE_DOMAIN (type
) != NULL_TREE
6378 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6383 if (!warned
&& warn_psabi
)
6386 inform (input_location
,
6387 "the ABI of passing struct with"
6388 " a flexible array member has"
6389 " changed in GCC 4.4");
6393 num
= classify_argument (TYPE_MODE (type
), type
,
6395 (int_bit_position (field
)
6396 + bit_offset
) % 256);
6399 pos
= (int_bit_position (field
)
6400 + (bit_offset
% 64)) / 8 / 8;
6401 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6403 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6410 /* Arrays are handled as small records. */
6413 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6414 TREE_TYPE (type
), subclasses
, bit_offset
);
6418 /* The partial classes are now full classes. */
6419 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6420 subclasses
[0] = X86_64_SSE_CLASS
;
6421 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6422 && !((bit_offset
% 64) == 0 && bytes
== 4))
6423 subclasses
[0] = X86_64_INTEGER_CLASS
;
6425 for (i
= 0; i
< words
; i
++)
6426 classes
[i
] = subclasses
[i
% num
];
6431 case QUAL_UNION_TYPE
:
6432 /* Unions are similar to RECORD_TYPE but offset is always 0.
6434 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6436 if (TREE_CODE (field
) == FIELD_DECL
)
6440 if (TREE_TYPE (field
) == error_mark_node
)
6443 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6444 TREE_TYPE (field
), subclasses
,
6448 for (i
= 0; i
< num
; i
++)
6449 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6460 /* When size > 16 bytes, if the first one isn't
6461 X86_64_SSE_CLASS or any other ones aren't
6462 X86_64_SSEUP_CLASS, everything should be passed in
6464 if (classes
[0] != X86_64_SSE_CLASS
)
6467 for (i
= 1; i
< words
; i
++)
6468 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6472 /* Final merger cleanup. */
6473 for (i
= 0; i
< words
; i
++)
6475 /* If one class is MEMORY, everything should be passed in
6477 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6480 /* The X86_64_SSEUP_CLASS should be always preceded by
6481 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6482 if (classes
[i
] == X86_64_SSEUP_CLASS
6483 && classes
[i
- 1] != X86_64_SSE_CLASS
6484 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6486 /* The first one should never be X86_64_SSEUP_CLASS. */
6487 gcc_assert (i
!= 0);
6488 classes
[i
] = X86_64_SSE_CLASS
;
6491 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6492 everything should be passed in memory. */
6493 if (classes
[i
] == X86_64_X87UP_CLASS
6494 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6498 /* The first one should never be X86_64_X87UP_CLASS. */
6499 gcc_assert (i
!= 0);
6500 if (!warned
&& warn_psabi
)
6503 inform (input_location
,
6504 "the ABI of passing union with long double"
6505 " has changed in GCC 4.4");
6513 /* Compute alignment needed. We align all types to natural boundaries with
6514 exception of XFmode that is aligned to 64bits. */
6515 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6517 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6520 mode_alignment
= 128;
6521 else if (mode
== XCmode
)
6522 mode_alignment
= 256;
6523 if (COMPLEX_MODE_P (mode
))
6524 mode_alignment
/= 2;
6525 /* Misaligned fields are always returned in memory. */
6526 if (bit_offset
% mode_alignment
)
6530 /* for V1xx modes, just use the base mode */
6531 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6532 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6533 mode
= GET_MODE_INNER (mode
);
6535 /* Classification of atomic types. */
6540 classes
[0] = X86_64_SSE_CLASS
;
6543 classes
[0] = X86_64_SSE_CLASS
;
6544 classes
[1] = X86_64_SSEUP_CLASS
;
6554 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6558 classes
[0] = X86_64_INTEGERSI_CLASS
;
6561 else if (size
<= 64)
6563 classes
[0] = X86_64_INTEGER_CLASS
;
6566 else if (size
<= 64+32)
6568 classes
[0] = X86_64_INTEGER_CLASS
;
6569 classes
[1] = X86_64_INTEGERSI_CLASS
;
6572 else if (size
<= 64+64)
6574 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6582 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6586 /* OImode shouldn't be used directly. */
6591 if (!(bit_offset
% 64))
6592 classes
[0] = X86_64_SSESF_CLASS
;
6594 classes
[0] = X86_64_SSE_CLASS
;
6597 classes
[0] = X86_64_SSEDF_CLASS
;
6600 classes
[0] = X86_64_X87_CLASS
;
6601 classes
[1] = X86_64_X87UP_CLASS
;
6604 classes
[0] = X86_64_SSE_CLASS
;
6605 classes
[1] = X86_64_SSEUP_CLASS
;
6608 classes
[0] = X86_64_SSE_CLASS
;
6609 if (!(bit_offset
% 64))
6615 if (!warned
&& warn_psabi
)
6618 inform (input_location
,
6619 "the ABI of passing structure with complex float"
6620 " member has changed in GCC 4.4");
6622 classes
[1] = X86_64_SSESF_CLASS
;
6626 classes
[0] = X86_64_SSEDF_CLASS
;
6627 classes
[1] = X86_64_SSEDF_CLASS
;
6630 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6633 /* This modes is larger than 16 bytes. */
6641 classes
[0] = X86_64_SSE_CLASS
;
6642 classes
[1] = X86_64_SSEUP_CLASS
;
6643 classes
[2] = X86_64_SSEUP_CLASS
;
6644 classes
[3] = X86_64_SSEUP_CLASS
;
6652 classes
[0] = X86_64_SSE_CLASS
;
6653 classes
[1] = X86_64_SSEUP_CLASS
;
6661 classes
[0] = X86_64_SSE_CLASS
;
6667 gcc_assert (VECTOR_MODE_P (mode
));
6672 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6674 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6675 classes
[0] = X86_64_INTEGERSI_CLASS
;
6677 classes
[0] = X86_64_INTEGER_CLASS
;
6678 classes
[1] = X86_64_INTEGER_CLASS
;
6679 return 1 + (bytes
> 8);
6683 /* Examine the argument and return set number of register required in each
6684 class. Return 0 iff parameter should be passed in memory. */
6686 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6687 int *int_nregs
, int *sse_nregs
)
6689 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6690 int n
= classify_argument (mode
, type
, regclass
, 0);
6696 for (n
--; n
>= 0; n
--)
6697 switch (regclass
[n
])
6699 case X86_64_INTEGER_CLASS
:
6700 case X86_64_INTEGERSI_CLASS
:
6703 case X86_64_SSE_CLASS
:
6704 case X86_64_SSESF_CLASS
:
6705 case X86_64_SSEDF_CLASS
:
6708 case X86_64_NO_CLASS
:
6709 case X86_64_SSEUP_CLASS
:
6711 case X86_64_X87_CLASS
:
6712 case X86_64_X87UP_CLASS
:
6716 case X86_64_COMPLEX_X87_CLASS
:
6717 return in_return
? 2 : 0;
6718 case X86_64_MEMORY_CLASS
:
6724 /* Construct container for the argument used by GCC interface. See
6725 FUNCTION_ARG for the detailed description. */
6728 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6729 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6730 const int *intreg
, int sse_regno
)
6732 /* The following variables hold the static issued_error state. */
6733 static bool issued_sse_arg_error
;
6734 static bool issued_sse_ret_error
;
6735 static bool issued_x87_ret_error
;
6737 enum machine_mode tmpmode
;
6739 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6740 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6744 int needed_sseregs
, needed_intregs
;
6745 rtx exp
[MAX_CLASSES
];
6748 n
= classify_argument (mode
, type
, regclass
, 0);
6751 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6754 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6757 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6758 some less clueful developer tries to use floating-point anyway. */
6759 if (needed_sseregs
&& !TARGET_SSE
)
6763 if (!issued_sse_ret_error
)
6765 error ("SSE register return with SSE disabled");
6766 issued_sse_ret_error
= true;
6769 else if (!issued_sse_arg_error
)
6771 error ("SSE register argument with SSE disabled");
6772 issued_sse_arg_error
= true;
6777 /* Likewise, error if the ABI requires us to return values in the
6778 x87 registers and the user specified -mno-80387. */
6779 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
6780 for (i
= 0; i
< n
; i
++)
6781 if (regclass
[i
] == X86_64_X87_CLASS
6782 || regclass
[i
] == X86_64_X87UP_CLASS
6783 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6785 if (!issued_x87_ret_error
)
6787 error ("x87 register return with x87 disabled");
6788 issued_x87_ret_error
= true;
6793 /* First construct simple cases. Avoid SCmode, since we want to use
6794 single register to pass this type. */
6795 if (n
== 1 && mode
!= SCmode
)
6796 switch (regclass
[0])
6798 case X86_64_INTEGER_CLASS
:
6799 case X86_64_INTEGERSI_CLASS
:
6800 return gen_rtx_REG (mode
, intreg
[0]);
6801 case X86_64_SSE_CLASS
:
6802 case X86_64_SSESF_CLASS
:
6803 case X86_64_SSEDF_CLASS
:
6804 if (mode
!= BLKmode
)
6805 return gen_reg_or_parallel (mode
, orig_mode
,
6806 SSE_REGNO (sse_regno
));
6808 case X86_64_X87_CLASS
:
6809 case X86_64_COMPLEX_X87_CLASS
:
6810 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6811 case X86_64_NO_CLASS
:
6812 /* Zero sized array, struct or class. */
6818 && regclass
[0] == X86_64_SSE_CLASS
6819 && regclass
[1] == X86_64_SSEUP_CLASS
6821 return gen_reg_or_parallel (mode
, orig_mode
,
6822 SSE_REGNO (sse_regno
));
6824 && regclass
[0] == X86_64_SSE_CLASS
6825 && regclass
[1] == X86_64_SSEUP_CLASS
6826 && regclass
[2] == X86_64_SSEUP_CLASS
6827 && regclass
[3] == X86_64_SSEUP_CLASS
6829 return gen_reg_or_parallel (mode
, orig_mode
,
6830 SSE_REGNO (sse_regno
));
6832 && regclass
[0] == X86_64_X87_CLASS
6833 && regclass
[1] == X86_64_X87UP_CLASS
)
6834 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6837 && regclass
[0] == X86_64_INTEGER_CLASS
6838 && regclass
[1] == X86_64_INTEGER_CLASS
6839 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6840 && intreg
[0] + 1 == intreg
[1])
6841 return gen_rtx_REG (mode
, intreg
[0]);
6843 /* Otherwise figure out the entries of the PARALLEL. */
6844 for (i
= 0; i
< n
; i
++)
6848 switch (regclass
[i
])
6850 case X86_64_NO_CLASS
:
6852 case X86_64_INTEGER_CLASS
:
6853 case X86_64_INTEGERSI_CLASS
:
6854 /* Merge TImodes on aligned occasions here too. */
6855 if (i
* 8 + 8 > bytes
)
6857 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6858 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6862 /* We've requested 24 bytes we
6863 don't have mode for. Use DImode. */
6864 if (tmpmode
== BLKmode
)
6867 = gen_rtx_EXPR_LIST (VOIDmode
,
6868 gen_rtx_REG (tmpmode
, *intreg
),
6872 case X86_64_SSESF_CLASS
:
6874 = gen_rtx_EXPR_LIST (VOIDmode
,
6875 gen_rtx_REG (SFmode
,
6876 SSE_REGNO (sse_regno
)),
6880 case X86_64_SSEDF_CLASS
:
6882 = gen_rtx_EXPR_LIST (VOIDmode
,
6883 gen_rtx_REG (DFmode
,
6884 SSE_REGNO (sse_regno
)),
6888 case X86_64_SSE_CLASS
:
6896 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6906 && regclass
[1] == X86_64_SSEUP_CLASS
6907 && regclass
[2] == X86_64_SSEUP_CLASS
6908 && regclass
[3] == X86_64_SSEUP_CLASS
);
6916 = gen_rtx_EXPR_LIST (VOIDmode
,
6917 gen_rtx_REG (tmpmode
,
6918 SSE_REGNO (sse_regno
)),
6927 /* Empty aligned struct, union or class. */
6931 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6932 for (i
= 0; i
< nexps
; i
++)
6933 XVECEXP (ret
, 0, i
) = exp
[i
];
6937 /* Update the data in CUM to advance over an argument of mode MODE
6938 and data type TYPE. (TYPE is null for libcalls where that information
6939 may not be available.) */
6942 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6943 const_tree type
, HOST_WIDE_INT bytes
,
6944 HOST_WIDE_INT words
)
6960 cum
->words
+= words
;
6961 cum
->nregs
-= words
;
6962 cum
->regno
+= words
;
6964 if (cum
->nregs
<= 0)
6972 /* OImode shouldn't be used directly. */
6976 if (cum
->float_in_sse
< 2)
6979 if (cum
->float_in_sse
< 1)
6996 if (!type
|| !AGGREGATE_TYPE_P (type
))
6998 cum
->sse_words
+= words
;
6999 cum
->sse_nregs
-= 1;
7000 cum
->sse_regno
+= 1;
7001 if (cum
->sse_nregs
<= 0)
7015 if (!type
|| !AGGREGATE_TYPE_P (type
))
7017 cum
->mmx_words
+= words
;
7018 cum
->mmx_nregs
-= 1;
7019 cum
->mmx_regno
+= 1;
7020 if (cum
->mmx_nregs
<= 0)
7031 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7032 const_tree type
, HOST_WIDE_INT words
, bool named
)
7034 int int_nregs
, sse_nregs
;
7036 /* Unnamed 256bit vector mode parameters are passed on stack. */
7037 if (!named
&& VALID_AVX256_REG_MODE (mode
))
7040 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
7041 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
7043 cum
->nregs
-= int_nregs
;
7044 cum
->sse_nregs
-= sse_nregs
;
7045 cum
->regno
+= int_nregs
;
7046 cum
->sse_regno
+= sse_nregs
;
7050 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
7051 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
7052 cum
->words
+= words
;
7057 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
7058 HOST_WIDE_INT words
)
7060 /* Otherwise, this should be passed indirect. */
7061 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
7063 cum
->words
+= words
;
7071 /* Update the data in CUM to advance over an argument of mode MODE and
7072 data type TYPE. (TYPE is null for libcalls where that information
7073 may not be available.) */
7076 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
7077 const_tree type
, bool named
)
7079 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7080 HOST_WIDE_INT bytes
, words
;
7082 if (mode
== BLKmode
)
7083 bytes
= int_size_in_bytes (type
);
7085 bytes
= GET_MODE_SIZE (mode
);
7086 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7089 mode
= type_natural_mode (type
, NULL
);
7091 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7092 function_arg_advance_ms_64 (cum
, bytes
, words
);
7093 else if (TARGET_64BIT
)
7094 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
7096 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
7099 /* Define where to put the arguments to a function.
7100 Value is zero to push the argument on the stack,
7101 or a hard register in which to store the argument.
7103 MODE is the argument's machine mode.
7104 TYPE is the data type of the argument (as a tree).
7105 This is null for libcalls where that information may
7107 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7108 the preceding args and about the function being called.
7109 NAMED is nonzero if this argument is a named parameter
7110 (otherwise it is an extra parameter matching an ellipsis). */
7113 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7114 enum machine_mode orig_mode
, const_tree type
,
7115 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
7117 static bool warnedsse
, warnedmmx
;
7119 /* Avoid the AL settings for the Unix64 ABI. */
7120 if (mode
== VOIDmode
)
7136 if (words
<= cum
->nregs
)
7138 int regno
= cum
->regno
;
7140 /* Fastcall allocates the first two DWORD (SImode) or
7141 smaller arguments to ECX and EDX if it isn't an
7147 || (type
&& AGGREGATE_TYPE_P (type
)))
7150 /* ECX not EAX is the first allocated register. */
7151 if (regno
== AX_REG
)
7154 return gen_rtx_REG (mode
, regno
);
7159 if (cum
->float_in_sse
< 2)
7162 if (cum
->float_in_sse
< 1)
7166 /* In 32bit, we pass TImode in xmm registers. */
7173 if (!type
|| !AGGREGATE_TYPE_P (type
))
7175 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
7178 warning (0, "SSE vector argument without SSE enabled "
7182 return gen_reg_or_parallel (mode
, orig_mode
,
7183 cum
->sse_regno
+ FIRST_SSE_REG
);
7188 /* OImode shouldn't be used directly. */
7197 if (!type
|| !AGGREGATE_TYPE_P (type
))
7200 return gen_reg_or_parallel (mode
, orig_mode
,
7201 cum
->sse_regno
+ FIRST_SSE_REG
);
7211 if (!type
|| !AGGREGATE_TYPE_P (type
))
7213 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7216 warning (0, "MMX vector argument without MMX enabled "
7220 return gen_reg_or_parallel (mode
, orig_mode
,
7221 cum
->mmx_regno
+ FIRST_MMX_REG
);
7230 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7231 enum machine_mode orig_mode
, const_tree type
, bool named
)
7233 /* Handle a hidden AL argument containing number of registers
7234 for varargs x86-64 functions. */
7235 if (mode
== VOIDmode
)
7236 return GEN_INT (cum
->maybe_vaarg
7237 ? (cum
->sse_nregs
< 0
7238 ? X86_64_SSE_REGPARM_MAX
7253 /* Unnamed 256bit vector mode parameters are passed on stack. */
7259 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7261 &x86_64_int_parameter_registers
[cum
->regno
],
7266 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7267 enum machine_mode orig_mode
, bool named
,
7268 HOST_WIDE_INT bytes
)
7272 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7273 We use value of -2 to specify that current function call is MSABI. */
7274 if (mode
== VOIDmode
)
7275 return GEN_INT (-2);
7277 /* If we've run out of registers, it goes on the stack. */
7278 if (cum
->nregs
== 0)
7281 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7283 /* Only floating point modes are passed in anything but integer regs. */
7284 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7287 regno
= cum
->regno
+ FIRST_SSE_REG
;
7292 /* Unnamed floating parameters are passed in both the
7293 SSE and integer registers. */
7294 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7295 t2
= gen_rtx_REG (mode
, regno
);
7296 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7297 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7298 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7301 /* Handle aggregated types passed in register. */
7302 if (orig_mode
== BLKmode
)
7304 if (bytes
> 0 && bytes
<= 8)
7305 mode
= (bytes
> 4 ? DImode
: SImode
);
7306 if (mode
== BLKmode
)
7310 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7313 /* Return where to put the arguments to a function.
7314 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7316 MODE is the argument's machine mode. TYPE is the data type of the
7317 argument. It is null for libcalls where that information may not be
7318 available. CUM gives information about the preceding args and about
7319 the function being called. NAMED is nonzero if this argument is a
7320 named parameter (otherwise it is an extra parameter matching an
7324 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7325 const_tree type
, bool named
)
7327 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7328 enum machine_mode mode
= omode
;
7329 HOST_WIDE_INT bytes
, words
;
7332 if (mode
== BLKmode
)
7333 bytes
= int_size_in_bytes (type
);
7335 bytes
= GET_MODE_SIZE (mode
);
7336 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7338 /* To simplify the code below, represent vector types with a vector mode
7339 even if MMX/SSE are not active. */
7340 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7341 mode
= type_natural_mode (type
, cum
);
7343 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7344 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7345 else if (TARGET_64BIT
)
7346 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7348 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7353 /* A C expression that indicates when an argument must be passed by
7354 reference. If nonzero for an argument, a copy of that argument is
7355 made in memory and a pointer to the argument is passed instead of
7356 the argument itself. The pointer is passed in whatever way is
7357 appropriate for passing a pointer to that type. */
7360 ix86_pass_by_reference (cumulative_args_t cum_v
, enum machine_mode mode
,
7361 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7363 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7365 /* See Windows x64 Software Convention. */
7366 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7368 int msize
= (int) GET_MODE_SIZE (mode
);
7371 /* Arrays are passed by reference. */
7372 if (TREE_CODE (type
) == ARRAY_TYPE
)
7375 if (AGGREGATE_TYPE_P (type
))
7377 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7378 are passed by reference. */
7379 msize
= int_size_in_bytes (type
);
7383 /* __m128 is passed by reference. */
7385 case 1: case 2: case 4: case 8:
7391 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7397 /* Return true when TYPE should be 128bit aligned for 32bit argument
7398 passing ABI. XXX: This function is obsolete and is only used for
7399 checking psABI compatibility with previous versions of GCC. */
7402 ix86_compat_aligned_value_p (const_tree type
)
7404 enum machine_mode mode
= TYPE_MODE (type
);
7405 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7409 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7411 if (TYPE_ALIGN (type
) < 128)
7414 if (AGGREGATE_TYPE_P (type
))
7416 /* Walk the aggregates recursively. */
7417 switch (TREE_CODE (type
))
7421 case QUAL_UNION_TYPE
:
7425 /* Walk all the structure fields. */
7426 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7428 if (TREE_CODE (field
) == FIELD_DECL
7429 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7436 /* Just for use if some languages passes arrays by value. */
7437 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7448 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7449 XXX: This function is obsolete and is only used for checking psABI
7450 compatibility with previous versions of GCC. */
7453 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7454 const_tree type
, unsigned int align
)
7456 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7457 natural boundaries. */
7458 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7460 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7461 make an exception for SSE modes since these require 128bit
7464 The handling here differs from field_alignment. ICC aligns MMX
7465 arguments to 4 byte boundaries, while structure fields are aligned
7466 to 8 byte boundaries. */
7469 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7470 align
= PARM_BOUNDARY
;
7474 if (!ix86_compat_aligned_value_p (type
))
7475 align
= PARM_BOUNDARY
;
7478 if (align
> BIGGEST_ALIGNMENT
)
7479 align
= BIGGEST_ALIGNMENT
;
7483 /* Return true when TYPE should be 128bit aligned for 32bit argument
7487 ix86_contains_aligned_value_p (const_tree type
)
7489 enum machine_mode mode
= TYPE_MODE (type
);
7491 if (mode
== XFmode
|| mode
== XCmode
)
7494 if (TYPE_ALIGN (type
) < 128)
7497 if (AGGREGATE_TYPE_P (type
))
7499 /* Walk the aggregates recursively. */
7500 switch (TREE_CODE (type
))
7504 case QUAL_UNION_TYPE
:
7508 /* Walk all the structure fields. */
7509 for (field
= TYPE_FIELDS (type
);
7511 field
= DECL_CHAIN (field
))
7513 if (TREE_CODE (field
) == FIELD_DECL
7514 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7521 /* Just for use if some languages passes arrays by value. */
7522 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7531 return TYPE_ALIGN (type
) >= 128;
7536 /* Gives the alignment boundary, in bits, of an argument with the
7537 specified mode and type. */
7540 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7545 /* Since the main variant type is used for call, we convert it to
7546 the main variant type. */
7547 type
= TYPE_MAIN_VARIANT (type
);
7548 align
= TYPE_ALIGN (type
);
7551 align
= GET_MODE_ALIGNMENT (mode
);
7552 if (align
< PARM_BOUNDARY
)
7553 align
= PARM_BOUNDARY
;
7557 unsigned int saved_align
= align
;
7561 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7564 if (mode
== XFmode
|| mode
== XCmode
)
7565 align
= PARM_BOUNDARY
;
7567 else if (!ix86_contains_aligned_value_p (type
))
7568 align
= PARM_BOUNDARY
;
7571 align
= PARM_BOUNDARY
;
7576 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7580 inform (input_location
,
7581 "The ABI for passing parameters with %d-byte"
7582 " alignment has changed in GCC 4.6",
7583 align
/ BITS_PER_UNIT
);
7590 /* Return true if N is a possible register number of function value. */
7593 ix86_function_value_regno_p (const unsigned int regno
)
7602 return TARGET_64BIT
&& ix86_abi
!= MS_ABI
;
7604 /* Complex values are returned in %st(0)/%st(1) pair. */
7607 /* TODO: The function should depend on current function ABI but
7608 builtins.c would need updating then. Therefore we use the
7610 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7612 return TARGET_FLOAT_RETURNS_IN_80387
;
7614 /* Complex values are returned in %xmm0/%xmm1 pair. */
7620 if (TARGET_MACHO
|| TARGET_64BIT
)
7628 /* Define how to find the value returned by a function.
7629 VALTYPE is the data type of the value (as a tree).
7630 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7631 otherwise, FUNC is 0. */
7634 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7635 const_tree fntype
, const_tree fn
)
7639 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7640 we normally prevent this case when mmx is not available. However
7641 some ABIs may require the result to be returned like DImode. */
7642 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7643 regno
= FIRST_MMX_REG
;
7645 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7646 we prevent this case when sse is not available. However some ABIs
7647 may require the result to be returned like integer TImode. */
7648 else if (mode
== TImode
7649 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7650 regno
= FIRST_SSE_REG
;
7652 /* 32-byte vector modes in %ymm0. */
7653 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7654 regno
= FIRST_SSE_REG
;
7656 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7657 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7658 regno
= FIRST_FLOAT_REG
;
7660 /* Most things go in %eax. */
7663 /* Override FP return register with %xmm0 for local functions when
7664 SSE math is enabled or for functions with sseregparm attribute. */
7665 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7667 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7668 if ((sse_level
>= 1 && mode
== SFmode
)
7669 || (sse_level
== 2 && mode
== DFmode
))
7670 regno
= FIRST_SSE_REG
;
7673 /* OImode shouldn't be used directly. */
7674 gcc_assert (mode
!= OImode
);
7676 return gen_rtx_REG (orig_mode
, regno
);
7680 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7685 /* Handle libcalls, which don't provide a type node. */
7686 if (valtype
== NULL
)
7700 regno
= FIRST_SSE_REG
;
7704 regno
= FIRST_FLOAT_REG
;
7712 return gen_rtx_REG (mode
, regno
);
7714 else if (POINTER_TYPE_P (valtype
))
7716 /* Pointers are always returned in word_mode. */
7720 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7721 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7722 x86_64_int_return_registers
, 0);
7724 /* For zero sized structures, construct_container returns NULL, but we
7725 need to keep rest of compiler happy by returning meaningful value. */
7727 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7733 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7736 unsigned int regno
= AX_REG
;
7740 switch (GET_MODE_SIZE (mode
))
7743 if (valtype
!= NULL_TREE
7744 && !VECTOR_INTEGER_TYPE_P (valtype
)
7745 && !VECTOR_INTEGER_TYPE_P (valtype
)
7746 && !INTEGRAL_TYPE_P (valtype
)
7747 && !VECTOR_FLOAT_TYPE_P (valtype
))
7749 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7750 && !COMPLEX_MODE_P (mode
))
7751 regno
= FIRST_SSE_REG
;
7755 if (mode
== SFmode
|| mode
== DFmode
)
7756 regno
= FIRST_SSE_REG
;
7762 return gen_rtx_REG (orig_mode
, regno
);
7766 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7767 enum machine_mode orig_mode
, enum machine_mode mode
)
7769 const_tree fn
, fntype
;
7772 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7773 fn
= fntype_or_decl
;
7774 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7776 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7777 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7778 else if (TARGET_64BIT
)
7779 return function_value_64 (orig_mode
, mode
, valtype
);
7781 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7785 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7786 bool outgoing ATTRIBUTE_UNUSED
)
7788 enum machine_mode mode
, orig_mode
;
7790 orig_mode
= TYPE_MODE (valtype
);
7791 mode
= type_natural_mode (valtype
, NULL
);
7792 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7795 /* Pointer function arguments and return values are promoted to
7798 static enum machine_mode
7799 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7800 int *punsignedp
, const_tree fntype
,
7803 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7805 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7808 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7812 /* Return true if a structure, union or array with MODE containing FIELD
7813 should be accessed using BLKmode. */
7816 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7818 /* Union with XFmode must be in BLKmode. */
7819 return (mode
== XFmode
7820 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7821 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7825 ix86_libcall_value (enum machine_mode mode
)
7827 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7830 /* Return true iff type is returned in memory. */
7832 static bool ATTRIBUTE_UNUSED
7833 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7837 if (mode
== BLKmode
)
7840 size
= int_size_in_bytes (type
);
7842 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7845 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7847 /* User-created vectors small enough to fit in EAX. */
7851 /* MMX/3dNow values are returned in MM0,
7852 except when it doesn't exits or the ABI prescribes otherwise. */
7854 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7856 /* SSE values are returned in XMM0, except when it doesn't exist. */
7860 /* AVX values are returned in YMM0, except when it doesn't exist. */
7871 /* OImode shouldn't be used directly. */
7872 gcc_assert (mode
!= OImode
);
7877 static bool ATTRIBUTE_UNUSED
7878 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7880 int needed_intregs
, needed_sseregs
;
7881 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7884 static bool ATTRIBUTE_UNUSED
7885 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7887 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7889 /* __m128 is returned in xmm0. */
7890 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7891 || VECTOR_FLOAT_TYPE_P (type
))
7892 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7893 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7896 /* Otherwise, the size must be exactly in [1248]. */
7897 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7901 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7903 #ifdef SUBTARGET_RETURN_IN_MEMORY
7904 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7906 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7910 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7911 return return_in_memory_ms_64 (type
, mode
);
7913 return return_in_memory_64 (type
, mode
);
7916 return return_in_memory_32 (type
, mode
);
7920 /* When returning SSE vector types, we have a choice of either
7921 (1) being abi incompatible with a -march switch, or
7922 (2) generating an error.
7923 Given no good solution, I think the safest thing is one warning.
7924 The user won't be able to use -Werror, but....
7926 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7927 called in response to actually generating a caller or callee that
7928 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7929 via aggregate_value_p for general type probing from tree-ssa. */
7932 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7934 static bool warnedsse
, warnedmmx
;
7936 if (!TARGET_64BIT
&& type
)
7938 /* Look at the return type of the function, not the function type. */
7939 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7941 if (!TARGET_SSE
&& !warnedsse
)
7944 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7947 warning (0, "SSE vector return without SSE enabled "
7952 if (!TARGET_MMX
&& !warnedmmx
)
7954 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7957 warning (0, "MMX vector return without MMX enabled "
7967 /* Create the va_list data type. */
7969 /* Returns the calling convention specific va_list date type.
7970 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7973 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7975 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7977 /* For i386 we use plain pointer to argument area. */
7978 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7979 return build_pointer_type (char_type_node
);
7981 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7982 type_decl
= build_decl (BUILTINS_LOCATION
,
7983 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7985 f_gpr
= build_decl (BUILTINS_LOCATION
,
7986 FIELD_DECL
, get_identifier ("gp_offset"),
7987 unsigned_type_node
);
7988 f_fpr
= build_decl (BUILTINS_LOCATION
,
7989 FIELD_DECL
, get_identifier ("fp_offset"),
7990 unsigned_type_node
);
7991 f_ovf
= build_decl (BUILTINS_LOCATION
,
7992 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7994 f_sav
= build_decl (BUILTINS_LOCATION
,
7995 FIELD_DECL
, get_identifier ("reg_save_area"),
7998 va_list_gpr_counter_field
= f_gpr
;
7999 va_list_fpr_counter_field
= f_fpr
;
8001 DECL_FIELD_CONTEXT (f_gpr
) = record
;
8002 DECL_FIELD_CONTEXT (f_fpr
) = record
;
8003 DECL_FIELD_CONTEXT (f_ovf
) = record
;
8004 DECL_FIELD_CONTEXT (f_sav
) = record
;
8006 TYPE_STUB_DECL (record
) = type_decl
;
8007 TYPE_NAME (record
) = type_decl
;
8008 TYPE_FIELDS (record
) = f_gpr
;
8009 DECL_CHAIN (f_gpr
) = f_fpr
;
8010 DECL_CHAIN (f_fpr
) = f_ovf
;
8011 DECL_CHAIN (f_ovf
) = f_sav
;
8013 layout_type (record
);
8015 /* The correct type is an array type of one element. */
8016 return build_array_type (record
, build_index_type (size_zero_node
));
8019 /* Setup the builtin va_list data type and for 64-bit the additional
8020 calling convention specific va_list data types. */
8023 ix86_build_builtin_va_list (void)
8025 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
8027 /* Initialize abi specific va_list builtin types. */
8031 if (ix86_abi
== MS_ABI
)
8033 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
8034 if (TREE_CODE (t
) != RECORD_TYPE
)
8035 t
= build_variant_type_copy (t
);
8036 sysv_va_list_type_node
= t
;
8041 if (TREE_CODE (t
) != RECORD_TYPE
)
8042 t
= build_variant_type_copy (t
);
8043 sysv_va_list_type_node
= t
;
8045 if (ix86_abi
!= MS_ABI
)
8047 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
8048 if (TREE_CODE (t
) != RECORD_TYPE
)
8049 t
= build_variant_type_copy (t
);
8050 ms_va_list_type_node
= t
;
8055 if (TREE_CODE (t
) != RECORD_TYPE
)
8056 t
= build_variant_type_copy (t
);
8057 ms_va_list_type_node
= t
;
8064 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8067 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
8073 /* GPR size of varargs save area. */
8074 if (cfun
->va_list_gpr_size
)
8075 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
8077 ix86_varargs_gpr_size
= 0;
8079 /* FPR size of varargs save area. We don't need it if we don't pass
8080 anything in SSE registers. */
8081 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8082 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
8084 ix86_varargs_fpr_size
= 0;
8086 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
8089 save_area
= frame_pointer_rtx
;
8090 set
= get_varargs_alias_set ();
8092 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
8093 if (max
> X86_64_REGPARM_MAX
)
8094 max
= X86_64_REGPARM_MAX
;
8096 for (i
= cum
->regno
; i
< max
; i
++)
8098 mem
= gen_rtx_MEM (word_mode
,
8099 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
8100 MEM_NOTRAP_P (mem
) = 1;
8101 set_mem_alias_set (mem
, set
);
8102 emit_move_insn (mem
,
8103 gen_rtx_REG (word_mode
,
8104 x86_64_int_parameter_registers
[i
]));
8107 if (ix86_varargs_fpr_size
)
8109 enum machine_mode smode
;
8112 /* Now emit code to save SSE registers. The AX parameter contains number
8113 of SSE parameter registers used to call this function, though all we
8114 actually check here is the zero/non-zero status. */
8116 label
= gen_label_rtx ();
8117 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
8118 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
8121 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8122 we used movdqa (i.e. TImode) instead? Perhaps even better would
8123 be if we could determine the real mode of the data, via a hook
8124 into pass_stdarg. Ignore all that for now. */
8126 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
8127 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
8129 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
8130 if (max
> X86_64_SSE_REGPARM_MAX
)
8131 max
= X86_64_SSE_REGPARM_MAX
;
8133 for (i
= cum
->sse_regno
; i
< max
; ++i
)
8135 mem
= plus_constant (Pmode
, save_area
,
8136 i
* 16 + ix86_varargs_gpr_size
);
8137 mem
= gen_rtx_MEM (smode
, mem
);
8138 MEM_NOTRAP_P (mem
) = 1;
8139 set_mem_alias_set (mem
, set
);
8140 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
8142 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
8150 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
8152 alias_set_type set
= get_varargs_alias_set ();
8155 /* Reset to zero, as there might be a sysv vaarg used
8157 ix86_varargs_gpr_size
= 0;
8158 ix86_varargs_fpr_size
= 0;
8160 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8164 mem
= gen_rtx_MEM (Pmode
,
8165 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8166 i
* UNITS_PER_WORD
));
8167 MEM_NOTRAP_P (mem
) = 1;
8168 set_mem_alias_set (mem
, set
);
8170 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8171 emit_move_insn (mem
, reg
);
8176 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8177 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8180 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8181 CUMULATIVE_ARGS next_cum
;
8184 /* This argument doesn't appear to be used anymore. Which is good,
8185 because the old code here didn't suppress rtl generation. */
8186 gcc_assert (!no_rtl
);
8191 fntype
= TREE_TYPE (current_function_decl
);
8193 /* For varargs, we do not want to skip the dummy va_dcl argument.
8194 For stdargs, we do want to skip the last named argument. */
8196 if (stdarg_p (fntype
))
8197 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8200 if (cum
->call_abi
== MS_ABI
)
8201 setup_incoming_varargs_ms_64 (&next_cum
);
8203 setup_incoming_varargs_64 (&next_cum
);
8206 /* Checks if TYPE is of kind va_list char *. */
8209 is_va_list_char_pointer (tree type
)
8213 /* For 32-bit it is always true. */
8216 canonic
= ix86_canonical_va_list_type (type
);
8217 return (canonic
== ms_va_list_type_node
8218 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8221 /* Implement va_start. */
8224 ix86_va_start (tree valist
, rtx nextarg
)
8226 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8227 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8228 tree gpr
, fpr
, ovf
, sav
, t
;
8232 if (flag_split_stack
8233 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8235 unsigned int scratch_regno
;
8237 /* When we are splitting the stack, we can't refer to the stack
8238 arguments using internal_arg_pointer, because they may be on
8239 the old stack. The split stack prologue will arrange to
8240 leave a pointer to the old stack arguments in a scratch
8241 register, which we here copy to a pseudo-register. The split
8242 stack prologue can't set the pseudo-register directly because
8243 it (the prologue) runs before any registers have been saved. */
8245 scratch_regno
= split_stack_prologue_scratch_regno ();
8246 if (scratch_regno
!= INVALID_REGNUM
)
8250 reg
= gen_reg_rtx (Pmode
);
8251 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8254 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8258 push_topmost_sequence ();
8259 emit_insn_after (seq
, entry_of_function ());
8260 pop_topmost_sequence ();
8264 /* Only 64bit target needs something special. */
8265 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8267 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8268 std_expand_builtin_va_start (valist
, nextarg
);
8273 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8274 next
= expand_binop (ptr_mode
, add_optab
,
8275 cfun
->machine
->split_stack_varargs_pointer
,
8276 crtl
->args
.arg_offset_rtx
,
8277 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8278 convert_move (va_r
, next
, 0);
8283 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8284 f_fpr
= DECL_CHAIN (f_gpr
);
8285 f_ovf
= DECL_CHAIN (f_fpr
);
8286 f_sav
= DECL_CHAIN (f_ovf
);
8288 valist
= build_simple_mem_ref (valist
);
8289 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8290 /* The following should be folded into the MEM_REF offset. */
8291 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8293 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8295 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8297 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8300 /* Count number of gp and fp argument registers used. */
8301 words
= crtl
->args
.info
.words
;
8302 n_gpr
= crtl
->args
.info
.regno
;
8303 n_fpr
= crtl
->args
.info
.sse_regno
;
8305 if (cfun
->va_list_gpr_size
)
8307 type
= TREE_TYPE (gpr
);
8308 t
= build2 (MODIFY_EXPR
, type
,
8309 gpr
, build_int_cst (type
, n_gpr
* 8));
8310 TREE_SIDE_EFFECTS (t
) = 1;
8311 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8314 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8316 type
= TREE_TYPE (fpr
);
8317 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8318 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8319 TREE_SIDE_EFFECTS (t
) = 1;
8320 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8323 /* Find the overflow area. */
8324 type
= TREE_TYPE (ovf
);
8325 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8326 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8328 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8329 t
= make_tree (type
, ovf_rtx
);
8331 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8332 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8333 TREE_SIDE_EFFECTS (t
) = 1;
8334 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8336 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8338 /* Find the register save area.
8339 Prologue of the function save it right above stack frame. */
8340 type
= TREE_TYPE (sav
);
8341 t
= make_tree (type
, frame_pointer_rtx
);
8342 if (!ix86_varargs_gpr_size
)
8343 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8344 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8345 TREE_SIDE_EFFECTS (t
) = 1;
8346 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8350 /* Implement va_arg. */
8353 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8356 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8357 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8358 tree gpr
, fpr
, ovf
, sav
, t
;
8360 tree lab_false
, lab_over
= NULL_TREE
;
8365 enum machine_mode nat_mode
;
8366 unsigned int arg_boundary
;
8368 /* Only 64bit target needs something special. */
8369 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8370 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8372 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8373 f_fpr
= DECL_CHAIN (f_gpr
);
8374 f_ovf
= DECL_CHAIN (f_fpr
);
8375 f_sav
= DECL_CHAIN (f_ovf
);
8377 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8378 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8379 valist
= build_va_arg_indirect_ref (valist
);
8380 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8381 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8382 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8384 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8386 type
= build_pointer_type (type
);
8387 size
= int_size_in_bytes (type
);
8388 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8390 nat_mode
= type_natural_mode (type
, NULL
);
8399 /* Unnamed 256bit vector mode parameters are passed on stack. */
8400 if (!TARGET_64BIT_MS_ABI
)
8407 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8408 type
, 0, X86_64_REGPARM_MAX
,
8409 X86_64_SSE_REGPARM_MAX
, intreg
,
8414 /* Pull the value out of the saved registers. */
8416 addr
= create_tmp_var (ptr_type_node
, "addr");
8420 int needed_intregs
, needed_sseregs
;
8422 tree int_addr
, sse_addr
;
8424 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8425 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8427 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8429 need_temp
= (!REG_P (container
)
8430 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8431 || TYPE_ALIGN (type
) > 128));
8433 /* In case we are passing structure, verify that it is consecutive block
8434 on the register save area. If not we need to do moves. */
8435 if (!need_temp
&& !REG_P (container
))
8437 /* Verify that all registers are strictly consecutive */
8438 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8442 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8444 rtx slot
= XVECEXP (container
, 0, i
);
8445 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8446 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8454 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8456 rtx slot
= XVECEXP (container
, 0, i
);
8457 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8458 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8470 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8471 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8474 /* First ensure that we fit completely in registers. */
8477 t
= build_int_cst (TREE_TYPE (gpr
),
8478 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8479 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8480 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8481 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8482 gimplify_and_add (t
, pre_p
);
8486 t
= build_int_cst (TREE_TYPE (fpr
),
8487 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8488 + X86_64_REGPARM_MAX
* 8);
8489 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8490 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8491 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8492 gimplify_and_add (t
, pre_p
);
8495 /* Compute index to start of area used for integer regs. */
8498 /* int_addr = gpr + sav; */
8499 t
= fold_build_pointer_plus (sav
, gpr
);
8500 gimplify_assign (int_addr
, t
, pre_p
);
8504 /* sse_addr = fpr + sav; */
8505 t
= fold_build_pointer_plus (sav
, fpr
);
8506 gimplify_assign (sse_addr
, t
, pre_p
);
8510 int i
, prev_size
= 0;
8511 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8514 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8515 gimplify_assign (addr
, t
, pre_p
);
8517 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8519 rtx slot
= XVECEXP (container
, 0, i
);
8520 rtx reg
= XEXP (slot
, 0);
8521 enum machine_mode mode
= GET_MODE (reg
);
8527 tree dest_addr
, dest
;
8528 int cur_size
= GET_MODE_SIZE (mode
);
8530 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8531 prev_size
= INTVAL (XEXP (slot
, 1));
8532 if (prev_size
+ cur_size
> size
)
8534 cur_size
= size
- prev_size
;
8535 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8536 if (mode
== BLKmode
)
8539 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8540 if (mode
== GET_MODE (reg
))
8541 addr_type
= build_pointer_type (piece_type
);
8543 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8545 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8548 if (SSE_REGNO_P (REGNO (reg
)))
8550 src_addr
= sse_addr
;
8551 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8555 src_addr
= int_addr
;
8556 src_offset
= REGNO (reg
) * 8;
8558 src_addr
= fold_convert (addr_type
, src_addr
);
8559 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8561 dest_addr
= fold_convert (daddr_type
, addr
);
8562 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8563 if (cur_size
== GET_MODE_SIZE (mode
))
8565 src
= build_va_arg_indirect_ref (src_addr
);
8566 dest
= build_va_arg_indirect_ref (dest_addr
);
8568 gimplify_assign (dest
, src
, pre_p
);
8573 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8574 3, dest_addr
, src_addr
,
8575 size_int (cur_size
));
8576 gimplify_and_add (copy
, pre_p
);
8578 prev_size
+= cur_size
;
8584 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8585 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8586 gimplify_assign (gpr
, t
, pre_p
);
8591 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8592 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8593 gimplify_assign (fpr
, t
, pre_p
);
8596 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8598 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8601 /* ... otherwise out of the overflow area. */
8603 /* When we align parameter on stack for caller, if the parameter
8604 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8605 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8606 here with caller. */
8607 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8608 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8609 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8611 /* Care for on-stack alignment if needed. */
8612 if (arg_boundary
<= 64 || size
== 0)
8616 HOST_WIDE_INT align
= arg_boundary
/ 8;
8617 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8618 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8619 build_int_cst (TREE_TYPE (t
), -align
));
8622 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8623 gimplify_assign (addr
, t
, pre_p
);
8625 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8626 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8629 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8631 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8632 addr
= fold_convert (ptrtype
, addr
);
8635 addr
= build_va_arg_indirect_ref (addr
);
8636 return build_va_arg_indirect_ref (addr
);
8639 /* Return true if OPNUM's MEM should be matched
8640 in movabs* patterns. */
8643 ix86_check_movabs (rtx insn
, int opnum
)
8647 set
= PATTERN (insn
);
8648 if (GET_CODE (set
) == PARALLEL
)
8649 set
= XVECEXP (set
, 0, 0);
8650 gcc_assert (GET_CODE (set
) == SET
);
8651 mem
= XEXP (set
, opnum
);
8652 while (GET_CODE (mem
) == SUBREG
)
8653 mem
= SUBREG_REG (mem
);
8654 gcc_assert (MEM_P (mem
));
8655 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8658 /* Initialize the table of extra 80387 mathematical constants. */
8661 init_ext_80387_constants (void)
8663 static const char * cst
[5] =
8665 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8666 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8667 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8668 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8669 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8673 for (i
= 0; i
< 5; i
++)
8675 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8676 /* Ensure each constant is rounded to XFmode precision. */
8677 real_convert (&ext_80387_constants_table
[i
],
8678 XFmode
, &ext_80387_constants_table
[i
]);
8681 ext_80387_constants_init
= 1;
8684 /* Return non-zero if the constant is something that
8685 can be loaded with a special instruction. */
8688 standard_80387_constant_p (rtx x
)
8690 enum machine_mode mode
= GET_MODE (x
);
8694 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8697 if (x
== CONST0_RTX (mode
))
8699 if (x
== CONST1_RTX (mode
))
8702 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8704 /* For XFmode constants, try to find a special 80387 instruction when
8705 optimizing for size or on those CPUs that benefit from them. */
8707 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8711 if (! ext_80387_constants_init
)
8712 init_ext_80387_constants ();
8714 for (i
= 0; i
< 5; i
++)
8715 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8719 /* Load of the constant -0.0 or -1.0 will be split as
8720 fldz;fchs or fld1;fchs sequence. */
8721 if (real_isnegzero (&r
))
8723 if (real_identical (&r
, &dconstm1
))
8729 /* Return the opcode of the special instruction to be used to load
8733 standard_80387_constant_opcode (rtx x
)
8735 switch (standard_80387_constant_p (x
))
8759 /* Return the CONST_DOUBLE representing the 80387 constant that is
8760 loaded by the specified special instruction. The argument IDX
8761 matches the return value from standard_80387_constant_p. */
8764 standard_80387_constant_rtx (int idx
)
8768 if (! ext_80387_constants_init
)
8769 init_ext_80387_constants ();
8785 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8789 /* Return 1 if X is all 0s and 2 if x is all 1s
8790 in supported SSE/AVX vector mode. */
8793 standard_sse_constant_p (rtx x
)
8795 enum machine_mode mode
= GET_MODE (x
);
8797 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8799 if (vector_all_ones_operand (x
, mode
))
8821 /* Return the opcode of the special instruction to be used to load
8825 standard_sse_constant_opcode (rtx insn
, rtx x
)
8827 switch (standard_sse_constant_p (x
))
8830 switch (get_attr_mode (insn
))
8833 return "%vpxor\t%0, %d0";
8835 return "%vxorpd\t%0, %d0";
8837 return "%vxorps\t%0, %d0";
8840 return "vpxor\t%x0, %x0, %x0";
8842 return "vxorpd\t%x0, %x0, %x0";
8844 return "vxorps\t%x0, %x0, %x0";
8851 if (get_attr_mode (insn
) == MODE_XI
8852 || get_attr_mode (insn
) == MODE_V8DF
8853 || get_attr_mode (insn
) == MODE_V16SF
)
8854 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8856 return "vpcmpeqd\t%0, %0, %0";
8858 return "pcmpeqd\t%0, %0";
8866 /* Returns true if OP contains a symbol reference */
8869 symbolic_reference_mentioned_p (rtx op
)
8874 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8877 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8878 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8884 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8885 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8889 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8896 /* Return true if it is appropriate to emit `ret' instructions in the
8897 body of a function. Do this only if the epilogue is simple, needing a
8898 couple of insns. Prior to reloading, we can't tell how many registers
8899 must be saved, so return false then. Return false if there is no frame
8900 marker to de-allocate. */
8903 ix86_can_use_return_insn_p (void)
8905 struct ix86_frame frame
;
8907 if (! reload_completed
|| frame_pointer_needed
)
8910 /* Don't allow more than 32k pop, since that's all we can do
8911 with one instruction. */
8912 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8915 ix86_compute_frame_layout (&frame
);
8916 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8917 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8920 /* Value should be nonzero if functions must have frame pointers.
8921 Zero means the frame pointer need not be set up (and parms may
8922 be accessed via the stack pointer) in functions that seem suitable. */
8925 ix86_frame_pointer_required (void)
8927 /* If we accessed previous frames, then the generated code expects
8928 to be able to access the saved ebp value in our frame. */
8929 if (cfun
->machine
->accesses_prev_frame
)
8932 /* Several x86 os'es need a frame pointer for other reasons,
8933 usually pertaining to setjmp. */
8934 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8937 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8938 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8941 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8942 allocation is 4GB. */
8943 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8946 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8947 turns off the frame pointer by default. Turn it back on now if
8948 we've not got a leaf function. */
8949 if (TARGET_OMIT_LEAF_FRAME_POINTER
8951 || ix86_current_function_calls_tls_descriptor
))
8954 if (crtl
->profile
&& !flag_fentry
)
8960 /* Record that the current function accesses previous call frames. */
8963 ix86_setup_frame_addresses (void)
8965 cfun
->machine
->accesses_prev_frame
= 1;
8968 #ifndef USE_HIDDEN_LINKONCE
8969 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8970 # define USE_HIDDEN_LINKONCE 1
8972 # define USE_HIDDEN_LINKONCE 0
8976 static int pic_labels_used
;
8978 /* Fills in the label name that should be used for a pc thunk for
8979 the given register. */
8982 get_pc_thunk_name (char name
[32], unsigned int regno
)
8984 gcc_assert (!TARGET_64BIT
);
8986 if (USE_HIDDEN_LINKONCE
)
8987 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8989 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8993 /* This function generates code for -fpic that loads %ebx with
8994 the return address of the caller and then returns. */
8997 ix86_code_end (void)
9002 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
9007 if (!(pic_labels_used
& (1 << regno
)))
9010 get_pc_thunk_name (name
, regno
);
9012 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
9013 get_identifier (name
),
9014 build_function_type_list (void_type_node
, NULL_TREE
));
9015 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
9016 NULL_TREE
, void_type_node
);
9017 TREE_PUBLIC (decl
) = 1;
9018 TREE_STATIC (decl
) = 1;
9019 DECL_IGNORED_P (decl
) = 1;
9024 switch_to_section (darwin_sections
[text_coal_section
]);
9025 fputs ("\t.weak_definition\t", asm_out_file
);
9026 assemble_name (asm_out_file
, name
);
9027 fputs ("\n\t.private_extern\t", asm_out_file
);
9028 assemble_name (asm_out_file
, name
);
9029 putc ('\n', asm_out_file
);
9030 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9031 DECL_WEAK (decl
) = 1;
9035 if (USE_HIDDEN_LINKONCE
)
9037 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
9039 targetm
.asm_out
.unique_section (decl
, 0);
9040 switch_to_section (get_named_section (decl
, NULL
, 0));
9042 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
9043 fputs ("\t.hidden\t", asm_out_file
);
9044 assemble_name (asm_out_file
, name
);
9045 putc ('\n', asm_out_file
);
9046 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
9050 switch_to_section (text_section
);
9051 ASM_OUTPUT_LABEL (asm_out_file
, name
);
9054 DECL_INITIAL (decl
) = make_node (BLOCK
);
9055 current_function_decl
= decl
;
9056 init_function_start (decl
);
9057 first_function_block_is_cold
= false;
9058 /* Make sure unwind info is emitted for the thunk if needed. */
9059 final_start_function (emit_barrier (), asm_out_file
, 1);
9061 /* Pad stack IP move with 4 instructions (two NOPs count
9062 as one instruction). */
9063 if (TARGET_PAD_SHORT_FUNCTION
)
9068 fputs ("\tnop\n", asm_out_file
);
9071 xops
[0] = gen_rtx_REG (Pmode
, regno
);
9072 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
9073 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
9074 fputs ("\tret\n", asm_out_file
);
9075 final_end_function ();
9076 init_insn_lengths ();
9077 free_after_compilation (cfun
);
9079 current_function_decl
= NULL
;
9082 if (flag_split_stack
)
9083 file_end_indicate_split_stack ();
9086 /* Emit code for the SET_GOT patterns. */
9089 output_set_got (rtx dest
, rtx label
)
9095 if (TARGET_VXWORKS_RTP
&& flag_pic
)
9097 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9098 xops
[2] = gen_rtx_MEM (Pmode
,
9099 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
9100 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
9102 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9103 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9104 an unadorned address. */
9105 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
9106 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
9107 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
9111 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
9116 /* We don't need a pic base, we're not producing pic. */
9119 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
9120 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
9121 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9122 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
9127 get_pc_thunk_name (name
, REGNO (dest
));
9128 pic_labels_used
|= 1 << REGNO (dest
);
9130 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
9131 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
9132 output_asm_insn ("call\t%X2", xops
);
9135 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9136 This is what will be referenced by the Mach-O PIC subsystem. */
9137 if (machopic_should_output_picbase_label () || !label
)
9138 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
9140 /* When we are restoring the pic base at the site of a nonlocal label,
9141 and we decided to emit the pic base above, we will still output a
9142 local label used for calculating the correction offset (even though
9143 the offset will be 0 in that case). */
9145 targetm
.asm_out
.internal_label (asm_out_file
, "L",
9146 CODE_LABEL_NUMBER (label
));
9151 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
9156 /* Generate an "push" pattern for input ARG. */
9161 struct machine_function
*m
= cfun
->machine
;
9163 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9164 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9165 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9167 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9168 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9170 return gen_rtx_SET (VOIDmode
,
9171 gen_rtx_MEM (word_mode
,
9172 gen_rtx_PRE_DEC (Pmode
,
9173 stack_pointer_rtx
)),
9177 /* Generate an "pop" pattern for input ARG. */
9182 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9183 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9185 return gen_rtx_SET (VOIDmode
,
9187 gen_rtx_MEM (word_mode
,
9188 gen_rtx_POST_INC (Pmode
,
9189 stack_pointer_rtx
)));
9192 /* Return >= 0 if there is an unused call-clobbered register available
9193 for the entire function. */
9196 ix86_select_alt_pic_regnum (void)
9200 && !ix86_current_function_calls_tls_descriptor
)
9203 /* Can't use the same register for both PIC and DRAP. */
9205 drap
= REGNO (crtl
->drap_reg
);
9208 for (i
= 2; i
>= 0; --i
)
9209 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9213 return INVALID_REGNUM
;
9216 /* Return TRUE if we need to save REGNO. */
9219 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9221 if (pic_offset_table_rtx
9222 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9223 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9225 || crtl
->calls_eh_return
9226 || crtl
->uses_const_pool
9227 || cfun
->has_nonlocal_label
))
9228 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9230 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9235 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9236 if (test
== INVALID_REGNUM
)
9243 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9246 return (df_regs_ever_live_p (regno
)
9247 && !call_used_regs
[regno
]
9248 && !fixed_regs
[regno
]
9249 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9252 /* Return number of saved general prupose registers. */
9255 ix86_nsaved_regs (void)
9260 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9261 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9266 /* Return number of saved SSE registrers. */
9269 ix86_nsaved_sseregs (void)
9274 if (!TARGET_64BIT_MS_ABI
)
9276 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9277 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9282 /* Given FROM and TO register numbers, say whether this elimination is
9283 allowed. If stack alignment is needed, we can only replace argument
9284 pointer with hard frame pointer, or replace frame pointer with stack
9285 pointer. Otherwise, frame pointer elimination is automatically
9286 handled and all other eliminations are valid. */
9289 ix86_can_eliminate (const int from
, const int to
)
9291 if (stack_realign_fp
)
9292 return ((from
== ARG_POINTER_REGNUM
9293 && to
== HARD_FRAME_POINTER_REGNUM
)
9294 || (from
== FRAME_POINTER_REGNUM
9295 && to
== STACK_POINTER_REGNUM
));
9297 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9300 /* Return the offset between two registers, one to be eliminated, and the other
9301 its replacement, at the start of a routine. */
9304 ix86_initial_elimination_offset (int from
, int to
)
9306 struct ix86_frame frame
;
9307 ix86_compute_frame_layout (&frame
);
9309 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9310 return frame
.hard_frame_pointer_offset
;
9311 else if (from
== FRAME_POINTER_REGNUM
9312 && to
== HARD_FRAME_POINTER_REGNUM
)
9313 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9316 gcc_assert (to
== STACK_POINTER_REGNUM
);
9318 if (from
== ARG_POINTER_REGNUM
)
9319 return frame
.stack_pointer_offset
;
9321 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9322 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9326 /* In a dynamically-aligned function, we can't know the offset from
9327 stack pointer to frame pointer, so we must ensure that setjmp
9328 eliminates fp against the hard fp (%ebp) rather than trying to
9329 index from %esp up to the top of the frame across a gap that is
9330 of unknown (at compile-time) size. */
9332 ix86_builtin_setjmp_frame_value (void)
9334 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9337 /* When using -fsplit-stack, the allocation routines set a field in
9338 the TCB to the bottom of the stack plus this much space, measured
9341 #define SPLIT_STACK_AVAILABLE 256
9343 /* Fill structure ix86_frame about frame of currently computed function. */
9346 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9348 unsigned HOST_WIDE_INT stack_alignment_needed
;
9349 HOST_WIDE_INT offset
;
9350 unsigned HOST_WIDE_INT preferred_alignment
;
9351 HOST_WIDE_INT size
= get_frame_size ();
9352 HOST_WIDE_INT to_allocate
;
9354 frame
->nregs
= ix86_nsaved_regs ();
9355 frame
->nsseregs
= ix86_nsaved_sseregs ();
9357 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9358 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9360 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9361 function prologues and leaf. */
9362 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9363 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9364 || ix86_current_function_calls_tls_descriptor
))
9366 preferred_alignment
= 16;
9367 stack_alignment_needed
= 16;
9368 crtl
->preferred_stack_boundary
= 128;
9369 crtl
->stack_alignment_needed
= 128;
9372 gcc_assert (!size
|| stack_alignment_needed
);
9373 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9374 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9376 /* For SEH we have to limit the amount of code movement into the prologue.
9377 At present we do this via a BLOCKAGE, at which point there's very little
9378 scheduling that can be done, which means that there's very little point
9379 in doing anything except PUSHs. */
9381 cfun
->machine
->use_fast_prologue_epilogue
= false;
9383 /* During reload iteration the amount of registers saved can change.
9384 Recompute the value as needed. Do not recompute when amount of registers
9385 didn't change as reload does multiple calls to the function and does not
9386 expect the decision to change within single iteration. */
9387 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
))
9388 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9390 int count
= frame
->nregs
;
9391 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9393 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9395 /* The fast prologue uses move instead of push to save registers. This
9396 is significantly longer, but also executes faster as modern hardware
9397 can execute the moves in parallel, but can't do that for push/pop.
9399 Be careful about choosing what prologue to emit: When function takes
9400 many instructions to execute we may use slow version as well as in
9401 case function is known to be outside hot spot (this is known with
9402 feedback only). Weight the size of function by number of registers
9403 to save as it is cheap to use one or two push instructions but very
9404 slow to use many of them. */
9406 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9407 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9408 || (flag_branch_probabilities
9409 && node
->frequency
< NODE_FREQUENCY_HOT
))
9410 cfun
->machine
->use_fast_prologue_epilogue
= false;
9412 cfun
->machine
->use_fast_prologue_epilogue
9413 = !expensive_function_p (count
);
9416 frame
->save_regs_using_mov
9417 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9418 /* If static stack checking is enabled and done with probes,
9419 the registers need to be saved before allocating the frame. */
9420 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9422 /* Skip return address. */
9423 offset
= UNITS_PER_WORD
;
9425 /* Skip pushed static chain. */
9426 if (ix86_static_chain_on_stack
)
9427 offset
+= UNITS_PER_WORD
;
9429 /* Skip saved base pointer. */
9430 if (frame_pointer_needed
)
9431 offset
+= UNITS_PER_WORD
;
9432 frame
->hfp_save_offset
= offset
;
9434 /* The traditional frame pointer location is at the top of the frame. */
9435 frame
->hard_frame_pointer_offset
= offset
;
9437 /* Register save area */
9438 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9439 frame
->reg_save_offset
= offset
;
9441 /* On SEH target, registers are pushed just before the frame pointer
9444 frame
->hard_frame_pointer_offset
= offset
;
9446 /* Align and set SSE register save area. */
9447 if (frame
->nsseregs
)
9449 /* The only ABI that has saved SSE registers (Win64) also has a
9450 16-byte aligned default stack, and thus we don't need to be
9451 within the re-aligned local stack frame to save them. */
9452 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9453 offset
= (offset
+ 16 - 1) & -16;
9454 offset
+= frame
->nsseregs
* 16;
9456 frame
->sse_reg_save_offset
= offset
;
9458 /* The re-aligned stack starts here. Values before this point are not
9459 directly comparable with values below this point. In order to make
9460 sure that no value happens to be the same before and after, force
9461 the alignment computation below to add a non-zero value. */
9462 if (stack_realign_fp
)
9463 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9466 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9467 offset
+= frame
->va_arg_size
;
9469 /* Align start of frame for local function. */
9470 if (stack_realign_fp
9471 || offset
!= frame
->sse_reg_save_offset
9474 || cfun
->calls_alloca
9475 || ix86_current_function_calls_tls_descriptor
)
9476 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9478 /* Frame pointer points here. */
9479 frame
->frame_pointer_offset
= offset
;
9483 /* Add outgoing arguments area. Can be skipped if we eliminated
9484 all the function calls as dead code.
9485 Skipping is however impossible when function calls alloca. Alloca
9486 expander assumes that last crtl->outgoing_args_size
9487 of stack frame are unused. */
9488 if (ACCUMULATE_OUTGOING_ARGS
9489 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9490 || ix86_current_function_calls_tls_descriptor
))
9492 offset
+= crtl
->outgoing_args_size
;
9493 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9496 frame
->outgoing_arguments_size
= 0;
9498 /* Align stack boundary. Only needed if we're calling another function
9500 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9501 || ix86_current_function_calls_tls_descriptor
)
9502 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9504 /* We've reached end of stack frame. */
9505 frame
->stack_pointer_offset
= offset
;
9507 /* Size prologue needs to allocate. */
9508 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9510 if ((!to_allocate
&& frame
->nregs
<= 1)
9511 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9512 frame
->save_regs_using_mov
= false;
9514 if (ix86_using_red_zone ()
9515 && crtl
->sp_is_unchanging
9517 && !ix86_current_function_calls_tls_descriptor
)
9519 frame
->red_zone_size
= to_allocate
;
9520 if (frame
->save_regs_using_mov
)
9521 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9522 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9523 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9526 frame
->red_zone_size
= 0;
9527 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9529 /* The SEH frame pointer location is near the bottom of the frame.
9530 This is enforced by the fact that the difference between the
9531 stack pointer and the frame pointer is limited to 240 bytes in
9532 the unwind data structure. */
9537 /* If we can leave the frame pointer where it is, do so. Also, returns
9538 the establisher frame for __builtin_frame_address (0). */
9539 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9540 if (diff
<= SEH_MAX_FRAME_SIZE
9541 && (diff
> 240 || (diff
& 15) != 0)
9542 && !crtl
->accesses_prior_frames
)
9544 /* Ideally we'd determine what portion of the local stack frame
9545 (within the constraint of the lowest 240) is most heavily used.
9546 But without that complication, simply bias the frame pointer
9547 by 128 bytes so as to maximize the amount of the local stack
9548 frame that is addressable with 8-bit offsets. */
9549 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9554 /* This is semi-inlined memory_address_length, but simplified
9555 since we know that we're always dealing with reg+offset, and
9556 to avoid having to create and discard all that rtl. */
9559 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9565 /* EBP and R13 cannot be encoded without an offset. */
9566 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9568 else if (IN_RANGE (offset
, -128, 127))
9571 /* ESP and R12 must be encoded with a SIB byte. */
9572 if (regno
== SP_REG
|| regno
== R12_REG
)
9578 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9579 The valid base registers are taken from CFUN->MACHINE->FS. */
9582 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9584 const struct machine_function
*m
= cfun
->machine
;
9585 rtx base_reg
= NULL
;
9586 HOST_WIDE_INT base_offset
= 0;
9588 if (m
->use_fast_prologue_epilogue
)
9590 /* Choose the base register most likely to allow the most scheduling
9591 opportunities. Generally FP is valid throughout the function,
9592 while DRAP must be reloaded within the epilogue. But choose either
9593 over the SP due to increased encoding size. */
9597 base_reg
= hard_frame_pointer_rtx
;
9598 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9600 else if (m
->fs
.drap_valid
)
9602 base_reg
= crtl
->drap_reg
;
9603 base_offset
= 0 - cfa_offset
;
9605 else if (m
->fs
.sp_valid
)
9607 base_reg
= stack_pointer_rtx
;
9608 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9613 HOST_WIDE_INT toffset
;
9616 /* Choose the base register with the smallest address encoding.
9617 With a tie, choose FP > DRAP > SP. */
9620 base_reg
= stack_pointer_rtx
;
9621 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9622 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9624 if (m
->fs
.drap_valid
)
9626 toffset
= 0 - cfa_offset
;
9627 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9630 base_reg
= crtl
->drap_reg
;
9631 base_offset
= toffset
;
9637 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9638 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9641 base_reg
= hard_frame_pointer_rtx
;
9642 base_offset
= toffset
;
9647 gcc_assert (base_reg
!= NULL
);
9649 return plus_constant (Pmode
, base_reg
, base_offset
);
9652 /* Emit code to save registers in the prologue. */
9655 ix86_emit_save_regs (void)
9660 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9661 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9663 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9664 RTX_FRAME_RELATED_P (insn
) = 1;
9668 /* Emit a single register save at CFA - CFA_OFFSET. */
9671 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9672 HOST_WIDE_INT cfa_offset
)
9674 struct machine_function
*m
= cfun
->machine
;
9675 rtx reg
= gen_rtx_REG (mode
, regno
);
9676 rtx mem
, addr
, base
, insn
;
9678 addr
= choose_baseaddr (cfa_offset
);
9679 mem
= gen_frame_mem (mode
, addr
);
9681 /* For SSE saves, we need to indicate the 128-bit alignment. */
9682 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9684 insn
= emit_move_insn (mem
, reg
);
9685 RTX_FRAME_RELATED_P (insn
) = 1;
9688 if (GET_CODE (base
) == PLUS
)
9689 base
= XEXP (base
, 0);
9690 gcc_checking_assert (REG_P (base
));
9692 /* When saving registers into a re-aligned local stack frame, avoid
9693 any tricky guessing by dwarf2out. */
9694 if (m
->fs
.realigned
)
9696 gcc_checking_assert (stack_realign_drap
);
9698 if (regno
== REGNO (crtl
->drap_reg
))
9700 /* A bit of a hack. We force the DRAP register to be saved in
9701 the re-aligned stack frame, which provides us with a copy
9702 of the CFA that will last past the prologue. Install it. */
9703 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9704 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9705 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9706 mem
= gen_rtx_MEM (mode
, addr
);
9707 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9711 /* The frame pointer is a stable reference within the
9712 aligned frame. Use it. */
9713 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9714 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9715 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9716 mem
= gen_rtx_MEM (mode
, addr
);
9717 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9718 gen_rtx_SET (VOIDmode
, mem
, reg
));
9722 /* The memory may not be relative to the current CFA register,
9723 which means that we may need to generate a new pattern for
9724 use by the unwind info. */
9725 else if (base
!= m
->fs
.cfa_reg
)
9727 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9728 m
->fs
.cfa_offset
- cfa_offset
);
9729 mem
= gen_rtx_MEM (mode
, addr
);
9730 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9734 /* Emit code to save registers using MOV insns.
9735 First register is stored at CFA - CFA_OFFSET. */
9737 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9741 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9742 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9744 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9745 cfa_offset
-= UNITS_PER_WORD
;
9749 /* Emit code to save SSE registers using MOV insns.
9750 First register is stored at CFA - CFA_OFFSET. */
9752 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9756 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9757 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9759 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9764 static GTY(()) rtx queued_cfa_restores
;
9766 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9767 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9768 Don't add the note if the previously saved value will be left untouched
9769 within stack red-zone till return, as unwinders can find the same value
9770 in the register and on the stack. */
9773 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9775 if (!crtl
->shrink_wrapped
9776 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9781 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9782 RTX_FRAME_RELATED_P (insn
) = 1;
9786 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9789 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9792 ix86_add_queued_cfa_restore_notes (rtx insn
)
9795 if (!queued_cfa_restores
)
9797 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9799 XEXP (last
, 1) = REG_NOTES (insn
);
9800 REG_NOTES (insn
) = queued_cfa_restores
;
9801 queued_cfa_restores
= NULL_RTX
;
9802 RTX_FRAME_RELATED_P (insn
) = 1;
9805 /* Expand prologue or epilogue stack adjustment.
9806 The pattern exist to put a dependency on all ebp-based memory accesses.
9807 STYLE should be negative if instructions should be marked as frame related,
9808 zero if %r11 register is live and cannot be freely used and positive
9812 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9813 int style
, bool set_cfa
)
9815 struct machine_function
*m
= cfun
->machine
;
9817 bool add_frame_related_expr
= false;
9819 if (Pmode
== SImode
)
9820 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9821 else if (x86_64_immediate_operand (offset
, DImode
))
9822 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9826 /* r11 is used by indirect sibcall return as well, set before the
9827 epilogue and used after the epilogue. */
9829 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9832 gcc_assert (src
!= hard_frame_pointer_rtx
9833 && dest
!= hard_frame_pointer_rtx
);
9834 tmp
= hard_frame_pointer_rtx
;
9836 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9838 add_frame_related_expr
= true;
9840 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9843 insn
= emit_insn (insn
);
9845 ix86_add_queued_cfa_restore_notes (insn
);
9851 gcc_assert (m
->fs
.cfa_reg
== src
);
9852 m
->fs
.cfa_offset
+= INTVAL (offset
);
9853 m
->fs
.cfa_reg
= dest
;
9855 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9856 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9857 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9858 RTX_FRAME_RELATED_P (insn
) = 1;
9862 RTX_FRAME_RELATED_P (insn
) = 1;
9863 if (add_frame_related_expr
)
9865 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9866 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9867 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9871 if (dest
== stack_pointer_rtx
)
9873 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9874 bool valid
= m
->fs
.sp_valid
;
9876 if (src
== hard_frame_pointer_rtx
)
9878 valid
= m
->fs
.fp_valid
;
9879 ooffset
= m
->fs
.fp_offset
;
9881 else if (src
== crtl
->drap_reg
)
9883 valid
= m
->fs
.drap_valid
;
9888 /* Else there are two possibilities: SP itself, which we set
9889 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9890 taken care of this by hand along the eh_return path. */
9891 gcc_checking_assert (src
== stack_pointer_rtx
9892 || offset
== const0_rtx
);
9895 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9896 m
->fs
.sp_valid
= valid
;
9900 /* Find an available register to be used as dynamic realign argument
9901 pointer regsiter. Such a register will be written in prologue and
9902 used in begin of body, so it must not be
9903 1. parameter passing register.
9905 We reuse static-chain register if it is available. Otherwise, we
9906 use DI for i386 and R13 for x86-64. We chose R13 since it has
9909 Return: the regno of chosen register. */
9912 find_drap_reg (void)
9914 tree decl
= cfun
->decl
;
9918 /* Use R13 for nested function or function need static chain.
9919 Since function with tail call may use any caller-saved
9920 registers in epilogue, DRAP must not use caller-saved
9921 register in such case. */
9922 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9929 /* Use DI for nested function or function need static chain.
9930 Since function with tail call may use any caller-saved
9931 registers in epilogue, DRAP must not use caller-saved
9932 register in such case. */
9933 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9936 /* Reuse static chain register if it isn't used for parameter
9938 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9940 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9941 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9948 /* Return minimum incoming stack alignment. */
9951 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9953 unsigned int incoming_stack_boundary
;
9955 /* Prefer the one specified at command line. */
9956 if (ix86_user_incoming_stack_boundary
)
9957 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9958 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9959 if -mstackrealign is used, it isn't used for sibcall check and
9960 estimated stack alignment is 128bit. */
9963 && ix86_force_align_arg_pointer
9964 && crtl
->stack_alignment_estimated
== 128)
9965 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9967 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9969 /* Incoming stack alignment can be changed on individual functions
9970 via force_align_arg_pointer attribute. We use the smallest
9971 incoming stack boundary. */
9972 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9973 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9974 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9975 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9977 /* The incoming stack frame has to be aligned at least at
9978 parm_stack_boundary. */
9979 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9980 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9982 /* Stack at entrance of main is aligned by runtime. We use the
9983 smallest incoming stack boundary. */
9984 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9985 && DECL_NAME (current_function_decl
)
9986 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9987 && DECL_FILE_SCOPE_P (current_function_decl
))
9988 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9990 return incoming_stack_boundary
;
9993 /* Update incoming stack boundary and estimated stack alignment. */
9996 ix86_update_stack_boundary (void)
9998 ix86_incoming_stack_boundary
9999 = ix86_minimum_incoming_stack_boundary (false);
10001 /* x86_64 vararg needs 16byte stack alignment for register save
10005 && crtl
->stack_alignment_estimated
< 128)
10006 crtl
->stack_alignment_estimated
= 128;
10009 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10010 needed or an rtx for DRAP otherwise. */
10013 ix86_get_drap_rtx (void)
10015 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
10016 crtl
->need_drap
= true;
10018 if (stack_realign_drap
)
10020 /* Assign DRAP to vDRAP and returns vDRAP */
10021 unsigned int regno
= find_drap_reg ();
10026 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
10027 crtl
->drap_reg
= arg_ptr
;
10030 drap_vreg
= copy_to_reg (arg_ptr
);
10031 seq
= get_insns ();
10034 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
10037 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
10038 RTX_FRAME_RELATED_P (insn
) = 1;
10046 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10049 ix86_internal_arg_pointer (void)
10051 return virtual_incoming_args_rtx
;
10054 struct scratch_reg
{
10059 /* Return a short-lived scratch register for use on function entry.
10060 In 32-bit mode, it is valid only after the registers are saved
10061 in the prologue. This register must be released by means of
10062 release_scratch_register_on_entry once it is dead. */
10065 get_scratch_register_on_entry (struct scratch_reg
*sr
)
10073 /* We always use R11 in 64-bit mode. */
10078 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
10080 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10082 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
10083 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
10084 int regparm
= ix86_function_regparm (fntype
, decl
);
10086 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
10088 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10089 for the static chain register. */
10090 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
10091 && drap_regno
!= AX_REG
)
10093 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10094 for the static chain register. */
10095 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
10097 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
10099 /* ecx is the static chain register. */
10100 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
10102 && drap_regno
!= CX_REG
)
10104 else if (ix86_save_reg (BX_REG
, true))
10106 /* esi is the static chain register. */
10107 else if (!(regparm
== 3 && static_chain_p
)
10108 && ix86_save_reg (SI_REG
, true))
10110 else if (ix86_save_reg (DI_REG
, true))
10114 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
10119 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
10122 rtx insn
= emit_insn (gen_push (sr
->reg
));
10123 RTX_FRAME_RELATED_P (insn
) = 1;
10127 /* Release a scratch register obtained from the preceding function. */
10130 release_scratch_register_on_entry (struct scratch_reg
*sr
)
10134 struct machine_function
*m
= cfun
->machine
;
10135 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
10137 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10138 RTX_FRAME_RELATED_P (insn
) = 1;
10139 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
10140 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10141 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
10142 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10146 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10148 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10151 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
10153 /* We skip the probe for the first interval + a small dope of 4 words and
10154 probe that many bytes past the specified size to maintain a protection
10155 area at the botton of the stack. */
10156 const int dope
= 4 * UNITS_PER_WORD
;
10157 rtx size_rtx
= GEN_INT (size
), last
;
10159 /* See if we have a constant small number of probes to generate. If so,
10160 that's the easy case. The run-time loop is made up of 11 insns in the
10161 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10162 for n # of intervals. */
10163 if (size
<= 5 * PROBE_INTERVAL
)
10165 HOST_WIDE_INT i
, adjust
;
10166 bool first_probe
= true;
10168 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10169 values of N from 1 until it exceeds SIZE. If only one probe is
10170 needed, this will not generate any code. Then adjust and probe
10171 to PROBE_INTERVAL + SIZE. */
10172 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10176 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10177 first_probe
= false;
10180 adjust
= PROBE_INTERVAL
;
10182 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10183 plus_constant (Pmode
, stack_pointer_rtx
,
10185 emit_stack_probe (stack_pointer_rtx
);
10189 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10191 adjust
= size
+ PROBE_INTERVAL
- i
;
10193 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10194 plus_constant (Pmode
, stack_pointer_rtx
,
10196 emit_stack_probe (stack_pointer_rtx
);
10198 /* Adjust back to account for the additional first interval. */
10199 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10200 plus_constant (Pmode
, stack_pointer_rtx
,
10201 PROBE_INTERVAL
+ dope
)));
10204 /* Otherwise, do the same as above, but in a loop. Note that we must be
10205 extra careful with variables wrapping around because we might be at
10206 the very top (or the very bottom) of the address space and we have
10207 to be able to handle this case properly; in particular, we use an
10208 equality test for the loop condition. */
10211 HOST_WIDE_INT rounded_size
;
10212 struct scratch_reg sr
;
10214 get_scratch_register_on_entry (&sr
);
10217 /* Step 1: round SIZE to the previous multiple of the interval. */
10219 rounded_size
= size
& -PROBE_INTERVAL
;
10222 /* Step 2: compute initial and final value of the loop counter. */
10224 /* SP = SP_0 + PROBE_INTERVAL. */
10225 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10226 plus_constant (Pmode
, stack_pointer_rtx
,
10227 - (PROBE_INTERVAL
+ dope
))));
10229 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10230 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10231 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10232 gen_rtx_PLUS (Pmode
, sr
.reg
,
10233 stack_pointer_rtx
)));
10236 /* Step 3: the loop
10238 while (SP != LAST_ADDR)
10240 SP = SP + PROBE_INTERVAL
10244 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10245 values of N from 1 until it is equal to ROUNDED_SIZE. */
10247 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10250 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10251 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10253 if (size
!= rounded_size
)
10255 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10256 plus_constant (Pmode
, stack_pointer_rtx
,
10257 rounded_size
- size
)));
10258 emit_stack_probe (stack_pointer_rtx
);
10261 /* Adjust back to account for the additional first interval. */
10262 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10263 plus_constant (Pmode
, stack_pointer_rtx
,
10264 PROBE_INTERVAL
+ dope
)));
10266 release_scratch_register_on_entry (&sr
);
10269 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10271 /* Even if the stack pointer isn't the CFA register, we need to correctly
10272 describe the adjustments made to it, in particular differentiate the
10273 frame-related ones from the frame-unrelated ones. */
10276 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10277 XVECEXP (expr
, 0, 0)
10278 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10279 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10280 XVECEXP (expr
, 0, 1)
10281 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10282 plus_constant (Pmode
, stack_pointer_rtx
,
10283 PROBE_INTERVAL
+ dope
+ size
));
10284 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10285 RTX_FRAME_RELATED_P (last
) = 1;
10287 cfun
->machine
->fs
.sp_offset
+= size
;
10290 /* Make sure nothing is scheduled before we are done. */
10291 emit_insn (gen_blockage ());
10294 /* Adjust the stack pointer up to REG while probing it. */
10297 output_adjust_stack_and_probe (rtx reg
)
10299 static int labelno
= 0;
10300 char loop_lab
[32], end_lab
[32];
10303 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10304 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10306 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10308 /* Jump to END_LAB if SP == LAST_ADDR. */
10309 xops
[0] = stack_pointer_rtx
;
10311 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10312 fputs ("\tje\t", asm_out_file
);
10313 assemble_name_raw (asm_out_file
, end_lab
);
10314 fputc ('\n', asm_out_file
);
10316 /* SP = SP + PROBE_INTERVAL. */
10317 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10318 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10321 xops
[1] = const0_rtx
;
10322 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10324 fprintf (asm_out_file
, "\tjmp\t");
10325 assemble_name_raw (asm_out_file
, loop_lab
);
10326 fputc ('\n', asm_out_file
);
10328 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10333 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10334 inclusive. These are offsets from the current stack pointer. */
10337 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10339 /* See if we have a constant small number of probes to generate. If so,
10340 that's the easy case. The run-time loop is made up of 7 insns in the
10341 generic case while the compile-time loop is made up of n insns for n #
10343 if (size
<= 7 * PROBE_INTERVAL
)
10347 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10348 it exceeds SIZE. If only one probe is needed, this will not
10349 generate any code. Then probe at FIRST + SIZE. */
10350 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10351 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10354 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10358 /* Otherwise, do the same as above, but in a loop. Note that we must be
10359 extra careful with variables wrapping around because we might be at
10360 the very top (or the very bottom) of the address space and we have
10361 to be able to handle this case properly; in particular, we use an
10362 equality test for the loop condition. */
10365 HOST_WIDE_INT rounded_size
, last
;
10366 struct scratch_reg sr
;
10368 get_scratch_register_on_entry (&sr
);
10371 /* Step 1: round SIZE to the previous multiple of the interval. */
10373 rounded_size
= size
& -PROBE_INTERVAL
;
10376 /* Step 2: compute initial and final value of the loop counter. */
10378 /* TEST_OFFSET = FIRST. */
10379 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10381 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10382 last
= first
+ rounded_size
;
10385 /* Step 3: the loop
10387 while (TEST_ADDR != LAST_ADDR)
10389 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10393 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10394 until it is equal to ROUNDED_SIZE. */
10396 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10399 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10400 that SIZE is equal to ROUNDED_SIZE. */
10402 if (size
!= rounded_size
)
10403 emit_stack_probe (plus_constant (Pmode
,
10404 gen_rtx_PLUS (Pmode
,
10407 rounded_size
- size
));
10409 release_scratch_register_on_entry (&sr
);
10412 /* Make sure nothing is scheduled before we are done. */
10413 emit_insn (gen_blockage ());
10416 /* Probe a range of stack addresses from REG to END, inclusive. These are
10417 offsets from the current stack pointer. */
10420 output_probe_stack_range (rtx reg
, rtx end
)
10422 static int labelno
= 0;
10423 char loop_lab
[32], end_lab
[32];
10426 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10427 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10429 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10431 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10434 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10435 fputs ("\tje\t", asm_out_file
);
10436 assemble_name_raw (asm_out_file
, end_lab
);
10437 fputc ('\n', asm_out_file
);
10439 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10440 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10441 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10443 /* Probe at TEST_ADDR. */
10444 xops
[0] = stack_pointer_rtx
;
10446 xops
[2] = const0_rtx
;
10447 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10449 fprintf (asm_out_file
, "\tjmp\t");
10450 assemble_name_raw (asm_out_file
, loop_lab
);
10451 fputc ('\n', asm_out_file
);
10453 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10458 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10459 to be generated in correct form. */
10461 ix86_finalize_stack_realign_flags (void)
10463 /* Check if stack realign is really needed after reload, and
10464 stores result in cfun */
10465 unsigned int incoming_stack_boundary
10466 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10467 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10468 unsigned int stack_realign
= (incoming_stack_boundary
10470 ? crtl
->max_used_stack_slot_alignment
10471 : crtl
->stack_alignment_needed
));
10473 if (crtl
->stack_realign_finalized
)
10475 /* After stack_realign_needed is finalized, we can't no longer
10477 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10481 /* If the only reason for frame_pointer_needed is that we conservatively
10482 assumed stack realignment might be needed, but in the end nothing that
10483 needed the stack alignment had been spilled, clear frame_pointer_needed
10484 and say we don't need stack realignment. */
10486 && !crtl
->need_drap
10487 && frame_pointer_needed
10489 && flag_omit_frame_pointer
10490 && crtl
->sp_is_unchanging
10491 && !ix86_current_function_calls_tls_descriptor
10492 && !crtl
->accesses_prior_frames
10493 && !cfun
->calls_alloca
10494 && !crtl
->calls_eh_return
10495 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10496 && !ix86_frame_pointer_required ()
10497 && get_frame_size () == 0
10498 && ix86_nsaved_sseregs () == 0
10499 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10501 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10504 CLEAR_HARD_REG_SET (prologue_used
);
10505 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10506 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10507 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10508 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10509 HARD_FRAME_POINTER_REGNUM
);
10510 FOR_EACH_BB_FN (bb
, cfun
)
10513 FOR_BB_INSNS (bb
, insn
)
10514 if (NONDEBUG_INSN_P (insn
)
10515 && requires_stack_frame_p (insn
, prologue_used
,
10516 set_up_by_prologue
))
10518 crtl
->stack_realign_needed
= stack_realign
;
10519 crtl
->stack_realign_finalized
= true;
10524 frame_pointer_needed
= false;
10525 stack_realign
= false;
10526 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10527 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10528 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10529 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10530 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10531 df_finish_pass (true);
10532 df_scan_alloc (NULL
);
10534 df_compute_regs_ever_live (true);
10538 crtl
->stack_realign_needed
= stack_realign
;
10539 crtl
->stack_realign_finalized
= true;
10542 /* Expand the prologue into a bunch of separate insns. */
10545 ix86_expand_prologue (void)
10547 struct machine_function
*m
= cfun
->machine
;
10550 struct ix86_frame frame
;
10551 HOST_WIDE_INT allocate
;
10552 bool int_registers_saved
;
10553 bool sse_registers_saved
;
10555 ix86_finalize_stack_realign_flags ();
10557 /* DRAP should not coexist with stack_realign_fp */
10558 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10560 memset (&m
->fs
, 0, sizeof (m
->fs
));
10562 /* Initialize CFA state for before the prologue. */
10563 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10564 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10566 /* Track SP offset to the CFA. We continue tracking this after we've
10567 swapped the CFA register away from SP. In the case of re-alignment
10568 this is fudged; we're interested to offsets within the local frame. */
10569 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10570 m
->fs
.sp_valid
= true;
10572 ix86_compute_frame_layout (&frame
);
10574 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10576 /* We should have already generated an error for any use of
10577 ms_hook on a nested function. */
10578 gcc_checking_assert (!ix86_static_chain_on_stack
);
10580 /* Check if profiling is active and we shall use profiling before
10581 prologue variant. If so sorry. */
10582 if (crtl
->profile
&& flag_fentry
!= 0)
10583 sorry ("ms_hook_prologue attribute isn%'t compatible "
10584 "with -mfentry for 32-bit");
10586 /* In ix86_asm_output_function_label we emitted:
10587 8b ff movl.s %edi,%edi
10589 8b ec movl.s %esp,%ebp
10591 This matches the hookable function prologue in Win32 API
10592 functions in Microsoft Windows XP Service Pack 2 and newer.
10593 Wine uses this to enable Windows apps to hook the Win32 API
10594 functions provided by Wine.
10596 What that means is that we've already set up the frame pointer. */
10598 if (frame_pointer_needed
10599 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10603 /* We've decided to use the frame pointer already set up.
10604 Describe this to the unwinder by pretending that both
10605 push and mov insns happen right here.
10607 Putting the unwind info here at the end of the ms_hook
10608 is done so that we can make absolutely certain we get
10609 the required byte sequence at the start of the function,
10610 rather than relying on an assembler that can produce
10611 the exact encoding required.
10613 However it does mean (in the unpatched case) that we have
10614 a 1 insn window where the asynchronous unwind info is
10615 incorrect. However, if we placed the unwind info at
10616 its correct location we would have incorrect unwind info
10617 in the patched case. Which is probably all moot since
10618 I don't expect Wine generates dwarf2 unwind info for the
10619 system libraries that use this feature. */
10621 insn
= emit_insn (gen_blockage ());
10623 push
= gen_push (hard_frame_pointer_rtx
);
10624 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10625 stack_pointer_rtx
);
10626 RTX_FRAME_RELATED_P (push
) = 1;
10627 RTX_FRAME_RELATED_P (mov
) = 1;
10629 RTX_FRAME_RELATED_P (insn
) = 1;
10630 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10631 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10633 /* Note that gen_push incremented m->fs.cfa_offset, even
10634 though we didn't emit the push insn here. */
10635 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10636 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10637 m
->fs
.fp_valid
= true;
10641 /* The frame pointer is not needed so pop %ebp again.
10642 This leaves us with a pristine state. */
10643 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10647 /* The first insn of a function that accepts its static chain on the
10648 stack is to push the register that would be filled in by a direct
10649 call. This insn will be skipped by the trampoline. */
10650 else if (ix86_static_chain_on_stack
)
10652 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10653 emit_insn (gen_blockage ());
10655 /* We don't want to interpret this push insn as a register save,
10656 only as a stack adjustment. The real copy of the register as
10657 a save will be done later, if needed. */
10658 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10659 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10660 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10661 RTX_FRAME_RELATED_P (insn
) = 1;
10664 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10665 of DRAP is needed and stack realignment is really needed after reload */
10666 if (stack_realign_drap
)
10668 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10670 /* Only need to push parameter pointer reg if it is caller saved. */
10671 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10673 /* Push arg pointer reg */
10674 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10675 RTX_FRAME_RELATED_P (insn
) = 1;
10678 /* Grab the argument pointer. */
10679 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10680 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10681 RTX_FRAME_RELATED_P (insn
) = 1;
10682 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10683 m
->fs
.cfa_offset
= 0;
10685 /* Align the stack. */
10686 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10688 GEN_INT (-align_bytes
)));
10689 RTX_FRAME_RELATED_P (insn
) = 1;
10691 /* Replicate the return address on the stack so that return
10692 address can be reached via (argp - 1) slot. This is needed
10693 to implement macro RETURN_ADDR_RTX and intrinsic function
10694 expand_builtin_return_addr etc. */
10695 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10696 t
= gen_frame_mem (word_mode
, t
);
10697 insn
= emit_insn (gen_push (t
));
10698 RTX_FRAME_RELATED_P (insn
) = 1;
10700 /* For the purposes of frame and register save area addressing,
10701 we've started over with a new frame. */
10702 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10703 m
->fs
.realigned
= true;
10706 int_registers_saved
= (frame
.nregs
== 0);
10707 sse_registers_saved
= (frame
.nsseregs
== 0);
10709 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10711 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10712 slower on all targets. Also sdb doesn't like it. */
10713 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10714 RTX_FRAME_RELATED_P (insn
) = 1;
10716 /* Push registers now, before setting the frame pointer
10718 if (!int_registers_saved
10720 && !frame
.save_regs_using_mov
)
10722 ix86_emit_save_regs ();
10723 int_registers_saved
= true;
10724 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10727 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10729 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10730 RTX_FRAME_RELATED_P (insn
) = 1;
10732 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10733 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10734 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10735 m
->fs
.fp_valid
= true;
10739 if (!int_registers_saved
)
10741 /* If saving registers via PUSH, do so now. */
10742 if (!frame
.save_regs_using_mov
)
10744 ix86_emit_save_regs ();
10745 int_registers_saved
= true;
10746 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10749 /* When using red zone we may start register saving before allocating
10750 the stack frame saving one cycle of the prologue. However, avoid
10751 doing this if we have to probe the stack; at least on x86_64 the
10752 stack probe can turn into a call that clobbers a red zone location. */
10753 else if (ix86_using_red_zone ()
10754 && (! TARGET_STACK_PROBE
10755 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10757 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10758 int_registers_saved
= true;
10762 if (stack_realign_fp
)
10764 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10765 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10767 /* The computation of the size of the re-aligned stack frame means
10768 that we must allocate the size of the register save area before
10769 performing the actual alignment. Otherwise we cannot guarantee
10770 that there's enough storage above the realignment point. */
10771 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10772 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10773 GEN_INT (m
->fs
.sp_offset
10774 - frame
.sse_reg_save_offset
),
10777 /* Align the stack. */
10778 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10780 GEN_INT (-align_bytes
)));
10782 /* For the purposes of register save area addressing, the stack
10783 pointer is no longer valid. As for the value of sp_offset,
10784 see ix86_compute_frame_layout, which we need to match in order
10785 to pass verification of stack_pointer_offset at the end. */
10786 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10787 m
->fs
.sp_valid
= false;
10790 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10792 if (flag_stack_usage_info
)
10794 /* We start to count from ARG_POINTER. */
10795 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10797 /* If it was realigned, take into account the fake frame. */
10798 if (stack_realign_drap
)
10800 if (ix86_static_chain_on_stack
)
10801 stack_size
+= UNITS_PER_WORD
;
10803 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10804 stack_size
+= UNITS_PER_WORD
;
10806 /* This over-estimates by 1 minimal-stack-alignment-unit but
10807 mitigates that by counting in the new return address slot. */
10808 current_function_dynamic_stack_size
10809 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10812 current_function_static_stack_size
= stack_size
;
10815 /* On SEH target with very large frame size, allocate an area to save
10816 SSE registers (as the very large allocation won't be described). */
10818 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10819 && !sse_registers_saved
)
10821 HOST_WIDE_INT sse_size
=
10822 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10824 gcc_assert (int_registers_saved
);
10826 /* No need to do stack checking as the area will be immediately
10828 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10829 GEN_INT (-sse_size
), -1,
10830 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10831 allocate
-= sse_size
;
10832 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10833 sse_registers_saved
= true;
10836 /* The stack has already been decremented by the instruction calling us
10837 so probe if the size is non-negative to preserve the protection area. */
10838 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10840 /* We expect the registers to be saved when probes are used. */
10841 gcc_assert (int_registers_saved
);
10843 if (STACK_CHECK_MOVING_SP
)
10845 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
10846 && allocate
<= PROBE_INTERVAL
))
10848 ix86_adjust_stack_and_probe (allocate
);
10854 HOST_WIDE_INT size
= allocate
;
10856 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10857 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10859 if (TARGET_STACK_PROBE
)
10861 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10863 if (size
> PROBE_INTERVAL
)
10864 ix86_emit_probe_stack_range (0, size
);
10867 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10871 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
10873 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
10874 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
,
10875 size
- STACK_CHECK_PROTECT
);
10878 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10885 else if (!ix86_target_stack_probe ()
10886 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10888 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10889 GEN_INT (-allocate
), -1,
10890 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10894 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10896 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10897 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10898 bool eax_live
= false;
10899 bool r10_live
= false;
10902 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10903 if (!TARGET_64BIT_MS_ABI
)
10904 eax_live
= ix86_eax_live_at_start_p ();
10906 /* Note that SEH directives need to continue tracking the stack
10907 pointer even after the frame pointer has been set up. */
10910 insn
= emit_insn (gen_push (eax
));
10911 allocate
-= UNITS_PER_WORD
;
10912 if (sp_is_cfa_reg
|| TARGET_SEH
)
10915 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10916 RTX_FRAME_RELATED_P (insn
) = 1;
10922 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10923 insn
= emit_insn (gen_push (r10
));
10924 allocate
-= UNITS_PER_WORD
;
10925 if (sp_is_cfa_reg
|| TARGET_SEH
)
10928 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10929 RTX_FRAME_RELATED_P (insn
) = 1;
10933 emit_move_insn (eax
, GEN_INT (allocate
));
10934 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10936 /* Use the fact that AX still contains ALLOCATE. */
10937 adjust_stack_insn
= (Pmode
== DImode
10938 ? gen_pro_epilogue_adjust_stack_di_sub
10939 : gen_pro_epilogue_adjust_stack_si_sub
);
10941 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10942 stack_pointer_rtx
, eax
));
10944 if (sp_is_cfa_reg
|| TARGET_SEH
)
10947 m
->fs
.cfa_offset
+= allocate
;
10948 RTX_FRAME_RELATED_P (insn
) = 1;
10949 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10950 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10951 plus_constant (Pmode
, stack_pointer_rtx
,
10954 m
->fs
.sp_offset
+= allocate
;
10956 /* Use stack_pointer_rtx for relative addressing so that code
10957 works for realigned stack, too. */
10958 if (r10_live
&& eax_live
)
10960 t
= plus_constant (Pmode
, stack_pointer_rtx
, allocate
);
10961 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10962 gen_frame_mem (word_mode
, t
));
10963 t
= plus_constant (Pmode
, stack_pointer_rtx
,
10964 allocate
- UNITS_PER_WORD
);
10965 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10966 gen_frame_mem (word_mode
, t
));
10968 else if (eax_live
|| r10_live
)
10970 t
= plus_constant (Pmode
, stack_pointer_rtx
, allocate
);
10971 emit_move_insn (gen_rtx_REG (word_mode
,
10972 (eax_live
? AX_REG
: R10_REG
)),
10973 gen_frame_mem (word_mode
, t
));
10976 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10978 /* If we havn't already set up the frame pointer, do so now. */
10979 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10981 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10982 GEN_INT (frame
.stack_pointer_offset
10983 - frame
.hard_frame_pointer_offset
));
10984 insn
= emit_insn (insn
);
10985 RTX_FRAME_RELATED_P (insn
) = 1;
10986 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10988 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10989 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10990 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10991 m
->fs
.fp_valid
= true;
10994 if (!int_registers_saved
)
10995 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10996 if (!sse_registers_saved
)
10997 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10999 pic_reg_used
= false;
11000 /* We don't use pic-register for pe-coff target. */
11001 if (pic_offset_table_rtx
11003 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
11006 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
11008 if (alt_pic_reg_used
!= INVALID_REGNUM
)
11009 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
11011 pic_reg_used
= true;
11018 if (ix86_cmodel
== CM_LARGE_PIC
)
11020 rtx label
, tmp_reg
;
11022 gcc_assert (Pmode
== DImode
);
11023 label
= gen_label_rtx ();
11024 emit_label (label
);
11025 LABEL_PRESERVE_P (label
) = 1;
11026 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
11027 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
11028 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
11030 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
11031 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
11032 pic_offset_table_rtx
, tmp_reg
));
11035 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
11039 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
11040 RTX_FRAME_RELATED_P (insn
) = 1;
11041 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
11045 /* In the pic_reg_used case, make sure that the got load isn't deleted
11046 when mcount needs it. Blockage to avoid call movement across mcount
11047 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11049 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
11050 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
11052 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
11054 /* vDRAP is setup but after reload it turns out stack realign
11055 isn't necessary, here we will emit prologue to setup DRAP
11056 without stack realign adjustment */
11057 t
= choose_baseaddr (0);
11058 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
11061 /* Prevent instructions from being scheduled into register save push
11062 sequence when access to the redzone area is done through frame pointer.
11063 The offset between the frame pointer and the stack pointer is calculated
11064 relative to the value of the stack pointer at the end of the function
11065 prologue, and moving instructions that access redzone area via frame
11066 pointer inside push sequence violates this assumption. */
11067 if (frame_pointer_needed
&& frame
.red_zone_size
)
11068 emit_insn (gen_memory_blockage ());
11070 /* Emit cld instruction if stringops are used in the function. */
11071 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
11072 emit_insn (gen_cld ());
11074 /* SEH requires that the prologue end within 256 bytes of the start of
11075 the function. Prevent instruction schedules that would extend that.
11076 Further, prevent alloca modifications to the stack pointer from being
11077 combined with prologue modifications. */
11079 emit_insn (gen_prologue_use (stack_pointer_rtx
));
11082 /* Emit code to restore REG using a POP insn. */
11085 ix86_emit_restore_reg_using_pop (rtx reg
)
11087 struct machine_function
*m
= cfun
->machine
;
11088 rtx insn
= emit_insn (gen_pop (reg
));
11090 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
11091 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11093 if (m
->fs
.cfa_reg
== crtl
->drap_reg
11094 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
11096 /* Previously we'd represented the CFA as an expression
11097 like *(%ebp - 8). We've just popped that value from
11098 the stack, which means we need to reset the CFA to
11099 the drap register. This will remain until we restore
11100 the stack pointer. */
11101 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11102 RTX_FRAME_RELATED_P (insn
) = 1;
11104 /* This means that the DRAP register is valid for addressing too. */
11105 m
->fs
.drap_valid
= true;
11109 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
11111 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
11112 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
11113 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
11114 RTX_FRAME_RELATED_P (insn
) = 1;
11116 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11119 /* When the frame pointer is the CFA, and we pop it, we are
11120 swapping back to the stack pointer as the CFA. This happens
11121 for stack frames that don't allocate other data, so we assume
11122 the stack pointer is now pointing at the return address, i.e.
11123 the function entry state, which makes the offset be 1 word. */
11124 if (reg
== hard_frame_pointer_rtx
)
11126 m
->fs
.fp_valid
= false;
11127 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11129 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11130 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11132 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11133 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11134 GEN_INT (m
->fs
.cfa_offset
)));
11135 RTX_FRAME_RELATED_P (insn
) = 1;
11140 /* Emit code to restore saved registers using POP insns. */
11143 ix86_emit_restore_regs_using_pop (void)
11145 unsigned int regno
;
11147 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11148 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
11149 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
11152 /* Emit code and notes for the LEAVE instruction. */
11155 ix86_emit_leave (void)
11157 struct machine_function
*m
= cfun
->machine
;
11158 rtx insn
= emit_insn (ix86_gen_leave ());
11160 ix86_add_queued_cfa_restore_notes (insn
);
11162 gcc_assert (m
->fs
.fp_valid
);
11163 m
->fs
.sp_valid
= true;
11164 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
11165 m
->fs
.fp_valid
= false;
11167 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
11169 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11170 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
11172 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11173 plus_constant (Pmode
, stack_pointer_rtx
,
11175 RTX_FRAME_RELATED_P (insn
) = 1;
11177 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
11181 /* Emit code to restore saved registers using MOV insns.
11182 First register is restored from CFA - CFA_OFFSET. */
11184 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11185 bool maybe_eh_return
)
11187 struct machine_function
*m
= cfun
->machine
;
11188 unsigned int regno
;
11190 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11191 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11193 rtx reg
= gen_rtx_REG (word_mode
, regno
);
11196 mem
= choose_baseaddr (cfa_offset
);
11197 mem
= gen_frame_mem (word_mode
, mem
);
11198 insn
= emit_move_insn (reg
, mem
);
11200 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
11202 /* Previously we'd represented the CFA as an expression
11203 like *(%ebp - 8). We've just popped that value from
11204 the stack, which means we need to reset the CFA to
11205 the drap register. This will remain until we restore
11206 the stack pointer. */
11207 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
11208 RTX_FRAME_RELATED_P (insn
) = 1;
11210 /* This means that the DRAP register is valid for addressing. */
11211 m
->fs
.drap_valid
= true;
11214 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11216 cfa_offset
-= UNITS_PER_WORD
;
11220 /* Emit code to restore saved registers using MOV insns.
11221 First register is restored from CFA - CFA_OFFSET. */
11223 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11224 bool maybe_eh_return
)
11226 unsigned int regno
;
11228 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11229 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11231 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11234 mem
= choose_baseaddr (cfa_offset
);
11235 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11236 set_mem_align (mem
, 128);
11237 emit_move_insn (reg
, mem
);
11239 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11245 /* Restore function stack, frame, and registers. */
11248 ix86_expand_epilogue (int style
)
11250 struct machine_function
*m
= cfun
->machine
;
11251 struct machine_frame_state frame_state_save
= m
->fs
;
11252 struct ix86_frame frame
;
11253 bool restore_regs_via_mov
;
11256 ix86_finalize_stack_realign_flags ();
11257 ix86_compute_frame_layout (&frame
);
11259 m
->fs
.sp_valid
= (!frame_pointer_needed
11260 || (crtl
->sp_is_unchanging
11261 && !stack_realign_fp
));
11262 gcc_assert (!m
->fs
.sp_valid
11263 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11265 /* The FP must be valid if the frame pointer is present. */
11266 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11267 gcc_assert (!m
->fs
.fp_valid
11268 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11270 /* We must have *some* valid pointer to the stack frame. */
11271 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11273 /* The DRAP is never valid at this point. */
11274 gcc_assert (!m
->fs
.drap_valid
);
11276 /* See the comment about red zone and frame
11277 pointer usage in ix86_expand_prologue. */
11278 if (frame_pointer_needed
&& frame
.red_zone_size
)
11279 emit_insn (gen_memory_blockage ());
11281 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11282 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11284 /* Determine the CFA offset of the end of the red-zone. */
11285 m
->fs
.red_zone_offset
= 0;
11286 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11288 /* The red-zone begins below the return address. */
11289 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11291 /* When the register save area is in the aligned portion of
11292 the stack, determine the maximum runtime displacement that
11293 matches up with the aligned frame. */
11294 if (stack_realign_drap
)
11295 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11299 /* Special care must be taken for the normal return case of a function
11300 using eh_return: the eax and edx registers are marked as saved, but
11301 not restored along this path. Adjust the save location to match. */
11302 if (crtl
->calls_eh_return
&& style
!= 2)
11303 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11305 /* EH_RETURN requires the use of moves to function properly. */
11306 if (crtl
->calls_eh_return
)
11307 restore_regs_via_mov
= true;
11308 /* SEH requires the use of pops to identify the epilogue. */
11309 else if (TARGET_SEH
)
11310 restore_regs_via_mov
= false;
11311 /* If we're only restoring one register and sp is not valid then
11312 using a move instruction to restore the register since it's
11313 less work than reloading sp and popping the register. */
11314 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11315 restore_regs_via_mov
= true;
11316 else if (TARGET_EPILOGUE_USING_MOVE
11317 && cfun
->machine
->use_fast_prologue_epilogue
11318 && (frame
.nregs
> 1
11319 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11320 restore_regs_via_mov
= true;
11321 else if (frame_pointer_needed
11323 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11324 restore_regs_via_mov
= true;
11325 else if (frame_pointer_needed
11326 && TARGET_USE_LEAVE
11327 && cfun
->machine
->use_fast_prologue_epilogue
11328 && frame
.nregs
== 1)
11329 restore_regs_via_mov
= true;
11331 restore_regs_via_mov
= false;
11333 if (restore_regs_via_mov
|| frame
.nsseregs
)
11335 /* Ensure that the entire register save area is addressable via
11336 the stack pointer, if we will restore via sp. */
11338 && m
->fs
.sp_offset
> 0x7fffffff
11339 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11340 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11342 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11343 GEN_INT (m
->fs
.sp_offset
11344 - frame
.sse_reg_save_offset
),
11346 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11350 /* If there are any SSE registers to restore, then we have to do it
11351 via moves, since there's obviously no pop for SSE regs. */
11352 if (frame
.nsseregs
)
11353 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11356 if (restore_regs_via_mov
)
11361 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11363 /* eh_return epilogues need %ecx added to the stack pointer. */
11366 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11368 /* Stack align doesn't work with eh_return. */
11369 gcc_assert (!stack_realign_drap
);
11370 /* Neither does regparm nested functions. */
11371 gcc_assert (!ix86_static_chain_on_stack
);
11373 if (frame_pointer_needed
)
11375 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11376 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11377 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11379 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11380 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11382 /* Note that we use SA as a temporary CFA, as the return
11383 address is at the proper place relative to it. We
11384 pretend this happens at the FP restore insn because
11385 prior to this insn the FP would be stored at the wrong
11386 offset relative to SA, and after this insn we have no
11387 other reasonable register to use for the CFA. We don't
11388 bother resetting the CFA to the SP for the duration of
11389 the return insn. */
11390 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11391 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11392 ix86_add_queued_cfa_restore_notes (insn
);
11393 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11394 RTX_FRAME_RELATED_P (insn
) = 1;
11396 m
->fs
.cfa_reg
= sa
;
11397 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11398 m
->fs
.fp_valid
= false;
11400 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11401 const0_rtx
, style
, false);
11405 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11406 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11407 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11408 ix86_add_queued_cfa_restore_notes (insn
);
11410 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11411 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11413 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11414 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11415 plus_constant (Pmode
, stack_pointer_rtx
,
11417 RTX_FRAME_RELATED_P (insn
) = 1;
11420 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11421 m
->fs
.sp_valid
= true;
11426 /* SEH requires that the function end with (1) a stack adjustment
11427 if necessary, (2) a sequence of pops, and (3) a return or
11428 jump instruction. Prevent insns from the function body from
11429 being scheduled into this sequence. */
11432 /* Prevent a catch region from being adjacent to the standard
11433 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11434 several other flags that would be interesting to test are
11436 if (flag_non_call_exceptions
)
11437 emit_insn (gen_nops (const1_rtx
));
11439 emit_insn (gen_blockage ());
11442 /* First step is to deallocate the stack frame so that we can
11443 pop the registers. Also do it on SEH target for very large
11444 frame as the emitted instructions aren't allowed by the ABI in
11446 if (!m
->fs
.sp_valid
11448 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11449 >= SEH_MAX_FRAME_SIZE
)))
11451 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11452 GEN_INT (m
->fs
.fp_offset
11453 - frame
.reg_save_offset
),
11456 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11458 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11459 GEN_INT (m
->fs
.sp_offset
11460 - frame
.reg_save_offset
),
11462 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11465 ix86_emit_restore_regs_using_pop ();
11468 /* If we used a stack pointer and haven't already got rid of it,
11470 if (m
->fs
.fp_valid
)
11472 /* If the stack pointer is valid and pointing at the frame
11473 pointer store address, then we only need a pop. */
11474 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11475 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11476 /* Leave results in shorter dependency chains on CPUs that are
11477 able to grok it fast. */
11478 else if (TARGET_USE_LEAVE
11479 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
11480 || !cfun
->machine
->use_fast_prologue_epilogue
)
11481 ix86_emit_leave ();
11484 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11485 hard_frame_pointer_rtx
,
11486 const0_rtx
, style
, !using_drap
);
11487 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11493 int param_ptr_offset
= UNITS_PER_WORD
;
11496 gcc_assert (stack_realign_drap
);
11498 if (ix86_static_chain_on_stack
)
11499 param_ptr_offset
+= UNITS_PER_WORD
;
11500 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11501 param_ptr_offset
+= UNITS_PER_WORD
;
11503 insn
= emit_insn (gen_rtx_SET
11504 (VOIDmode
, stack_pointer_rtx
,
11505 gen_rtx_PLUS (Pmode
,
11507 GEN_INT (-param_ptr_offset
))));
11508 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11509 m
->fs
.cfa_offset
= param_ptr_offset
;
11510 m
->fs
.sp_offset
= param_ptr_offset
;
11511 m
->fs
.realigned
= false;
11513 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11514 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11515 GEN_INT (param_ptr_offset
)));
11516 RTX_FRAME_RELATED_P (insn
) = 1;
11518 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11519 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11522 /* At this point the stack pointer must be valid, and we must have
11523 restored all of the registers. We may not have deallocated the
11524 entire stack frame. We've delayed this until now because it may
11525 be possible to merge the local stack deallocation with the
11526 deallocation forced by ix86_static_chain_on_stack. */
11527 gcc_assert (m
->fs
.sp_valid
);
11528 gcc_assert (!m
->fs
.fp_valid
);
11529 gcc_assert (!m
->fs
.realigned
);
11530 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11532 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11533 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11537 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11539 /* Sibcall epilogues don't want a return instruction. */
11542 m
->fs
= frame_state_save
;
11546 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11548 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11550 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11551 address, do explicit add, and jump indirectly to the caller. */
11553 if (crtl
->args
.pops_args
>= 65536)
11555 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11558 /* There is no "pascal" calling convention in any 64bit ABI. */
11559 gcc_assert (!TARGET_64BIT
);
11561 insn
= emit_insn (gen_pop (ecx
));
11562 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11563 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11565 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11566 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11567 add_reg_note (insn
, REG_CFA_REGISTER
,
11568 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11569 RTX_FRAME_RELATED_P (insn
) = 1;
11571 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11573 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11576 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11579 emit_jump_insn (gen_simple_return_internal ());
11581 /* Restore the state back to the state from the prologue,
11582 so that it's correct for the next epilogue. */
11583 m
->fs
= frame_state_save
;
11586 /* Reset from the function's potential modifications. */
11589 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11590 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11592 if (pic_offset_table_rtx
)
11593 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11595 /* Mach-O doesn't support labels at the end of objects, so if
11596 it looks like we might want one, insert a NOP. */
11598 rtx insn
= get_last_insn ();
11599 rtx deleted_debug_label
= NULL_RTX
;
11602 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11604 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11605 notes only, instead set their CODE_LABEL_NUMBER to -1,
11606 otherwise there would be code generation differences
11607 in between -g and -g0. */
11608 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11609 deleted_debug_label
= insn
;
11610 insn
= PREV_INSN (insn
);
11615 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11616 fputs ("\tnop\n", file
);
11617 else if (deleted_debug_label
)
11618 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11619 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11620 CODE_LABEL_NUMBER (insn
) = -1;
11626 /* Return a scratch register to use in the split stack prologue. The
11627 split stack prologue is used for -fsplit-stack. It is the first
11628 instructions in the function, even before the regular prologue.
11629 The scratch register can be any caller-saved register which is not
11630 used for parameters or for the static chain. */
11632 static unsigned int
11633 split_stack_prologue_scratch_regno (void)
11639 bool is_fastcall
, is_thiscall
;
11642 is_fastcall
= (lookup_attribute ("fastcall",
11643 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11645 is_thiscall
= (lookup_attribute ("thiscall",
11646 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11648 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11652 if (DECL_STATIC_CHAIN (cfun
->decl
))
11654 sorry ("-fsplit-stack does not support fastcall with "
11655 "nested function");
11656 return INVALID_REGNUM
;
11660 else if (is_thiscall
)
11662 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11666 else if (regparm
< 3)
11668 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11674 sorry ("-fsplit-stack does not support 2 register "
11675 " parameters for a nested function");
11676 return INVALID_REGNUM
;
11683 /* FIXME: We could make this work by pushing a register
11684 around the addition and comparison. */
11685 sorry ("-fsplit-stack does not support 3 register parameters");
11686 return INVALID_REGNUM
;
11691 /* A SYMBOL_REF for the function which allocates new stackspace for
11694 static GTY(()) rtx split_stack_fn
;
11696 /* A SYMBOL_REF for the more stack function when using the large
11699 static GTY(()) rtx split_stack_fn_large
;
11701 /* Handle -fsplit-stack. These are the first instructions in the
11702 function, even before the regular prologue. */
11705 ix86_expand_split_stack_prologue (void)
11707 struct ix86_frame frame
;
11708 HOST_WIDE_INT allocate
;
11709 unsigned HOST_WIDE_INT args_size
;
11710 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11711 rtx scratch_reg
= NULL_RTX
;
11712 rtx varargs_label
= NULL_RTX
;
11715 gcc_assert (flag_split_stack
&& reload_completed
);
11717 ix86_finalize_stack_realign_flags ();
11718 ix86_compute_frame_layout (&frame
);
11719 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11721 /* This is the label we will branch to if we have enough stack
11722 space. We expect the basic block reordering pass to reverse this
11723 branch if optimizing, so that we branch in the unlikely case. */
11724 label
= gen_label_rtx ();
11726 /* We need to compare the stack pointer minus the frame size with
11727 the stack boundary in the TCB. The stack boundary always gives
11728 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11729 can compare directly. Otherwise we need to do an addition. */
11731 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11732 UNSPEC_STACK_CHECK
);
11733 limit
= gen_rtx_CONST (Pmode
, limit
);
11734 limit
= gen_rtx_MEM (Pmode
, limit
);
11735 if (allocate
< SPLIT_STACK_AVAILABLE
)
11736 current
= stack_pointer_rtx
;
11739 unsigned int scratch_regno
;
11742 /* We need a scratch register to hold the stack pointer minus
11743 the required frame size. Since this is the very start of the
11744 function, the scratch register can be any caller-saved
11745 register which is not used for parameters. */
11746 offset
= GEN_INT (- allocate
);
11747 scratch_regno
= split_stack_prologue_scratch_regno ();
11748 if (scratch_regno
== INVALID_REGNUM
)
11750 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11751 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11753 /* We don't use ix86_gen_add3 in this case because it will
11754 want to split to lea, but when not optimizing the insn
11755 will not be split after this point. */
11756 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11757 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11762 emit_move_insn (scratch_reg
, offset
);
11763 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11764 stack_pointer_rtx
));
11766 current
= scratch_reg
;
11769 ix86_expand_branch (GEU
, current
, limit
, label
);
11770 jump_insn
= get_last_insn ();
11771 JUMP_LABEL (jump_insn
) = label
;
11773 /* Mark the jump as very likely to be taken. */
11774 add_int_reg_note (jump_insn
, REG_BR_PROB
,
11775 REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100);
11777 if (split_stack_fn
== NULL_RTX
)
11778 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11779 fn
= split_stack_fn
;
11781 /* Get more stack space. We pass in the desired stack space and the
11782 size of the arguments to copy to the new stack. In 32-bit mode
11783 we push the parameters; __morestack will return on a new stack
11784 anyhow. In 64-bit mode we pass the parameters in r10 and
11786 allocate_rtx
= GEN_INT (allocate
);
11787 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11788 call_fusage
= NULL_RTX
;
11793 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11794 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11796 /* If this function uses a static chain, it will be in %r10.
11797 Preserve it across the call to __morestack. */
11798 if (DECL_STATIC_CHAIN (cfun
->decl
))
11802 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11803 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11804 use_reg (&call_fusage
, rax
);
11807 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11810 HOST_WIDE_INT argval
;
11812 gcc_assert (Pmode
== DImode
);
11813 /* When using the large model we need to load the address
11814 into a register, and we've run out of registers. So we
11815 switch to a different calling convention, and we call a
11816 different function: __morestack_large. We pass the
11817 argument size in the upper 32 bits of r10 and pass the
11818 frame size in the lower 32 bits. */
11819 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11820 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11822 if (split_stack_fn_large
== NULL_RTX
)
11823 split_stack_fn_large
=
11824 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11826 if (ix86_cmodel
== CM_LARGE_PIC
)
11830 label
= gen_label_rtx ();
11831 emit_label (label
);
11832 LABEL_PRESERVE_P (label
) = 1;
11833 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11834 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11835 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11836 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11838 x
= gen_rtx_CONST (Pmode
, x
);
11839 emit_move_insn (reg11
, x
);
11840 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11841 x
= gen_const_mem (Pmode
, x
);
11842 emit_move_insn (reg11
, x
);
11845 emit_move_insn (reg11
, split_stack_fn_large
);
11849 argval
= ((args_size
<< 16) << 16) + allocate
;
11850 emit_move_insn (reg10
, GEN_INT (argval
));
11854 emit_move_insn (reg10
, allocate_rtx
);
11855 emit_move_insn (reg11
, GEN_INT (args_size
));
11856 use_reg (&call_fusage
, reg11
);
11859 use_reg (&call_fusage
, reg10
);
11863 emit_insn (gen_push (GEN_INT (args_size
)));
11864 emit_insn (gen_push (allocate_rtx
));
11866 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11867 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11869 add_function_usage_to (call_insn
, call_fusage
);
11871 /* In order to make call/return prediction work right, we now need
11872 to execute a return instruction. See
11873 libgcc/config/i386/morestack.S for the details on how this works.
11875 For flow purposes gcc must not see this as a return
11876 instruction--we need control flow to continue at the subsequent
11877 label. Therefore, we use an unspec. */
11878 gcc_assert (crtl
->args
.pops_args
< 65536);
11879 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11881 /* If we are in 64-bit mode and this function uses a static chain,
11882 we saved %r10 in %rax before calling _morestack. */
11883 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11884 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11885 gen_rtx_REG (word_mode
, AX_REG
));
11887 /* If this function calls va_start, we need to store a pointer to
11888 the arguments on the old stack, because they may not have been
11889 all copied to the new stack. At this point the old stack can be
11890 found at the frame pointer value used by __morestack, because
11891 __morestack has set that up before calling back to us. Here we
11892 store that pointer in a scratch register, and in
11893 ix86_expand_prologue we store the scratch register in a stack
11895 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11897 unsigned int scratch_regno
;
11901 scratch_regno
= split_stack_prologue_scratch_regno ();
11902 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11903 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11907 return address within this function
11908 return address of caller of this function
11910 So we add three words to get to the stack arguments.
11914 return address within this function
11915 first argument to __morestack
11916 second argument to __morestack
11917 return address of caller of this function
11919 So we add five words to get to the stack arguments.
11921 words
= TARGET_64BIT
? 3 : 5;
11922 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11923 gen_rtx_PLUS (Pmode
, frame_reg
,
11924 GEN_INT (words
* UNITS_PER_WORD
))));
11926 varargs_label
= gen_label_rtx ();
11927 emit_jump_insn (gen_jump (varargs_label
));
11928 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11933 emit_label (label
);
11934 LABEL_NUSES (label
) = 1;
11936 /* If this function calls va_start, we now have to set the scratch
11937 register for the case where we do not call __morestack. In this
11938 case we need to set it based on the stack pointer. */
11939 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11941 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11942 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11943 GEN_INT (UNITS_PER_WORD
))));
11945 emit_label (varargs_label
);
11946 LABEL_NUSES (varargs_label
) = 1;
11950 /* We may have to tell the dataflow pass that the split stack prologue
11951 is initializing a scratch register. */
11954 ix86_live_on_entry (bitmap regs
)
11956 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11958 gcc_assert (flag_split_stack
);
11959 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11963 /* Extract the parts of an RTL expression that is a valid memory address
11964 for an instruction. Return 0 if the structure of the address is
11965 grossly off. Return -1 if the address contains ASHIFT, so it is not
11966 strictly valid, but still used for computing length of lea instruction. */
11969 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11971 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11972 rtx base_reg
, index_reg
;
11973 HOST_WIDE_INT scale
= 1;
11974 rtx scale_rtx
= NULL_RTX
;
11977 enum ix86_address_seg seg
= SEG_DEFAULT
;
11979 /* Allow zero-extended SImode addresses,
11980 they will be emitted with addr32 prefix. */
11981 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11983 if (GET_CODE (addr
) == ZERO_EXTEND
11984 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11986 addr
= XEXP (addr
, 0);
11987 if (CONST_INT_P (addr
))
11990 else if (GET_CODE (addr
) == AND
11991 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11993 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11994 if (addr
== NULL_RTX
)
11997 if (CONST_INT_P (addr
))
12002 /* Allow SImode subregs of DImode addresses,
12003 they will be emitted with addr32 prefix. */
12004 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
12006 if (GET_CODE (addr
) == SUBREG
12007 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
12009 addr
= SUBREG_REG (addr
);
12010 if (CONST_INT_P (addr
))
12017 else if (GET_CODE (addr
) == SUBREG
)
12019 if (REG_P (SUBREG_REG (addr
)))
12024 else if (GET_CODE (addr
) == PLUS
)
12026 rtx addends
[4], op
;
12034 addends
[n
++] = XEXP (op
, 1);
12037 while (GET_CODE (op
) == PLUS
);
12042 for (i
= n
; i
>= 0; --i
)
12045 switch (GET_CODE (op
))
12050 index
= XEXP (op
, 0);
12051 scale_rtx
= XEXP (op
, 1);
12057 index
= XEXP (op
, 0);
12058 tmp
= XEXP (op
, 1);
12059 if (!CONST_INT_P (tmp
))
12061 scale
= INTVAL (tmp
);
12062 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12064 scale
= 1 << scale
;
12069 if (GET_CODE (op
) != UNSPEC
)
12074 if (XINT (op
, 1) == UNSPEC_TP
12075 && TARGET_TLS_DIRECT_SEG_REFS
12076 && seg
== SEG_DEFAULT
)
12077 seg
= DEFAULT_TLS_SEG_REG
;
12083 if (!REG_P (SUBREG_REG (op
)))
12110 else if (GET_CODE (addr
) == MULT
)
12112 index
= XEXP (addr
, 0); /* index*scale */
12113 scale_rtx
= XEXP (addr
, 1);
12115 else if (GET_CODE (addr
) == ASHIFT
)
12117 /* We're called for lea too, which implements ashift on occasion. */
12118 index
= XEXP (addr
, 0);
12119 tmp
= XEXP (addr
, 1);
12120 if (!CONST_INT_P (tmp
))
12122 scale
= INTVAL (tmp
);
12123 if ((unsigned HOST_WIDE_INT
) scale
> 3)
12125 scale
= 1 << scale
;
12129 disp
= addr
; /* displacement */
12135 else if (GET_CODE (index
) == SUBREG
12136 && REG_P (SUBREG_REG (index
)))
12142 /* Extract the integral value of scale. */
12145 if (!CONST_INT_P (scale_rtx
))
12147 scale
= INTVAL (scale_rtx
);
12150 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
12151 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
12153 /* Avoid useless 0 displacement. */
12154 if (disp
== const0_rtx
&& (base
|| index
))
12157 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12158 if (base_reg
&& index_reg
&& scale
== 1
12159 && (index_reg
== arg_pointer_rtx
12160 || index_reg
== frame_pointer_rtx
12161 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12164 tmp
= base
, base
= index
, index
= tmp
;
12165 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12168 /* Special case: %ebp cannot be encoded as a base without a displacement.
12172 && (base_reg
== hard_frame_pointer_rtx
12173 || base_reg
== frame_pointer_rtx
12174 || base_reg
== arg_pointer_rtx
12175 || (REG_P (base_reg
)
12176 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12177 || REGNO (base_reg
) == R13_REG
))))
12180 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12181 Avoid this by transforming to [%esi+0].
12182 Reload calls address legitimization without cfun defined, so we need
12183 to test cfun for being non-NULL. */
12184 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12185 && base_reg
&& !index_reg
&& !disp
12186 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12189 /* Special case: encode reg+reg instead of reg*2. */
12190 if (!base
&& index
&& scale
== 2)
12191 base
= index
, base_reg
= index_reg
, scale
= 1;
12193 /* Special case: scaling cannot be encoded without base or displacement. */
12194 if (!base
&& !disp
&& index
&& scale
!= 1)
12198 out
->index
= index
;
12200 out
->scale
= scale
;
12206 /* Return cost of the memory address x.
12207 For i386, it is better to use a complex address than let gcc copy
12208 the address into a reg and make a new pseudo. But not if the address
12209 requires to two regs - that would mean more pseudos with longer
12212 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12213 addr_space_t as ATTRIBUTE_UNUSED
,
12214 bool speed ATTRIBUTE_UNUSED
)
12216 struct ix86_address parts
;
12218 int ok
= ix86_decompose_address (x
, &parts
);
12222 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12223 parts
.base
= SUBREG_REG (parts
.base
);
12224 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12225 parts
.index
= SUBREG_REG (parts
.index
);
12227 /* Attempt to minimize number of registers in the address. */
12229 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12231 && (!REG_P (parts
.index
)
12232 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12236 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12238 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12239 && parts
.base
!= parts
.index
)
12242 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12243 since it's predecode logic can't detect the length of instructions
12244 and it degenerates to vector decoded. Increase cost of such
12245 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12246 to split such addresses or even refuse such addresses at all.
12248 Following addressing modes are affected:
12253 The first and last case may be avoidable by explicitly coding the zero in
12254 memory address, but I don't have AMD-K6 machine handy to check this
12258 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12259 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12260 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12266 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12267 this is used for to form addresses to local data when -fPIC is in
12271 darwin_local_data_pic (rtx disp
)
12273 return (GET_CODE (disp
) == UNSPEC
12274 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12277 /* Determine if a given RTX is a valid constant. We already know this
12278 satisfies CONSTANT_P. */
12281 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12283 switch (GET_CODE (x
))
12288 if (GET_CODE (x
) == PLUS
)
12290 if (!CONST_INT_P (XEXP (x
, 1)))
12295 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12298 /* Only some unspecs are valid as "constants". */
12299 if (GET_CODE (x
) == UNSPEC
)
12300 switch (XINT (x
, 1))
12303 case UNSPEC_GOTOFF
:
12304 case UNSPEC_PLTOFF
:
12305 return TARGET_64BIT
;
12307 case UNSPEC_NTPOFF
:
12308 x
= XVECEXP (x
, 0, 0);
12309 return (GET_CODE (x
) == SYMBOL_REF
12310 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12311 case UNSPEC_DTPOFF
:
12312 x
= XVECEXP (x
, 0, 0);
12313 return (GET_CODE (x
) == SYMBOL_REF
12314 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12319 /* We must have drilled down to a symbol. */
12320 if (GET_CODE (x
) == LABEL_REF
)
12322 if (GET_CODE (x
) != SYMBOL_REF
)
12327 /* TLS symbols are never valid. */
12328 if (SYMBOL_REF_TLS_MODEL (x
))
12331 /* DLLIMPORT symbols are never valid. */
12332 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12333 && SYMBOL_REF_DLLIMPORT_P (x
))
12337 /* mdynamic-no-pic */
12338 if (MACHO_DYNAMIC_NO_PIC_P
)
12339 return machopic_symbol_defined_p (x
);
12344 if (GET_MODE (x
) == TImode
12345 && x
!= CONST0_RTX (TImode
)
12351 if (!standard_sse_constant_p (x
))
12358 /* Otherwise we handle everything else in the move patterns. */
12362 /* Determine if it's legal to put X into the constant pool. This
12363 is not possible for the address of thread-local symbols, which
12364 is checked above. */
12367 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12369 /* We can always put integral constants and vectors in memory. */
12370 switch (GET_CODE (x
))
12380 return !ix86_legitimate_constant_p (mode
, x
);
12383 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12387 is_imported_p (rtx x
)
12389 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12390 || GET_CODE (x
) != SYMBOL_REF
)
12393 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
12397 /* Nonzero if the constant value X is a legitimate general operand
12398 when generating PIC code. It is given that flag_pic is on and
12399 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12402 legitimate_pic_operand_p (rtx x
)
12406 switch (GET_CODE (x
))
12409 inner
= XEXP (x
, 0);
12410 if (GET_CODE (inner
) == PLUS
12411 && CONST_INT_P (XEXP (inner
, 1)))
12412 inner
= XEXP (inner
, 0);
12414 /* Only some unspecs are valid as "constants". */
12415 if (GET_CODE (inner
) == UNSPEC
)
12416 switch (XINT (inner
, 1))
12419 case UNSPEC_GOTOFF
:
12420 case UNSPEC_PLTOFF
:
12421 return TARGET_64BIT
;
12423 x
= XVECEXP (inner
, 0, 0);
12424 return (GET_CODE (x
) == SYMBOL_REF
12425 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12426 case UNSPEC_MACHOPIC_OFFSET
:
12427 return legitimate_pic_address_disp_p (x
);
12435 return legitimate_pic_address_disp_p (x
);
12442 /* Determine if a given CONST RTX is a valid memory displacement
12446 legitimate_pic_address_disp_p (rtx disp
)
12450 /* In 64bit mode we can allow direct addresses of symbols and labels
12451 when they are not dynamic symbols. */
12454 rtx op0
= disp
, op1
;
12456 switch (GET_CODE (disp
))
12462 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12464 op0
= XEXP (XEXP (disp
, 0), 0);
12465 op1
= XEXP (XEXP (disp
, 0), 1);
12466 if (!CONST_INT_P (op1
)
12467 || INTVAL (op1
) >= 16*1024*1024
12468 || INTVAL (op1
) < -16*1024*1024)
12470 if (GET_CODE (op0
) == LABEL_REF
)
12472 if (GET_CODE (op0
) == CONST
12473 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12474 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12476 if (GET_CODE (op0
) == UNSPEC
12477 && XINT (op0
, 1) == UNSPEC_PCREL
)
12479 if (GET_CODE (op0
) != SYMBOL_REF
)
12484 /* TLS references should always be enclosed in UNSPEC.
12485 The dllimported symbol needs always to be resolved. */
12486 if (SYMBOL_REF_TLS_MODEL (op0
)
12487 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
12492 if (is_imported_p (op0
))
12495 if (SYMBOL_REF_FAR_ADDR_P (op0
)
12496 || !SYMBOL_REF_LOCAL_P (op0
))
12499 /* Function-symbols need to be resolved only for
12501 For the small-model we don't need to resolve anything
12503 if ((ix86_cmodel
!= CM_LARGE_PIC
12504 && SYMBOL_REF_FUNCTION_P (op0
))
12505 || ix86_cmodel
== CM_SMALL_PIC
)
12507 /* Non-external symbols don't need to be resolved for
12508 large, and medium-model. */
12509 if ((ix86_cmodel
== CM_LARGE_PIC
12510 || ix86_cmodel
== CM_MEDIUM_PIC
)
12511 && !SYMBOL_REF_EXTERNAL_P (op0
))
12514 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
12515 && SYMBOL_REF_LOCAL_P (op0
)
12516 && ix86_cmodel
!= CM_LARGE_PIC
)
12524 if (GET_CODE (disp
) != CONST
)
12526 disp
= XEXP (disp
, 0);
12530 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12531 of GOT tables. We should not need these anyway. */
12532 if (GET_CODE (disp
) != UNSPEC
12533 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12534 && XINT (disp
, 1) != UNSPEC_GOTOFF
12535 && XINT (disp
, 1) != UNSPEC_PCREL
12536 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12539 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12540 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12546 if (GET_CODE (disp
) == PLUS
)
12548 if (!CONST_INT_P (XEXP (disp
, 1)))
12550 disp
= XEXP (disp
, 0);
12554 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12557 if (GET_CODE (disp
) != UNSPEC
)
12560 switch (XINT (disp
, 1))
12565 /* We need to check for both symbols and labels because VxWorks loads
12566 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12568 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12569 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12570 case UNSPEC_GOTOFF
:
12571 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12572 While ABI specify also 32bit relocation but we don't produce it in
12573 small PIC model at all. */
12574 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12575 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12577 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12579 case UNSPEC_GOTTPOFF
:
12580 case UNSPEC_GOTNTPOFF
:
12581 case UNSPEC_INDNTPOFF
:
12584 disp
= XVECEXP (disp
, 0, 0);
12585 return (GET_CODE (disp
) == SYMBOL_REF
12586 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12587 case UNSPEC_NTPOFF
:
12588 disp
= XVECEXP (disp
, 0, 0);
12589 return (GET_CODE (disp
) == SYMBOL_REF
12590 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12591 case UNSPEC_DTPOFF
:
12592 disp
= XVECEXP (disp
, 0, 0);
12593 return (GET_CODE (disp
) == SYMBOL_REF
12594 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12600 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12601 replace the input X, or the original X if no replacement is called for.
12602 The output parameter *WIN is 1 if the calling macro should goto WIN,
12603 0 if it should not. */
12606 ix86_legitimize_reload_address (rtx x
,
12607 enum machine_mode mode ATTRIBUTE_UNUSED
,
12608 int opnum
, int type
,
12609 int ind_levels ATTRIBUTE_UNUSED
)
12611 /* Reload can generate:
12613 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12617 This RTX is rejected from ix86_legitimate_address_p due to
12618 non-strictness of base register 97. Following this rejection,
12619 reload pushes all three components into separate registers,
12620 creating invalid memory address RTX.
12622 Following code reloads only the invalid part of the
12623 memory address RTX. */
12625 if (GET_CODE (x
) == PLUS
12626 && REG_P (XEXP (x
, 1))
12627 && GET_CODE (XEXP (x
, 0)) == PLUS
12628 && REG_P (XEXP (XEXP (x
, 0), 1)))
12631 bool something_reloaded
= false;
12633 base
= XEXP (XEXP (x
, 0), 1);
12634 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12636 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12637 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12638 opnum
, (enum reload_type
) type
);
12639 something_reloaded
= true;
12642 index
= XEXP (x
, 1);
12643 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12645 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12646 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12647 opnum
, (enum reload_type
) type
);
12648 something_reloaded
= true;
12651 gcc_assert (something_reloaded
);
12658 /* Determine if op is suitable RTX for an address register.
12659 Return naked register if a register or a register subreg is
12660 found, otherwise return NULL_RTX. */
12663 ix86_validate_address_register (rtx op
)
12665 enum machine_mode mode
= GET_MODE (op
);
12667 /* Only SImode or DImode registers can form the address. */
12668 if (mode
!= SImode
&& mode
!= DImode
)
12673 else if (GET_CODE (op
) == SUBREG
)
12675 rtx reg
= SUBREG_REG (op
);
12680 mode
= GET_MODE (reg
);
12682 /* Don't allow SUBREGs that span more than a word. It can
12683 lead to spill failures when the register is one word out
12684 of a two word structure. */
12685 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
12688 /* Allow only SUBREGs of non-eliminable hard registers. */
12689 if (register_no_elim_operand (reg
, mode
))
12693 /* Op is not a register. */
12697 /* Recognizes RTL expressions that are valid memory addresses for an
12698 instruction. The MODE argument is the machine mode for the MEM
12699 expression that wants to use this address.
12701 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12702 convert common non-canonical forms to canonical form so that they will
12706 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12707 rtx addr
, bool strict
)
12709 struct ix86_address parts
;
12710 rtx base
, index
, disp
;
12711 HOST_WIDE_INT scale
;
12712 enum ix86_address_seg seg
;
12714 if (ix86_decompose_address (addr
, &parts
) <= 0)
12715 /* Decomposition failed. */
12719 index
= parts
.index
;
12721 scale
= parts
.scale
;
12724 /* Validate base register. */
12727 rtx reg
= ix86_validate_address_register (base
);
12729 if (reg
== NULL_RTX
)
12732 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12733 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12734 /* Base is not valid. */
12738 /* Validate index register. */
12741 rtx reg
= ix86_validate_address_register (index
);
12743 if (reg
== NULL_RTX
)
12746 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12747 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12748 /* Index is not valid. */
12752 /* Index and base should have the same mode. */
12754 && GET_MODE (base
) != GET_MODE (index
))
12757 /* Address override works only on the (%reg) part of %fs:(%reg). */
12758 if (seg
!= SEG_DEFAULT
12759 && ((base
&& GET_MODE (base
) != word_mode
)
12760 || (index
&& GET_MODE (index
) != word_mode
)))
12763 /* Validate scale factor. */
12767 /* Scale without index. */
12770 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12771 /* Scale is not a valid multiplier. */
12775 /* Validate displacement. */
12778 if (GET_CODE (disp
) == CONST
12779 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12780 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12781 switch (XINT (XEXP (disp
, 0), 1))
12783 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12784 used. While ABI specify also 32bit relocations, we don't produce
12785 them at all and use IP relative instead. */
12787 case UNSPEC_GOTOFF
:
12788 gcc_assert (flag_pic
);
12790 goto is_legitimate_pic
;
12792 /* 64bit address unspec. */
12795 case UNSPEC_GOTPCREL
:
12797 gcc_assert (flag_pic
);
12798 goto is_legitimate_pic
;
12800 case UNSPEC_GOTTPOFF
:
12801 case UNSPEC_GOTNTPOFF
:
12802 case UNSPEC_INDNTPOFF
:
12803 case UNSPEC_NTPOFF
:
12804 case UNSPEC_DTPOFF
:
12807 case UNSPEC_STACK_CHECK
:
12808 gcc_assert (flag_split_stack
);
12812 /* Invalid address unspec. */
12816 else if (SYMBOLIC_CONST (disp
)
12820 && MACHOPIC_INDIRECT
12821 && !machopic_operand_p (disp
)
12827 if (TARGET_64BIT
&& (index
|| base
))
12829 /* foo@dtpoff(%rX) is ok. */
12830 if (GET_CODE (disp
) != CONST
12831 || GET_CODE (XEXP (disp
, 0)) != PLUS
12832 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12833 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12834 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12835 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12836 /* Non-constant pic memory reference. */
12839 else if ((!TARGET_MACHO
|| flag_pic
)
12840 && ! legitimate_pic_address_disp_p (disp
))
12841 /* Displacement is an invalid pic construct. */
12844 else if (MACHO_DYNAMIC_NO_PIC_P
12845 && !ix86_legitimate_constant_p (Pmode
, disp
))
12846 /* displacment must be referenced via non_lazy_pointer */
12850 /* This code used to verify that a symbolic pic displacement
12851 includes the pic_offset_table_rtx register.
12853 While this is good idea, unfortunately these constructs may
12854 be created by "adds using lea" optimization for incorrect
12863 This code is nonsensical, but results in addressing
12864 GOT table with pic_offset_table_rtx base. We can't
12865 just refuse it easily, since it gets matched by
12866 "addsi3" pattern, that later gets split to lea in the
12867 case output register differs from input. While this
12868 can be handled by separate addsi pattern for this case
12869 that never results in lea, this seems to be easier and
12870 correct fix for crash to disable this test. */
12872 else if (GET_CODE (disp
) != LABEL_REF
12873 && !CONST_INT_P (disp
)
12874 && (GET_CODE (disp
) != CONST
12875 || !ix86_legitimate_constant_p (Pmode
, disp
))
12876 && (GET_CODE (disp
) != SYMBOL_REF
12877 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12878 /* Displacement is not constant. */
12880 else if (TARGET_64BIT
12881 && !x86_64_immediate_operand (disp
, VOIDmode
))
12882 /* Displacement is out of range. */
12884 /* In x32 mode, constant addresses are sign extended to 64bit, so
12885 we have to prevent addresses from 0x80000000 to 0xffffffff. */
12886 else if (TARGET_X32
&& !(index
|| base
)
12887 && CONST_INT_P (disp
)
12888 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
12892 /* Everything looks valid. */
12896 /* Determine if a given RTX is a valid constant address. */
12899 constant_address_p (rtx x
)
12901 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12904 /* Return a unique alias set for the GOT. */
12906 static alias_set_type
12907 ix86_GOT_alias_set (void)
12909 static alias_set_type set
= -1;
12911 set
= new_alias_set ();
12915 /* Return a legitimate reference for ORIG (an address) using the
12916 register REG. If REG is 0, a new pseudo is generated.
12918 There are two types of references that must be handled:
12920 1. Global data references must load the address from the GOT, via
12921 the PIC reg. An insn is emitted to do this load, and the reg is
12924 2. Static data references, constant pool addresses, and code labels
12925 compute the address as an offset from the GOT, whose base is in
12926 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12927 differentiate them from global data objects. The returned
12928 address is the PIC reg + an unspec constant.
12930 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12931 reg also appears in the address. */
12934 legitimize_pic_address (rtx orig
, rtx reg
)
12937 rtx new_rtx
= orig
;
12940 if (TARGET_MACHO
&& !TARGET_64BIT
)
12943 reg
= gen_reg_rtx (Pmode
);
12944 /* Use the generic Mach-O PIC machinery. */
12945 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12949 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12951 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
12956 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12958 else if (TARGET_64BIT
&& !TARGET_PECOFF
12959 && ix86_cmodel
!= CM_SMALL_PIC
&& gotoff_operand (addr
, Pmode
))
12962 /* This symbol may be referenced via a displacement from the PIC
12963 base address (@GOTOFF). */
12965 if (reload_in_progress
)
12966 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12967 if (GET_CODE (addr
) == CONST
)
12968 addr
= XEXP (addr
, 0);
12969 if (GET_CODE (addr
) == PLUS
)
12971 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12973 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12976 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12977 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12979 tmpreg
= gen_reg_rtx (Pmode
);
12982 emit_move_insn (tmpreg
, new_rtx
);
12986 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12987 tmpreg
, 1, OPTAB_DIRECT
);
12991 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12993 else if (!TARGET_64BIT
&& !TARGET_PECOFF
&& gotoff_operand (addr
, Pmode
))
12995 /* This symbol may be referenced via a displacement from the PIC
12996 base address (@GOTOFF). */
12998 if (reload_in_progress
)
12999 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13000 if (GET_CODE (addr
) == CONST
)
13001 addr
= XEXP (addr
, 0);
13002 if (GET_CODE (addr
) == PLUS
)
13004 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
13006 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
13009 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
13010 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13011 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13015 emit_move_insn (reg
, new_rtx
);
13019 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
13020 /* We can't use @GOTOFF for text labels on VxWorks;
13021 see gotoff_operand. */
13022 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
13024 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
13028 /* For x64 PE-COFF there is no GOT table. So we use address
13030 if (TARGET_64BIT
&& TARGET_PECOFF
)
13032 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
13033 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13036 reg
= gen_reg_rtx (Pmode
);
13037 emit_move_insn (reg
, new_rtx
);
13040 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
13042 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
13043 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13044 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13045 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13048 reg
= gen_reg_rtx (Pmode
);
13049 /* Use directly gen_movsi, otherwise the address is loaded
13050 into register for CSE. We don't want to CSE this addresses,
13051 instead we CSE addresses from the GOT table, so skip this. */
13052 emit_insn (gen_movsi (reg
, new_rtx
));
13057 /* This symbol must be referenced via a load from the
13058 Global Offset Table (@GOT). */
13060 if (reload_in_progress
)
13061 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13062 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
13063 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13065 new_rtx
= force_reg (Pmode
, new_rtx
);
13066 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13067 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
13068 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
13071 reg
= gen_reg_rtx (Pmode
);
13072 emit_move_insn (reg
, new_rtx
);
13078 if (CONST_INT_P (addr
)
13079 && !x86_64_immediate_operand (addr
, VOIDmode
))
13083 emit_move_insn (reg
, addr
);
13087 new_rtx
= force_reg (Pmode
, addr
);
13089 else if (GET_CODE (addr
) == CONST
)
13091 addr
= XEXP (addr
, 0);
13093 /* We must match stuff we generate before. Assume the only
13094 unspecs that can get here are ours. Not that we could do
13095 anything with them anyway.... */
13096 if (GET_CODE (addr
) == UNSPEC
13097 || (GET_CODE (addr
) == PLUS
13098 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
13100 gcc_assert (GET_CODE (addr
) == PLUS
);
13102 if (GET_CODE (addr
) == PLUS
)
13104 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
13106 /* Check first to see if this is a constant offset from a @GOTOFF
13107 symbol reference. */
13108 if (!TARGET_PECOFF
&& gotoff_operand (op0
, Pmode
)
13109 && CONST_INT_P (op1
))
13113 if (reload_in_progress
)
13114 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13115 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
13117 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
13118 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
13119 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
13123 emit_move_insn (reg
, new_rtx
);
13129 if (INTVAL (op1
) < -16*1024*1024
13130 || INTVAL (op1
) >= 16*1024*1024)
13132 if (!x86_64_immediate_operand (op1
, Pmode
))
13133 op1
= force_reg (Pmode
, op1
);
13134 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
13140 rtx base
= legitimize_pic_address (op0
, reg
);
13141 enum machine_mode mode
= GET_MODE (base
);
13143 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
13145 if (CONST_INT_P (new_rtx
))
13147 if (INTVAL (new_rtx
) < -16*1024*1024
13148 || INTVAL (new_rtx
) >= 16*1024*1024)
13150 if (!x86_64_immediate_operand (new_rtx
, mode
))
13151 new_rtx
= force_reg (mode
, new_rtx
);
13153 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
13156 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
13160 if (GET_CODE (new_rtx
) == PLUS
13161 && CONSTANT_P (XEXP (new_rtx
, 1)))
13163 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
13164 new_rtx
= XEXP (new_rtx
, 1);
13166 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
13174 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13177 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
13179 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
13181 if (GET_MODE (tp
) != tp_mode
)
13183 gcc_assert (GET_MODE (tp
) == SImode
);
13184 gcc_assert (tp_mode
== DImode
);
13186 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
13190 tp
= copy_to_mode_reg (tp_mode
, tp
);
13195 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13197 static GTY(()) rtx ix86_tls_symbol
;
13200 ix86_tls_get_addr (void)
13202 if (!ix86_tls_symbol
)
13205 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
13206 ? "___tls_get_addr" : "__tls_get_addr");
13208 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
13211 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
13213 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
13215 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
13216 gen_rtx_CONST (Pmode
, unspec
));
13219 return ix86_tls_symbol
;
13222 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13224 static GTY(()) rtx ix86_tls_module_base_symbol
;
13227 ix86_tls_module_base (void)
13229 if (!ix86_tls_module_base_symbol
)
13231 ix86_tls_module_base_symbol
13232 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
13234 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
13235 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
13238 return ix86_tls_module_base_symbol
;
13241 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13242 false if we expect this to be used for a memory address and true if
13243 we expect to load the address into a register. */
13246 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
13248 rtx dest
, base
, off
;
13249 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
13250 enum machine_mode tp_mode
= Pmode
;
13255 case TLS_MODEL_GLOBAL_DYNAMIC
:
13256 dest
= gen_reg_rtx (Pmode
);
13260 if (flag_pic
&& !TARGET_PECOFF
)
13261 pic
= pic_offset_table_rtx
;
13264 pic
= gen_reg_rtx (Pmode
);
13265 emit_insn (gen_set_got (pic
));
13269 if (TARGET_GNU2_TLS
)
13272 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13274 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13276 tp
= get_thread_pointer (Pmode
, true);
13277 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13279 if (GET_MODE (x
) != Pmode
)
13280 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13282 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13286 rtx caddr
= ix86_tls_get_addr ();
13290 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13295 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
13296 insns
= get_insns ();
13299 if (GET_MODE (x
) != Pmode
)
13300 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13302 RTL_CONST_CALL_P (insns
) = 1;
13303 emit_libcall_block (insns
, dest
, rax
, x
);
13306 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13310 case TLS_MODEL_LOCAL_DYNAMIC
:
13311 base
= gen_reg_rtx (Pmode
);
13316 pic
= pic_offset_table_rtx
;
13319 pic
= gen_reg_rtx (Pmode
);
13320 emit_insn (gen_set_got (pic
));
13324 if (TARGET_GNU2_TLS
)
13326 rtx tmp
= ix86_tls_module_base ();
13329 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13331 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13333 tp
= get_thread_pointer (Pmode
, true);
13334 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13335 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13339 rtx caddr
= ix86_tls_get_addr ();
13343 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
13348 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
13349 insns
= get_insns ();
13352 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13353 share the LD_BASE result with other LD model accesses. */
13354 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13355 UNSPEC_TLS_LD_BASE
);
13357 RTL_CONST_CALL_P (insns
) = 1;
13358 emit_libcall_block (insns
, base
, rax
, eqv
);
13361 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13364 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13365 off
= gen_rtx_CONST (Pmode
, off
);
13367 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13369 if (TARGET_GNU2_TLS
)
13371 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13373 if (GET_MODE (x
) != Pmode
)
13374 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
13376 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13380 case TLS_MODEL_INITIAL_EXEC
:
13383 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13385 /* The Sun linker took the AMD64 TLS spec literally
13386 and can only handle %rax as destination of the
13387 initial executable code sequence. */
13389 dest
= gen_reg_rtx (DImode
);
13390 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13394 /* Generate DImode references to avoid %fs:(%reg32)
13395 problems and linker IE->LE relaxation bug. */
13398 type
= UNSPEC_GOTNTPOFF
;
13402 if (reload_in_progress
)
13403 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13404 pic
= pic_offset_table_rtx
;
13405 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13407 else if (!TARGET_ANY_GNU_TLS
)
13409 pic
= gen_reg_rtx (Pmode
);
13410 emit_insn (gen_set_got (pic
));
13411 type
= UNSPEC_GOTTPOFF
;
13416 type
= UNSPEC_INDNTPOFF
;
13419 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13420 off
= gen_rtx_CONST (tp_mode
, off
);
13422 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13423 off
= gen_const_mem (tp_mode
, off
);
13424 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13426 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13428 base
= get_thread_pointer (tp_mode
,
13429 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13430 off
= force_reg (tp_mode
, off
);
13431 return gen_rtx_PLUS (tp_mode
, base
, off
);
13435 base
= get_thread_pointer (Pmode
, true);
13436 dest
= gen_reg_rtx (Pmode
);
13437 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13441 case TLS_MODEL_LOCAL_EXEC
:
13442 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13443 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13444 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13445 off
= gen_rtx_CONST (Pmode
, off
);
13447 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13449 base
= get_thread_pointer (Pmode
,
13450 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13451 return gen_rtx_PLUS (Pmode
, base
, off
);
13455 base
= get_thread_pointer (Pmode
, true);
13456 dest
= gen_reg_rtx (Pmode
);
13457 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13462 gcc_unreachable ();
13468 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13469 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13470 unique refptr-DECL symbol corresponding to symbol DECL. */
13472 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13473 htab_t dllimport_map
;
13476 get_dllimport_decl (tree decl
, bool beimport
)
13478 struct tree_map
*h
, in
;
13481 const char *prefix
;
13482 size_t namelen
, prefixlen
;
13487 if (!dllimport_map
)
13488 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13490 in
.hash
= htab_hash_pointer (decl
);
13491 in
.base
.from
= decl
;
13492 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13493 h
= (struct tree_map
*) *loc
;
13497 *loc
= h
= ggc_alloc_tree_map ();
13499 h
->base
.from
= decl
;
13500 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13501 VAR_DECL
, NULL
, ptr_type_node
);
13502 DECL_ARTIFICIAL (to
) = 1;
13503 DECL_IGNORED_P (to
) = 1;
13504 DECL_EXTERNAL (to
) = 1;
13505 TREE_READONLY (to
) = 1;
13507 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13508 name
= targetm
.strip_name_encoding (name
);
13510 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13511 ? "*__imp_" : "*__imp__";
13513 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
13514 namelen
= strlen (name
);
13515 prefixlen
= strlen (prefix
);
13516 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13517 memcpy (imp_name
, prefix
, prefixlen
);
13518 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13520 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13521 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13522 SET_SYMBOL_REF_DECL (rtl
, to
);
13523 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
13526 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
13527 #ifdef SUB_TARGET_RECORD_STUB
13528 SUB_TARGET_RECORD_STUB (name
);
13532 rtl
= gen_const_mem (Pmode
, rtl
);
13533 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13535 SET_DECL_RTL (to
, rtl
);
13536 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13541 /* Expand SYMBOL into its corresponding far-addresse symbol.
13542 WANT_REG is true if we require the result be a register. */
13545 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
13550 gcc_assert (SYMBOL_REF_DECL (symbol
));
13551 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
13553 x
= DECL_RTL (imp_decl
);
13555 x
= force_reg (Pmode
, x
);
13559 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13560 true if we require the result be a register. */
13563 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13568 gcc_assert (SYMBOL_REF_DECL (symbol
));
13569 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
13571 x
= DECL_RTL (imp_decl
);
13573 x
= force_reg (Pmode
, x
);
13577 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13578 is true if we require the result be a register. */
13581 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
13583 if (!TARGET_PECOFF
)
13586 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13588 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
13589 return legitimize_dllimport_symbol (addr
, inreg
);
13590 if (GET_CODE (addr
) == CONST
13591 && GET_CODE (XEXP (addr
, 0)) == PLUS
13592 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13593 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
13595 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
13596 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13600 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
13602 if (GET_CODE (addr
) == SYMBOL_REF
13603 && !is_imported_p (addr
)
13604 && SYMBOL_REF_EXTERNAL_P (addr
)
13605 && SYMBOL_REF_DECL (addr
))
13606 return legitimize_pe_coff_extern_decl (addr
, inreg
);
13608 if (GET_CODE (addr
) == CONST
13609 && GET_CODE (XEXP (addr
, 0)) == PLUS
13610 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
13611 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
13612 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
13613 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
13615 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
13616 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
13621 /* Try machine-dependent ways of modifying an illegitimate address
13622 to be legitimate. If we find one, return the new, valid address.
13623 This macro is used in only one place: `memory_address' in explow.c.
13625 OLDX is the address as it was before break_out_memory_refs was called.
13626 In some cases it is useful to look at this to decide what needs to be done.
13628 It is always safe for this macro to do nothing. It exists to recognize
13629 opportunities to optimize the output.
13631 For the 80386, we handle X+REG by loading X into a register R and
13632 using R+REG. R will go in a general reg and indexing will be used.
13633 However, if REG is a broken-out memory address or multiplication,
13634 nothing needs to be done because REG can certainly go in a general reg.
13636 When -fpic is used, special handling is needed for symbolic references.
13637 See comments by legitimize_pic_address in i386.c for details. */
13640 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13641 enum machine_mode mode
)
13646 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13648 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13649 if (GET_CODE (x
) == CONST
13650 && GET_CODE (XEXP (x
, 0)) == PLUS
13651 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13652 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13654 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13655 (enum tls_model
) log
, false);
13656 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13659 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13661 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
13666 if (flag_pic
&& SYMBOLIC_CONST (x
))
13667 return legitimize_pic_address (x
, 0);
13670 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13671 return machopic_indirect_data_reference (x
, 0);
13674 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13675 if (GET_CODE (x
) == ASHIFT
13676 && CONST_INT_P (XEXP (x
, 1))
13677 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13680 log
= INTVAL (XEXP (x
, 1));
13681 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13682 GEN_INT (1 << log
));
13685 if (GET_CODE (x
) == PLUS
)
13687 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13689 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13690 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13691 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13694 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13695 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13696 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13697 GEN_INT (1 << log
));
13700 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13701 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13702 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13705 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13706 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13707 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13708 GEN_INT (1 << log
));
13711 /* Put multiply first if it isn't already. */
13712 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13714 rtx tmp
= XEXP (x
, 0);
13715 XEXP (x
, 0) = XEXP (x
, 1);
13720 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13721 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13722 created by virtual register instantiation, register elimination, and
13723 similar optimizations. */
13724 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13727 x
= gen_rtx_PLUS (Pmode
,
13728 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13729 XEXP (XEXP (x
, 1), 0)),
13730 XEXP (XEXP (x
, 1), 1));
13734 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13735 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13736 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13737 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13738 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13739 && CONSTANT_P (XEXP (x
, 1)))
13742 rtx other
= NULL_RTX
;
13744 if (CONST_INT_P (XEXP (x
, 1)))
13746 constant
= XEXP (x
, 1);
13747 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13749 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13751 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13752 other
= XEXP (x
, 1);
13760 x
= gen_rtx_PLUS (Pmode
,
13761 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13762 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13763 plus_constant (Pmode
, other
,
13764 INTVAL (constant
)));
13768 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13771 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13774 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13777 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13780 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13784 && REG_P (XEXP (x
, 1))
13785 && REG_P (XEXP (x
, 0)))
13788 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13791 x
= legitimize_pic_address (x
, 0);
13794 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13797 if (REG_P (XEXP (x
, 0)))
13799 rtx temp
= gen_reg_rtx (Pmode
);
13800 rtx val
= force_operand (XEXP (x
, 1), temp
);
13803 val
= convert_to_mode (Pmode
, val
, 1);
13804 emit_move_insn (temp
, val
);
13807 XEXP (x
, 1) = temp
;
13811 else if (REG_P (XEXP (x
, 1)))
13813 rtx temp
= gen_reg_rtx (Pmode
);
13814 rtx val
= force_operand (XEXP (x
, 0), temp
);
13817 val
= convert_to_mode (Pmode
, val
, 1);
13818 emit_move_insn (temp
, val
);
13821 XEXP (x
, 0) = temp
;
13829 /* Print an integer constant expression in assembler syntax. Addition
13830 and subtraction are the only arithmetic that may appear in these
13831 expressions. FILE is the stdio stream to write to, X is the rtx, and
13832 CODE is the operand print code from the output string. */
13835 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13839 switch (GET_CODE (x
))
13842 gcc_assert (flag_pic
);
13847 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13848 output_addr_const (file
, x
);
13851 const char *name
= XSTR (x
, 0);
13853 /* Mark the decl as referenced so that cgraph will
13854 output the function. */
13855 if (SYMBOL_REF_DECL (x
))
13856 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13859 if (MACHOPIC_INDIRECT
13860 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13861 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13863 assemble_name (file
, name
);
13865 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
13866 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13867 fputs ("@PLT", file
);
13874 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13875 assemble_name (asm_out_file
, buf
);
13879 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13883 /* This used to output parentheses around the expression,
13884 but that does not work on the 386 (either ATT or BSD assembler). */
13885 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13889 if (GET_MODE (x
) == VOIDmode
)
13891 /* We can use %d if the number is <32 bits and positive. */
13892 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13893 fprintf (file
, "0x%lx%08lx",
13894 (unsigned long) CONST_DOUBLE_HIGH (x
),
13895 (unsigned long) CONST_DOUBLE_LOW (x
));
13897 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13900 /* We can't handle floating point constants;
13901 TARGET_PRINT_OPERAND must handle them. */
13902 output_operand_lossage ("floating constant misused");
13906 /* Some assemblers need integer constants to appear first. */
13907 if (CONST_INT_P (XEXP (x
, 0)))
13909 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13911 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13915 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13916 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13918 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13924 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13925 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13927 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13929 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13933 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13935 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13940 gcc_assert (XVECLEN (x
, 0) == 1);
13941 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13942 switch (XINT (x
, 1))
13945 fputs ("@GOT", file
);
13947 case UNSPEC_GOTOFF
:
13948 fputs ("@GOTOFF", file
);
13950 case UNSPEC_PLTOFF
:
13951 fputs ("@PLTOFF", file
);
13954 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13955 "(%rip)" : "[rip]", file
);
13957 case UNSPEC_GOTPCREL
:
13958 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13959 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13961 case UNSPEC_GOTTPOFF
:
13962 /* FIXME: This might be @TPOFF in Sun ld too. */
13963 fputs ("@gottpoff", file
);
13966 fputs ("@tpoff", file
);
13968 case UNSPEC_NTPOFF
:
13970 fputs ("@tpoff", file
);
13972 fputs ("@ntpoff", file
);
13974 case UNSPEC_DTPOFF
:
13975 fputs ("@dtpoff", file
);
13977 case UNSPEC_GOTNTPOFF
:
13979 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13980 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13982 fputs ("@gotntpoff", file
);
13984 case UNSPEC_INDNTPOFF
:
13985 fputs ("@indntpoff", file
);
13988 case UNSPEC_MACHOPIC_OFFSET
:
13990 machopic_output_function_base_name (file
);
13994 output_operand_lossage ("invalid UNSPEC as operand");
14000 output_operand_lossage ("invalid expression as operand");
14004 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14005 We need to emit DTP-relative relocations. */
14007 static void ATTRIBUTE_UNUSED
14008 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
14010 fputs (ASM_LONG
, file
);
14011 output_addr_const (file
, x
);
14012 fputs ("@dtpoff", file
);
14018 fputs (", 0", file
);
14021 gcc_unreachable ();
14025 /* Return true if X is a representation of the PIC register. This copes
14026 with calls from ix86_find_base_term, where the register might have
14027 been replaced by a cselib value. */
14030 ix86_pic_register_p (rtx x
)
14032 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
14033 return (pic_offset_table_rtx
14034 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
14036 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
14039 /* Helper function for ix86_delegitimize_address.
14040 Attempt to delegitimize TLS local-exec accesses. */
14043 ix86_delegitimize_tls_address (rtx orig_x
)
14045 rtx x
= orig_x
, unspec
;
14046 struct ix86_address addr
;
14048 if (!TARGET_TLS_DIRECT_SEG_REFS
)
14052 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
14054 if (ix86_decompose_address (x
, &addr
) == 0
14055 || addr
.seg
!= DEFAULT_TLS_SEG_REG
14056 || addr
.disp
== NULL_RTX
14057 || GET_CODE (addr
.disp
) != CONST
)
14059 unspec
= XEXP (addr
.disp
, 0);
14060 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
14061 unspec
= XEXP (unspec
, 0);
14062 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
14064 x
= XVECEXP (unspec
, 0, 0);
14065 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
14066 if (unspec
!= XEXP (addr
.disp
, 0))
14067 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
14070 rtx idx
= addr
.index
;
14071 if (addr
.scale
!= 1)
14072 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
14073 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
14076 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
14077 if (MEM_P (orig_x
))
14078 x
= replace_equiv_address_nv (orig_x
, x
);
14082 /* In the name of slightly smaller debug output, and to cater to
14083 general assembler lossage, recognize PIC+GOTOFF and turn it back
14084 into a direct symbol reference.
14086 On Darwin, this is necessary to avoid a crash, because Darwin
14087 has a different PIC label for each routine but the DWARF debugging
14088 information is not associated with any particular routine, so it's
14089 necessary to remove references to the PIC label from RTL stored by
14090 the DWARF output code. */
14093 ix86_delegitimize_address (rtx x
)
14095 rtx orig_x
= delegitimize_mem_from_attrs (x
);
14096 /* addend is NULL or some rtx if x is something+GOTOFF where
14097 something doesn't include the PIC register. */
14098 rtx addend
= NULL_RTX
;
14099 /* reg_addend is NULL or a multiple of some register. */
14100 rtx reg_addend
= NULL_RTX
;
14101 /* const_addend is NULL or a const_int. */
14102 rtx const_addend
= NULL_RTX
;
14103 /* This is the result, or NULL. */
14104 rtx result
= NULL_RTX
;
14113 if (GET_CODE (x
) == CONST
14114 && GET_CODE (XEXP (x
, 0)) == PLUS
14115 && GET_MODE (XEXP (x
, 0)) == Pmode
14116 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
14117 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
14118 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
14120 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
14121 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
14122 if (MEM_P (orig_x
))
14123 x
= replace_equiv_address_nv (orig_x
, x
);
14127 if (GET_CODE (x
) == CONST
14128 && GET_CODE (XEXP (x
, 0)) == UNSPEC
14129 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
14130 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
14131 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
14133 x
= XVECEXP (XEXP (x
, 0), 0, 0);
14134 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
14136 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
14144 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
14145 return ix86_delegitimize_tls_address (orig_x
);
14147 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14148 and -mcmodel=medium -fpic. */
14151 if (GET_CODE (x
) != PLUS
14152 || GET_CODE (XEXP (x
, 1)) != CONST
)
14153 return ix86_delegitimize_tls_address (orig_x
);
14155 if (ix86_pic_register_p (XEXP (x
, 0)))
14156 /* %ebx + GOT/GOTOFF */
14158 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
14160 /* %ebx + %reg * scale + GOT/GOTOFF */
14161 reg_addend
= XEXP (x
, 0);
14162 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
14163 reg_addend
= XEXP (reg_addend
, 1);
14164 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
14165 reg_addend
= XEXP (reg_addend
, 0);
14168 reg_addend
= NULL_RTX
;
14169 addend
= XEXP (x
, 0);
14173 addend
= XEXP (x
, 0);
14175 x
= XEXP (XEXP (x
, 1), 0);
14176 if (GET_CODE (x
) == PLUS
14177 && CONST_INT_P (XEXP (x
, 1)))
14179 const_addend
= XEXP (x
, 1);
14183 if (GET_CODE (x
) == UNSPEC
14184 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
14185 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
14186 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
14187 && !MEM_P (orig_x
) && !addend
)))
14188 result
= XVECEXP (x
, 0, 0);
14190 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
14191 && !MEM_P (orig_x
))
14192 result
= XVECEXP (x
, 0, 0);
14195 return ix86_delegitimize_tls_address (orig_x
);
14198 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
14200 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
14203 /* If the rest of original X doesn't involve the PIC register, add
14204 addend and subtract pic_offset_table_rtx. This can happen e.g.
14206 leal (%ebx, %ecx, 4), %ecx
14208 movl foo@GOTOFF(%ecx), %edx
14209 in which case we return (%ecx - %ebx) + foo. */
14210 if (pic_offset_table_rtx
)
14211 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
14212 pic_offset_table_rtx
),
14217 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
14219 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
14220 if (result
== NULL_RTX
)
14226 /* If X is a machine specific address (i.e. a symbol or label being
14227 referenced as a displacement from the GOT implemented using an
14228 UNSPEC), then return the base term. Otherwise return X. */
14231 ix86_find_base_term (rtx x
)
14237 if (GET_CODE (x
) != CONST
)
14239 term
= XEXP (x
, 0);
14240 if (GET_CODE (term
) == PLUS
14241 && (CONST_INT_P (XEXP (term
, 1))
14242 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
14243 term
= XEXP (term
, 0);
14244 if (GET_CODE (term
) != UNSPEC
14245 || (XINT (term
, 1) != UNSPEC_GOTPCREL
14246 && XINT (term
, 1) != UNSPEC_PCREL
))
14249 return XVECEXP (term
, 0, 0);
14252 return ix86_delegitimize_address (x
);
14256 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
14257 bool fp
, FILE *file
)
14259 const char *suffix
;
14261 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
14263 code
= ix86_fp_compare_code_to_integer (code
);
14267 code
= reverse_condition (code
);
14318 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
14322 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14323 Those same assemblers have the same but opposite lossage on cmov. */
14324 if (mode
== CCmode
)
14325 suffix
= fp
? "nbe" : "a";
14327 gcc_unreachable ();
14343 gcc_unreachable ();
14347 if (mode
== CCmode
)
14349 else if (mode
== CCCmode
)
14352 gcc_unreachable ();
14368 gcc_unreachable ();
14372 if (mode
== CCmode
)
14373 suffix
= fp
? "nb" : "ae";
14374 else if (mode
== CCCmode
)
14377 gcc_unreachable ();
14380 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14384 if (mode
== CCmode
)
14387 gcc_unreachable ();
14390 suffix
= fp
? "u" : "p";
14393 suffix
= fp
? "nu" : "np";
14396 gcc_unreachable ();
14398 fputs (suffix
, file
);
14401 /* Print the name of register X to FILE based on its machine mode and number.
14402 If CODE is 'w', pretend the mode is HImode.
14403 If CODE is 'b', pretend the mode is QImode.
14404 If CODE is 'k', pretend the mode is SImode.
14405 If CODE is 'q', pretend the mode is DImode.
14406 If CODE is 'x', pretend the mode is V4SFmode.
14407 If CODE is 't', pretend the mode is V8SFmode.
14408 If CODE is 'g', pretend the mode is V16SFmode.
14409 If CODE is 'h', pretend the reg is the 'high' byte register.
14410 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14411 If CODE is 'd', duplicate the operand for AVX instruction.
14415 print_reg (rtx x
, int code
, FILE *file
)
14418 unsigned int regno
;
14419 bool duplicated
= code
== 'd' && TARGET_AVX
;
14421 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14426 gcc_assert (TARGET_64BIT
);
14427 fputs ("rip", file
);
14431 regno
= true_regnum (x
);
14432 gcc_assert (regno
!= ARG_POINTER_REGNUM
14433 && regno
!= FRAME_POINTER_REGNUM
14434 && regno
!= FLAGS_REG
14435 && regno
!= FPSR_REG
14436 && regno
!= FPCR_REG
);
14438 if (code
== 'w' || MMX_REG_P (x
))
14440 else if (code
== 'b')
14442 else if (code
== 'k')
14444 else if (code
== 'q')
14446 else if (code
== 'y')
14448 else if (code
== 'h')
14450 else if (code
== 'x')
14452 else if (code
== 't')
14454 else if (code
== 'g')
14457 code
= GET_MODE_SIZE (GET_MODE (x
));
14459 /* Irritatingly, AMD extended registers use different naming convention
14460 from the normal registers: "r%d[bwd]" */
14461 if (REX_INT_REGNO_P (regno
))
14463 gcc_assert (TARGET_64BIT
);
14465 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
14469 error ("extended registers have no high halves");
14484 error ("unsupported operand size for extended register");
14494 if (STACK_TOP_P (x
))
14503 if (! ANY_FP_REG_P (x
))
14504 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14509 reg
= hi_reg_name
[regno
];
14512 if (regno
>= ARRAY_SIZE (qi_reg_name
))
14514 reg
= qi_reg_name
[regno
];
14517 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
14519 reg
= qi_high_reg_name
[regno
];
14524 gcc_assert (!duplicated
);
14526 fputs (hi_reg_name
[regno
] + 1, file
);
14532 gcc_assert (!duplicated
);
14534 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14539 gcc_unreachable ();
14545 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14546 fprintf (file
, ", %%%s", reg
);
14548 fprintf (file
, ", %s", reg
);
14552 /* Locate some local-dynamic symbol still in use by this function
14553 so that we can print its name in some tls_local_dynamic_base
14557 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14561 if (GET_CODE (x
) == SYMBOL_REF
14562 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14564 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14571 static const char *
14572 get_some_local_dynamic_name (void)
14576 if (cfun
->machine
->some_ld_name
)
14577 return cfun
->machine
->some_ld_name
;
14579 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14580 if (NONDEBUG_INSN_P (insn
)
14581 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14582 return cfun
->machine
->some_ld_name
;
14587 /* Meaning of CODE:
14588 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14589 C -- print opcode suffix for set/cmov insn.
14590 c -- like C, but print reversed condition
14591 F,f -- likewise, but for floating-point.
14592 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14594 R -- print the prefix for register names.
14595 z -- print the opcode suffix for the size of the current operand.
14596 Z -- likewise, with special suffixes for x87 instructions.
14597 * -- print a star (in certain assembler syntax)
14598 A -- print an absolute memory reference.
14599 E -- print address with DImode register names if TARGET_64BIT.
14600 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14601 s -- print a shift double count, followed by the assemblers argument
14603 b -- print the QImode name of the register for the indicated operand.
14604 %b0 would print %al if operands[0] is reg 0.
14605 w -- likewise, print the HImode name of the register.
14606 k -- likewise, print the SImode name of the register.
14607 q -- likewise, print the DImode name of the register.
14608 x -- likewise, print the V4SFmode name of the register.
14609 t -- likewise, print the V8SFmode name of the register.
14610 g -- likewise, print the V16SFmode name of the register.
14611 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14612 y -- print "st(0)" instead of "st" as a register.
14613 d -- print duplicated register operand for AVX instruction.
14614 D -- print condition for SSE cmp instruction.
14615 P -- if PIC, print an @PLT suffix.
14616 p -- print raw symbol name.
14617 X -- don't print any sort of PIC '@' suffix for a symbol.
14618 & -- print some in-use local-dynamic symbol name.
14619 H -- print a memory address offset by 8; used for sse high-parts
14620 Y -- print condition for XOP pcom* instruction.
14621 + -- print a branch hint as 'cs' or 'ds' prefix
14622 ; -- print a semicolon (after prefixes due to bug in older gas).
14623 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14624 @ -- print a segment register of thread base pointer load
14625 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14629 ix86_print_operand (FILE *file
, rtx x
, int code
)
14636 switch (ASSEMBLER_DIALECT
)
14643 /* Intel syntax. For absolute addresses, registers should not
14644 be surrounded by braces. */
14648 ix86_print_operand (file
, x
, 0);
14655 gcc_unreachable ();
14658 ix86_print_operand (file
, x
, 0);
14662 /* Wrap address in an UNSPEC to declare special handling. */
14664 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14666 output_address (x
);
14670 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14675 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14680 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14685 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14690 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14695 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14700 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14701 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14704 switch (GET_MODE_SIZE (GET_MODE (x
)))
14719 output_operand_lossage
14720 ("invalid operand size for operand code 'O'");
14729 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14731 /* Opcodes don't get size suffixes if using Intel opcodes. */
14732 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14735 switch (GET_MODE_SIZE (GET_MODE (x
)))
14754 output_operand_lossage
14755 ("invalid operand size for operand code 'z'");
14760 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14762 (0, "non-integer operand used with operand code 'z'");
14766 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14767 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14770 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14772 switch (GET_MODE_SIZE (GET_MODE (x
)))
14775 #ifdef HAVE_AS_IX86_FILDS
14785 #ifdef HAVE_AS_IX86_FILDQ
14788 fputs ("ll", file
);
14796 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14798 /* 387 opcodes don't get size suffixes
14799 if the operands are registers. */
14800 if (STACK_REG_P (x
))
14803 switch (GET_MODE_SIZE (GET_MODE (x
)))
14824 output_operand_lossage
14825 ("invalid operand type used with operand code 'Z'");
14829 output_operand_lossage
14830 ("invalid operand size for operand code 'Z'");
14849 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14851 ix86_print_operand (file
, x
, 0);
14852 fputs (", ", file
);
14857 switch (GET_CODE (x
))
14860 fputs ("neq", file
);
14863 fputs ("eq", file
);
14867 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14871 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14875 fputs ("le", file
);
14879 fputs ("lt", file
);
14882 fputs ("unord", file
);
14885 fputs ("ord", file
);
14888 fputs ("ueq", file
);
14891 fputs ("nlt", file
);
14894 fputs ("nle", file
);
14897 fputs ("ule", file
);
14900 fputs ("ult", file
);
14903 fputs ("une", file
);
14906 output_operand_lossage ("operand is not a condition code, "
14907 "invalid operand code 'Y'");
14913 /* Little bit of braindamage here. The SSE compare instructions
14914 does use completely different names for the comparisons that the
14915 fp conditional moves. */
14916 switch (GET_CODE (x
))
14921 fputs ("eq_us", file
);
14925 fputs ("eq", file
);
14930 fputs ("nge", file
);
14934 fputs ("lt", file
);
14939 fputs ("ngt", file
);
14943 fputs ("le", file
);
14946 fputs ("unord", file
);
14951 fputs ("neq_oq", file
);
14955 fputs ("neq", file
);
14960 fputs ("ge", file
);
14964 fputs ("nlt", file
);
14969 fputs ("gt", file
);
14973 fputs ("nle", file
);
14976 fputs ("ord", file
);
14979 output_operand_lossage ("operand is not a condition code, "
14980 "invalid operand code 'D'");
14987 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14988 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14994 if (!COMPARISON_P (x
))
14996 output_operand_lossage ("operand is not a condition code, "
14997 "invalid operand code '%c'", code
);
15000 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
15001 code
== 'c' || code
== 'f',
15002 code
== 'F' || code
== 'f',
15007 if (!offsettable_memref_p (x
))
15009 output_operand_lossage ("operand is not an offsettable memory "
15010 "reference, invalid operand code 'H'");
15013 /* It doesn't actually matter what mode we use here, as we're
15014 only going to use this for printing. */
15015 x
= adjust_address_nv (x
, DImode
, 8);
15016 /* Output 'qword ptr' for intel assembler dialect. */
15017 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
15022 gcc_assert (CONST_INT_P (x
));
15024 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
15025 #ifdef HAVE_AS_IX86_HLE
15026 fputs ("xacquire ", file
);
15028 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
15030 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
15031 #ifdef HAVE_AS_IX86_HLE
15032 fputs ("xrelease ", file
);
15034 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
15036 /* We do not want to print value of the operand. */
15040 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
15041 fputs ("{z}", file
);
15045 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15051 const char *name
= get_some_local_dynamic_name ();
15053 output_operand_lossage ("'%%&' used without any "
15054 "local dynamic TLS references");
15056 assemble_name (file
, name
);
15065 || optimize_function_for_size_p (cfun
)
15066 || !TARGET_BRANCH_PREDICTION_HINTS
)
15069 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
15072 int pred_val
= XINT (x
, 0);
15074 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
15075 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
15077 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
15079 = final_forward_branch_p (current_output_insn
) == 0;
15081 /* Emit hints only in the case default branch prediction
15082 heuristics would fail. */
15083 if (taken
!= cputaken
)
15085 /* We use 3e (DS) prefix for taken branches and
15086 2e (CS) prefix for not taken branches. */
15088 fputs ("ds ; ", file
);
15090 fputs ("cs ; ", file
);
15098 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15104 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15107 /* The kernel uses a different segment register for performance
15108 reasons; a system call would not have to trash the userspace
15109 segment register, which would be expensive. */
15110 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
15111 fputs ("fs", file
);
15113 fputs ("gs", file
);
15117 putc (TARGET_AVX2
? 'i' : 'f', file
);
15121 if (TARGET_64BIT
&& Pmode
!= word_mode
)
15122 fputs ("addr32 ", file
);
15126 output_operand_lossage ("invalid operand code '%c'", code
);
15131 print_reg (x
, code
, file
);
15133 else if (MEM_P (x
))
15135 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15136 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
15137 && GET_MODE (x
) != BLKmode
)
15140 switch (GET_MODE_SIZE (GET_MODE (x
)))
15142 case 1: size
= "BYTE"; break;
15143 case 2: size
= "WORD"; break;
15144 case 4: size
= "DWORD"; break;
15145 case 8: size
= "QWORD"; break;
15146 case 12: size
= "TBYTE"; break;
15148 if (GET_MODE (x
) == XFmode
)
15153 case 32: size
= "YMMWORD"; break;
15154 case 64: size
= "ZMMWORD"; break;
15156 gcc_unreachable ();
15159 /* Check for explicit size override (codes 'b', 'w', 'k',
15163 else if (code
== 'w')
15165 else if (code
== 'k')
15167 else if (code
== 'q')
15169 else if (code
== 'x')
15172 fputs (size
, file
);
15173 fputs (" PTR ", file
);
15177 /* Avoid (%rip) for call operands. */
15178 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
15179 && !CONST_INT_P (x
))
15180 output_addr_const (file
, x
);
15181 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
15182 output_operand_lossage ("invalid constraints for operand");
15184 output_address (x
);
15187 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
15192 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15193 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
15195 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15197 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15199 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
15200 (unsigned long long) (int) l
);
15202 fprintf (file
, "0x%08x", (unsigned int) l
);
15205 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
15210 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15211 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
15213 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15215 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
15218 /* These float cases don't actually occur as immediate operands. */
15219 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
15223 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
15224 fputs (dstr
, file
);
15229 /* We have patterns that allow zero sets of memory, for instance.
15230 In 64-bit mode, we should probably support all 8-byte vectors,
15231 since we can in fact encode that into an immediate. */
15232 if (GET_CODE (x
) == CONST_VECTOR
)
15234 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
15238 if (code
!= 'P' && code
!= 'p')
15240 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
15242 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15245 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
15246 || GET_CODE (x
) == LABEL_REF
)
15248 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15251 fputs ("OFFSET FLAT:", file
);
15254 if (CONST_INT_P (x
))
15255 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15256 else if (flag_pic
|| MACHOPIC_INDIRECT
)
15257 output_pic_addr_const (file
, x
, code
);
15259 output_addr_const (file
, x
);
15264 ix86_print_operand_punct_valid_p (unsigned char code
)
15266 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
15267 || code
== ';' || code
== '~' || code
== '^');
15270 /* Print a memory operand whose address is ADDR. */
15273 ix86_print_operand_address (FILE *file
, rtx addr
)
15275 struct ix86_address parts
;
15276 rtx base
, index
, disp
;
15282 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
15284 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15285 gcc_assert (parts
.index
== NULL_RTX
);
15286 parts
.index
= XVECEXP (addr
, 0, 1);
15287 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
15288 addr
= XVECEXP (addr
, 0, 0);
15291 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
15293 gcc_assert (TARGET_64BIT
);
15294 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
15298 ok
= ix86_decompose_address (addr
, &parts
);
15303 index
= parts
.index
;
15305 scale
= parts
.scale
;
15313 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15315 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
15318 gcc_unreachable ();
15321 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15322 if (TARGET_64BIT
&& !base
&& !index
)
15326 if (GET_CODE (disp
) == CONST
15327 && GET_CODE (XEXP (disp
, 0)) == PLUS
15328 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15329 symbol
= XEXP (XEXP (disp
, 0), 0);
15331 if (GET_CODE (symbol
) == LABEL_REF
15332 || (GET_CODE (symbol
) == SYMBOL_REF
15333 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
15336 if (!base
&& !index
)
15338 /* Displacement only requires special attention. */
15340 if (CONST_INT_P (disp
))
15342 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
15343 fputs ("ds:", file
);
15344 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
15347 output_pic_addr_const (file
, disp
, 0);
15349 output_addr_const (file
, disp
);
15353 /* Print SImode register names to force addr32 prefix. */
15354 if (SImode_address_operand (addr
, VOIDmode
))
15356 #ifdef ENABLE_CHECKING
15357 gcc_assert (TARGET_64BIT
);
15358 switch (GET_CODE (addr
))
15361 gcc_assert (GET_MODE (addr
) == SImode
);
15362 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15366 gcc_assert (GET_MODE (addr
) == DImode
);
15369 gcc_unreachable ();
15372 gcc_assert (!code
);
15378 && CONST_INT_P (disp
)
15379 && INTVAL (disp
) < -16*1024*1024)
15381 /* X32 runs in 64-bit mode, where displacement, DISP, in
15382 address DISP(%r64), is encoded as 32-bit immediate sign-
15383 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15384 address is %r64 + 0xffffffffbffffd00. When %r64 <
15385 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15386 which is invalid for x32. The correct address is %r64
15387 - 0x40000300 == 0xf7ffdd64. To properly encode
15388 -0x40000300(%r64) for x32, we zero-extend negative
15389 displacement by forcing addr32 prefix which truncates
15390 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15391 zero-extend all negative displacements, including -1(%rsp).
15392 However, for small negative displacements, sign-extension
15393 won't cause overflow. We only zero-extend negative
15394 displacements if they < -16*1024*1024, which is also used
15395 to check legitimate address displacements for PIC. */
15399 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15404 output_pic_addr_const (file
, disp
, 0);
15405 else if (GET_CODE (disp
) == LABEL_REF
)
15406 output_asm_label (disp
);
15408 output_addr_const (file
, disp
);
15413 print_reg (base
, code
, file
);
15417 print_reg (index
, vsib
? 0 : code
, file
);
15418 if (scale
!= 1 || vsib
)
15419 fprintf (file
, ",%d", scale
);
15425 rtx offset
= NULL_RTX
;
15429 /* Pull out the offset of a symbol; print any symbol itself. */
15430 if (GET_CODE (disp
) == CONST
15431 && GET_CODE (XEXP (disp
, 0)) == PLUS
15432 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15434 offset
= XEXP (XEXP (disp
, 0), 1);
15435 disp
= gen_rtx_CONST (VOIDmode
,
15436 XEXP (XEXP (disp
, 0), 0));
15440 output_pic_addr_const (file
, disp
, 0);
15441 else if (GET_CODE (disp
) == LABEL_REF
)
15442 output_asm_label (disp
);
15443 else if (CONST_INT_P (disp
))
15446 output_addr_const (file
, disp
);
15452 print_reg (base
, code
, file
);
15455 if (INTVAL (offset
) >= 0)
15457 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15461 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15468 print_reg (index
, vsib
? 0 : code
, file
);
15469 if (scale
!= 1 || vsib
)
15470 fprintf (file
, "*%d", scale
);
15477 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15480 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15484 if (GET_CODE (x
) != UNSPEC
)
15487 op
= XVECEXP (x
, 0, 0);
15488 switch (XINT (x
, 1))
15490 case UNSPEC_GOTTPOFF
:
15491 output_addr_const (file
, op
);
15492 /* FIXME: This might be @TPOFF in Sun ld. */
15493 fputs ("@gottpoff", file
);
15496 output_addr_const (file
, op
);
15497 fputs ("@tpoff", file
);
15499 case UNSPEC_NTPOFF
:
15500 output_addr_const (file
, op
);
15502 fputs ("@tpoff", file
);
15504 fputs ("@ntpoff", file
);
15506 case UNSPEC_DTPOFF
:
15507 output_addr_const (file
, op
);
15508 fputs ("@dtpoff", file
);
15510 case UNSPEC_GOTNTPOFF
:
15511 output_addr_const (file
, op
);
15513 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15514 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15516 fputs ("@gotntpoff", file
);
15518 case UNSPEC_INDNTPOFF
:
15519 output_addr_const (file
, op
);
15520 fputs ("@indntpoff", file
);
15523 case UNSPEC_MACHOPIC_OFFSET
:
15524 output_addr_const (file
, op
);
15526 machopic_output_function_base_name (file
);
15530 case UNSPEC_STACK_CHECK
:
15534 gcc_assert (flag_split_stack
);
15536 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15537 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15539 gcc_unreachable ();
15542 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15553 /* Split one or more double-mode RTL references into pairs of half-mode
15554 references. The RTL can be REG, offsettable MEM, integer constant, or
15555 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15556 split and "num" is its length. lo_half and hi_half are output arrays
15557 that parallel "operands". */
15560 split_double_mode (enum machine_mode mode
, rtx operands
[],
15561 int num
, rtx lo_half
[], rtx hi_half
[])
15563 enum machine_mode half_mode
;
15569 half_mode
= DImode
;
15572 half_mode
= SImode
;
15575 gcc_unreachable ();
15578 byte
= GET_MODE_SIZE (half_mode
);
15582 rtx op
= operands
[num
];
15584 /* simplify_subreg refuse to split volatile memory addresses,
15585 but we still have to handle it. */
15588 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15589 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15593 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15594 GET_MODE (op
) == VOIDmode
15595 ? mode
: GET_MODE (op
), 0);
15596 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15597 GET_MODE (op
) == VOIDmode
15598 ? mode
: GET_MODE (op
), byte
);
15603 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15604 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15605 is the expression of the binary operation. The output may either be
15606 emitted here, or returned to the caller, like all output_* functions.
15608 There is no guarantee that the operands are the same mode, as they
15609 might be within FLOAT or FLOAT_EXTEND expressions. */
15611 #ifndef SYSV386_COMPAT
15612 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15613 wants to fix the assemblers because that causes incompatibility
15614 with gcc. No-one wants to fix gcc because that causes
15615 incompatibility with assemblers... You can use the option of
15616 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15617 #define SYSV386_COMPAT 1
15621 output_387_binary_op (rtx insn
, rtx
*operands
)
15623 static char buf
[40];
15626 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15628 #ifdef ENABLE_CHECKING
15629 /* Even if we do not want to check the inputs, this documents input
15630 constraints. Which helps in understanding the following code. */
15631 if (STACK_REG_P (operands
[0])
15632 && ((REG_P (operands
[1])
15633 && REGNO (operands
[0]) == REGNO (operands
[1])
15634 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15635 || (REG_P (operands
[2])
15636 && REGNO (operands
[0]) == REGNO (operands
[2])
15637 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15638 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15641 gcc_assert (is_sse
);
15644 switch (GET_CODE (operands
[3]))
15647 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15648 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15656 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15657 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15665 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15666 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15674 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15675 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15683 gcc_unreachable ();
15690 strcpy (buf
, ssep
);
15691 if (GET_MODE (operands
[0]) == SFmode
)
15692 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15694 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15698 strcpy (buf
, ssep
+ 1);
15699 if (GET_MODE (operands
[0]) == SFmode
)
15700 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15702 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15708 switch (GET_CODE (operands
[3]))
15712 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15714 rtx temp
= operands
[2];
15715 operands
[2] = operands
[1];
15716 operands
[1] = temp
;
15719 /* know operands[0] == operands[1]. */
15721 if (MEM_P (operands
[2]))
15727 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15729 if (STACK_TOP_P (operands
[0]))
15730 /* How is it that we are storing to a dead operand[2]?
15731 Well, presumably operands[1] is dead too. We can't
15732 store the result to st(0) as st(0) gets popped on this
15733 instruction. Instead store to operands[2] (which I
15734 think has to be st(1)). st(1) will be popped later.
15735 gcc <= 2.8.1 didn't have this check and generated
15736 assembly code that the Unixware assembler rejected. */
15737 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15739 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15743 if (STACK_TOP_P (operands
[0]))
15744 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15746 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15751 if (MEM_P (operands
[1]))
15757 if (MEM_P (operands
[2]))
15763 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15766 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15767 derived assemblers, confusingly reverse the direction of
15768 the operation for fsub{r} and fdiv{r} when the
15769 destination register is not st(0). The Intel assembler
15770 doesn't have this brain damage. Read !SYSV386_COMPAT to
15771 figure out what the hardware really does. */
15772 if (STACK_TOP_P (operands
[0]))
15773 p
= "{p\t%0, %2|rp\t%2, %0}";
15775 p
= "{rp\t%2, %0|p\t%0, %2}";
15777 if (STACK_TOP_P (operands
[0]))
15778 /* As above for fmul/fadd, we can't store to st(0). */
15779 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15781 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15786 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15789 if (STACK_TOP_P (operands
[0]))
15790 p
= "{rp\t%0, %1|p\t%1, %0}";
15792 p
= "{p\t%1, %0|rp\t%0, %1}";
15794 if (STACK_TOP_P (operands
[0]))
15795 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15797 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15802 if (STACK_TOP_P (operands
[0]))
15804 if (STACK_TOP_P (operands
[1]))
15805 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15807 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15810 else if (STACK_TOP_P (operands
[1]))
15813 p
= "{\t%1, %0|r\t%0, %1}";
15815 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15821 p
= "{r\t%2, %0|\t%0, %2}";
15823 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15829 gcc_unreachable ();
15836 /* Check if a 256bit AVX register is referenced inside of EXP. */
15839 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15843 if (GET_CODE (exp
) == SUBREG
)
15844 exp
= SUBREG_REG (exp
);
15847 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15853 /* Return needed mode for entity in optimize_mode_switching pass. */
15856 ix86_avx_u128_mode_needed (rtx insn
)
15862 /* Needed mode is set to AVX_U128_CLEAN if there are
15863 no 256bit modes used in function arguments. */
15864 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15866 link
= XEXP (link
, 1))
15868 if (GET_CODE (XEXP (link
, 0)) == USE
)
15870 rtx arg
= XEXP (XEXP (link
, 0), 0);
15872 if (ix86_check_avx256_register (&arg
, NULL
))
15873 return AVX_U128_DIRTY
;
15877 return AVX_U128_CLEAN
;
15880 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15881 changes state only when a 256bit register is written to, but we need
15882 to prevent the compiler from moving optimal insertion point above
15883 eventual read from 256bit register. */
15884 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15885 return AVX_U128_DIRTY
;
15887 return AVX_U128_ANY
;
15890 /* Return mode that i387 must be switched into
15891 prior to the execution of insn. */
15894 ix86_i387_mode_needed (int entity
, rtx insn
)
15896 enum attr_i387_cw mode
;
15898 /* The mode UNINITIALIZED is used to store control word after a
15899 function call or ASM pattern. The mode ANY specify that function
15900 has no requirements on the control word and make no changes in the
15901 bits we are interested in. */
15904 || (NONJUMP_INSN_P (insn
)
15905 && (asm_noperands (PATTERN (insn
)) >= 0
15906 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15907 return I387_CW_UNINITIALIZED
;
15909 if (recog_memoized (insn
) < 0)
15910 return I387_CW_ANY
;
15912 mode
= get_attr_i387_cw (insn
);
15917 if (mode
== I387_CW_TRUNC
)
15922 if (mode
== I387_CW_FLOOR
)
15927 if (mode
== I387_CW_CEIL
)
15932 if (mode
== I387_CW_MASK_PM
)
15937 gcc_unreachable ();
15940 return I387_CW_ANY
;
15943 /* Return mode that entity must be switched into
15944 prior to the execution of insn. */
15947 ix86_mode_needed (int entity
, rtx insn
)
15952 return ix86_avx_u128_mode_needed (insn
);
15957 return ix86_i387_mode_needed (entity
, insn
);
15959 gcc_unreachable ();
15964 /* Check if a 256bit AVX register is referenced in stores. */
15967 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15969 if (ix86_check_avx256_register (&dest
, NULL
))
15971 bool *used
= (bool *) data
;
15976 /* Calculate mode of upper 128bit AVX registers after the insn. */
15979 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15981 rtx pat
= PATTERN (insn
);
15983 if (vzeroupper_operation (pat
, VOIDmode
)
15984 || vzeroall_operation (pat
, VOIDmode
))
15985 return AVX_U128_CLEAN
;
15987 /* We know that state is clean after CALL insn if there are no
15988 256bit registers used in the function return register. */
15991 bool avx_reg256_found
= false;
15992 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15994 return avx_reg256_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
15997 /* Otherwise, return current mode. Remember that if insn
15998 references AVX 256bit registers, the mode was already changed
15999 to DIRTY from MODE_NEEDED. */
16003 /* Return the mode that an insn results in. */
16006 ix86_mode_after (int entity
, int mode
, rtx insn
)
16011 return ix86_avx_u128_mode_after (mode
, insn
);
16018 gcc_unreachable ();
16023 ix86_avx_u128_mode_entry (void)
16027 /* Entry mode is set to AVX_U128_DIRTY if there are
16028 256bit modes used in function arguments. */
16029 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
16030 arg
= TREE_CHAIN (arg
))
16032 rtx incoming
= DECL_INCOMING_RTL (arg
);
16034 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
16035 return AVX_U128_DIRTY
;
16038 return AVX_U128_CLEAN
;
16041 /* Return a mode that ENTITY is assumed to be
16042 switched to at function entry. */
16045 ix86_mode_entry (int entity
)
16050 return ix86_avx_u128_mode_entry ();
16055 return I387_CW_ANY
;
16057 gcc_unreachable ();
16062 ix86_avx_u128_mode_exit (void)
16064 rtx reg
= crtl
->return_rtx
;
16066 /* Exit mode is set to AVX_U128_DIRTY if there are
16067 256bit modes used in the function return register. */
16068 if (reg
&& ix86_check_avx256_register (®
, NULL
))
16069 return AVX_U128_DIRTY
;
16071 return AVX_U128_CLEAN
;
16074 /* Return a mode that ENTITY is assumed to be
16075 switched to at function exit. */
16078 ix86_mode_exit (int entity
)
16083 return ix86_avx_u128_mode_exit ();
16088 return I387_CW_ANY
;
16090 gcc_unreachable ();
16094 /* Output code to initialize control word copies used by trunc?f?i and
16095 rounding patterns. CURRENT_MODE is set to current control word,
16096 while NEW_MODE is set to new control word. */
16099 emit_i387_cw_initialization (int mode
)
16101 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
16104 enum ix86_stack_slot slot
;
16106 rtx reg
= gen_reg_rtx (HImode
);
16108 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
16109 emit_move_insn (reg
, copy_rtx (stored_mode
));
16111 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
16112 || optimize_insn_for_size_p ())
16116 case I387_CW_TRUNC
:
16117 /* round toward zero (truncate) */
16118 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
16119 slot
= SLOT_CW_TRUNC
;
16122 case I387_CW_FLOOR
:
16123 /* round down toward -oo */
16124 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16125 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
16126 slot
= SLOT_CW_FLOOR
;
16130 /* round up toward +oo */
16131 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
16132 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
16133 slot
= SLOT_CW_CEIL
;
16136 case I387_CW_MASK_PM
:
16137 /* mask precision exception for nearbyint() */
16138 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16139 slot
= SLOT_CW_MASK_PM
;
16143 gcc_unreachable ();
16150 case I387_CW_TRUNC
:
16151 /* round toward zero (truncate) */
16152 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
16153 slot
= SLOT_CW_TRUNC
;
16156 case I387_CW_FLOOR
:
16157 /* round down toward -oo */
16158 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
16159 slot
= SLOT_CW_FLOOR
;
16163 /* round up toward +oo */
16164 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
16165 slot
= SLOT_CW_CEIL
;
16168 case I387_CW_MASK_PM
:
16169 /* mask precision exception for nearbyint() */
16170 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
16171 slot
= SLOT_CW_MASK_PM
;
16175 gcc_unreachable ();
16179 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
16181 new_mode
= assign_386_stack_local (HImode
, slot
);
16182 emit_move_insn (new_mode
, reg
);
16185 /* Emit vzeroupper. */
16188 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
16192 /* Cancel automatic vzeroupper insertion if there are
16193 live call-saved SSE registers at the insertion point. */
16195 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
16196 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16200 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
16201 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
16204 emit_insn (gen_avx_vzeroupper ());
16207 /* Generate one or more insns to set ENTITY to MODE. */
16210 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
16215 if (mode
== AVX_U128_CLEAN
)
16216 ix86_avx_emit_vzeroupper (regs_live
);
16222 if (mode
!= I387_CW_ANY
16223 && mode
!= I387_CW_UNINITIALIZED
)
16224 emit_i387_cw_initialization (mode
);
16227 gcc_unreachable ();
16231 /* Output code for INSN to convert a float to a signed int. OPERANDS
16232 are the insn operands. The output may be [HSD]Imode and the input
16233 operand may be [SDX]Fmode. */
16236 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
16238 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16239 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
16240 int round_mode
= get_attr_i387_cw (insn
);
16242 /* Jump through a hoop or two for DImode, since the hardware has no
16243 non-popping instruction. We used to do this a different way, but
16244 that was somewhat fragile and broke with post-reload splitters. */
16245 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
16246 output_asm_insn ("fld\t%y1", operands
);
16248 gcc_assert (STACK_TOP_P (operands
[1]));
16249 gcc_assert (MEM_P (operands
[0]));
16250 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
16253 output_asm_insn ("fisttp%Z0\t%0", operands
);
16256 if (round_mode
!= I387_CW_ANY
)
16257 output_asm_insn ("fldcw\t%3", operands
);
16258 if (stack_top_dies
|| dimode_p
)
16259 output_asm_insn ("fistp%Z0\t%0", operands
);
16261 output_asm_insn ("fist%Z0\t%0", operands
);
16262 if (round_mode
!= I387_CW_ANY
)
16263 output_asm_insn ("fldcw\t%2", operands
);
16269 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16270 have the values zero or one, indicates the ffreep insn's operand
16271 from the OPERANDS array. */
16273 static const char *
16274 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
16276 if (TARGET_USE_FFREEP
)
16277 #ifdef HAVE_AS_IX86_FFREEP
16278 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
16281 static char retval
[32];
16282 int regno
= REGNO (operands
[opno
]);
16284 gcc_assert (STACK_REGNO_P (regno
));
16286 regno
-= FIRST_STACK_REG
;
16288 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
16293 return opno
? "fstp\t%y1" : "fstp\t%y0";
16297 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16298 should be used. UNORDERED_P is true when fucom should be used. */
16301 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
16303 int stack_top_dies
;
16304 rtx cmp_op0
, cmp_op1
;
16305 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
16309 cmp_op0
= operands
[0];
16310 cmp_op1
= operands
[1];
16314 cmp_op0
= operands
[1];
16315 cmp_op1
= operands
[2];
16320 if (GET_MODE (operands
[0]) == SFmode
)
16322 return "%vucomiss\t{%1, %0|%0, %1}";
16324 return "%vcomiss\t{%1, %0|%0, %1}";
16327 return "%vucomisd\t{%1, %0|%0, %1}";
16329 return "%vcomisd\t{%1, %0|%0, %1}";
16332 gcc_assert (STACK_TOP_P (cmp_op0
));
16334 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
16336 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
16338 if (stack_top_dies
)
16340 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
16341 return output_387_ffreep (operands
, 1);
16344 return "ftst\n\tfnstsw\t%0";
16347 if (STACK_REG_P (cmp_op1
)
16349 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
16350 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
16352 /* If both the top of the 387 stack dies, and the other operand
16353 is also a stack register that dies, then this must be a
16354 `fcompp' float compare */
16358 /* There is no double popping fcomi variant. Fortunately,
16359 eflags is immune from the fstp's cc clobbering. */
16361 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
16363 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
16364 return output_387_ffreep (operands
, 0);
16369 return "fucompp\n\tfnstsw\t%0";
16371 return "fcompp\n\tfnstsw\t%0";
16376 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16378 static const char * const alt
[16] =
16380 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16381 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16382 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16383 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16385 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16386 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16390 "fcomi\t{%y1, %0|%0, %y1}",
16391 "fcomip\t{%y1, %0|%0, %y1}",
16392 "fucomi\t{%y1, %0|%0, %y1}",
16393 "fucomip\t{%y1, %0|%0, %y1}",
16404 mask
= eflags_p
<< 3;
16405 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
16406 mask
|= unordered_p
<< 1;
16407 mask
|= stack_top_dies
;
16409 gcc_assert (mask
< 16);
16418 ix86_output_addr_vec_elt (FILE *file
, int value
)
16420 const char *directive
= ASM_LONG
;
16424 directive
= ASM_QUAD
;
16426 gcc_assert (!TARGET_64BIT
);
16429 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
16433 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
16435 const char *directive
= ASM_LONG
;
16438 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
16439 directive
= ASM_QUAD
;
16441 gcc_assert (!TARGET_64BIT
);
16443 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16444 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
16445 fprintf (file
, "%s%s%d-%s%d\n",
16446 directive
, LPREFIX
, value
, LPREFIX
, rel
);
16447 else if (HAVE_AS_GOTOFF_IN_DATA
)
16448 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
16450 else if (TARGET_MACHO
)
16452 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
16453 machopic_output_function_base_name (file
);
16458 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
16459 GOT_SYMBOL_NAME
, LPREFIX
, value
);
16462 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16466 ix86_expand_clear (rtx dest
)
16470 /* We play register width games, which are only valid after reload. */
16471 gcc_assert (reload_completed
);
16473 /* Avoid HImode and its attendant prefix byte. */
16474 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
16475 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
16476 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
16478 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16479 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
16481 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16482 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
16488 /* X is an unchanging MEM. If it is a constant pool reference, return
16489 the constant pool rtx, else NULL. */
16492 maybe_get_pool_constant (rtx x
)
16494 x
= ix86_delegitimize_address (XEXP (x
, 0));
16496 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
16497 return get_pool_constant (x
);
16503 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
16506 enum tls_model model
;
16511 if (GET_CODE (op1
) == SYMBOL_REF
)
16515 model
= SYMBOL_REF_TLS_MODEL (op1
);
16518 op1
= legitimize_tls_address (op1
, model
, true);
16519 op1
= force_operand (op1
, op0
);
16522 op1
= convert_to_mode (mode
, op1
, 1);
16524 else if ((tmp
= legitimize_pe_coff_symbol (op1
, false)) != NULL_RTX
)
16527 else if (GET_CODE (op1
) == CONST
16528 && GET_CODE (XEXP (op1
, 0)) == PLUS
16529 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
16531 rtx addend
= XEXP (XEXP (op1
, 0), 1);
16532 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
16535 model
= SYMBOL_REF_TLS_MODEL (symbol
);
16537 tmp
= legitimize_tls_address (symbol
, model
, true);
16539 tmp
= legitimize_pe_coff_symbol (symbol
, true);
16543 tmp
= force_operand (tmp
, NULL
);
16544 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
16545 op0
, 1, OPTAB_DIRECT
);
16548 op1
= convert_to_mode (mode
, tmp
, 1);
16552 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16553 && symbolic_operand (op1
, mode
))
16555 if (TARGET_MACHO
&& !TARGET_64BIT
)
16558 /* dynamic-no-pic */
16559 if (MACHOPIC_INDIRECT
)
16561 rtx temp
= ((reload_in_progress
16562 || ((op0
&& REG_P (op0
))
16564 ? op0
: gen_reg_rtx (Pmode
));
16565 op1
= machopic_indirect_data_reference (op1
, temp
);
16567 op1
= machopic_legitimize_pic_address (op1
, mode
,
16568 temp
== op1
? 0 : temp
);
16570 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16572 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16576 if (GET_CODE (op0
) == MEM
)
16577 op1
= force_reg (Pmode
, op1
);
16581 if (GET_CODE (temp
) != REG
)
16582 temp
= gen_reg_rtx (Pmode
);
16583 temp
= legitimize_pic_address (op1
, temp
);
16588 /* dynamic-no-pic */
16594 op1
= force_reg (mode
, op1
);
16595 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16597 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16598 op1
= legitimize_pic_address (op1
, reg
);
16601 op1
= convert_to_mode (mode
, op1
, 1);
16608 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16609 || !push_operand (op0
, mode
))
16611 op1
= force_reg (mode
, op1
);
16613 if (push_operand (op0
, mode
)
16614 && ! general_no_elim_operand (op1
, mode
))
16615 op1
= copy_to_mode_reg (mode
, op1
);
16617 /* Force large constants in 64bit compilation into register
16618 to get them CSEed. */
16619 if (can_create_pseudo_p ()
16620 && (mode
== DImode
) && TARGET_64BIT
16621 && immediate_operand (op1
, mode
)
16622 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16623 && !register_operand (op0
, mode
)
16625 op1
= copy_to_mode_reg (mode
, op1
);
16627 if (can_create_pseudo_p ()
16628 && FLOAT_MODE_P (mode
)
16629 && GET_CODE (op1
) == CONST_DOUBLE
)
16631 /* If we are loading a floating point constant to a register,
16632 force the value to memory now, since we'll get better code
16633 out the back end. */
16635 op1
= validize_mem (force_const_mem (mode
, op1
));
16636 if (!register_operand (op0
, mode
))
16638 rtx temp
= gen_reg_rtx (mode
);
16639 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16640 emit_move_insn (op0
, temp
);
16646 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16650 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16652 rtx op0
= operands
[0], op1
= operands
[1];
16653 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16655 /* Force constants other than zero into memory. We do not know how
16656 the instructions used to build constants modify the upper 64 bits
16657 of the register, once we have that information we may be able
16658 to handle some of them more efficiently. */
16659 if (can_create_pseudo_p ()
16660 && register_operand (op0
, mode
)
16661 && (CONSTANT_P (op1
)
16662 || (GET_CODE (op1
) == SUBREG
16663 && CONSTANT_P (SUBREG_REG (op1
))))
16664 && !standard_sse_constant_p (op1
))
16665 op1
= validize_mem (force_const_mem (mode
, op1
));
16667 /* We need to check memory alignment for SSE mode since attribute
16668 can make operands unaligned. */
16669 if (can_create_pseudo_p ()
16670 && SSE_REG_MODE_P (mode
)
16671 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16672 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16676 /* ix86_expand_vector_move_misalign() does not like constants ... */
16677 if (CONSTANT_P (op1
)
16678 || (GET_CODE (op1
) == SUBREG
16679 && CONSTANT_P (SUBREG_REG (op1
))))
16680 op1
= validize_mem (force_const_mem (mode
, op1
));
16682 /* ... nor both arguments in memory. */
16683 if (!register_operand (op0
, mode
)
16684 && !register_operand (op1
, mode
))
16685 op1
= force_reg (mode
, op1
);
16687 tmp
[0] = op0
; tmp
[1] = op1
;
16688 ix86_expand_vector_move_misalign (mode
, tmp
);
16692 /* Make operand1 a register if it isn't already. */
16693 if (can_create_pseudo_p ()
16694 && !register_operand (op0
, mode
)
16695 && !register_operand (op1
, mode
))
16697 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16701 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16704 /* Split 32-byte AVX unaligned load and store if needed. */
16707 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16710 rtx (*extract
) (rtx
, rtx
, rtx
);
16711 rtx (*load_unaligned
) (rtx
, rtx
);
16712 rtx (*store_unaligned
) (rtx
, rtx
);
16713 enum machine_mode mode
;
16715 switch (GET_MODE (op0
))
16718 gcc_unreachable ();
16720 extract
= gen_avx_vextractf128v32qi
;
16721 load_unaligned
= gen_avx_loaddquv32qi
;
16722 store_unaligned
= gen_avx_storedquv32qi
;
16726 extract
= gen_avx_vextractf128v8sf
;
16727 load_unaligned
= gen_avx_loadups256
;
16728 store_unaligned
= gen_avx_storeups256
;
16732 extract
= gen_avx_vextractf128v4df
;
16733 load_unaligned
= gen_avx_loadupd256
;
16734 store_unaligned
= gen_avx_storeupd256
;
16741 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16743 rtx r
= gen_reg_rtx (mode
);
16744 m
= adjust_address (op1
, mode
, 0);
16745 emit_move_insn (r
, m
);
16746 m
= adjust_address (op1
, mode
, 16);
16747 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16748 emit_move_insn (op0
, r
);
16750 /* Normal *mov<mode>_internal pattern will handle
16751 unaligned loads just fine if misaligned_operand
16752 is true, and without the UNSPEC it can be combined
16753 with arithmetic instructions. */
16754 else if (misaligned_operand (op1
, GET_MODE (op1
)))
16755 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16757 emit_insn (load_unaligned (op0
, op1
));
16759 else if (MEM_P (op0
))
16761 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16763 m
= adjust_address (op0
, mode
, 0);
16764 emit_insn (extract (m
, op1
, const0_rtx
));
16765 m
= adjust_address (op0
, mode
, 16);
16766 emit_insn (extract (m
, op1
, const1_rtx
));
16769 emit_insn (store_unaligned (op0
, op1
));
16772 gcc_unreachable ();
16775 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16776 straight to ix86_expand_vector_move. */
16777 /* Code generation for scalar reg-reg moves of single and double precision data:
16778 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16782 if (x86_sse_partial_reg_dependency == true)
16787 Code generation for scalar loads of double precision data:
16788 if (x86_sse_split_regs == true)
16789 movlpd mem, reg (gas syntax)
16793 Code generation for unaligned packed loads of single precision data
16794 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16795 if (x86_sse_unaligned_move_optimal)
16798 if (x86_sse_partial_reg_dependency == true)
16810 Code generation for unaligned packed loads of double precision data
16811 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16812 if (x86_sse_unaligned_move_optimal)
16815 if (x86_sse_split_regs == true)
16828 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16830 rtx op0
, op1
, orig_op0
= NULL_RTX
, m
;
16831 rtx (*load_unaligned
) (rtx
, rtx
);
16832 rtx (*store_unaligned
) (rtx
, rtx
);
16837 if (GET_MODE_SIZE (mode
) == 64)
16839 switch (GET_MODE_CLASS (mode
))
16841 case MODE_VECTOR_INT
:
16843 if (GET_MODE (op0
) != V16SImode
)
16848 op0
= gen_reg_rtx (V16SImode
);
16851 op0
= gen_lowpart (V16SImode
, op0
);
16853 op1
= gen_lowpart (V16SImode
, op1
);
16856 case MODE_VECTOR_FLOAT
:
16857 switch (GET_MODE (op0
))
16860 gcc_unreachable ();
16862 load_unaligned
= gen_avx512f_loaddquv16si
;
16863 store_unaligned
= gen_avx512f_storedquv16si
;
16866 load_unaligned
= gen_avx512f_loadups512
;
16867 store_unaligned
= gen_avx512f_storeups512
;
16870 load_unaligned
= gen_avx512f_loadupd512
;
16871 store_unaligned
= gen_avx512f_storeupd512
;
16876 emit_insn (load_unaligned (op0
, op1
));
16877 else if (MEM_P (op0
))
16878 emit_insn (store_unaligned (op0
, op1
));
16880 gcc_unreachable ();
16882 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16886 gcc_unreachable ();
16893 && GET_MODE_SIZE (mode
) == 32)
16895 switch (GET_MODE_CLASS (mode
))
16897 case MODE_VECTOR_INT
:
16899 if (GET_MODE (op0
) != V32QImode
)
16904 op0
= gen_reg_rtx (V32QImode
);
16907 op0
= gen_lowpart (V32QImode
, op0
);
16909 op1
= gen_lowpart (V32QImode
, op1
);
16912 case MODE_VECTOR_FLOAT
:
16913 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16915 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16919 gcc_unreachable ();
16927 /* Normal *mov<mode>_internal pattern will handle
16928 unaligned loads just fine if misaligned_operand
16929 is true, and without the UNSPEC it can be combined
16930 with arithmetic instructions. */
16932 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
16933 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
16934 && misaligned_operand (op1
, GET_MODE (op1
)))
16935 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16936 /* ??? If we have typed data, then it would appear that using
16937 movdqu is the only way to get unaligned data loaded with
16939 else if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16941 if (GET_MODE (op0
) != V16QImode
)
16944 op0
= gen_reg_rtx (V16QImode
);
16946 op1
= gen_lowpart (V16QImode
, op1
);
16947 /* We will eventually emit movups based on insn attributes. */
16948 emit_insn (gen_sse2_loaddquv16qi (op0
, op1
));
16950 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
16952 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16957 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16958 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16959 || optimize_insn_for_size_p ())
16961 /* We will eventually emit movups based on insn attributes. */
16962 emit_insn (gen_sse2_loadupd (op0
, op1
));
16966 /* When SSE registers are split into halves, we can avoid
16967 writing to the top half twice. */
16968 if (TARGET_SSE_SPLIT_REGS
)
16970 emit_clobber (op0
);
16975 /* ??? Not sure about the best option for the Intel chips.
16976 The following would seem to satisfy; the register is
16977 entirely cleared, breaking the dependency chain. We
16978 then store to the upper half, with a dependency depth
16979 of one. A rumor has it that Intel recommends two movsd
16980 followed by an unpacklpd, but this is unconfirmed. And
16981 given that the dependency depth of the unpacklpd would
16982 still be one, I'm not sure why this would be better. */
16983 zero
= CONST0_RTX (V2DFmode
);
16986 m
= adjust_address (op1
, DFmode
, 0);
16987 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16988 m
= adjust_address (op1
, DFmode
, 8);
16989 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16996 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16997 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16998 || optimize_insn_for_size_p ())
17000 if (GET_MODE (op0
) != V4SFmode
)
17003 op0
= gen_reg_rtx (V4SFmode
);
17005 op1
= gen_lowpart (V4SFmode
, op1
);
17006 emit_insn (gen_sse_loadups (op0
, op1
));
17008 emit_move_insn (orig_op0
,
17009 gen_lowpart (GET_MODE (orig_op0
), op0
));
17013 if (mode
!= V4SFmode
)
17014 t
= gen_reg_rtx (V4SFmode
);
17018 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
17019 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
17023 m
= adjust_address (op1
, V2SFmode
, 0);
17024 emit_insn (gen_sse_loadlps (t
, t
, m
));
17025 m
= adjust_address (op1
, V2SFmode
, 8);
17026 emit_insn (gen_sse_loadhps (t
, t
, m
));
17027 if (mode
!= V4SFmode
)
17028 emit_move_insn (op0
, gen_lowpart (mode
, t
));
17031 else if (MEM_P (op0
))
17033 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17035 op0
= gen_lowpart (V16QImode
, op0
);
17036 op1
= gen_lowpart (V16QImode
, op1
);
17037 /* We will eventually emit movups based on insn attributes. */
17038 emit_insn (gen_sse2_storedquv16qi (op0
, op1
));
17040 else if (TARGET_SSE2
&& mode
== V2DFmode
)
17043 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17044 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17045 || optimize_insn_for_size_p ())
17046 /* We will eventually emit movups based on insn attributes. */
17047 emit_insn (gen_sse2_storeupd (op0
, op1
));
17050 m
= adjust_address (op0
, DFmode
, 0);
17051 emit_insn (gen_sse2_storelpd (m
, op1
));
17052 m
= adjust_address (op0
, DFmode
, 8);
17053 emit_insn (gen_sse2_storehpd (m
, op1
));
17058 if (mode
!= V4SFmode
)
17059 op1
= gen_lowpart (V4SFmode
, op1
);
17062 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17063 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17064 || optimize_insn_for_size_p ())
17066 op0
= gen_lowpart (V4SFmode
, op0
);
17067 emit_insn (gen_sse_storeups (op0
, op1
));
17071 m
= adjust_address (op0
, V2SFmode
, 0);
17072 emit_insn (gen_sse_storelps (m
, op1
));
17073 m
= adjust_address (op0
, V2SFmode
, 8);
17074 emit_insn (gen_sse_storehps (m
, op1
));
17079 gcc_unreachable ();
17082 /* Expand a push in MODE. This is some mode for which we do not support
17083 proper push instructions, at least from the registers that we expect
17084 the value to live in. */
17087 ix86_expand_push (enum machine_mode mode
, rtx x
)
17091 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
17092 GEN_INT (-GET_MODE_SIZE (mode
)),
17093 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
17094 if (tmp
!= stack_pointer_rtx
)
17095 emit_move_insn (stack_pointer_rtx
, tmp
);
17097 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
17099 /* When we push an operand onto stack, it has to be aligned at least
17100 at the function argument boundary. However since we don't have
17101 the argument type, we can't determine the actual argument
17103 emit_move_insn (tmp
, x
);
17106 /* Helper function of ix86_fixup_binary_operands to canonicalize
17107 operand order. Returns true if the operands should be swapped. */
17110 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
17113 rtx dst
= operands
[0];
17114 rtx src1
= operands
[1];
17115 rtx src2
= operands
[2];
17117 /* If the operation is not commutative, we can't do anything. */
17118 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
17121 /* Highest priority is that src1 should match dst. */
17122 if (rtx_equal_p (dst
, src1
))
17124 if (rtx_equal_p (dst
, src2
))
17127 /* Next highest priority is that immediate constants come second. */
17128 if (immediate_operand (src2
, mode
))
17130 if (immediate_operand (src1
, mode
))
17133 /* Lowest priority is that memory references should come second. */
17143 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17144 destination to use for the operation. If different from the true
17145 destination in operands[0], a copy operation will be required. */
17148 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
17151 rtx dst
= operands
[0];
17152 rtx src1
= operands
[1];
17153 rtx src2
= operands
[2];
17155 /* Canonicalize operand order. */
17156 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17160 /* It is invalid to swap operands of different modes. */
17161 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
17168 /* Both source operands cannot be in memory. */
17169 if (MEM_P (src1
) && MEM_P (src2
))
17171 /* Optimization: Only read from memory once. */
17172 if (rtx_equal_p (src1
, src2
))
17174 src2
= force_reg (mode
, src2
);
17177 else if (rtx_equal_p (dst
, src1
))
17178 src2
= force_reg (mode
, src2
);
17180 src1
= force_reg (mode
, src1
);
17183 /* If the destination is memory, and we do not have matching source
17184 operands, do things in registers. */
17185 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17186 dst
= gen_reg_rtx (mode
);
17188 /* Source 1 cannot be a constant. */
17189 if (CONSTANT_P (src1
))
17190 src1
= force_reg (mode
, src1
);
17192 /* Source 1 cannot be a non-matching memory. */
17193 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17194 src1
= force_reg (mode
, src1
);
17196 /* Improve address combine. */
17198 && GET_MODE_CLASS (mode
) == MODE_INT
17200 src2
= force_reg (mode
, src2
);
17202 operands
[1] = src1
;
17203 operands
[2] = src2
;
17207 /* Similarly, but assume that the destination has already been
17208 set up properly. */
17211 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
17212 enum machine_mode mode
, rtx operands
[])
17214 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17215 gcc_assert (dst
== operands
[0]);
17218 /* Attempt to expand a binary operator. Make the expansion closer to the
17219 actual machine, then just general_operand, which will allow 3 separate
17220 memory references (one output, two input) in a single insn. */
17223 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
17226 rtx src1
, src2
, dst
, op
, clob
;
17228 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
17229 src1
= operands
[1];
17230 src2
= operands
[2];
17232 /* Emit the instruction. */
17234 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
17235 if (reload_in_progress
)
17237 /* Reload doesn't know about the flags register, and doesn't know that
17238 it doesn't want to clobber it. We can only do this with PLUS. */
17239 gcc_assert (code
== PLUS
);
17242 else if (reload_completed
17244 && !rtx_equal_p (dst
, src1
))
17246 /* This is going to be an LEA; avoid splitting it later. */
17251 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17252 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17255 /* Fix up the destination if needed. */
17256 if (dst
!= operands
[0])
17257 emit_move_insn (operands
[0], dst
);
17260 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17261 the given OPERANDS. */
17264 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
17267 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
17268 if (GET_CODE (operands
[1]) == SUBREG
)
17273 else if (GET_CODE (operands
[2]) == SUBREG
)
17278 /* Optimize (__m128i) d | (__m128i) e and similar code
17279 when d and e are float vectors into float vector logical
17280 insn. In C/C++ without using intrinsics there is no other way
17281 to express vector logical operation on float vectors than
17282 to cast them temporarily to integer vectors. */
17284 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17285 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
17286 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
17287 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
17288 && SUBREG_BYTE (op1
) == 0
17289 && (GET_CODE (op2
) == CONST_VECTOR
17290 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
17291 && SUBREG_BYTE (op2
) == 0))
17292 && can_create_pseudo_p ())
17295 switch (GET_MODE (SUBREG_REG (op1
)))
17301 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
17302 if (GET_CODE (op2
) == CONST_VECTOR
)
17304 op2
= gen_lowpart (GET_MODE (dst
), op2
);
17305 op2
= force_reg (GET_MODE (dst
), op2
);
17310 op2
= SUBREG_REG (operands
[2]);
17311 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
17312 op2
= force_reg (GET_MODE (dst
), op2
);
17314 op1
= SUBREG_REG (op1
);
17315 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
17316 op1
= force_reg (GET_MODE (dst
), op1
);
17317 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
17318 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
17320 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
17326 if (!nonimmediate_operand (operands
[1], mode
))
17327 operands
[1] = force_reg (mode
, operands
[1]);
17328 if (!nonimmediate_operand (operands
[2], mode
))
17329 operands
[2] = force_reg (mode
, operands
[2]);
17330 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
17331 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
17332 gen_rtx_fmt_ee (code
, mode
, operands
[1],
17336 /* Return TRUE or FALSE depending on whether the binary operator meets the
17337 appropriate constraints. */
17340 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
17343 rtx dst
= operands
[0];
17344 rtx src1
= operands
[1];
17345 rtx src2
= operands
[2];
17347 /* Both source operands cannot be in memory. */
17348 if (MEM_P (src1
) && MEM_P (src2
))
17351 /* Canonicalize operand order for commutative operators. */
17352 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
17359 /* If the destination is memory, we must have a matching source operand. */
17360 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
17363 /* Source 1 cannot be a constant. */
17364 if (CONSTANT_P (src1
))
17367 /* Source 1 cannot be a non-matching memory. */
17368 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
17369 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17370 return (code
== AND
17373 || (TARGET_64BIT
&& mode
== DImode
))
17374 && satisfies_constraint_L (src2
));
17379 /* Attempt to expand a unary operator. Make the expansion closer to the
17380 actual machine, then just general_operand, which will allow 2 separate
17381 memory references (one output, one input) in a single insn. */
17384 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
17387 int matching_memory
;
17388 rtx src
, dst
, op
, clob
;
17393 /* If the destination is memory, and we do not have matching source
17394 operands, do things in registers. */
17395 matching_memory
= 0;
17398 if (rtx_equal_p (dst
, src
))
17399 matching_memory
= 1;
17401 dst
= gen_reg_rtx (mode
);
17404 /* When source operand is memory, destination must match. */
17405 if (MEM_P (src
) && !matching_memory
)
17406 src
= force_reg (mode
, src
);
17408 /* Emit the instruction. */
17410 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
17411 if (reload_in_progress
|| code
== NOT
)
17413 /* Reload doesn't know about the flags register, and doesn't know that
17414 it doesn't want to clobber it. */
17415 gcc_assert (code
== NOT
);
17420 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17421 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17424 /* Fix up the destination if needed. */
17425 if (dst
!= operands
[0])
17426 emit_move_insn (operands
[0], dst
);
17429 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17430 divisor are within the range [0-255]. */
17433 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
17436 rtx end_label
, qimode_label
;
17437 rtx insn
, div
, mod
;
17438 rtx scratch
, tmp0
, tmp1
, tmp2
;
17439 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
17440 rtx (*gen_zero_extend
) (rtx
, rtx
);
17441 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
17446 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
17447 gen_test_ccno_1
= gen_testsi_ccno_1
;
17448 gen_zero_extend
= gen_zero_extendqisi2
;
17451 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
17452 gen_test_ccno_1
= gen_testdi_ccno_1
;
17453 gen_zero_extend
= gen_zero_extendqidi2
;
17456 gcc_unreachable ();
17459 end_label
= gen_label_rtx ();
17460 qimode_label
= gen_label_rtx ();
17462 scratch
= gen_reg_rtx (mode
);
17464 /* Use 8bit unsigned divimod if dividend and divisor are within
17465 the range [0-255]. */
17466 emit_move_insn (scratch
, operands
[2]);
17467 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
17468 scratch
, 1, OPTAB_DIRECT
);
17469 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
17470 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
17471 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
17472 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
17473 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
17475 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
17476 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
17477 JUMP_LABEL (insn
) = qimode_label
;
17479 /* Generate original signed/unsigned divimod. */
17480 div
= gen_divmod4_1 (operands
[0], operands
[1],
17481 operands
[2], operands
[3]);
17484 /* Branch to the end. */
17485 emit_jump_insn (gen_jump (end_label
));
17488 /* Generate 8bit unsigned divide. */
17489 emit_label (qimode_label
);
17490 /* Don't use operands[0] for result of 8bit divide since not all
17491 registers support QImode ZERO_EXTRACT. */
17492 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
17493 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
17494 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
17495 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
17499 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
17500 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
17504 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
17505 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
17508 /* Extract remainder from AH. */
17509 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
17510 if (REG_P (operands
[1]))
17511 insn
= emit_move_insn (operands
[1], tmp1
);
17514 /* Need a new scratch register since the old one has result
17516 scratch
= gen_reg_rtx (mode
);
17517 emit_move_insn (scratch
, tmp1
);
17518 insn
= emit_move_insn (operands
[1], scratch
);
17520 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
17522 /* Zero extend quotient from AL. */
17523 tmp1
= gen_lowpart (QImode
, tmp0
);
17524 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
17525 set_unique_reg_note (insn
, REG_EQUAL
, div
);
17527 emit_label (end_label
);
17530 /* Whether it is OK to emit CFI directives when emitting asm code. */
17535 return dwarf2out_do_cfi_asm ();
17538 #define LEA_MAX_STALL (3)
17539 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17541 /* Increase given DISTANCE in half-cycles according to
17542 dependencies between PREV and NEXT instructions.
17543 Add 1 half-cycle if there is no dependency and
17544 go to next cycle if there is some dependecy. */
17546 static unsigned int
17547 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
17552 if (!prev
|| !next
)
17553 return distance
+ (distance
& 1) + 2;
17555 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
17556 return distance
+ 1;
17558 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
17559 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
17560 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
17561 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
17562 return distance
+ (distance
& 1) + 2;
17564 return distance
+ 1;
17567 /* Function checks if instruction INSN defines register number
17568 REGNO1 or REGNO2. */
17571 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
17576 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
17577 if (DF_REF_REG_DEF_P (*def_rec
)
17578 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
17579 && (regno1
== DF_REF_REGNO (*def_rec
)
17580 || regno2
== DF_REF_REGNO (*def_rec
)))
17588 /* Function checks if instruction INSN uses register number
17589 REGNO as a part of address expression. */
17592 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
17596 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
17597 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
17603 /* Search backward for non-agu definition of register number REGNO1
17604 or register number REGNO2 in basic block starting from instruction
17605 START up to head of basic block or instruction INSN.
17607 Function puts true value into *FOUND var if definition was found
17608 and false otherwise.
17610 Distance in half-cycles between START and found instruction or head
17611 of BB is added to DISTANCE and returned. */
17614 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
17615 rtx insn
, int distance
,
17616 rtx start
, bool *found
)
17618 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17626 && distance
< LEA_SEARCH_THRESHOLD
)
17628 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
17630 distance
= increase_distance (prev
, next
, distance
);
17631 if (insn_defines_reg (regno1
, regno2
, prev
))
17633 if (recog_memoized (prev
) < 0
17634 || get_attr_type (prev
) != TYPE_LEA
)
17643 if (prev
== BB_HEAD (bb
))
17646 prev
= PREV_INSN (prev
);
17652 /* Search backward for non-agu definition of register number REGNO1
17653 or register number REGNO2 in INSN's basic block until
17654 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17655 2. Reach neighbour BBs boundary, or
17656 3. Reach agu definition.
17657 Returns the distance between the non-agu definition point and INSN.
17658 If no definition point, returns -1. */
17661 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
17664 basic_block bb
= BLOCK_FOR_INSN (insn
);
17666 bool found
= false;
17668 if (insn
!= BB_HEAD (bb
))
17669 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17670 distance
, PREV_INSN (insn
),
17673 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17677 bool simple_loop
= false;
17679 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17682 simple_loop
= true;
17687 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17689 BB_END (bb
), &found
);
17692 int shortest_dist
= -1;
17693 bool found_in_bb
= false;
17695 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17698 = distance_non_agu_define_in_bb (regno1
, regno2
,
17704 if (shortest_dist
< 0)
17705 shortest_dist
= bb_dist
;
17706 else if (bb_dist
> 0)
17707 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17713 distance
= shortest_dist
;
17717 /* get_attr_type may modify recog data. We want to make sure
17718 that recog data is valid for instruction INSN, on which
17719 distance_non_agu_define is called. INSN is unchanged here. */
17720 extract_insn_cached (insn
);
17725 return distance
>> 1;
17728 /* Return the distance in half-cycles between INSN and the next
17729 insn that uses register number REGNO in memory address added
17730 to DISTANCE. Return -1 if REGNO0 is set.
17732 Put true value into *FOUND if register usage was found and
17734 Put true value into *REDEFINED if register redefinition was
17735 found and false otherwise. */
17738 distance_agu_use_in_bb (unsigned int regno
,
17739 rtx insn
, int distance
, rtx start
,
17740 bool *found
, bool *redefined
)
17742 basic_block bb
= NULL
;
17747 *redefined
= false;
17749 if (start
!= NULL_RTX
)
17751 bb
= BLOCK_FOR_INSN (start
);
17752 if (start
!= BB_HEAD (bb
))
17753 /* If insn and start belong to the same bb, set prev to insn,
17754 so the call to increase_distance will increase the distance
17755 between insns by 1. */
17761 && distance
< LEA_SEARCH_THRESHOLD
)
17763 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17765 distance
= increase_distance(prev
, next
, distance
);
17766 if (insn_uses_reg_mem (regno
, next
))
17768 /* Return DISTANCE if OP0 is used in memory
17769 address in NEXT. */
17774 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17776 /* Return -1 if OP0 is set in NEXT. */
17784 if (next
== BB_END (bb
))
17787 next
= NEXT_INSN (next
);
17793 /* Return the distance between INSN and the next insn that uses
17794 register number REGNO0 in memory address. Return -1 if no such
17795 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17798 distance_agu_use (unsigned int regno0
, rtx insn
)
17800 basic_block bb
= BLOCK_FOR_INSN (insn
);
17802 bool found
= false;
17803 bool redefined
= false;
17805 if (insn
!= BB_END (bb
))
17806 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17808 &found
, &redefined
);
17810 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17814 bool simple_loop
= false;
17816 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17819 simple_loop
= true;
17824 distance
= distance_agu_use_in_bb (regno0
, insn
,
17825 distance
, BB_HEAD (bb
),
17826 &found
, &redefined
);
17829 int shortest_dist
= -1;
17830 bool found_in_bb
= false;
17831 bool redefined_in_bb
= false;
17833 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17836 = distance_agu_use_in_bb (regno0
, insn
,
17837 distance
, BB_HEAD (e
->dest
),
17838 &found_in_bb
, &redefined_in_bb
);
17841 if (shortest_dist
< 0)
17842 shortest_dist
= bb_dist
;
17843 else if (bb_dist
> 0)
17844 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17850 distance
= shortest_dist
;
17854 if (!found
|| redefined
)
17857 return distance
>> 1;
17860 /* Define this macro to tune LEA priority vs ADD, it take effect when
17861 there is a dilemma of choicing LEA or ADD
17862 Negative value: ADD is more preferred than LEA
17864 Positive value: LEA is more preferred than ADD*/
17865 #define IX86_LEA_PRIORITY 0
17867 /* Return true if usage of lea INSN has performance advantage
17868 over a sequence of instructions. Instructions sequence has
17869 SPLIT_COST cycles higher latency than lea latency. */
17872 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17873 unsigned int regno2
, int split_cost
, bool has_scale
)
17875 int dist_define
, dist_use
;
17877 /* For Silvermont if using a 2-source or 3-source LEA for
17878 non-destructive destination purposes, or due to wanting
17879 ability to use SCALE, the use of LEA is justified. */
17880 if (ix86_tune
== PROCESSOR_SLM
)
17884 if (split_cost
< 1)
17886 if (regno0
== regno1
|| regno0
== regno2
)
17891 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17892 dist_use
= distance_agu_use (regno0
, insn
);
17894 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17896 /* If there is no non AGU operand definition, no AGU
17897 operand usage and split cost is 0 then both lea
17898 and non lea variants have same priority. Currently
17899 we prefer lea for 64 bit code and non lea on 32 bit
17901 if (dist_use
< 0 && split_cost
== 0)
17902 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17907 /* With longer definitions distance lea is more preferable.
17908 Here we change it to take into account splitting cost and
17910 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17912 /* If there is no use in memory addess then we just check
17913 that split cost exceeds AGU stall. */
17915 return dist_define
> LEA_MAX_STALL
;
17917 /* If this insn has both backward non-agu dependence and forward
17918 agu dependence, the one with short distance takes effect. */
17919 return dist_define
>= dist_use
;
17922 /* Return true if it is legal to clobber flags by INSN and
17923 false otherwise. */
17926 ix86_ok_to_clobber_flags (rtx insn
)
17928 basic_block bb
= BLOCK_FOR_INSN (insn
);
17934 if (NONDEBUG_INSN_P (insn
))
17936 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17937 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17940 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17944 if (insn
== BB_END (bb
))
17947 insn
= NEXT_INSN (insn
);
17950 live
= df_get_live_out(bb
);
17951 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17954 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17955 move and add to avoid AGU stalls. */
17958 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17960 unsigned int regno0
, regno1
, regno2
;
17962 /* Check if we need to optimize. */
17963 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17966 /* Check it is correct to split here. */
17967 if (!ix86_ok_to_clobber_flags(insn
))
17970 regno0
= true_regnum (operands
[0]);
17971 regno1
= true_regnum (operands
[1]);
17972 regno2
= true_regnum (operands
[2]);
17974 /* We need to split only adds with non destructive
17975 destination operand. */
17976 if (regno0
== regno1
|| regno0
== regno2
)
17979 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
17982 /* Return true if we should emit lea instruction instead of mov
17986 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17988 unsigned int regno0
, regno1
;
17990 /* Check if we need to optimize. */
17991 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17994 /* Use lea for reg to reg moves only. */
17995 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17998 regno0
= true_regnum (operands
[0]);
17999 regno1
= true_regnum (operands
[1]);
18001 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
18004 /* Return true if we need to split lea into a sequence of
18005 instructions to avoid AGU stalls. */
18008 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
18010 unsigned int regno0
, regno1
, regno2
;
18012 struct ix86_address parts
;
18015 /* Check we need to optimize. */
18016 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18019 /* Check it is correct to split here. */
18020 if (!ix86_ok_to_clobber_flags(insn
))
18023 ok
= ix86_decompose_address (operands
[1], &parts
);
18026 /* There should be at least two components in the address. */
18027 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
18028 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
18031 /* We should not split into add if non legitimate pic
18032 operand is used as displacement. */
18033 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
18036 regno0
= true_regnum (operands
[0]) ;
18037 regno1
= INVALID_REGNUM
;
18038 regno2
= INVALID_REGNUM
;
18041 regno1
= true_regnum (parts
.base
);
18043 regno2
= true_regnum (parts
.index
);
18047 /* Compute how many cycles we will add to execution time
18048 if split lea into a sequence of instructions. */
18049 if (parts
.base
|| parts
.index
)
18051 /* Have to use mov instruction if non desctructive
18052 destination form is used. */
18053 if (regno1
!= regno0
&& regno2
!= regno0
)
18056 /* Have to add index to base if both exist. */
18057 if (parts
.base
&& parts
.index
)
18060 /* Have to use shift and adds if scale is 2 or greater. */
18061 if (parts
.scale
> 1)
18063 if (regno0
!= regno1
)
18065 else if (regno2
== regno0
)
18068 split_cost
+= parts
.scale
;
18071 /* Have to use add instruction with immediate if
18072 disp is non zero. */
18073 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18076 /* Subtract the price of lea. */
18080 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
18084 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18085 matches destination. RTX includes clobber of FLAGS_REG. */
18088 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
18093 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
18094 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18096 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
18099 /* Return true if regno1 def is nearest to the insn. */
18102 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
18105 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
18109 while (prev
&& prev
!= start
)
18111 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
18113 prev
= PREV_INSN (prev
);
18116 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
18118 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
18120 prev
= PREV_INSN (prev
);
18123 /* None of the regs is defined in the bb. */
18127 /* Split lea instructions into a sequence of instructions
18128 which are executed on ALU to avoid AGU stalls.
18129 It is assumed that it is allowed to clobber flags register
18130 at lea position. */
18133 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
18135 unsigned int regno0
, regno1
, regno2
;
18136 struct ix86_address parts
;
18140 ok
= ix86_decompose_address (operands
[1], &parts
);
18143 target
= gen_lowpart (mode
, operands
[0]);
18145 regno0
= true_regnum (target
);
18146 regno1
= INVALID_REGNUM
;
18147 regno2
= INVALID_REGNUM
;
18151 parts
.base
= gen_lowpart (mode
, parts
.base
);
18152 regno1
= true_regnum (parts
.base
);
18157 parts
.index
= gen_lowpart (mode
, parts
.index
);
18158 regno2
= true_regnum (parts
.index
);
18162 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
18164 if (parts
.scale
> 1)
18166 /* Case r1 = r1 + ... */
18167 if (regno1
== regno0
)
18169 /* If we have a case r1 = r1 + C * r1 then we
18170 should use multiplication which is very
18171 expensive. Assume cost model is wrong if we
18172 have such case here. */
18173 gcc_assert (regno2
!= regno0
);
18175 for (adds
= parts
.scale
; adds
> 0; adds
--)
18176 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
18180 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18181 if (regno0
!= regno2
)
18182 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18184 /* Use shift for scaling. */
18185 ix86_emit_binop (ASHIFT
, mode
, target
,
18186 GEN_INT (exact_log2 (parts
.scale
)));
18189 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
18191 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18192 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18195 else if (!parts
.base
&& !parts
.index
)
18197 gcc_assert(parts
.disp
);
18198 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
18204 if (regno0
!= regno2
)
18205 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
18207 else if (!parts
.index
)
18209 if (regno0
!= regno1
)
18210 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
18214 if (regno0
== regno1
)
18216 else if (regno0
== regno2
)
18222 /* Find better operand for SET instruction, depending
18223 on which definition is farther from the insn. */
18224 if (find_nearest_reg_def (insn
, regno1
, regno2
))
18225 tmp
= parts
.index
, tmp1
= parts
.base
;
18227 tmp
= parts
.base
, tmp1
= parts
.index
;
18229 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
18231 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18232 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18234 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
18238 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
18241 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
18242 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
18246 /* Return true if it is ok to optimize an ADD operation to LEA
18247 operation to avoid flag register consumation. For most processors,
18248 ADD is faster than LEA. For the processors like ATOM, if the
18249 destination register of LEA holds an actual address which will be
18250 used soon, LEA is better and otherwise ADD is better. */
18253 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
18255 unsigned int regno0
= true_regnum (operands
[0]);
18256 unsigned int regno1
= true_regnum (operands
[1]);
18257 unsigned int regno2
= true_regnum (operands
[2]);
18259 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18260 if (regno0
!= regno1
&& regno0
!= regno2
)
18263 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
18266 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
18269 /* Return true if destination reg of SET_BODY is shift count of
18273 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
18279 /* Retrieve destination of SET_BODY. */
18280 switch (GET_CODE (set_body
))
18283 set_dest
= SET_DEST (set_body
);
18284 if (!set_dest
|| !REG_P (set_dest
))
18288 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
18289 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
18297 /* Retrieve shift count of USE_BODY. */
18298 switch (GET_CODE (use_body
))
18301 shift_rtx
= XEXP (use_body
, 1);
18304 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
18305 if (ix86_dep_by_shift_count_body (set_body
,
18306 XVECEXP (use_body
, 0, i
)))
18314 && (GET_CODE (shift_rtx
) == ASHIFT
18315 || GET_CODE (shift_rtx
) == LSHIFTRT
18316 || GET_CODE (shift_rtx
) == ASHIFTRT
18317 || GET_CODE (shift_rtx
) == ROTATE
18318 || GET_CODE (shift_rtx
) == ROTATERT
))
18320 rtx shift_count
= XEXP (shift_rtx
, 1);
18322 /* Return true if shift count is dest of SET_BODY. */
18323 if (REG_P (shift_count
))
18325 /* Add check since it can be invoked before register
18326 allocation in pre-reload schedule. */
18327 if (reload_completed
18328 && true_regnum (set_dest
) == true_regnum (shift_count
))
18330 else if (REGNO(set_dest
) == REGNO(shift_count
))
18338 /* Return true if destination reg of SET_INSN is shift count of
18342 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
18344 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
18345 PATTERN (use_insn
));
18348 /* Return TRUE or FALSE depending on whether the unary operator meets the
18349 appropriate constraints. */
18352 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
18353 enum machine_mode mode ATTRIBUTE_UNUSED
,
18356 /* If one of operands is memory, source and destination must match. */
18357 if ((MEM_P (operands
[0])
18358 || MEM_P (operands
[1]))
18359 && ! rtx_equal_p (operands
[0], operands
[1]))
18364 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18365 are ok, keeping in mind the possible movddup alternative. */
18368 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
18370 if (MEM_P (operands
[0]))
18371 return rtx_equal_p (operands
[0], operands
[1 + high
]);
18372 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
18373 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
18377 /* Post-reload splitter for converting an SF or DFmode value in an
18378 SSE register into an unsigned SImode. */
18381 ix86_split_convert_uns_si_sse (rtx operands
[])
18383 enum machine_mode vecmode
;
18384 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
18386 large
= operands
[1];
18387 zero_or_two31
= operands
[2];
18388 input
= operands
[3];
18389 two31
= operands
[4];
18390 vecmode
= GET_MODE (large
);
18391 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
18393 /* Load up the value into the low element. We must ensure that the other
18394 elements are valid floats -- zero is the easiest such value. */
18397 if (vecmode
== V4SFmode
)
18398 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
18400 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
18404 input
= gen_rtx_REG (vecmode
, REGNO (input
));
18405 emit_move_insn (value
, CONST0_RTX (vecmode
));
18406 if (vecmode
== V4SFmode
)
18407 emit_insn (gen_sse_movss (value
, value
, input
));
18409 emit_insn (gen_sse2_movsd (value
, value
, input
));
18412 emit_move_insn (large
, two31
);
18413 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
18415 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
18416 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
18418 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
18419 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
18421 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
18422 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
18424 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
18425 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
18427 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
18428 if (vecmode
== V4SFmode
)
18429 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
18431 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
18434 emit_insn (gen_xorv4si3 (value
, value
, large
));
18437 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18438 Expects the 64-bit DImode to be supplied in a pair of integral
18439 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18440 -mfpmath=sse, !optimize_size only. */
18443 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
18445 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
18446 rtx int_xmm
, fp_xmm
;
18447 rtx biases
, exponents
;
18450 int_xmm
= gen_reg_rtx (V4SImode
);
18451 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
18452 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
18453 else if (TARGET_SSE_SPLIT_REGS
)
18455 emit_clobber (int_xmm
);
18456 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
18460 x
= gen_reg_rtx (V2DImode
);
18461 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
18462 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
18465 x
= gen_rtx_CONST_VECTOR (V4SImode
,
18466 gen_rtvec (4, GEN_INT (0x43300000UL
),
18467 GEN_INT (0x45300000UL
),
18468 const0_rtx
, const0_rtx
));
18469 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
18471 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18472 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
18474 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18475 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18476 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18477 (0x1.0p84 + double(fp_value_hi_xmm)).
18478 Note these exponents differ by 32. */
18480 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
18482 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18483 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18484 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
18485 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
18486 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
18487 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
18488 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
18489 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
18490 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
18492 /* Add the upper and lower DFmode values together. */
18494 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
18497 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
18498 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
18499 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
18502 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
18505 /* Not used, but eases macroization of patterns. */
18507 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
18508 rtx input ATTRIBUTE_UNUSED
)
18510 gcc_unreachable ();
18513 /* Convert an unsigned SImode value into a DFmode. Only currently used
18514 for SSE, but applicable anywhere. */
18517 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
18519 REAL_VALUE_TYPE TWO31r
;
18522 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
18523 NULL
, 1, OPTAB_DIRECT
);
18525 fp
= gen_reg_rtx (DFmode
);
18526 emit_insn (gen_floatsidf2 (fp
, x
));
18528 real_ldexp (&TWO31r
, &dconst1
, 31);
18529 x
= const_double_from_real_value (TWO31r
, DFmode
);
18531 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
18533 emit_move_insn (target
, x
);
18536 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18537 32-bit mode; otherwise we have a direct convert instruction. */
18540 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
18542 REAL_VALUE_TYPE TWO32r
;
18543 rtx fp_lo
, fp_hi
, x
;
18545 fp_lo
= gen_reg_rtx (DFmode
);
18546 fp_hi
= gen_reg_rtx (DFmode
);
18548 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
18550 real_ldexp (&TWO32r
, &dconst1
, 32);
18551 x
= const_double_from_real_value (TWO32r
, DFmode
);
18552 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
18554 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
18556 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18559 emit_move_insn (target
, x
);
18562 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18563 For x86_32, -mfpmath=sse, !optimize_size only. */
18565 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
18567 REAL_VALUE_TYPE ONE16r
;
18568 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
18570 real_ldexp (&ONE16r
, &dconst1
, 16);
18571 x
= const_double_from_real_value (ONE16r
, SFmode
);
18572 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
18573 NULL
, 0, OPTAB_DIRECT
);
18574 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
18575 NULL
, 0, OPTAB_DIRECT
);
18576 fp_hi
= gen_reg_rtx (SFmode
);
18577 fp_lo
= gen_reg_rtx (SFmode
);
18578 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
18579 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
18580 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
18582 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
18584 if (!rtx_equal_p (target
, fp_hi
))
18585 emit_move_insn (target
, fp_hi
);
18588 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18589 a vector of unsigned ints VAL to vector of floats TARGET. */
18592 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
18595 REAL_VALUE_TYPE TWO16r
;
18596 enum machine_mode intmode
= GET_MODE (val
);
18597 enum machine_mode fltmode
= GET_MODE (target
);
18598 rtx (*cvt
) (rtx
, rtx
);
18600 if (intmode
== V4SImode
)
18601 cvt
= gen_floatv4siv4sf2
;
18603 cvt
= gen_floatv8siv8sf2
;
18604 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
18605 tmp
[0] = force_reg (intmode
, tmp
[0]);
18606 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
18608 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
18609 NULL_RTX
, 1, OPTAB_DIRECT
);
18610 tmp
[3] = gen_reg_rtx (fltmode
);
18611 emit_insn (cvt (tmp
[3], tmp
[1]));
18612 tmp
[4] = gen_reg_rtx (fltmode
);
18613 emit_insn (cvt (tmp
[4], tmp
[2]));
18614 real_ldexp (&TWO16r
, &dconst1
, 16);
18615 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
18616 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
18617 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
18619 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
18621 if (tmp
[7] != target
)
18622 emit_move_insn (target
, tmp
[7]);
18625 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18626 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18627 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18628 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18631 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
18633 REAL_VALUE_TYPE TWO31r
;
18634 rtx two31r
, tmp
[4];
18635 enum machine_mode mode
= GET_MODE (val
);
18636 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
18637 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
18638 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
18641 for (i
= 0; i
< 3; i
++)
18642 tmp
[i
] = gen_reg_rtx (mode
);
18643 real_ldexp (&TWO31r
, &dconst1
, 31);
18644 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
18645 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
18646 two31r
= force_reg (mode
, two31r
);
18649 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
18650 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
18651 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
18652 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
18653 default: gcc_unreachable ();
18655 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
18656 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
18657 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
18659 if (intmode
== V4SImode
|| TARGET_AVX2
)
18660 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
18661 gen_lowpart (intmode
, tmp
[0]),
18662 GEN_INT (31), NULL_RTX
, 0,
18666 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
18667 two31
= ix86_build_const_vector (intmode
, 1, two31
);
18668 *xorp
= expand_simple_binop (intmode
, AND
,
18669 gen_lowpart (intmode
, tmp
[0]),
18670 two31
, NULL_RTX
, 0,
18673 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
18677 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18678 then replicate the value for all elements of the vector
18682 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
18686 enum machine_mode scalar_mode
;
18703 n_elt
= GET_MODE_NUNITS (mode
);
18704 v
= rtvec_alloc (n_elt
);
18705 scalar_mode
= GET_MODE_INNER (mode
);
18707 RTVEC_ELT (v
, 0) = value
;
18709 for (i
= 1; i
< n_elt
; ++i
)
18710 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18712 return gen_rtx_CONST_VECTOR (mode
, v
);
18715 gcc_unreachable ();
18719 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18720 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18721 for an SSE register. If VECT is true, then replicate the mask for
18722 all elements of the vector register. If INVERT is true, then create
18723 a mask excluding the sign bit. */
18726 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18728 enum machine_mode vec_mode
, imode
;
18729 HOST_WIDE_INT hi
, lo
;
18734 /* Find the sign bit, sign extended to 2*HWI. */
18742 mode
= GET_MODE_INNER (mode
);
18744 lo
= 0x80000000, hi
= lo
< 0;
18752 mode
= GET_MODE_INNER (mode
);
18754 if (HOST_BITS_PER_WIDE_INT
>= 64)
18755 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18757 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18762 vec_mode
= VOIDmode
;
18763 if (HOST_BITS_PER_WIDE_INT
>= 64)
18766 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18773 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18777 lo
= ~lo
, hi
= ~hi
;
18783 mask
= immed_double_const (lo
, hi
, imode
);
18785 vec
= gen_rtvec (2, v
, mask
);
18786 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18787 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18794 gcc_unreachable ();
18798 lo
= ~lo
, hi
= ~hi
;
18800 /* Force this value into the low part of a fp vector constant. */
18801 mask
= immed_double_const (lo
, hi
, imode
);
18802 mask
= gen_lowpart (mode
, mask
);
18804 if (vec_mode
== VOIDmode
)
18805 return force_reg (mode
, mask
);
18807 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18808 return force_reg (vec_mode
, v
);
18811 /* Generate code for floating point ABS or NEG. */
18814 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18817 rtx mask
, set
, dst
, src
;
18818 bool use_sse
= false;
18819 bool vector_mode
= VECTOR_MODE_P (mode
);
18820 enum machine_mode vmode
= mode
;
18824 else if (mode
== TFmode
)
18826 else if (TARGET_SSE_MATH
)
18828 use_sse
= SSE_FLOAT_MODE_P (mode
);
18829 if (mode
== SFmode
)
18831 else if (mode
== DFmode
)
18835 /* NEG and ABS performed with SSE use bitwise mask operations.
18836 Create the appropriate mask now. */
18838 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18845 set
= gen_rtx_fmt_e (code
, mode
, src
);
18846 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18853 use
= gen_rtx_USE (VOIDmode
, mask
);
18855 par
= gen_rtvec (2, set
, use
);
18858 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18859 par
= gen_rtvec (3, set
, use
, clob
);
18861 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18867 /* Expand a copysign operation. Special case operand 0 being a constant. */
18870 ix86_expand_copysign (rtx operands
[])
18872 enum machine_mode mode
, vmode
;
18873 rtx dest
, op0
, op1
, mask
, nmask
;
18875 dest
= operands
[0];
18879 mode
= GET_MODE (dest
);
18881 if (mode
== SFmode
)
18883 else if (mode
== DFmode
)
18888 if (GET_CODE (op0
) == CONST_DOUBLE
)
18890 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18892 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18893 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18895 if (mode
== SFmode
|| mode
== DFmode
)
18897 if (op0
== CONST0_RTX (mode
))
18898 op0
= CONST0_RTX (vmode
);
18901 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18903 op0
= force_reg (vmode
, v
);
18906 else if (op0
!= CONST0_RTX (mode
))
18907 op0
= force_reg (mode
, op0
);
18909 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18911 if (mode
== SFmode
)
18912 copysign_insn
= gen_copysignsf3_const
;
18913 else if (mode
== DFmode
)
18914 copysign_insn
= gen_copysigndf3_const
;
18916 copysign_insn
= gen_copysigntf3_const
;
18918 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18922 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18924 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18925 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18927 if (mode
== SFmode
)
18928 copysign_insn
= gen_copysignsf3_var
;
18929 else if (mode
== DFmode
)
18930 copysign_insn
= gen_copysigndf3_var
;
18932 copysign_insn
= gen_copysigntf3_var
;
18934 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18938 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18939 be a constant, and so has already been expanded into a vector constant. */
18942 ix86_split_copysign_const (rtx operands
[])
18944 enum machine_mode mode
, vmode
;
18945 rtx dest
, op0
, mask
, x
;
18947 dest
= operands
[0];
18949 mask
= operands
[3];
18951 mode
= GET_MODE (dest
);
18952 vmode
= GET_MODE (mask
);
18954 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18955 x
= gen_rtx_AND (vmode
, dest
, mask
);
18956 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18958 if (op0
!= CONST0_RTX (vmode
))
18960 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18961 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18965 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18966 so we have to do two masks. */
18969 ix86_split_copysign_var (rtx operands
[])
18971 enum machine_mode mode
, vmode
;
18972 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18974 dest
= operands
[0];
18975 scratch
= operands
[1];
18978 nmask
= operands
[4];
18979 mask
= operands
[5];
18981 mode
= GET_MODE (dest
);
18982 vmode
= GET_MODE (mask
);
18984 if (rtx_equal_p (op0
, op1
))
18986 /* Shouldn't happen often (it's useless, obviously), but when it does
18987 we'd generate incorrect code if we continue below. */
18988 emit_move_insn (dest
, op0
);
18992 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18994 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18996 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18997 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
19000 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19001 x
= gen_rtx_NOT (vmode
, dest
);
19002 x
= gen_rtx_AND (vmode
, x
, op0
);
19003 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19007 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
19009 x
= gen_rtx_AND (vmode
, scratch
, mask
);
19011 else /* alternative 2,4 */
19013 gcc_assert (REGNO (mask
) == REGNO (scratch
));
19014 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
19015 x
= gen_rtx_AND (vmode
, scratch
, op1
);
19017 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
19019 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
19021 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19022 x
= gen_rtx_AND (vmode
, dest
, nmask
);
19024 else /* alternative 3,4 */
19026 gcc_assert (REGNO (nmask
) == REGNO (dest
));
19028 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
19029 x
= gen_rtx_AND (vmode
, dest
, op0
);
19031 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19034 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
19035 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19038 /* Return TRUE or FALSE depending on whether the first SET in INSN
19039 has source and destination with matching CC modes, and that the
19040 CC mode is at least as constrained as REQ_MODE. */
19043 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
19046 enum machine_mode set_mode
;
19048 set
= PATTERN (insn
);
19049 if (GET_CODE (set
) == PARALLEL
)
19050 set
= XVECEXP (set
, 0, 0);
19051 gcc_assert (GET_CODE (set
) == SET
);
19052 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
19054 set_mode
= GET_MODE (SET_DEST (set
));
19058 if (req_mode
!= CCNOmode
19059 && (req_mode
!= CCmode
19060 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
19064 if (req_mode
== CCGCmode
)
19068 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
19072 if (req_mode
== CCZmode
)
19082 if (set_mode
!= req_mode
)
19087 gcc_unreachable ();
19090 return GET_MODE (SET_SRC (set
)) == set_mode
;
19093 /* Generate insn patterns to do an integer compare of OPERANDS. */
19096 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19098 enum machine_mode cmpmode
;
19101 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
19102 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
19104 /* This is very simple, but making the interface the same as in the
19105 FP case makes the rest of the code easier. */
19106 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
19107 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
19109 /* Return the test that should be put into the flags user, i.e.
19110 the bcc, scc, or cmov instruction. */
19111 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
19114 /* Figure out whether to use ordered or unordered fp comparisons.
19115 Return the appropriate mode to use. */
19118 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
19120 /* ??? In order to make all comparisons reversible, we do all comparisons
19121 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19122 all forms trapping and nontrapping comparisons, we can make inequality
19123 comparisons trapping again, since it results in better code when using
19124 FCOM based compares. */
19125 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
19129 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
19131 enum machine_mode mode
= GET_MODE (op0
);
19133 if (SCALAR_FLOAT_MODE_P (mode
))
19135 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19136 return ix86_fp_compare_mode (code
);
19141 /* Only zero flag is needed. */
19142 case EQ
: /* ZF=0 */
19143 case NE
: /* ZF!=0 */
19145 /* Codes needing carry flag. */
19146 case GEU
: /* CF=0 */
19147 case LTU
: /* CF=1 */
19148 /* Detect overflow checks. They need just the carry flag. */
19149 if (GET_CODE (op0
) == PLUS
19150 && rtx_equal_p (op1
, XEXP (op0
, 0)))
19154 case GTU
: /* CF=0 & ZF=0 */
19155 case LEU
: /* CF=1 | ZF=1 */
19157 /* Codes possibly doable only with sign flag when
19158 comparing against zero. */
19159 case GE
: /* SF=OF or SF=0 */
19160 case LT
: /* SF<>OF or SF=1 */
19161 if (op1
== const0_rtx
)
19164 /* For other cases Carry flag is not required. */
19166 /* Codes doable only with sign flag when comparing
19167 against zero, but we miss jump instruction for it
19168 so we need to use relational tests against overflow
19169 that thus needs to be zero. */
19170 case GT
: /* ZF=0 & SF=OF */
19171 case LE
: /* ZF=1 | SF<>OF */
19172 if (op1
== const0_rtx
)
19176 /* strcmp pattern do (use flags) and combine may ask us for proper
19181 gcc_unreachable ();
19185 /* Return the fixed registers used for condition codes. */
19188 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
19195 /* If two condition code modes are compatible, return a condition code
19196 mode which is compatible with both. Otherwise, return
19199 static enum machine_mode
19200 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
19205 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
19208 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
19209 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
19212 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
19214 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
19220 gcc_unreachable ();
19250 /* These are only compatible with themselves, which we already
19257 /* Return a comparison we can do and that it is equivalent to
19258 swap_condition (code) apart possibly from orderedness.
19259 But, never change orderedness if TARGET_IEEE_FP, returning
19260 UNKNOWN in that case if necessary. */
19262 static enum rtx_code
19263 ix86_fp_swap_condition (enum rtx_code code
)
19267 case GT
: /* GTU - CF=0 & ZF=0 */
19268 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
19269 case GE
: /* GEU - CF=0 */
19270 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
19271 case UNLT
: /* LTU - CF=1 */
19272 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
19273 case UNLE
: /* LEU - CF=1 | ZF=1 */
19274 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
19276 return swap_condition (code
);
19280 /* Return cost of comparison CODE using the best strategy for performance.
19281 All following functions do use number of instructions as a cost metrics.
19282 In future this should be tweaked to compute bytes for optimize_size and
19283 take into account performance of various instructions on various CPUs. */
19286 ix86_fp_comparison_cost (enum rtx_code code
)
19290 /* The cost of code using bit-twiddling on %ah. */
19307 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
19311 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
19314 gcc_unreachable ();
19317 switch (ix86_fp_comparison_strategy (code
))
19319 case IX86_FPCMP_COMI
:
19320 return arith_cost
> 4 ? 3 : 2;
19321 case IX86_FPCMP_SAHF
:
19322 return arith_cost
> 4 ? 4 : 3;
19328 /* Return strategy to use for floating-point. We assume that fcomi is always
19329 preferrable where available, since that is also true when looking at size
19330 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19332 enum ix86_fpcmp_strategy
19333 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
19335 /* Do fcomi/sahf based test when profitable. */
19338 return IX86_FPCMP_COMI
;
19340 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
19341 return IX86_FPCMP_SAHF
;
19343 return IX86_FPCMP_ARITH
;
19346 /* Swap, force into registers, or otherwise massage the two operands
19347 to a fp comparison. The operands are updated in place; the new
19348 comparison code is returned. */
19350 static enum rtx_code
19351 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
19353 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
19354 rtx op0
= *pop0
, op1
= *pop1
;
19355 enum machine_mode op_mode
= GET_MODE (op0
);
19356 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
19358 /* All of the unordered compare instructions only work on registers.
19359 The same is true of the fcomi compare instructions. The XFmode
19360 compare instructions require registers except when comparing
19361 against zero or when converting operand 1 from fixed point to
19365 && (fpcmp_mode
== CCFPUmode
19366 || (op_mode
== XFmode
19367 && ! (standard_80387_constant_p (op0
) == 1
19368 || standard_80387_constant_p (op1
) == 1)
19369 && GET_CODE (op1
) != FLOAT
)
19370 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
19372 op0
= force_reg (op_mode
, op0
);
19373 op1
= force_reg (op_mode
, op1
);
19377 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19378 things around if they appear profitable, otherwise force op0
19379 into a register. */
19381 if (standard_80387_constant_p (op0
) == 0
19383 && ! (standard_80387_constant_p (op1
) == 0
19386 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
19387 if (new_code
!= UNKNOWN
)
19390 tmp
= op0
, op0
= op1
, op1
= tmp
;
19396 op0
= force_reg (op_mode
, op0
);
19398 if (CONSTANT_P (op1
))
19400 int tmp
= standard_80387_constant_p (op1
);
19402 op1
= validize_mem (force_const_mem (op_mode
, op1
));
19406 op1
= force_reg (op_mode
, op1
);
19409 op1
= force_reg (op_mode
, op1
);
19413 /* Try to rearrange the comparison to make it cheaper. */
19414 if (ix86_fp_comparison_cost (code
)
19415 > ix86_fp_comparison_cost (swap_condition (code
))
19416 && (REG_P (op1
) || can_create_pseudo_p ()))
19419 tmp
= op0
, op0
= op1
, op1
= tmp
;
19420 code
= swap_condition (code
);
19422 op0
= force_reg (op_mode
, op0
);
19430 /* Convert comparison codes we use to represent FP comparison to integer
19431 code that will result in proper branch. Return UNKNOWN if no such code
19435 ix86_fp_compare_code_to_integer (enum rtx_code code
)
19464 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19467 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
19469 enum machine_mode fpcmp_mode
, intcmp_mode
;
19472 fpcmp_mode
= ix86_fp_compare_mode (code
);
19473 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
19475 /* Do fcomi/sahf based test when profitable. */
19476 switch (ix86_fp_comparison_strategy (code
))
19478 case IX86_FPCMP_COMI
:
19479 intcmp_mode
= fpcmp_mode
;
19480 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19481 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19486 case IX86_FPCMP_SAHF
:
19487 intcmp_mode
= fpcmp_mode
;
19488 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19489 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
19493 scratch
= gen_reg_rtx (HImode
);
19494 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
19495 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
19498 case IX86_FPCMP_ARITH
:
19499 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19500 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
19501 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
19503 scratch
= gen_reg_rtx (HImode
);
19504 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
19506 /* In the unordered case, we have to check C2 for NaN's, which
19507 doesn't happen to work out to anything nice combination-wise.
19508 So do some bit twiddling on the value we've got in AH to come
19509 up with an appropriate set of condition codes. */
19511 intcmp_mode
= CCNOmode
;
19516 if (code
== GT
|| !TARGET_IEEE_FP
)
19518 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19523 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19524 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19525 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
19526 intcmp_mode
= CCmode
;
19532 if (code
== LT
&& TARGET_IEEE_FP
)
19534 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19535 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
19536 intcmp_mode
= CCmode
;
19541 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
19547 if (code
== GE
|| !TARGET_IEEE_FP
)
19549 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
19554 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19555 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
19561 if (code
== LE
&& TARGET_IEEE_FP
)
19563 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19564 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
19565 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19566 intcmp_mode
= CCmode
;
19571 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
19577 if (code
== EQ
&& TARGET_IEEE_FP
)
19579 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19580 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
19581 intcmp_mode
= CCmode
;
19586 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19592 if (code
== NE
&& TARGET_IEEE_FP
)
19594 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
19595 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
19601 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
19607 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19611 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
19616 gcc_unreachable ();
19624 /* Return the test that should be put into the flags user, i.e.
19625 the bcc, scc, or cmov instruction. */
19626 return gen_rtx_fmt_ee (code
, VOIDmode
,
19627 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
19632 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
19636 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
19637 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
19639 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
19641 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
19642 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19645 ret
= ix86_expand_int_compare (code
, op0
, op1
);
19651 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
19653 enum machine_mode mode
= GET_MODE (op0
);
19665 tmp
= ix86_expand_compare (code
, op0
, op1
);
19666 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
19667 gen_rtx_LABEL_REF (VOIDmode
, label
),
19669 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
19676 /* Expand DImode branch into multiple compare+branch. */
19678 rtx lo
[2], hi
[2], label2
;
19679 enum rtx_code code1
, code2
, code3
;
19680 enum machine_mode submode
;
19682 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
19684 tmp
= op0
, op0
= op1
, op1
= tmp
;
19685 code
= swap_condition (code
);
19688 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19689 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19691 submode
= mode
== DImode
? SImode
: DImode
;
19693 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19694 avoid two branches. This costs one extra insn, so disable when
19695 optimizing for size. */
19697 if ((code
== EQ
|| code
== NE
)
19698 && (!optimize_insn_for_size_p ()
19699 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19704 if (hi
[1] != const0_rtx
)
19705 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19706 NULL_RTX
, 0, OPTAB_WIDEN
);
19709 if (lo
[1] != const0_rtx
)
19710 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19711 NULL_RTX
, 0, OPTAB_WIDEN
);
19713 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19714 NULL_RTX
, 0, OPTAB_WIDEN
);
19716 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19720 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19721 op1 is a constant and the low word is zero, then we can just
19722 examine the high word. Similarly for low word -1 and
19723 less-or-equal-than or greater-than. */
19725 if (CONST_INT_P (hi
[1]))
19728 case LT
: case LTU
: case GE
: case GEU
:
19729 if (lo
[1] == const0_rtx
)
19731 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19735 case LE
: case LEU
: case GT
: case GTU
:
19736 if (lo
[1] == constm1_rtx
)
19738 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19746 /* Otherwise, we need two or three jumps. */
19748 label2
= gen_label_rtx ();
19751 code2
= swap_condition (code
);
19752 code3
= unsigned_condition (code
);
19756 case LT
: case GT
: case LTU
: case GTU
:
19759 case LE
: code1
= LT
; code2
= GT
; break;
19760 case GE
: code1
= GT
; code2
= LT
; break;
19761 case LEU
: code1
= LTU
; code2
= GTU
; break;
19762 case GEU
: code1
= GTU
; code2
= LTU
; break;
19764 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19765 case NE
: code2
= UNKNOWN
; break;
19768 gcc_unreachable ();
19773 * if (hi(a) < hi(b)) goto true;
19774 * if (hi(a) > hi(b)) goto false;
19775 * if (lo(a) < lo(b)) goto true;
19779 if (code1
!= UNKNOWN
)
19780 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19781 if (code2
!= UNKNOWN
)
19782 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19784 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19786 if (code2
!= UNKNOWN
)
19787 emit_label (label2
);
19792 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19797 /* Split branch based on floating point condition. */
19799 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19800 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19805 if (target2
!= pc_rtx
)
19808 code
= reverse_condition_maybe_unordered (code
);
19813 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19816 /* Remove pushed operand from stack. */
19818 ix86_free_from_memory (GET_MODE (pushed
));
19820 i
= emit_jump_insn (gen_rtx_SET
19822 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19823 condition
, target1
, target2
)));
19824 if (split_branch_probability
>= 0)
19825 add_int_reg_note (i
, REG_BR_PROB
, split_branch_probability
);
19829 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19833 gcc_assert (GET_MODE (dest
) == QImode
);
19835 ret
= ix86_expand_compare (code
, op0
, op1
);
19836 PUT_MODE (ret
, QImode
);
19837 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19840 /* Expand comparison setting or clearing carry flag. Return true when
19841 successful and set pop for the operation. */
19843 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19845 enum machine_mode mode
=
19846 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19848 /* Do not handle double-mode compares that go through special path. */
19849 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19852 if (SCALAR_FLOAT_MODE_P (mode
))
19854 rtx compare_op
, compare_seq
;
19856 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19858 /* Shortcut: following common codes never translate
19859 into carry flag compares. */
19860 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19861 || code
== ORDERED
|| code
== UNORDERED
)
19864 /* These comparisons require zero flag; swap operands so they won't. */
19865 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19866 && !TARGET_IEEE_FP
)
19871 code
= swap_condition (code
);
19874 /* Try to expand the comparison and verify that we end up with
19875 carry flag based comparison. This fails to be true only when
19876 we decide to expand comparison using arithmetic that is not
19877 too common scenario. */
19879 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19880 compare_seq
= get_insns ();
19883 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19884 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19885 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19887 code
= GET_CODE (compare_op
);
19889 if (code
!= LTU
&& code
!= GEU
)
19892 emit_insn (compare_seq
);
19897 if (!INTEGRAL_MODE_P (mode
))
19906 /* Convert a==0 into (unsigned)a<1. */
19909 if (op1
!= const0_rtx
)
19912 code
= (code
== EQ
? LTU
: GEU
);
19915 /* Convert a>b into b<a or a>=b-1. */
19918 if (CONST_INT_P (op1
))
19920 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19921 /* Bail out on overflow. We still can swap operands but that
19922 would force loading of the constant into register. */
19923 if (op1
== const0_rtx
19924 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19926 code
= (code
== GTU
? GEU
: LTU
);
19933 code
= (code
== GTU
? LTU
: GEU
);
19937 /* Convert a>=0 into (unsigned)a<0x80000000. */
19940 if (mode
== DImode
|| op1
!= const0_rtx
)
19942 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19943 code
= (code
== LT
? GEU
: LTU
);
19947 if (mode
== DImode
|| op1
!= constm1_rtx
)
19949 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19950 code
= (code
== LE
? GEU
: LTU
);
19956 /* Swapping operands may cause constant to appear as first operand. */
19957 if (!nonimmediate_operand (op0
, VOIDmode
))
19959 if (!can_create_pseudo_p ())
19961 op0
= force_reg (mode
, op0
);
19963 *pop
= ix86_expand_compare (code
, op0
, op1
);
19964 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19969 ix86_expand_int_movcc (rtx operands
[])
19971 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19972 rtx compare_seq
, compare_op
;
19973 enum machine_mode mode
= GET_MODE (operands
[0]);
19974 bool sign_bit_compare_p
= false;
19975 rtx op0
= XEXP (operands
[1], 0);
19976 rtx op1
= XEXP (operands
[1], 1);
19978 if (GET_MODE (op0
) == TImode
19979 || (GET_MODE (op0
) == DImode
19984 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19985 compare_seq
= get_insns ();
19988 compare_code
= GET_CODE (compare_op
);
19990 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19991 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19992 sign_bit_compare_p
= true;
19994 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19995 HImode insns, we'd be swallowed in word prefix ops. */
19997 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19998 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19999 && CONST_INT_P (operands
[2])
20000 && CONST_INT_P (operands
[3]))
20002 rtx out
= operands
[0];
20003 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
20004 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
20005 HOST_WIDE_INT diff
;
20008 /* Sign bit compares are better done using shifts than we do by using
20010 if (sign_bit_compare_p
20011 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20013 /* Detect overlap between destination and compare sources. */
20016 if (!sign_bit_compare_p
)
20019 bool fpcmp
= false;
20021 compare_code
= GET_CODE (compare_op
);
20023 flags
= XEXP (compare_op
, 0);
20025 if (GET_MODE (flags
) == CCFPmode
20026 || GET_MODE (flags
) == CCFPUmode
)
20030 = ix86_fp_compare_code_to_integer (compare_code
);
20033 /* To simplify rest of code, restrict to the GEU case. */
20034 if (compare_code
== LTU
)
20036 HOST_WIDE_INT tmp
= ct
;
20039 compare_code
= reverse_condition (compare_code
);
20040 code
= reverse_condition (code
);
20045 PUT_CODE (compare_op
,
20046 reverse_condition_maybe_unordered
20047 (GET_CODE (compare_op
)));
20049 PUT_CODE (compare_op
,
20050 reverse_condition (GET_CODE (compare_op
)));
20054 if (reg_overlap_mentioned_p (out
, op0
)
20055 || reg_overlap_mentioned_p (out
, op1
))
20056 tmp
= gen_reg_rtx (mode
);
20058 if (mode
== DImode
)
20059 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
20061 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
20062 flags
, compare_op
));
20066 if (code
== GT
|| code
== GE
)
20067 code
= reverse_condition (code
);
20070 HOST_WIDE_INT tmp
= ct
;
20075 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
20088 tmp
= expand_simple_binop (mode
, PLUS
,
20090 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20101 tmp
= expand_simple_binop (mode
, IOR
,
20103 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20105 else if (diff
== -1 && ct
)
20115 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20117 tmp
= expand_simple_binop (mode
, PLUS
,
20118 copy_rtx (tmp
), GEN_INT (cf
),
20119 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20127 * andl cf - ct, dest
20137 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
20140 tmp
= expand_simple_binop (mode
, AND
,
20142 gen_int_mode (cf
- ct
, mode
),
20143 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20145 tmp
= expand_simple_binop (mode
, PLUS
,
20146 copy_rtx (tmp
), GEN_INT (ct
),
20147 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
20150 if (!rtx_equal_p (tmp
, out
))
20151 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
20158 enum machine_mode cmp_mode
= GET_MODE (op0
);
20161 tmp
= ct
, ct
= cf
, cf
= tmp
;
20164 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20166 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20168 /* We may be reversing unordered compare to normal compare, that
20169 is not valid in general (we may convert non-trapping condition
20170 to trapping one), however on i386 we currently emit all
20171 comparisons unordered. */
20172 compare_code
= reverse_condition_maybe_unordered (compare_code
);
20173 code
= reverse_condition_maybe_unordered (code
);
20177 compare_code
= reverse_condition (compare_code
);
20178 code
= reverse_condition (code
);
20182 compare_code
= UNKNOWN
;
20183 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
20184 && CONST_INT_P (op1
))
20186 if (op1
== const0_rtx
20187 && (code
== LT
|| code
== GE
))
20188 compare_code
= code
;
20189 else if (op1
== constm1_rtx
)
20193 else if (code
== GT
)
20198 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20199 if (compare_code
!= UNKNOWN
20200 && GET_MODE (op0
) == GET_MODE (out
)
20201 && (cf
== -1 || ct
== -1))
20203 /* If lea code below could be used, only optimize
20204 if it results in a 2 insn sequence. */
20206 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20207 || diff
== 3 || diff
== 5 || diff
== 9)
20208 || (compare_code
== LT
&& ct
== -1)
20209 || (compare_code
== GE
&& cf
== -1))
20212 * notl op1 (if necessary)
20220 code
= reverse_condition (code
);
20223 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20225 out
= expand_simple_binop (mode
, IOR
,
20227 out
, 1, OPTAB_DIRECT
);
20228 if (out
!= operands
[0])
20229 emit_move_insn (operands
[0], out
);
20236 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
20237 || diff
== 3 || diff
== 5 || diff
== 9)
20238 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
20240 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
20246 * lea cf(dest*(ct-cf)),dest
20250 * This also catches the degenerate setcc-only case.
20256 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20259 /* On x86_64 the lea instruction operates on Pmode, so we need
20260 to get arithmetics done in proper mode to match. */
20262 tmp
= copy_rtx (out
);
20266 out1
= copy_rtx (out
);
20267 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
20271 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
20277 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
20280 if (!rtx_equal_p (tmp
, out
))
20283 out
= force_operand (tmp
, copy_rtx (out
));
20285 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
20287 if (!rtx_equal_p (out
, operands
[0]))
20288 emit_move_insn (operands
[0], copy_rtx (out
));
20294 * General case: Jumpful:
20295 * xorl dest,dest cmpl op1, op2
20296 * cmpl op1, op2 movl ct, dest
20297 * setcc dest jcc 1f
20298 * decl dest movl cf, dest
20299 * andl (cf-ct),dest 1:
20302 * Size 20. Size 14.
20304 * This is reasonably steep, but branch mispredict costs are
20305 * high on modern cpus, so consider failing only if optimizing
20309 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20310 && BRANCH_COST (optimize_insn_for_speed_p (),
20315 enum machine_mode cmp_mode
= GET_MODE (op0
);
20320 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
20322 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
20324 /* We may be reversing unordered compare to normal compare,
20325 that is not valid in general (we may convert non-trapping
20326 condition to trapping one), however on i386 we currently
20327 emit all comparisons unordered. */
20328 code
= reverse_condition_maybe_unordered (code
);
20332 code
= reverse_condition (code
);
20333 if (compare_code
!= UNKNOWN
)
20334 compare_code
= reverse_condition (compare_code
);
20338 if (compare_code
!= UNKNOWN
)
20340 /* notl op1 (if needed)
20345 For x < 0 (resp. x <= -1) there will be no notl,
20346 so if possible swap the constants to get rid of the
20348 True/false will be -1/0 while code below (store flag
20349 followed by decrement) is 0/-1, so the constants need
20350 to be exchanged once more. */
20352 if (compare_code
== GE
|| !cf
)
20354 code
= reverse_condition (code
);
20359 HOST_WIDE_INT tmp
= cf
;
20364 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
20368 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
20370 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
20372 copy_rtx (out
), 1, OPTAB_DIRECT
);
20375 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
20376 gen_int_mode (cf
- ct
, mode
),
20377 copy_rtx (out
), 1, OPTAB_DIRECT
);
20379 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
20380 copy_rtx (out
), 1, OPTAB_DIRECT
);
20381 if (!rtx_equal_p (out
, operands
[0]))
20382 emit_move_insn (operands
[0], copy_rtx (out
));
20388 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
20390 /* Try a few things more with specific constants and a variable. */
20393 rtx var
, orig_out
, out
, tmp
;
20395 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20398 /* If one of the two operands is an interesting constant, load a
20399 constant with the above and mask it in with a logical operation. */
20401 if (CONST_INT_P (operands
[2]))
20404 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
20405 operands
[3] = constm1_rtx
, op
= and_optab
;
20406 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
20407 operands
[3] = const0_rtx
, op
= ior_optab
;
20411 else if (CONST_INT_P (operands
[3]))
20414 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
20415 operands
[2] = constm1_rtx
, op
= and_optab
;
20416 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
20417 operands
[2] = const0_rtx
, op
= ior_optab
;
20424 orig_out
= operands
[0];
20425 tmp
= gen_reg_rtx (mode
);
20428 /* Recurse to get the constant loaded. */
20429 if (ix86_expand_int_movcc (operands
) == 0)
20432 /* Mask in the interesting variable. */
20433 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
20435 if (!rtx_equal_p (out
, orig_out
))
20436 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
20442 * For comparison with above,
20452 if (! nonimmediate_operand (operands
[2], mode
))
20453 operands
[2] = force_reg (mode
, operands
[2]);
20454 if (! nonimmediate_operand (operands
[3], mode
))
20455 operands
[3] = force_reg (mode
, operands
[3]);
20457 if (! register_operand (operands
[2], VOIDmode
)
20459 || ! register_operand (operands
[3], VOIDmode
)))
20460 operands
[2] = force_reg (mode
, operands
[2]);
20463 && ! register_operand (operands
[3], VOIDmode
))
20464 operands
[3] = force_reg (mode
, operands
[3]);
20466 emit_insn (compare_seq
);
20467 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20468 gen_rtx_IF_THEN_ELSE (mode
,
20469 compare_op
, operands
[2],
20474 /* Swap, force into registers, or otherwise massage the two operands
20475 to an sse comparison with a mask result. Thus we differ a bit from
20476 ix86_prepare_fp_compare_args which expects to produce a flags result.
20478 The DEST operand exists to help determine whether to commute commutative
20479 operators. The POP0/POP1 operands are updated in place. The new
20480 comparison code is returned, or UNKNOWN if not implementable. */
20482 static enum rtx_code
20483 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
20484 rtx
*pop0
, rtx
*pop1
)
20492 /* AVX supports all the needed comparisons. */
20495 /* We have no LTGT as an operator. We could implement it with
20496 NE & ORDERED, but this requires an extra temporary. It's
20497 not clear that it's worth it. */
20504 /* These are supported directly. */
20511 /* AVX has 3 operand comparisons, no need to swap anything. */
20514 /* For commutative operators, try to canonicalize the destination
20515 operand to be first in the comparison - this helps reload to
20516 avoid extra moves. */
20517 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
20525 /* These are not supported directly before AVX, and furthermore
20526 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20527 comparison operands to transform into something that is
20532 code
= swap_condition (code
);
20536 gcc_unreachable ();
20542 /* Detect conditional moves that exactly match min/max operational
20543 semantics. Note that this is IEEE safe, as long as we don't
20544 interchange the operands.
20546 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20547 and TRUE if the operation is successful and instructions are emitted. */
20550 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
20551 rtx cmp_op1
, rtx if_true
, rtx if_false
)
20553 enum machine_mode mode
;
20559 else if (code
== UNGE
)
20562 if_true
= if_false
;
20568 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
20570 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
20575 mode
= GET_MODE (dest
);
20577 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20578 but MODE may be a vector mode and thus not appropriate. */
20579 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
20581 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
20584 if_true
= force_reg (mode
, if_true
);
20585 v
= gen_rtvec (2, if_true
, if_false
);
20586 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
20590 code
= is_min
? SMIN
: SMAX
;
20591 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
20594 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
20598 /* Expand an sse vector comparison. Return the register with the result. */
20601 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
20602 rtx op_true
, rtx op_false
)
20604 enum machine_mode mode
= GET_MODE (dest
);
20605 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
20608 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
20609 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
20610 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
20613 || reg_overlap_mentioned_p (dest
, op_true
)
20614 || reg_overlap_mentioned_p (dest
, op_false
))
20615 dest
= gen_reg_rtx (mode
);
20617 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
20618 if (cmp_mode
!= mode
)
20620 x
= force_reg (cmp_mode
, x
);
20621 convert_move (dest
, x
, false);
20624 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20629 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20630 operations. This is used for both scalar and vector conditional moves. */
20633 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
20635 enum machine_mode mode
= GET_MODE (dest
);
20638 if (vector_all_ones_operand (op_true
, mode
)
20639 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
20641 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
20643 else if (op_false
== CONST0_RTX (mode
))
20645 op_true
= force_reg (mode
, op_true
);
20646 x
= gen_rtx_AND (mode
, cmp
, op_true
);
20647 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20649 else if (op_true
== CONST0_RTX (mode
))
20651 op_false
= force_reg (mode
, op_false
);
20652 x
= gen_rtx_NOT (mode
, cmp
);
20653 x
= gen_rtx_AND (mode
, x
, op_false
);
20654 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20656 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
20658 op_false
= force_reg (mode
, op_false
);
20659 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
20660 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20662 else if (TARGET_XOP
)
20664 op_true
= force_reg (mode
, op_true
);
20666 if (!nonimmediate_operand (op_false
, mode
))
20667 op_false
= force_reg (mode
, op_false
);
20669 emit_insn (gen_rtx_SET (mode
, dest
,
20670 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
20676 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
20679 if (!nonimmediate_operand (op_true
, mode
))
20680 op_true
= force_reg (mode
, op_true
);
20682 op_false
= force_reg (mode
, op_false
);
20688 gen
= gen_sse4_1_blendvps
;
20692 gen
= gen_sse4_1_blendvpd
;
20700 gen
= gen_sse4_1_pblendvb
;
20701 if (mode
!= V16QImode
)
20702 d
= gen_reg_rtx (V16QImode
);
20703 op_false
= gen_lowpart (V16QImode
, op_false
);
20704 op_true
= gen_lowpart (V16QImode
, op_true
);
20705 cmp
= gen_lowpart (V16QImode
, cmp
);
20710 gen
= gen_avx_blendvps256
;
20714 gen
= gen_avx_blendvpd256
;
20722 gen
= gen_avx2_pblendvb
;
20723 if (mode
!= V32QImode
)
20724 d
= gen_reg_rtx (V32QImode
);
20725 op_false
= gen_lowpart (V32QImode
, op_false
);
20726 op_true
= gen_lowpart (V32QImode
, op_true
);
20727 cmp
= gen_lowpart (V32QImode
, cmp
);
20736 emit_insn (gen (d
, op_false
, op_true
, cmp
));
20738 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
20742 op_true
= force_reg (mode
, op_true
);
20744 t2
= gen_reg_rtx (mode
);
20746 t3
= gen_reg_rtx (mode
);
20750 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20751 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20753 x
= gen_rtx_NOT (mode
, cmp
);
20754 x
= gen_rtx_AND (mode
, x
, op_false
);
20755 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20757 x
= gen_rtx_IOR (mode
, t3
, t2
);
20758 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20763 /* Expand a floating-point conditional move. Return true if successful. */
20766 ix86_expand_fp_movcc (rtx operands
[])
20768 enum machine_mode mode
= GET_MODE (operands
[0]);
20769 enum rtx_code code
= GET_CODE (operands
[1]);
20770 rtx tmp
, compare_op
;
20771 rtx op0
= XEXP (operands
[1], 0);
20772 rtx op1
= XEXP (operands
[1], 1);
20774 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20776 enum machine_mode cmode
;
20778 /* Since we've no cmove for sse registers, don't force bad register
20779 allocation just to gain access to it. Deny movcc when the
20780 comparison mode doesn't match the move mode. */
20781 cmode
= GET_MODE (op0
);
20782 if (cmode
== VOIDmode
)
20783 cmode
= GET_MODE (op1
);
20787 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20788 if (code
== UNKNOWN
)
20791 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20792 operands
[2], operands
[3]))
20795 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20796 operands
[2], operands
[3]);
20797 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20801 if (GET_MODE (op0
) == TImode
20802 || (GET_MODE (op0
) == DImode
20806 /* The floating point conditional move instructions don't directly
20807 support conditions resulting from a signed integer comparison. */
20809 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20810 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20812 tmp
= gen_reg_rtx (QImode
);
20813 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20815 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20818 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20819 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20820 operands
[2], operands
[3])));
20825 /* Expand a floating-point vector conditional move; a vcond operation
20826 rather than a movcc operation. */
20829 ix86_expand_fp_vcond (rtx operands
[])
20831 enum rtx_code code
= GET_CODE (operands
[3]);
20834 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20835 &operands
[4], &operands
[5]);
20836 if (code
== UNKNOWN
)
20839 switch (GET_CODE (operands
[3]))
20842 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20843 operands
[5], operands
[0], operands
[0]);
20844 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20845 operands
[5], operands
[1], operands
[2]);
20849 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20850 operands
[5], operands
[0], operands
[0]);
20851 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20852 operands
[5], operands
[1], operands
[2]);
20856 gcc_unreachable ();
20858 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20860 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20864 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20865 operands
[5], operands
[1], operands
[2]))
20868 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20869 operands
[1], operands
[2]);
20870 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20874 /* Expand a signed/unsigned integral vector conditional move. */
20877 ix86_expand_int_vcond (rtx operands
[])
20879 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20880 enum machine_mode mode
= GET_MODE (operands
[4]);
20881 enum rtx_code code
= GET_CODE (operands
[3]);
20882 bool negate
= false;
20885 cop0
= operands
[4];
20886 cop1
= operands
[5];
20888 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20889 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20890 if ((code
== LT
|| code
== GE
)
20891 && data_mode
== mode
20892 && cop1
== CONST0_RTX (mode
)
20893 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20894 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20895 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20896 && (GET_MODE_SIZE (data_mode
) == 16
20897 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20899 rtx negop
= operands
[2 - (code
== LT
)];
20900 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20901 if (negop
== CONST1_RTX (data_mode
))
20903 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20904 operands
[0], 1, OPTAB_DIRECT
);
20905 if (res
!= operands
[0])
20906 emit_move_insn (operands
[0], res
);
20909 else if (GET_MODE_INNER (data_mode
) != DImode
20910 && vector_all_ones_operand (negop
, data_mode
))
20912 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20913 operands
[0], 0, OPTAB_DIRECT
);
20914 if (res
!= operands
[0])
20915 emit_move_insn (operands
[0], res
);
20920 if (!nonimmediate_operand (cop1
, mode
))
20921 cop1
= force_reg (mode
, cop1
);
20922 if (!general_operand (operands
[1], data_mode
))
20923 operands
[1] = force_reg (data_mode
, operands
[1]);
20924 if (!general_operand (operands
[2], data_mode
))
20925 operands
[2] = force_reg (data_mode
, operands
[2]);
20927 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20929 && (mode
== V16QImode
|| mode
== V8HImode
20930 || mode
== V4SImode
|| mode
== V2DImode
))
20934 /* Canonicalize the comparison to EQ, GT, GTU. */
20945 code
= reverse_condition (code
);
20951 code
= reverse_condition (code
);
20957 code
= swap_condition (code
);
20958 x
= cop0
, cop0
= cop1
, cop1
= x
;
20962 gcc_unreachable ();
20965 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20966 if (mode
== V2DImode
)
20971 /* SSE4.1 supports EQ. */
20972 if (!TARGET_SSE4_1
)
20978 /* SSE4.2 supports GT/GTU. */
20979 if (!TARGET_SSE4_2
)
20984 gcc_unreachable ();
20988 /* Unsigned parallel compare is not supported by the hardware.
20989 Play some tricks to turn this into a signed comparison
20993 cop0
= force_reg (mode
, cop0
);
21003 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
21007 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
21008 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
21009 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
21010 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
21012 gcc_unreachable ();
21014 /* Subtract (-(INT MAX) - 1) from both operands to make
21016 mask
= ix86_build_signbit_mask (mode
, true, false);
21017 t1
= gen_reg_rtx (mode
);
21018 emit_insn (gen_sub3 (t1
, cop0
, mask
));
21020 t2
= gen_reg_rtx (mode
);
21021 emit_insn (gen_sub3 (t2
, cop1
, mask
));
21033 /* Perform a parallel unsigned saturating subtraction. */
21034 x
= gen_reg_rtx (mode
);
21035 emit_insn (gen_rtx_SET (VOIDmode
, x
,
21036 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
21039 cop1
= CONST0_RTX (mode
);
21045 gcc_unreachable ();
21050 /* Allow the comparison to be done in one mode, but the movcc to
21051 happen in another mode. */
21052 if (data_mode
== mode
)
21054 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
21055 operands
[1+negate
], operands
[2-negate
]);
21059 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
21060 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
21061 operands
[1+negate
], operands
[2-negate
]);
21062 x
= gen_lowpart (data_mode
, x
);
21065 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
21066 operands
[2-negate
]);
21070 /* Expand a variable vector permutation. */
21073 ix86_expand_vec_perm (rtx operands
[])
21075 rtx target
= operands
[0];
21076 rtx op0
= operands
[1];
21077 rtx op1
= operands
[2];
21078 rtx mask
= operands
[3];
21079 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
21080 enum machine_mode mode
= GET_MODE (op0
);
21081 enum machine_mode maskmode
= GET_MODE (mask
);
21083 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
21085 /* Number of elements in the vector. */
21086 w
= GET_MODE_NUNITS (mode
);
21087 e
= GET_MODE_UNIT_SIZE (mode
);
21088 gcc_assert (w
<= 32);
21092 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
21094 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21095 an constant shuffle operand. With a tiny bit of effort we can
21096 use VPERMD instead. A re-interpretation stall for V4DFmode is
21097 unfortunate but there's no avoiding it.
21098 Similarly for V16HImode we don't have instructions for variable
21099 shuffling, while for V32QImode we can use after preparing suitable
21100 masks vpshufb; vpshufb; vpermq; vpor. */
21102 if (mode
== V16HImode
)
21104 maskmode
= mode
= V32QImode
;
21110 maskmode
= mode
= V8SImode
;
21114 t1
= gen_reg_rtx (maskmode
);
21116 /* Replicate the low bits of the V4DImode mask into V8SImode:
21118 t1 = { A A B B C C D D }. */
21119 for (i
= 0; i
< w
/ 2; ++i
)
21120 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
21121 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21122 vt
= force_reg (maskmode
, vt
);
21123 mask
= gen_lowpart (maskmode
, mask
);
21124 if (maskmode
== V8SImode
)
21125 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
21127 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
21129 /* Multiply the shuffle indicies by two. */
21130 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
21133 /* Add one to the odd shuffle indicies:
21134 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21135 for (i
= 0; i
< w
/ 2; ++i
)
21137 vec
[i
* 2] = const0_rtx
;
21138 vec
[i
* 2 + 1] = const1_rtx
;
21140 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21141 vt
= validize_mem (force_const_mem (maskmode
, vt
));
21142 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
21145 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21146 operands
[3] = mask
= t1
;
21147 target
= gen_reg_rtx (mode
);
21148 op0
= gen_lowpart (mode
, op0
);
21149 op1
= gen_lowpart (mode
, op1
);
21155 /* The VPERMD and VPERMPS instructions already properly ignore
21156 the high bits of the shuffle elements. No need for us to
21157 perform an AND ourselves. */
21158 if (one_operand_shuffle
)
21160 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
21161 if (target
!= operands
[0])
21162 emit_move_insn (operands
[0],
21163 gen_lowpart (GET_MODE (operands
[0]), target
));
21167 t1
= gen_reg_rtx (V8SImode
);
21168 t2
= gen_reg_rtx (V8SImode
);
21169 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
21170 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
21176 mask
= gen_lowpart (V8SFmode
, mask
);
21177 if (one_operand_shuffle
)
21178 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
21181 t1
= gen_reg_rtx (V8SFmode
);
21182 t2
= gen_reg_rtx (V8SFmode
);
21183 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
21184 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
21190 /* By combining the two 128-bit input vectors into one 256-bit
21191 input vector, we can use VPERMD and VPERMPS for the full
21192 two-operand shuffle. */
21193 t1
= gen_reg_rtx (V8SImode
);
21194 t2
= gen_reg_rtx (V8SImode
);
21195 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
21196 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21197 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
21198 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
21202 t1
= gen_reg_rtx (V8SFmode
);
21203 t2
= gen_reg_rtx (V8SImode
);
21204 mask
= gen_lowpart (V4SImode
, mask
);
21205 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
21206 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
21207 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
21208 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
21212 t1
= gen_reg_rtx (V32QImode
);
21213 t2
= gen_reg_rtx (V32QImode
);
21214 t3
= gen_reg_rtx (V32QImode
);
21215 vt2
= GEN_INT (128);
21216 for (i
= 0; i
< 32; i
++)
21218 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21219 vt
= force_reg (V32QImode
, vt
);
21220 for (i
= 0; i
< 32; i
++)
21221 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
21222 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
21223 vt2
= force_reg (V32QImode
, vt2
);
21224 /* From mask create two adjusted masks, which contain the same
21225 bits as mask in the low 7 bits of each vector element.
21226 The first mask will have the most significant bit clear
21227 if it requests element from the same 128-bit lane
21228 and MSB set if it requests element from the other 128-bit lane.
21229 The second mask will have the opposite values of the MSB,
21230 and additionally will have its 128-bit lanes swapped.
21231 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21232 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21233 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21234 stands for other 12 bytes. */
21235 /* The bit whether element is from the same lane or the other
21236 lane is bit 4, so shift it up by 3 to the MSB position. */
21237 t5
= gen_reg_rtx (V4DImode
);
21238 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
21240 /* Clear MSB bits from the mask just in case it had them set. */
21241 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
21242 /* After this t1 will have MSB set for elements from other lane. */
21243 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
21244 /* Clear bits other than MSB. */
21245 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
21246 /* Or in the lower bits from mask into t3. */
21247 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
21248 /* And invert MSB bits in t1, so MSB is set for elements from the same
21250 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
21251 /* Swap 128-bit lanes in t3. */
21252 t6
= gen_reg_rtx (V4DImode
);
21253 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
21254 const2_rtx
, GEN_INT (3),
21255 const0_rtx
, const1_rtx
));
21256 /* And or in the lower bits from mask into t1. */
21257 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
21258 if (one_operand_shuffle
)
21260 /* Each of these shuffles will put 0s in places where
21261 element from the other 128-bit lane is needed, otherwise
21262 will shuffle in the requested value. */
21263 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
21264 gen_lowpart (V32QImode
, t6
)));
21265 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
21266 /* For t3 the 128-bit lanes are swapped again. */
21267 t7
= gen_reg_rtx (V4DImode
);
21268 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
21269 const2_rtx
, GEN_INT (3),
21270 const0_rtx
, const1_rtx
));
21271 /* And oring both together leads to the result. */
21272 emit_insn (gen_iorv32qi3 (target
, t1
,
21273 gen_lowpart (V32QImode
, t7
)));
21274 if (target
!= operands
[0])
21275 emit_move_insn (operands
[0],
21276 gen_lowpart (GET_MODE (operands
[0]), target
));
21280 t4
= gen_reg_rtx (V32QImode
);
21281 /* Similarly to the above one_operand_shuffle code,
21282 just for repeated twice for each operand. merge_two:
21283 code will merge the two results together. */
21284 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
21285 gen_lowpart (V32QImode
, t6
)));
21286 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
21287 gen_lowpart (V32QImode
, t6
)));
21288 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
21289 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
21290 t7
= gen_reg_rtx (V4DImode
);
21291 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
21292 const2_rtx
, GEN_INT (3),
21293 const0_rtx
, const1_rtx
));
21294 t8
= gen_reg_rtx (V4DImode
);
21295 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
21296 const2_rtx
, GEN_INT (3),
21297 const0_rtx
, const1_rtx
));
21298 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
21299 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
21305 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
21312 /* The XOP VPPERM insn supports three inputs. By ignoring the
21313 one_operand_shuffle special case, we avoid creating another
21314 set of constant vectors in memory. */
21315 one_operand_shuffle
= false;
21317 /* mask = mask & {2*w-1, ...} */
21318 vt
= GEN_INT (2*w
- 1);
21322 /* mask = mask & {w-1, ...} */
21323 vt
= GEN_INT (w
- 1);
21326 for (i
= 0; i
< w
; i
++)
21328 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21329 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21330 NULL_RTX
, 0, OPTAB_DIRECT
);
21332 /* For non-QImode operations, convert the word permutation control
21333 into a byte permutation control. */
21334 if (mode
!= V16QImode
)
21336 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
21337 GEN_INT (exact_log2 (e
)),
21338 NULL_RTX
, 0, OPTAB_DIRECT
);
21340 /* Convert mask to vector of chars. */
21341 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
21343 /* Replicate each of the input bytes into byte positions:
21344 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21345 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21346 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21347 for (i
= 0; i
< 16; ++i
)
21348 vec
[i
] = GEN_INT (i
/e
* e
);
21349 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21350 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21352 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
21354 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
21356 /* Convert it into the byte positions by doing
21357 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21358 for (i
= 0; i
< 16; ++i
)
21359 vec
[i
] = GEN_INT (i
% e
);
21360 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
21361 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
21362 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
21365 /* The actual shuffle operations all operate on V16QImode. */
21366 op0
= gen_lowpart (V16QImode
, op0
);
21367 op1
= gen_lowpart (V16QImode
, op1
);
21371 if (GET_MODE (target
) != V16QImode
)
21372 target
= gen_reg_rtx (V16QImode
);
21373 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
21374 if (target
!= operands
[0])
21375 emit_move_insn (operands
[0],
21376 gen_lowpart (GET_MODE (operands
[0]), target
));
21378 else if (one_operand_shuffle
)
21380 if (GET_MODE (target
) != V16QImode
)
21381 target
= gen_reg_rtx (V16QImode
);
21382 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
21383 if (target
!= operands
[0])
21384 emit_move_insn (operands
[0],
21385 gen_lowpart (GET_MODE (operands
[0]), target
));
21392 /* Shuffle the two input vectors independently. */
21393 t1
= gen_reg_rtx (V16QImode
);
21394 t2
= gen_reg_rtx (V16QImode
);
21395 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
21396 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
21399 /* Then merge them together. The key is whether any given control
21400 element contained a bit set that indicates the second word. */
21401 mask
= operands
[3];
21403 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
21405 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21406 more shuffle to convert the V2DI input mask into a V4SI
21407 input mask. At which point the masking that expand_int_vcond
21408 will work as desired. */
21409 rtx t3
= gen_reg_rtx (V4SImode
);
21410 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
21411 const0_rtx
, const0_rtx
,
21412 const2_rtx
, const2_rtx
));
21414 maskmode
= V4SImode
;
21418 for (i
= 0; i
< w
; i
++)
21420 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
21421 vt
= force_reg (maskmode
, vt
);
21422 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
21423 NULL_RTX
, 0, OPTAB_DIRECT
);
21425 if (GET_MODE (target
) != mode
)
21426 target
= gen_reg_rtx (mode
);
21428 xops
[1] = gen_lowpart (mode
, t2
);
21429 xops
[2] = gen_lowpart (mode
, t1
);
21430 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
21433 ok
= ix86_expand_int_vcond (xops
);
21435 if (target
!= operands
[0])
21436 emit_move_insn (operands
[0],
21437 gen_lowpart (GET_MODE (operands
[0]), target
));
21441 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21442 true if we should do zero extension, else sign extension. HIGH_P is
21443 true if we want the N/2 high elements, else the low elements. */
21446 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
21448 enum machine_mode imode
= GET_MODE (src
);
21453 rtx (*unpack
)(rtx
, rtx
);
21454 rtx (*extract
)(rtx
, rtx
) = NULL
;
21455 enum machine_mode halfmode
= BLKmode
;
21461 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
21463 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
21464 halfmode
= V16QImode
;
21466 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
21470 unpack
= gen_avx2_zero_extendv8hiv8si2
;
21472 unpack
= gen_avx2_sign_extendv8hiv8si2
;
21473 halfmode
= V8HImode
;
21475 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
21479 unpack
= gen_avx2_zero_extendv4siv4di2
;
21481 unpack
= gen_avx2_sign_extendv4siv4di2
;
21482 halfmode
= V4SImode
;
21484 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
21488 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
21490 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
21494 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
21496 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
21500 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
21502 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
21505 gcc_unreachable ();
21508 if (GET_MODE_SIZE (imode
) == 32)
21510 tmp
= gen_reg_rtx (halfmode
);
21511 emit_insn (extract (tmp
, src
));
21515 /* Shift higher 8 bytes to lower 8 bytes. */
21516 tmp
= gen_reg_rtx (V1TImode
);
21517 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
21519 tmp
= gen_lowpart (imode
, tmp
);
21524 emit_insn (unpack (dest
, tmp
));
21528 rtx (*unpack
)(rtx
, rtx
, rtx
);
21534 unpack
= gen_vec_interleave_highv16qi
;
21536 unpack
= gen_vec_interleave_lowv16qi
;
21540 unpack
= gen_vec_interleave_highv8hi
;
21542 unpack
= gen_vec_interleave_lowv8hi
;
21546 unpack
= gen_vec_interleave_highv4si
;
21548 unpack
= gen_vec_interleave_lowv4si
;
21551 gcc_unreachable ();
21555 tmp
= force_reg (imode
, CONST0_RTX (imode
));
21557 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
21558 src
, pc_rtx
, pc_rtx
);
21560 rtx tmp2
= gen_reg_rtx (imode
);
21561 emit_insn (unpack (tmp2
, src
, tmp
));
21562 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
21566 /* Expand conditional increment or decrement using adb/sbb instructions.
21567 The default case using setcc followed by the conditional move can be
21568 done by generic code. */
21570 ix86_expand_int_addcc (rtx operands
[])
21572 enum rtx_code code
= GET_CODE (operands
[1]);
21574 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
21576 rtx val
= const0_rtx
;
21577 bool fpcmp
= false;
21578 enum machine_mode mode
;
21579 rtx op0
= XEXP (operands
[1], 0);
21580 rtx op1
= XEXP (operands
[1], 1);
21582 if (operands
[3] != const1_rtx
21583 && operands
[3] != constm1_rtx
)
21585 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
21587 code
= GET_CODE (compare_op
);
21589 flags
= XEXP (compare_op
, 0);
21591 if (GET_MODE (flags
) == CCFPmode
21592 || GET_MODE (flags
) == CCFPUmode
)
21595 code
= ix86_fp_compare_code_to_integer (code
);
21602 PUT_CODE (compare_op
,
21603 reverse_condition_maybe_unordered
21604 (GET_CODE (compare_op
)));
21606 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
21609 mode
= GET_MODE (operands
[0]);
21611 /* Construct either adc or sbb insn. */
21612 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
21617 insn
= gen_subqi3_carry
;
21620 insn
= gen_subhi3_carry
;
21623 insn
= gen_subsi3_carry
;
21626 insn
= gen_subdi3_carry
;
21629 gcc_unreachable ();
21637 insn
= gen_addqi3_carry
;
21640 insn
= gen_addhi3_carry
;
21643 insn
= gen_addsi3_carry
;
21646 insn
= gen_adddi3_carry
;
21649 gcc_unreachable ();
21652 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
21658 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21659 but works for floating pointer parameters and nonoffsetable memories.
21660 For pushes, it returns just stack offsets; the values will be saved
21661 in the right order. Maximally three parts are generated. */
21664 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
21669 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
21671 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
21673 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
21674 gcc_assert (size
>= 2 && size
<= 4);
21676 /* Optimize constant pool reference to immediates. This is used by fp
21677 moves, that force all constants to memory to allow combining. */
21678 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
21680 rtx tmp
= maybe_get_pool_constant (operand
);
21685 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
21687 /* The only non-offsetable memories we handle are pushes. */
21688 int ok
= push_operand (operand
, VOIDmode
);
21692 operand
= copy_rtx (operand
);
21693 PUT_MODE (operand
, word_mode
);
21694 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
21698 if (GET_CODE (operand
) == CONST_VECTOR
)
21700 enum machine_mode imode
= int_mode_for_mode (mode
);
21701 /* Caution: if we looked through a constant pool memory above,
21702 the operand may actually have a different mode now. That's
21703 ok, since we want to pun this all the way back to an integer. */
21704 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
21705 gcc_assert (operand
!= NULL
);
21711 if (mode
== DImode
)
21712 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21717 if (REG_P (operand
))
21719 gcc_assert (reload_completed
);
21720 for (i
= 0; i
< size
; i
++)
21721 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21723 else if (offsettable_memref_p (operand
))
21725 operand
= adjust_address (operand
, SImode
, 0);
21726 parts
[0] = operand
;
21727 for (i
= 1; i
< size
; i
++)
21728 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21730 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21735 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21739 real_to_target (l
, &r
, mode
);
21740 parts
[3] = gen_int_mode (l
[3], SImode
);
21741 parts
[2] = gen_int_mode (l
[2], SImode
);
21744 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21745 long double may not be 80-bit. */
21746 real_to_target (l
, &r
, mode
);
21747 parts
[2] = gen_int_mode (l
[2], SImode
);
21750 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21753 gcc_unreachable ();
21755 parts
[1] = gen_int_mode (l
[1], SImode
);
21756 parts
[0] = gen_int_mode (l
[0], SImode
);
21759 gcc_unreachable ();
21764 if (mode
== TImode
)
21765 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21766 if (mode
== XFmode
|| mode
== TFmode
)
21768 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21769 if (REG_P (operand
))
21771 gcc_assert (reload_completed
);
21772 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21773 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21775 else if (offsettable_memref_p (operand
))
21777 operand
= adjust_address (operand
, DImode
, 0);
21778 parts
[0] = operand
;
21779 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21781 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21786 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21787 real_to_target (l
, &r
, mode
);
21789 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21790 if (HOST_BITS_PER_WIDE_INT
>= 64)
21793 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21794 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21797 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21799 if (upper_mode
== SImode
)
21800 parts
[1] = gen_int_mode (l
[2], SImode
);
21801 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21804 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21805 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21808 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21811 gcc_unreachable ();
21818 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21819 Return false when normal moves are needed; true when all required
21820 insns have been emitted. Operands 2-4 contain the input values
21821 int the correct order; operands 5-7 contain the output values. */
21824 ix86_split_long_move (rtx operands
[])
21829 int collisions
= 0;
21830 enum machine_mode mode
= GET_MODE (operands
[0]);
21831 bool collisionparts
[4];
21833 /* The DFmode expanders may ask us to move double.
21834 For 64bit target this is single move. By hiding the fact
21835 here we simplify i386.md splitters. */
21836 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21838 /* Optimize constant pool reference to immediates. This is used by
21839 fp moves, that force all constants to memory to allow combining. */
21841 if (MEM_P (operands
[1])
21842 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21843 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21844 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21845 if (push_operand (operands
[0], VOIDmode
))
21847 operands
[0] = copy_rtx (operands
[0]);
21848 PUT_MODE (operands
[0], word_mode
);
21851 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21852 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21853 emit_move_insn (operands
[0], operands
[1]);
21857 /* The only non-offsettable memory we handle is push. */
21858 if (push_operand (operands
[0], VOIDmode
))
21861 gcc_assert (!MEM_P (operands
[0])
21862 || offsettable_memref_p (operands
[0]));
21864 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21865 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21867 /* When emitting push, take care for source operands on the stack. */
21868 if (push
&& MEM_P (operands
[1])
21869 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21871 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21873 /* Compensate for the stack decrement by 4. */
21874 if (!TARGET_64BIT
&& nparts
== 3
21875 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21876 src_base
= plus_constant (Pmode
, src_base
, 4);
21878 /* src_base refers to the stack pointer and is
21879 automatically decreased by emitted push. */
21880 for (i
= 0; i
< nparts
; i
++)
21881 part
[1][i
] = change_address (part
[1][i
],
21882 GET_MODE (part
[1][i
]), src_base
);
21885 /* We need to do copy in the right order in case an address register
21886 of the source overlaps the destination. */
21887 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21891 for (i
= 0; i
< nparts
; i
++)
21894 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21895 if (collisionparts
[i
])
21899 /* Collision in the middle part can be handled by reordering. */
21900 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21902 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21903 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21905 else if (collisions
== 1
21907 && (collisionparts
[1] || collisionparts
[2]))
21909 if (collisionparts
[1])
21911 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21912 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21916 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21917 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21921 /* If there are more collisions, we can't handle it by reordering.
21922 Do an lea to the last part and use only one colliding move. */
21923 else if (collisions
> 1)
21929 base
= part
[0][nparts
- 1];
21931 /* Handle the case when the last part isn't valid for lea.
21932 Happens in 64-bit mode storing the 12-byte XFmode. */
21933 if (GET_MODE (base
) != Pmode
)
21934 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21936 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21937 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21938 for (i
= 1; i
< nparts
; i
++)
21940 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21941 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21952 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21953 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21954 stack_pointer_rtx
, GEN_INT (-4)));
21955 emit_move_insn (part
[0][2], part
[1][2]);
21957 else if (nparts
== 4)
21959 emit_move_insn (part
[0][3], part
[1][3]);
21960 emit_move_insn (part
[0][2], part
[1][2]);
21965 /* In 64bit mode we don't have 32bit push available. In case this is
21966 register, it is OK - we will just use larger counterpart. We also
21967 retype memory - these comes from attempt to avoid REX prefix on
21968 moving of second half of TFmode value. */
21969 if (GET_MODE (part
[1][1]) == SImode
)
21971 switch (GET_CODE (part
[1][1]))
21974 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21978 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21982 gcc_unreachable ();
21985 if (GET_MODE (part
[1][0]) == SImode
)
21986 part
[1][0] = part
[1][1];
21989 emit_move_insn (part
[0][1], part
[1][1]);
21990 emit_move_insn (part
[0][0], part
[1][0]);
21994 /* Choose correct order to not overwrite the source before it is copied. */
21995 if ((REG_P (part
[0][0])
21996 && REG_P (part
[1][1])
21997 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21999 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
22001 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
22003 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
22005 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
22007 operands
[2 + i
] = part
[0][j
];
22008 operands
[6 + i
] = part
[1][j
];
22013 for (i
= 0; i
< nparts
; i
++)
22015 operands
[2 + i
] = part
[0][i
];
22016 operands
[6 + i
] = part
[1][i
];
22020 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22021 if (optimize_insn_for_size_p ())
22023 for (j
= 0; j
< nparts
- 1; j
++)
22024 if (CONST_INT_P (operands
[6 + j
])
22025 && operands
[6 + j
] != const0_rtx
22026 && REG_P (operands
[2 + j
]))
22027 for (i
= j
; i
< nparts
- 1; i
++)
22028 if (CONST_INT_P (operands
[7 + i
])
22029 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
22030 operands
[7 + i
] = operands
[2 + j
];
22033 for (i
= 0; i
< nparts
; i
++)
22034 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
22039 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22040 left shift by a constant, either using a single shift or
22041 a sequence of add instructions. */
22044 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
22046 rtx (*insn
)(rtx
, rtx
, rtx
);
22049 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
22050 && !optimize_insn_for_size_p ()))
22052 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
22053 while (count
-- > 0)
22054 emit_insn (insn (operand
, operand
, operand
));
22058 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22059 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
22064 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22066 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
22067 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
22068 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22070 rtx low
[2], high
[2];
22073 if (CONST_INT_P (operands
[2]))
22075 split_double_mode (mode
, operands
, 2, low
, high
);
22076 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22078 if (count
>= half_width
)
22080 emit_move_insn (high
[0], low
[1]);
22081 emit_move_insn (low
[0], const0_rtx
);
22083 if (count
> half_width
)
22084 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
22088 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22090 if (!rtx_equal_p (operands
[0], operands
[1]))
22091 emit_move_insn (operands
[0], operands
[1]);
22093 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
22094 ix86_expand_ashl_const (low
[0], count
, mode
);
22099 split_double_mode (mode
, operands
, 1, low
, high
);
22101 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
22103 if (operands
[1] == const1_rtx
)
22105 /* Assuming we've chosen a QImode capable registers, then 1 << N
22106 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22107 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
22109 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
22111 ix86_expand_clear (low
[0]);
22112 ix86_expand_clear (high
[0]);
22113 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
22115 d
= gen_lowpart (QImode
, low
[0]);
22116 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22117 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
22118 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22120 d
= gen_lowpart (QImode
, high
[0]);
22121 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
22122 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
22123 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
22126 /* Otherwise, we can get the same results by manually performing
22127 a bit extract operation on bit 5/6, and then performing the two
22128 shifts. The two methods of getting 0/1 into low/high are exactly
22129 the same size. Avoiding the shift in the bit extract case helps
22130 pentium4 a bit; no one else seems to care much either way. */
22133 enum machine_mode half_mode
;
22134 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
22135 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
22136 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
22137 HOST_WIDE_INT bits
;
22140 if (mode
== DImode
)
22142 half_mode
= SImode
;
22143 gen_lshr3
= gen_lshrsi3
;
22144 gen_and3
= gen_andsi3
;
22145 gen_xor3
= gen_xorsi3
;
22150 half_mode
= DImode
;
22151 gen_lshr3
= gen_lshrdi3
;
22152 gen_and3
= gen_anddi3
;
22153 gen_xor3
= gen_xordi3
;
22157 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
22158 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
22160 x
= gen_lowpart (half_mode
, operands
[2]);
22161 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
22163 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
22164 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
22165 emit_move_insn (low
[0], high
[0]);
22166 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
22169 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22170 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
22174 if (operands
[1] == constm1_rtx
)
22176 /* For -1 << N, we can avoid the shld instruction, because we
22177 know that we're shifting 0...31/63 ones into a -1. */
22178 emit_move_insn (low
[0], constm1_rtx
);
22179 if (optimize_insn_for_size_p ())
22180 emit_move_insn (high
[0], low
[0]);
22182 emit_move_insn (high
[0], constm1_rtx
);
22186 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
22188 if (!rtx_equal_p (operands
[0], operands
[1]))
22189 emit_move_insn (operands
[0], operands
[1]);
22191 split_double_mode (mode
, operands
, 1, low
, high
);
22192 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
22195 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
22197 if (TARGET_CMOVE
&& scratch
)
22199 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22200 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22202 ix86_expand_clear (scratch
);
22203 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
22207 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22208 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22210 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
22215 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22217 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
22218 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
22219 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22220 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22222 rtx low
[2], high
[2];
22225 if (CONST_INT_P (operands
[2]))
22227 split_double_mode (mode
, operands
, 2, low
, high
);
22228 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22230 if (count
== GET_MODE_BITSIZE (mode
) - 1)
22232 emit_move_insn (high
[0], high
[1]);
22233 emit_insn (gen_ashr3 (high
[0], high
[0],
22234 GEN_INT (half_width
- 1)));
22235 emit_move_insn (low
[0], high
[0]);
22238 else if (count
>= half_width
)
22240 emit_move_insn (low
[0], high
[1]);
22241 emit_move_insn (high
[0], low
[0]);
22242 emit_insn (gen_ashr3 (high
[0], high
[0],
22243 GEN_INT (half_width
- 1)));
22245 if (count
> half_width
)
22246 emit_insn (gen_ashr3 (low
[0], low
[0],
22247 GEN_INT (count
- half_width
)));
22251 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22253 if (!rtx_equal_p (operands
[0], operands
[1]))
22254 emit_move_insn (operands
[0], operands
[1]);
22256 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22257 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
22262 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22264 if (!rtx_equal_p (operands
[0], operands
[1]))
22265 emit_move_insn (operands
[0], operands
[1]);
22267 split_double_mode (mode
, operands
, 1, low
, high
);
22269 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22270 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
22272 if (TARGET_CMOVE
&& scratch
)
22274 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22275 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22277 emit_move_insn (scratch
, high
[0]);
22278 emit_insn (gen_ashr3 (scratch
, scratch
,
22279 GEN_INT (half_width
- 1)));
22280 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22285 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
22286 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
22288 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
22294 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
22296 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
22297 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
22298 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
22299 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
22301 rtx low
[2], high
[2];
22304 if (CONST_INT_P (operands
[2]))
22306 split_double_mode (mode
, operands
, 2, low
, high
);
22307 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
22309 if (count
>= half_width
)
22311 emit_move_insn (low
[0], high
[1]);
22312 ix86_expand_clear (high
[0]);
22314 if (count
> half_width
)
22315 emit_insn (gen_lshr3 (low
[0], low
[0],
22316 GEN_INT (count
- half_width
)));
22320 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22322 if (!rtx_equal_p (operands
[0], operands
[1]))
22323 emit_move_insn (operands
[0], operands
[1]);
22325 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
22326 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
22331 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
22333 if (!rtx_equal_p (operands
[0], operands
[1]))
22334 emit_move_insn (operands
[0], operands
[1]);
22336 split_double_mode (mode
, operands
, 1, low
, high
);
22338 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
22339 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
22341 if (TARGET_CMOVE
&& scratch
)
22343 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
22344 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
22346 ix86_expand_clear (scratch
);
22347 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
22352 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
22353 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
22355 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
22360 /* Predict just emitted jump instruction to be taken with probability PROB. */
22362 predict_jump (int prob
)
22364 rtx insn
= get_last_insn ();
22365 gcc_assert (JUMP_P (insn
));
22366 add_int_reg_note (insn
, REG_BR_PROB
, prob
);
22369 /* Helper function for the string operations below. Dest VARIABLE whether
22370 it is aligned to VALUE bytes. If true, jump to the label. */
22372 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
22374 rtx label
= gen_label_rtx ();
22375 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
22376 if (GET_MODE (variable
) == DImode
)
22377 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
22379 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
22380 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
22383 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
22385 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22389 /* Adjust COUNTER by the VALUE. */
22391 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
22393 rtx (*gen_add
)(rtx
, rtx
, rtx
)
22394 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
22396 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
22399 /* Zero extend possibly SImode EXP to Pmode register. */
22401 ix86_zero_extend_to_Pmode (rtx exp
)
22403 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
22406 /* Divide COUNTREG by SCALE. */
22408 scale_counter (rtx countreg
, int scale
)
22414 if (CONST_INT_P (countreg
))
22415 return GEN_INT (INTVAL (countreg
) / scale
);
22416 gcc_assert (REG_P (countreg
));
22418 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
22419 GEN_INT (exact_log2 (scale
)),
22420 NULL
, 1, OPTAB_DIRECT
);
22424 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22425 DImode for constant loop counts. */
22427 static enum machine_mode
22428 counter_mode (rtx count_exp
)
22430 if (GET_MODE (count_exp
) != VOIDmode
)
22431 return GET_MODE (count_exp
);
22432 if (!CONST_INT_P (count_exp
))
22434 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
22439 /* Copy the address to a Pmode register. This is used for x32 to
22440 truncate DImode TLS address to a SImode register. */
22443 ix86_copy_addr_to_reg (rtx addr
)
22445 if (GET_MODE (addr
) == Pmode
)
22446 return copy_addr_to_reg (addr
);
22449 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
22450 return gen_rtx_SUBREG (SImode
, copy_to_mode_reg (DImode
, addr
), 0);
22454 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22455 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22456 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22457 memory by VALUE (supposed to be in MODE).
22459 The size is rounded down to whole number of chunk size moved at once.
22460 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22464 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
22465 rtx destptr
, rtx srcptr
, rtx value
,
22466 rtx count
, enum machine_mode mode
, int unroll
,
22467 int expected_size
, bool issetmem
)
22469 rtx out_label
, top_label
, iter
, tmp
;
22470 enum machine_mode iter_mode
= counter_mode (count
);
22471 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
22472 rtx piece_size
= GEN_INT (piece_size_n
);
22473 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
22477 top_label
= gen_label_rtx ();
22478 out_label
= gen_label_rtx ();
22479 iter
= gen_reg_rtx (iter_mode
);
22481 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
22482 NULL
, 1, OPTAB_DIRECT
);
22483 /* Those two should combine. */
22484 if (piece_size
== const1_rtx
)
22486 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
22488 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
22490 emit_move_insn (iter
, const0_rtx
);
22492 emit_label (top_label
);
22494 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
22496 /* This assert could be relaxed - in this case we'll need to compute
22497 smallest power of two, containing in PIECE_SIZE_N and pass it to
22499 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
22500 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
22501 destmem
= adjust_address (destmem
, mode
, 0);
22505 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
22506 srcmem
= adjust_address (srcmem
, mode
, 0);
22508 /* When unrolling for chips that reorder memory reads and writes,
22509 we can save registers by using single temporary.
22510 Also using 4 temporaries is overkill in 32bit mode. */
22511 if (!TARGET_64BIT
&& 0)
22513 for (i
= 0; i
< unroll
; i
++)
22518 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22520 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22522 emit_move_insn (destmem
, srcmem
);
22528 gcc_assert (unroll
<= 4);
22529 for (i
= 0; i
< unroll
; i
++)
22531 tmpreg
[i
] = gen_reg_rtx (mode
);
22535 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
22537 emit_move_insn (tmpreg
[i
], srcmem
);
22539 for (i
= 0; i
< unroll
; i
++)
22544 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22546 emit_move_insn (destmem
, tmpreg
[i
]);
22551 for (i
= 0; i
< unroll
; i
++)
22555 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
22556 emit_move_insn (destmem
, value
);
22559 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
22560 true, OPTAB_LIB_WIDEN
);
22562 emit_move_insn (iter
, tmp
);
22564 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
22566 if (expected_size
!= -1)
22568 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
22569 if (expected_size
== 0)
22571 else if (expected_size
> REG_BR_PROB_BASE
)
22572 predict_jump (REG_BR_PROB_BASE
- 1);
22574 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
22577 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
22578 iter
= ix86_zero_extend_to_Pmode (iter
);
22579 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
22580 true, OPTAB_LIB_WIDEN
);
22581 if (tmp
!= destptr
)
22582 emit_move_insn (destptr
, tmp
);
22585 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
22586 true, OPTAB_LIB_WIDEN
);
22588 emit_move_insn (srcptr
, tmp
);
22590 emit_label (out_label
);
22593 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22594 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22595 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22596 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22597 ORIG_VALUE is the original value passed to memset to fill the memory with.
22598 Other arguments have same meaning as for previous function. */
22601 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
22602 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
22604 enum machine_mode mode
, bool issetmem
)
22609 HOST_WIDE_INT rounded_count
;
22611 /* If possible, it is shorter to use rep movs.
22612 TODO: Maybe it is better to move this logic to decide_alg. */
22613 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
22614 && (!issetmem
|| orig_value
== const0_rtx
))
22617 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
22618 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
22620 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
22621 GET_MODE_SIZE (mode
)));
22622 if (mode
!= QImode
)
22624 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22625 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22626 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
22629 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
22630 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
22632 rounded_count
= (INTVAL (count
)
22633 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22634 destmem
= shallow_copy_rtx (destmem
);
22635 set_mem_size (destmem
, rounded_count
);
22637 else if (MEM_SIZE_KNOWN_P (destmem
))
22638 clear_mem_size (destmem
);
22642 value
= force_reg (mode
, gen_lowpart (mode
, value
));
22643 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
22647 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
22648 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
22649 if (mode
!= QImode
)
22651 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
22652 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
22653 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
22656 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
22657 if (CONST_INT_P (count
))
22659 rounded_count
= (INTVAL (count
)
22660 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
22661 srcmem
= shallow_copy_rtx (srcmem
);
22662 set_mem_size (srcmem
, rounded_count
);
22666 if (MEM_SIZE_KNOWN_P (srcmem
))
22667 clear_mem_size (srcmem
);
22669 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
22674 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
22676 SRC is passed by pointer to be updated on return.
22677 Return value is updated DST. */
22679 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
22680 HOST_WIDE_INT size_to_move
)
22682 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
22683 enum insn_code code
;
22684 enum machine_mode move_mode
;
22687 /* Find the widest mode in which we could perform moves.
22688 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22689 it until move of such size is supported. */
22690 piece_size
= 1 << floor_log2 (size_to_move
);
22691 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22692 code
= optab_handler (mov_optab
, move_mode
);
22693 while (code
== CODE_FOR_nothing
&& piece_size
> 1)
22696 move_mode
= mode_for_size (piece_size
* BITS_PER_UNIT
, MODE_INT
, 0);
22697 code
= optab_handler (mov_optab
, move_mode
);
22700 /* Find the corresponding vector mode with the same size as MOVE_MODE.
22701 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
22702 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
22704 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
22705 move_mode
= mode_for_vector (word_mode
, nunits
);
22706 code
= optab_handler (mov_optab
, move_mode
);
22707 if (code
== CODE_FOR_nothing
)
22709 move_mode
= word_mode
;
22710 piece_size
= GET_MODE_SIZE (move_mode
);
22711 code
= optab_handler (mov_optab
, move_mode
);
22714 gcc_assert (code
!= CODE_FOR_nothing
);
22716 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22717 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
22719 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22720 gcc_assert (size_to_move
% piece_size
== 0);
22721 adjust
= GEN_INT (piece_size
);
22722 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22724 /* We move from memory to memory, so we'll need to do it via
22725 a temporary register. */
22726 tempreg
= gen_reg_rtx (move_mode
);
22727 emit_insn (GEN_FCN (code
) (tempreg
, src
));
22728 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
22730 emit_move_insn (destptr
,
22731 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22732 emit_move_insn (srcptr
,
22733 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
22735 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22737 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
22741 /* Update DST and SRC rtx. */
22746 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22748 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
22749 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
22752 if (CONST_INT_P (count
))
22754 HOST_WIDE_INT countval
= INTVAL (count
);
22755 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22758 /* For now MAX_SIZE should be a power of 2. This assert could be
22759 relaxed, but it'll require a bit more complicated epilogue
22761 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22762 for (i
= max_size
; i
>= 1; i
>>= 1)
22764 if (epilogue_size
& i
)
22765 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
22771 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22772 count
, 1, OPTAB_DIRECT
);
22773 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22774 count
, QImode
, 1, 4, false);
22778 /* When there are stringops, we can cheaply increase dest and src pointers.
22779 Otherwise we save code size by maintaining offset (zero is readily
22780 available from preceding rep operation) and using x86 addressing modes.
22782 if (TARGET_SINGLE_STRINGOP
)
22786 rtx label
= ix86_expand_aligntest (count
, 4, true);
22787 src
= change_address (srcmem
, SImode
, srcptr
);
22788 dest
= change_address (destmem
, SImode
, destptr
);
22789 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22790 emit_label (label
);
22791 LABEL_NUSES (label
) = 1;
22795 rtx label
= ix86_expand_aligntest (count
, 2, true);
22796 src
= change_address (srcmem
, HImode
, srcptr
);
22797 dest
= change_address (destmem
, HImode
, destptr
);
22798 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22799 emit_label (label
);
22800 LABEL_NUSES (label
) = 1;
22804 rtx label
= ix86_expand_aligntest (count
, 1, true);
22805 src
= change_address (srcmem
, QImode
, srcptr
);
22806 dest
= change_address (destmem
, QImode
, destptr
);
22807 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22808 emit_label (label
);
22809 LABEL_NUSES (label
) = 1;
22814 rtx offset
= force_reg (Pmode
, const0_rtx
);
22819 rtx label
= ix86_expand_aligntest (count
, 4, true);
22820 src
= change_address (srcmem
, SImode
, srcptr
);
22821 dest
= change_address (destmem
, SImode
, destptr
);
22822 emit_move_insn (dest
, src
);
22823 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22824 true, OPTAB_LIB_WIDEN
);
22826 emit_move_insn (offset
, tmp
);
22827 emit_label (label
);
22828 LABEL_NUSES (label
) = 1;
22832 rtx label
= ix86_expand_aligntest (count
, 2, true);
22833 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22834 src
= change_address (srcmem
, HImode
, tmp
);
22835 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22836 dest
= change_address (destmem
, HImode
, tmp
);
22837 emit_move_insn (dest
, src
);
22838 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22839 true, OPTAB_LIB_WIDEN
);
22841 emit_move_insn (offset
, tmp
);
22842 emit_label (label
);
22843 LABEL_NUSES (label
) = 1;
22847 rtx label
= ix86_expand_aligntest (count
, 1, true);
22848 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22849 src
= change_address (srcmem
, QImode
, tmp
);
22850 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22851 dest
= change_address (destmem
, QImode
, tmp
);
22852 emit_move_insn (dest
, src
);
22853 emit_label (label
);
22854 LABEL_NUSES (label
) = 1;
22859 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
22860 with value PROMOTED_VAL.
22861 SRC is passed by pointer to be updated on return.
22862 Return value is updated DST. */
22864 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
22865 HOST_WIDE_INT size_to_move
)
22867 rtx dst
= destmem
, adjust
;
22868 enum insn_code code
;
22869 enum machine_mode move_mode
;
22872 /* Find the widest mode in which we could perform moves.
22873 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
22874 it until move of such size is supported. */
22875 move_mode
= GET_MODE (promoted_val
);
22876 if (move_mode
== VOIDmode
)
22877 move_mode
= QImode
;
22878 if (size_to_move
< GET_MODE_SIZE (move_mode
))
22880 move_mode
= mode_for_size (size_to_move
* BITS_PER_UNIT
, MODE_INT
, 0);
22881 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
22883 piece_size
= GET_MODE_SIZE (move_mode
);
22884 code
= optab_handler (mov_optab
, move_mode
);
22885 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
22887 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
22889 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
22890 gcc_assert (size_to_move
% piece_size
== 0);
22891 adjust
= GEN_INT (piece_size
);
22892 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
22894 if (piece_size
<= GET_MODE_SIZE (word_mode
))
22896 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
22897 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22902 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
22904 emit_move_insn (destptr
,
22905 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
22907 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
22911 /* Update DST rtx. */
22914 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22916 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22917 rtx count
, int max_size
)
22920 expand_simple_binop (counter_mode (count
), AND
, count
,
22921 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22922 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22923 gen_lowpart (QImode
, value
), count
, QImode
,
22924 1, max_size
/ 2, true);
22927 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22929 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
22930 rtx count
, int max_size
)
22934 if (CONST_INT_P (count
))
22936 HOST_WIDE_INT countval
= INTVAL (count
);
22937 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
22940 /* For now MAX_SIZE should be a power of 2. This assert could be
22941 relaxed, but it'll require a bit more complicated epilogue
22943 gcc_assert ((max_size
& (max_size
- 1)) == 0);
22944 for (i
= max_size
; i
>= 1; i
>>= 1)
22946 if (epilogue_size
& i
)
22948 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
22949 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
22951 destmem
= emit_memset (destmem
, destptr
, value
, i
);
22958 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22963 rtx label
= ix86_expand_aligntest (count
, 16, true);
22966 dest
= change_address (destmem
, DImode
, destptr
);
22967 emit_insn (gen_strset (destptr
, dest
, value
));
22968 dest
= adjust_automodify_address_nv (dest
, DImode
, destptr
, 8);
22969 emit_insn (gen_strset (destptr
, dest
, value
));
22973 dest
= change_address (destmem
, SImode
, destptr
);
22974 emit_insn (gen_strset (destptr
, dest
, value
));
22975 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
22976 emit_insn (gen_strset (destptr
, dest
, value
));
22977 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 8);
22978 emit_insn (gen_strset (destptr
, dest
, value
));
22979 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 12);
22980 emit_insn (gen_strset (destptr
, dest
, value
));
22982 emit_label (label
);
22983 LABEL_NUSES (label
) = 1;
22987 rtx label
= ix86_expand_aligntest (count
, 8, true);
22990 dest
= change_address (destmem
, DImode
, destptr
);
22991 emit_insn (gen_strset (destptr
, dest
, value
));
22995 dest
= change_address (destmem
, SImode
, destptr
);
22996 emit_insn (gen_strset (destptr
, dest
, value
));
22997 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
22998 emit_insn (gen_strset (destptr
, dest
, value
));
23000 emit_label (label
);
23001 LABEL_NUSES (label
) = 1;
23005 rtx label
= ix86_expand_aligntest (count
, 4, true);
23006 dest
= change_address (destmem
, SImode
, destptr
);
23007 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
23008 emit_label (label
);
23009 LABEL_NUSES (label
) = 1;
23013 rtx label
= ix86_expand_aligntest (count
, 2, true);
23014 dest
= change_address (destmem
, HImode
, destptr
);
23015 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
23016 emit_label (label
);
23017 LABEL_NUSES (label
) = 1;
23021 rtx label
= ix86_expand_aligntest (count
, 1, true);
23022 dest
= change_address (destmem
, QImode
, destptr
);
23023 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
23024 emit_label (label
);
23025 LABEL_NUSES (label
) = 1;
23029 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23030 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23031 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23033 Return value is updated DESTMEM. */
23035 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
23036 rtx destptr
, rtx srcptr
, rtx value
,
23037 rtx vec_value
, rtx count
, int align
,
23038 int desired_alignment
, bool issetmem
)
23041 for (i
= 1; i
< desired_alignment
; i
<<= 1)
23045 rtx label
= ix86_expand_aligntest (destptr
, i
, false);
23048 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
23049 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
23051 destmem
= emit_memset (destmem
, destptr
, value
, i
);
23054 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
23055 ix86_adjust_counter (count
, i
);
23056 emit_label (label
);
23057 LABEL_NUSES (label
) = 1;
23058 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
23064 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23065 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23066 and jump to DONE_LABEL. */
23068 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
23069 rtx destptr
, rtx srcptr
,
23070 rtx value
, rtx vec_value
,
23071 rtx count
, int size
,
23072 rtx done_label
, bool issetmem
)
23074 rtx label
= ix86_expand_aligntest (count
, size
, false);
23075 enum machine_mode mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 1);
23079 /* If we do not have vector value to copy, we must reduce size. */
23084 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
23086 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
23087 mode
= GET_MODE (value
);
23090 mode
= GET_MODE (vec_value
), value
= vec_value
;
23094 /* Choose appropriate vector mode. */
23096 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
23097 else if (size
>= 16)
23098 mode
= TARGET_SSE
? V16QImode
: DImode
;
23099 srcmem
= change_address (srcmem
, mode
, srcptr
);
23101 destmem
= change_address (destmem
, mode
, destptr
);
23102 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23103 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23104 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23107 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23110 emit_move_insn (destmem
, srcmem
);
23111 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23113 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23116 destmem
= offset_address (destmem
, count
, 1);
23117 destmem
= offset_address (destmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23118 GET_MODE_SIZE (mode
));
23120 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
23123 srcmem
= offset_address (srcmem
, count
, 1);
23124 srcmem
= offset_address (srcmem
, GEN_INT (-size
- GET_MODE_SIZE (mode
)),
23125 GET_MODE_SIZE (mode
));
23126 emit_move_insn (destmem
, srcmem
);
23128 emit_jump_insn (gen_jump (done_label
));
23131 emit_label (label
);
23132 LABEL_NUSES (label
) = 1;
23135 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23136 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23137 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23138 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23139 DONE_LABEL is a label after the whole copying sequence. The label is created
23140 on demand if *DONE_LABEL is NULL.
23141 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23142 bounds after the initial copies.
23144 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23145 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23146 we will dispatch to a library call for large blocks.
23148 In pseudocode we do:
23152 Assume that SIZE is 4. Bigger sizes are handled analogously
23155 copy 4 bytes from SRCPTR to DESTPTR
23156 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23161 copy 1 byte from SRCPTR to DESTPTR
23164 copy 2 bytes from SRCPTR to DESTPTR
23165 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23170 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23171 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23173 OLD_DESPTR = DESTPTR;
23174 Align DESTPTR up to DESIRED_ALIGN
23175 SRCPTR += DESTPTR - OLD_DESTPTR
23176 COUNT -= DEST_PTR - OLD_DESTPTR
23178 Round COUNT down to multiple of SIZE
23179 << optional caller supplied zero size guard is here >>
23180 << optional caller suppplied dynamic check is here >>
23181 << caller supplied main copy loop is here >>
23186 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
23187 rtx
*destptr
, rtx
*srcptr
,
23188 enum machine_mode mode
,
23189 rtx value
, rtx vec_value
,
23195 unsigned HOST_WIDE_INT
*min_size
,
23196 bool dynamic_check
,
23199 rtx loop_label
= NULL
, label
;
23202 int prolog_size
= 0;
23205 /* Chose proper value to copy. */
23206 if (issetmem
&& VECTOR_MODE_P (mode
))
23207 mode_value
= vec_value
;
23209 mode_value
= value
;
23210 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
23212 /* See if block is big or small, handle small blocks. */
23213 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
23216 loop_label
= gen_label_rtx ();
23219 *done_label
= gen_label_rtx ();
23221 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
23225 /* Handle sizes > 3. */
23226 for (;size2
> 2; size2
>>= 1)
23227 expand_small_movmem_or_setmem (destmem
, srcmem
,
23231 size2
, *done_label
, issetmem
);
23232 /* Nothing to copy? Jump to DONE_LABEL if so */
23233 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
23236 /* Do a byte copy. */
23237 destmem
= change_address (destmem
, QImode
, *destptr
);
23239 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
23242 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
23243 emit_move_insn (destmem
, srcmem
);
23246 /* Handle sizes 2 and 3. */
23247 label
= ix86_expand_aligntest (*count
, 2, false);
23248 destmem
= change_address (destmem
, HImode
, *destptr
);
23249 destmem
= offset_address (destmem
, *count
, 1);
23250 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
23252 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
23255 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
23256 srcmem
= offset_address (srcmem
, *count
, 1);
23257 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
23258 emit_move_insn (destmem
, srcmem
);
23261 emit_label (label
);
23262 LABEL_NUSES (label
) = 1;
23263 emit_jump_insn (gen_jump (*done_label
));
23267 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
23268 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
23270 /* Start memcpy for COUNT >= SIZE. */
23273 emit_label (loop_label
);
23274 LABEL_NUSES (loop_label
) = 1;
23277 /* Copy first desired_align bytes. */
23279 srcmem
= change_address (srcmem
, mode
, *srcptr
);
23280 destmem
= change_address (destmem
, mode
, *destptr
);
23281 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
23282 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
23285 emit_move_insn (destmem
, mode_value
);
23288 emit_move_insn (destmem
, srcmem
);
23289 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
23291 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
23292 prolog_size
+= GET_MODE_SIZE (mode
);
23296 /* Copy last SIZE bytes. */
23297 destmem
= offset_address (destmem
, *count
, 1);
23298 destmem
= offset_address (destmem
,
23299 GEN_INT (-size
- prolog_size
),
23302 emit_move_insn (destmem
, mode_value
);
23305 srcmem
= offset_address (srcmem
, *count
, 1);
23306 srcmem
= offset_address (srcmem
,
23307 GEN_INT (-size
- prolog_size
),
23309 emit_move_insn (destmem
, srcmem
);
23311 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
23313 destmem
= offset_address (destmem
, modesize
, 1);
23315 emit_move_insn (destmem
, mode_value
);
23318 srcmem
= offset_address (srcmem
, modesize
, 1);
23319 emit_move_insn (destmem
, srcmem
);
23323 /* Align destination. */
23324 if (desired_align
> 1 && desired_align
> align
)
23326 rtx saveddest
= *destptr
;
23328 gcc_assert (desired_align
<= size
);
23329 /* Align destptr up, place it to new register. */
23330 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
23331 GEN_INT (prolog_size
),
23332 NULL_RTX
, 1, OPTAB_DIRECT
);
23333 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
23334 GEN_INT (-desired_align
),
23335 *destptr
, 1, OPTAB_DIRECT
);
23336 /* See how many bytes we skipped. */
23337 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
23339 saveddest
, 1, OPTAB_DIRECT
);
23340 /* Adjust srcptr and count. */
23342 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
, saveddest
,
23343 *srcptr
, 1, OPTAB_DIRECT
);
23344 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23345 saveddest
, *count
, 1, OPTAB_DIRECT
);
23346 /* We copied at most size + prolog_size. */
23347 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
23348 *min_size
= (*min_size
- size
) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23352 /* Our loops always round down the bock size, but for dispatch to library
23353 we need precise value. */
23355 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
23356 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
23360 gcc_assert (prolog_size
== 0);
23361 /* Decrease count, so we won't end up copying last word twice. */
23362 if (!CONST_INT_P (*count
))
23363 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
23364 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
23366 *count
= GEN_INT ((UINTVAL (*count
) - 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1));
23368 *min_size
= (*min_size
- 1) & ~(unsigned HOST_WIDE_INT
)(size
- 1);
23373 /* This function is like the previous one, except here we know how many bytes
23374 need to be copied. That allows us to update alignment not only of DST, which
23375 is returned, but also of SRC, which is passed as a pointer for that
23378 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
23379 rtx srcreg
, rtx value
, rtx vec_value
,
23380 int desired_align
, int align_bytes
,
23384 rtx orig_dst
= dst
;
23385 rtx orig_src
= NULL
;
23386 int piece_size
= 1;
23387 int copied_bytes
= 0;
23391 gcc_assert (srcp
!= NULL
);
23396 for (piece_size
= 1;
23397 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
23400 if (align_bytes
& piece_size
)
23404 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
23405 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
23407 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
23410 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
23411 copied_bytes
+= piece_size
;
23414 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
23415 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23416 if (MEM_SIZE_KNOWN_P (orig_dst
))
23417 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
23421 int src_align_bytes
= get_mem_align_offset (src
, desired_align
23423 if (src_align_bytes
>= 0)
23424 src_align_bytes
= desired_align
- src_align_bytes
;
23425 if (src_align_bytes
>= 0)
23427 unsigned int src_align
;
23428 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
23430 if ((src_align_bytes
& (src_align
- 1))
23431 == (align_bytes
& (src_align
- 1)))
23434 if (src_align
> (unsigned int) desired_align
)
23435 src_align
= desired_align
;
23436 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
23437 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
23439 if (MEM_SIZE_KNOWN_P (orig_src
))
23440 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
23447 /* Return true if ALG can be used in current context.
23448 Assume we expand memset if MEMSET is true. */
23450 alg_usable_p (enum stringop_alg alg
, bool memset
)
23452 if (alg
== no_stringop
)
23454 if (alg
== vector_loop
)
23455 return TARGET_SSE
|| TARGET_AVX
;
23456 /* Algorithms using the rep prefix want at least edi and ecx;
23457 additionally, memset wants eax and memcpy wants esi. Don't
23458 consider such algorithms if the user has appropriated those
23459 registers for their own purposes. */
23460 if (alg
== rep_prefix_1_byte
23461 || alg
== rep_prefix_4_byte
23462 || alg
== rep_prefix_8_byte
)
23463 return !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
23464 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
23468 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23469 static enum stringop_alg
23470 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
23471 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
23472 bool memset
, bool zero_memset
, int *dynamic_check
, bool *noalign
)
23474 const struct stringop_algs
* algs
;
23475 bool optimize_for_speed
;
23477 const struct processor_costs
*cost
;
23479 bool any_alg_usable_p
= false;
23482 *dynamic_check
= -1;
23484 /* Even if the string operation call is cold, we still might spend a lot
23485 of time processing large blocks. */
23486 if (optimize_function_for_size_p (cfun
)
23487 || (optimize_insn_for_size_p ()
23489 || (expected_size
!= -1 && expected_size
< 256))))
23490 optimize_for_speed
= false;
23492 optimize_for_speed
= true;
23494 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
23496 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
23498 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
23500 /* See maximal size for user defined algorithm. */
23501 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23503 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23504 bool usable
= alg_usable_p (candidate
, memset
);
23505 any_alg_usable_p
|= usable
;
23507 if (candidate
!= libcall
&& candidate
&& usable
)
23508 max
= algs
->size
[i
].max
;
23511 /* If expected size is not known but max size is small enough
23512 so inline version is a win, set expected size into
23514 if (max
> 1 && (unsigned HOST_WIDE_INT
) max
>= max_size
23515 && expected_size
== -1)
23516 expected_size
= min_size
/ 2 + max_size
/ 2;
23518 /* If user specified the algorithm, honnor it if possible. */
23519 if (ix86_stringop_alg
!= no_stringop
23520 && alg_usable_p (ix86_stringop_alg
, memset
))
23521 return ix86_stringop_alg
;
23522 /* rep; movq or rep; movl is the smallest variant. */
23523 else if (!optimize_for_speed
)
23526 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
23527 return alg_usable_p (rep_prefix_1_byte
, memset
)
23528 ? rep_prefix_1_byte
: loop_1_byte
;
23530 return alg_usable_p (rep_prefix_4_byte
, memset
)
23531 ? rep_prefix_4_byte
: loop
;
23533 /* Very tiny blocks are best handled via the loop, REP is expensive to
23535 else if (expected_size
!= -1 && expected_size
< 4)
23536 return loop_1_byte
;
23537 else if (expected_size
!= -1)
23539 enum stringop_alg alg
= libcall
;
23540 bool alg_noalign
= false;
23541 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
23543 /* We get here if the algorithms that were not libcall-based
23544 were rep-prefix based and we are unable to use rep prefixes
23545 based on global register usage. Break out of the loop and
23546 use the heuristic below. */
23547 if (algs
->size
[i
].max
== 0)
23549 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
23551 enum stringop_alg candidate
= algs
->size
[i
].alg
;
23553 if (candidate
!= libcall
&& alg_usable_p (candidate
, memset
))
23556 alg_noalign
= algs
->size
[i
].noalign
;
23558 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23559 last non-libcall inline algorithm. */
23560 if (TARGET_INLINE_ALL_STRINGOPS
)
23562 /* When the current size is best to be copied by a libcall,
23563 but we are still forced to inline, run the heuristic below
23564 that will pick code for medium sized blocks. */
23565 if (alg
!= libcall
)
23567 *noalign
= alg_noalign
;
23572 else if (alg_usable_p (candidate
, memset
))
23574 *noalign
= algs
->size
[i
].noalign
;
23580 /* When asked to inline the call anyway, try to pick meaningful choice.
23581 We look for maximal size of block that is faster to copy by hand and
23582 take blocks of at most of that size guessing that average size will
23583 be roughly half of the block.
23585 If this turns out to be bad, we might simply specify the preferred
23586 choice in ix86_costs. */
23587 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23588 && (algs
->unknown_size
== libcall
23589 || !alg_usable_p (algs
->unknown_size
, memset
)))
23591 enum stringop_alg alg
;
23593 /* If there aren't any usable algorithms, then recursing on
23594 smaller sizes isn't going to find anything. Just return the
23595 simple byte-at-a-time copy loop. */
23596 if (!any_alg_usable_p
)
23598 /* Pick something reasonable. */
23599 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23600 *dynamic_check
= 128;
23601 return loop_1_byte
;
23605 alg
= decide_alg (count
, max
/ 2, min_size
, max_size
, memset
,
23606 zero_memset
, dynamic_check
, noalign
);
23607 gcc_assert (*dynamic_check
== -1);
23608 gcc_assert (alg
!= libcall
);
23609 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
23610 *dynamic_check
= max
;
23613 return (alg_usable_p (algs
->unknown_size
, memset
)
23614 ? algs
->unknown_size
: libcall
);
23617 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23618 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23620 decide_alignment (int align
,
23621 enum stringop_alg alg
,
23623 enum machine_mode move_mode
)
23625 int desired_align
= 0;
23627 gcc_assert (alg
!= no_stringop
);
23629 if (alg
== libcall
)
23631 if (move_mode
== VOIDmode
)
23634 desired_align
= GET_MODE_SIZE (move_mode
);
23635 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23636 copying whole cacheline at once. */
23637 if (TARGET_PENTIUMPRO
23638 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
23643 if (desired_align
< align
)
23644 desired_align
= align
;
23645 if (expected_size
!= -1 && expected_size
< 4)
23646 desired_align
= align
;
23648 return desired_align
;
23652 /* Helper function for memcpy. For QImode value 0xXY produce
23653 0xXYXYXYXY of wide specified by MODE. This is essentially
23654 a * 0x10101010, but we can do slightly better than
23655 synth_mult by unwinding the sequence by hand on CPUs with
23658 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
23660 enum machine_mode valmode
= GET_MODE (val
);
23662 int nops
= mode
== DImode
? 3 : 2;
23664 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
23665 if (val
== const0_rtx
)
23666 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
23667 if (CONST_INT_P (val
))
23669 HOST_WIDE_INT v
= INTVAL (val
) & 255;
23673 if (mode
== DImode
)
23674 v
|= (v
<< 16) << 16;
23675 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
23678 if (valmode
== VOIDmode
)
23680 if (valmode
!= QImode
)
23681 val
= gen_lowpart (QImode
, val
);
23682 if (mode
== QImode
)
23684 if (!TARGET_PARTIAL_REG_STALL
)
23686 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
23687 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
23688 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
23689 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
23691 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23692 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
23693 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23698 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23700 if (!TARGET_PARTIAL_REG_STALL
)
23701 if (mode
== SImode
)
23702 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23704 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23707 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23708 NULL
, 1, OPTAB_DIRECT
);
23710 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23712 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23713 NULL
, 1, OPTAB_DIRECT
);
23714 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23715 if (mode
== SImode
)
23717 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23718 NULL
, 1, OPTAB_DIRECT
);
23719 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23724 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23725 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23726 alignment from ALIGN to DESIRED_ALIGN. */
23728 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
23734 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23735 promoted_val
= promote_duplicated_reg (DImode
, val
);
23736 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23737 promoted_val
= promote_duplicated_reg (SImode
, val
);
23738 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23739 promoted_val
= promote_duplicated_reg (HImode
, val
);
23741 promoted_val
= val
;
23743 return promoted_val
;
23746 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
23747 operations when profitable. The code depends upon architecture, block size
23748 and alignment, but always has one of the following overall structures:
23750 Aligned move sequence:
23752 1) Prologue guard: Conditional that jumps up to epilogues for small
23753 blocks that can be handled by epilogue alone. This is faster
23754 but also needed for correctness, since prologue assume the block
23755 is larger than the desired alignment.
23757 Optional dynamic check for size and libcall for large
23758 blocks is emitted here too, with -minline-stringops-dynamically.
23760 2) Prologue: copy first few bytes in order to get destination
23761 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
23762 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
23763 copied. We emit either a jump tree on power of two sized
23764 blocks, or a byte loop.
23766 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23767 with specified algorithm.
23769 4) Epilogue: code copying tail of the block that is too small to be
23770 handled by main body (or up to size guarded by prologue guard).
23772 Misaligned move sequence
23774 1) missaligned move prologue/epilogue containing:
23775 a) Prologue handling small memory blocks and jumping to done_label
23776 (skipped if blocks are known to be large enough)
23777 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
23778 needed by single possibly misaligned move
23779 (skipped if alignment is not needed)
23780 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
23782 2) Zero size guard dispatching to done_label, if needed
23784 3) dispatch to library call, if needed,
23786 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
23787 with specified algorithm. */
23789 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
23790 rtx align_exp
, rtx expected_align_exp
,
23791 rtx expected_size_exp
, rtx min_size_exp
,
23792 rtx max_size_exp
, rtx probable_max_size_exp
,
23799 rtx jump_around_label
= NULL
;
23800 HOST_WIDE_INT align
= 1;
23801 unsigned HOST_WIDE_INT count
= 0;
23802 HOST_WIDE_INT expected_size
= -1;
23803 int size_needed
= 0, epilogue_size_needed
;
23804 int desired_align
= 0, align_bytes
= 0;
23805 enum stringop_alg alg
;
23806 rtx promoted_val
= NULL
;
23807 rtx vec_promoted_val
= NULL
;
23808 bool force_loopy_epilogue
= false;
23810 bool need_zero_guard
= false;
23812 enum machine_mode move_mode
= VOIDmode
;
23813 int unroll_factor
= 1;
23814 /* TODO: Once value ranges are available, fill in proper data. */
23815 unsigned HOST_WIDE_INT min_size
= 0;
23816 unsigned HOST_WIDE_INT max_size
= -1;
23817 unsigned HOST_WIDE_INT probable_max_size
= -1;
23818 bool misaligned_prologue_used
= false;
23820 if (CONST_INT_P (align_exp
))
23821 align
= INTVAL (align_exp
);
23822 /* i386 can do misaligned access on reasonably increased cost. */
23823 if (CONST_INT_P (expected_align_exp
)
23824 && INTVAL (expected_align_exp
) > align
)
23825 align
= INTVAL (expected_align_exp
);
23826 /* ALIGN is the minimum of destination and source alignment, but we care here
23827 just about destination alignment. */
23829 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
23830 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
23832 if (CONST_INT_P (count_exp
))
23833 min_size
= max_size
= probable_max_size
= count
= expected_size
23834 = INTVAL (count_exp
);
23838 min_size
= INTVAL (min_size_exp
);
23840 max_size
= INTVAL (max_size_exp
);
23841 if (probable_max_size_exp
)
23842 probable_max_size
= INTVAL (probable_max_size_exp
);
23843 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23844 expected_size
= INTVAL (expected_size_exp
);
23847 /* Make sure we don't need to care about overflow later on. */
23848 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23851 /* Step 0: Decide on preferred algorithm, desired alignment and
23852 size of chunks to be copied by main loop. */
23853 alg
= decide_alg (count
, expected_size
, min_size
, probable_max_size
,
23855 issetmem
&& val_exp
== const0_rtx
,
23856 &dynamic_check
, &noalign
);
23857 if (alg
== libcall
)
23859 gcc_assert (alg
!= no_stringop
);
23861 /* For now vector-version of memset is generated only for memory zeroing, as
23862 creating of promoted vector value is very cheap in this case. */
23863 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
23864 alg
= unrolled_loop
;
23867 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
23868 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
23870 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
23873 move_mode
= word_mode
;
23879 gcc_unreachable ();
23881 need_zero_guard
= true;
23882 move_mode
= QImode
;
23885 need_zero_guard
= true;
23887 case unrolled_loop
:
23888 need_zero_guard
= true;
23889 unroll_factor
= (TARGET_64BIT
? 4 : 2);
23892 need_zero_guard
= true;
23894 /* Find the widest supported mode. */
23895 move_mode
= word_mode
;
23896 while (optab_handler (mov_optab
, GET_MODE_WIDER_MODE (move_mode
))
23897 != CODE_FOR_nothing
)
23898 move_mode
= GET_MODE_WIDER_MODE (move_mode
);
23900 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23901 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23902 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
23904 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
23905 move_mode
= mode_for_vector (word_mode
, nunits
);
23906 if (optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
23907 move_mode
= word_mode
;
23909 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
23911 case rep_prefix_8_byte
:
23912 move_mode
= DImode
;
23914 case rep_prefix_4_byte
:
23915 move_mode
= SImode
;
23917 case rep_prefix_1_byte
:
23918 move_mode
= QImode
;
23921 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
23922 epilogue_size_needed
= size_needed
;
23924 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
23925 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23926 align
= desired_align
;
23928 /* Step 1: Prologue guard. */
23930 /* Alignment code needs count to be in register. */
23931 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23933 if (INTVAL (count_exp
) > desired_align
23934 && INTVAL (count_exp
) > size_needed
)
23937 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23938 if (align_bytes
<= 0)
23941 align_bytes
= desired_align
- align_bytes
;
23943 if (align_bytes
== 0)
23944 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
23946 gcc_assert (desired_align
>= 1 && align
>= 1);
23948 /* Misaligned move sequences handle both prologue and epilogue at once.
23949 Default code generation results in a smaller code for large alignments
23950 and also avoids redundant job when sizes are known precisely. */
23951 misaligned_prologue_used
23952 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
23953 && MAX (desired_align
, epilogue_size_needed
) <= 32
23954 && desired_align
<= epilogue_size_needed
23955 && ((desired_align
> align
&& !align_bytes
)
23956 || (!count
&& epilogue_size_needed
> 1)));
23958 /* Do the cheap promotion to allow better CSE across the
23959 main loop and epilogue (ie one load of the big constant in the
23961 For now the misaligned move sequences do not have fast path
23962 without broadcasting. */
23963 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
23965 if (alg
== vector_loop
)
23967 gcc_assert (val_exp
== const0_rtx
);
23968 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
23969 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
23970 GET_MODE_SIZE (word_mode
),
23971 desired_align
, align
);
23975 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23976 desired_align
, align
);
23979 /* Misaligned move sequences handles both prologues and epilogues at once.
23980 Default code generation results in smaller code for large alignments and
23981 also avoids redundant job when sizes are known precisely. */
23982 if (misaligned_prologue_used
)
23984 /* Misaligned move prologue handled small blocks by itself. */
23985 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
23986 (dst
, src
, &destreg
, &srcreg
,
23987 move_mode
, promoted_val
, vec_promoted_val
,
23989 &jump_around_label
,
23990 desired_align
< align
23991 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
23992 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
23994 src
= change_address (src
, BLKmode
, srcreg
);
23995 dst
= change_address (dst
, BLKmode
, destreg
);
23996 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
23997 epilogue_size_needed
= 0;
23998 if (need_zero_guard
&& !min_size
)
24000 /* It is possible that we copied enough so the main loop will not
24002 gcc_assert (size_needed
> 1);
24003 if (jump_around_label
== NULL_RTX
)
24004 jump_around_label
= gen_label_rtx ();
24005 emit_cmp_and_jump_insns (count_exp
,
24006 GEN_INT (size_needed
),
24007 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
24008 if (expected_size
== -1
24009 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24010 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24012 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24015 /* Ensure that alignment prologue won't copy past end of block. */
24016 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
24018 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
24019 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24020 Make sure it is power of 2. */
24021 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
24023 /* To improve performance of small blocks, we jump around the VAL
24024 promoting mode. This mean that if the promoted VAL is not constant,
24025 we might not use it in the epilogue and have to use byte
24027 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
24028 force_loopy_epilogue
= true;
24029 if ((count
&& count
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24030 || max_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24032 /* If main algorithm works on QImode, no epilogue is needed.
24033 For small sizes just don't align anything. */
24034 if (size_needed
== 1)
24035 desired_align
= align
;
24040 && min_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
24042 label
= gen_label_rtx ();
24043 emit_cmp_and_jump_insns (count_exp
,
24044 GEN_INT (epilogue_size_needed
),
24045 LTU
, 0, counter_mode (count_exp
), 1, label
);
24046 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
24047 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24049 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24053 /* Emit code to decide on runtime whether library call or inline should be
24055 if (dynamic_check
!= -1)
24057 if (!issetmem
&& CONST_INT_P (count_exp
))
24059 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
24061 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
24062 count_exp
= const0_rtx
;
24068 rtx hot_label
= gen_label_rtx ();
24069 jump_around_label
= gen_label_rtx ();
24070 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
24071 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
24072 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
24074 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
24076 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
24077 emit_jump (jump_around_label
);
24078 emit_label (hot_label
);
24082 /* Step 2: Alignment prologue. */
24083 /* Do the expensive promotion once we branched off the small blocks. */
24084 if (issetmem
&& !promoted_val
)
24085 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
24086 desired_align
, align
);
24088 if (desired_align
> align
&& !misaligned_prologue_used
)
24090 if (align_bytes
== 0)
24092 /* Except for the first move in prologue, we no longer know
24093 constant offset in aliasing info. It don't seems to worth
24094 the pain to maintain it for the first move, so throw away
24096 dst
= change_address (dst
, BLKmode
, destreg
);
24098 src
= change_address (src
, BLKmode
, srcreg
);
24099 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
24100 promoted_val
, vec_promoted_val
,
24101 count_exp
, align
, desired_align
,
24103 /* At most desired_align - align bytes are copied. */
24104 if (min_size
< (unsigned)(desired_align
- align
))
24107 min_size
-= desired_align
- align
;
24111 /* If we know how many bytes need to be stored before dst is
24112 sufficiently aligned, maintain aliasing info accurately. */
24113 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
24121 count_exp
= plus_constant (counter_mode (count_exp
),
24122 count_exp
, -align_bytes
);
24123 count
-= align_bytes
;
24124 min_size
-= align_bytes
;
24125 max_size
-= align_bytes
;
24127 if (need_zero_guard
24129 && (count
< (unsigned HOST_WIDE_INT
) size_needed
24130 || (align_bytes
== 0
24131 && count
< ((unsigned HOST_WIDE_INT
) size_needed
24132 + desired_align
- align
))))
24134 /* It is possible that we copied enough so the main loop will not
24136 gcc_assert (size_needed
> 1);
24137 if (label
== NULL_RTX
)
24138 label
= gen_label_rtx ();
24139 emit_cmp_and_jump_insns (count_exp
,
24140 GEN_INT (size_needed
),
24141 LTU
, 0, counter_mode (count_exp
), 1, label
);
24142 if (expected_size
== -1
24143 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
24144 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
24146 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
24149 if (label
&& size_needed
== 1)
24151 emit_label (label
);
24152 LABEL_NUSES (label
) = 1;
24154 epilogue_size_needed
= 1;
24156 promoted_val
= val_exp
;
24158 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
24159 epilogue_size_needed
= size_needed
;
24161 /* Step 3: Main loop. */
24168 gcc_unreachable ();
24171 case unrolled_loop
:
24172 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
24173 count_exp
, move_mode
, unroll_factor
,
24174 expected_size
, issetmem
);
24177 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
24178 vec_promoted_val
, count_exp
, move_mode
,
24179 unroll_factor
, expected_size
, issetmem
);
24181 case rep_prefix_8_byte
:
24182 case rep_prefix_4_byte
:
24183 case rep_prefix_1_byte
:
24184 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
24185 val_exp
, count_exp
, move_mode
, issetmem
);
24188 /* Adjust properly the offset of src and dest memory for aliasing. */
24189 if (CONST_INT_P (count_exp
))
24192 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
24193 (count
/ size_needed
) * size_needed
);
24194 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
24195 (count
/ size_needed
) * size_needed
);
24200 src
= change_address (src
, BLKmode
, srcreg
);
24201 dst
= change_address (dst
, BLKmode
, destreg
);
24204 /* Step 4: Epilogue to copy the remaining bytes. */
24208 /* When the main loop is done, COUNT_EXP might hold original count,
24209 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24210 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24211 bytes. Compensate if needed. */
24213 if (size_needed
< epilogue_size_needed
)
24216 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
24217 GEN_INT (size_needed
- 1), count_exp
, 1,
24219 if (tmp
!= count_exp
)
24220 emit_move_insn (count_exp
, tmp
);
24222 emit_label (label
);
24223 LABEL_NUSES (label
) = 1;
24226 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
24228 if (force_loopy_epilogue
)
24229 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
24230 epilogue_size_needed
);
24234 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
24235 vec_promoted_val
, count_exp
,
24236 epilogue_size_needed
);
24238 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
24239 epilogue_size_needed
);
24242 if (jump_around_label
)
24243 emit_label (jump_around_label
);
24248 /* Expand the appropriate insns for doing strlen if not just doing
24251 out = result, initialized with the start address
24252 align_rtx = alignment of the address.
24253 scratch = scratch register, initialized with the startaddress when
24254 not aligned, otherwise undefined
24256 This is just the body. It needs the initializations mentioned above and
24257 some address computing at the end. These things are done in i386.md. */
24260 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
24264 rtx align_2_label
= NULL_RTX
;
24265 rtx align_3_label
= NULL_RTX
;
24266 rtx align_4_label
= gen_label_rtx ();
24267 rtx end_0_label
= gen_label_rtx ();
24269 rtx tmpreg
= gen_reg_rtx (SImode
);
24270 rtx scratch
= gen_reg_rtx (SImode
);
24274 if (CONST_INT_P (align_rtx
))
24275 align
= INTVAL (align_rtx
);
24277 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24279 /* Is there a known alignment and is it less than 4? */
24282 rtx scratch1
= gen_reg_rtx (Pmode
);
24283 emit_move_insn (scratch1
, out
);
24284 /* Is there a known alignment and is it not 2? */
24287 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
24288 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
24290 /* Leave just the 3 lower bits. */
24291 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
24292 NULL_RTX
, 0, OPTAB_WIDEN
);
24294 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24295 Pmode
, 1, align_4_label
);
24296 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
24297 Pmode
, 1, align_2_label
);
24298 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
24299 Pmode
, 1, align_3_label
);
24303 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24304 check if is aligned to 4 - byte. */
24306 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
24307 NULL_RTX
, 0, OPTAB_WIDEN
);
24309 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
24310 Pmode
, 1, align_4_label
);
24313 mem
= change_address (src
, QImode
, out
);
24315 /* Now compare the bytes. */
24317 /* Compare the first n unaligned byte on a byte per byte basis. */
24318 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
24319 QImode
, 1, end_0_label
);
24321 /* Increment the address. */
24322 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24324 /* Not needed with an alignment of 2 */
24327 emit_label (align_2_label
);
24329 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24332 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24334 emit_label (align_3_label
);
24337 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
24340 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
24343 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24344 align this loop. It gives only huge programs, but does not help to
24346 emit_label (align_4_label
);
24348 mem
= change_address (src
, SImode
, out
);
24349 emit_move_insn (scratch
, mem
);
24350 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
24352 /* This formula yields a nonzero result iff one of the bytes is zero.
24353 This saves three branches inside loop and many cycles. */
24355 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
24356 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
24357 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
24358 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
24359 gen_int_mode (0x80808080, SImode
)));
24360 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
24365 rtx reg
= gen_reg_rtx (SImode
);
24366 rtx reg2
= gen_reg_rtx (Pmode
);
24367 emit_move_insn (reg
, tmpreg
);
24368 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
24370 /* If zero is not in the first two bytes, move two bytes forward. */
24371 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24372 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24373 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24374 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
24375 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
24378 /* Emit lea manually to avoid clobbering of flags. */
24379 emit_insn (gen_rtx_SET (SImode
, reg2
,
24380 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
24382 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24383 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
24384 emit_insn (gen_rtx_SET (VOIDmode
, out
,
24385 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
24391 rtx end_2_label
= gen_label_rtx ();
24392 /* Is zero in the first two bytes? */
24394 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
24395 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
24396 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
24397 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
24398 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
24400 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
24401 JUMP_LABEL (tmp
) = end_2_label
;
24403 /* Not in the first two. Move two bytes forward. */
24404 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
24405 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
24407 emit_label (end_2_label
);
24411 /* Avoid branch in fixing the byte. */
24412 tmpreg
= gen_lowpart (QImode
, tmpreg
);
24413 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
24414 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
24415 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
24416 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
24418 emit_label (end_0_label
);
24421 /* Expand strlen. */
24424 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
24426 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
24428 /* The generic case of strlen expander is long. Avoid it's
24429 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24431 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24432 && !TARGET_INLINE_ALL_STRINGOPS
24433 && !optimize_insn_for_size_p ()
24434 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
24437 addr
= force_reg (Pmode
, XEXP (src
, 0));
24438 scratch1
= gen_reg_rtx (Pmode
);
24440 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
24441 && !optimize_insn_for_size_p ())
24443 /* Well it seems that some optimizer does not combine a call like
24444 foo(strlen(bar), strlen(bar));
24445 when the move and the subtraction is done here. It does calculate
24446 the length just once when these instructions are done inside of
24447 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24448 often used and I use one fewer register for the lifetime of
24449 output_strlen_unroll() this is better. */
24451 emit_move_insn (out
, addr
);
24453 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
24455 /* strlensi_unroll_1 returns the address of the zero at the end of
24456 the string, like memchr(), so compute the length by subtracting
24457 the start address. */
24458 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
24464 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24465 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
24468 scratch2
= gen_reg_rtx (Pmode
);
24469 scratch3
= gen_reg_rtx (Pmode
);
24470 scratch4
= force_reg (Pmode
, constm1_rtx
);
24472 emit_move_insn (scratch3
, addr
);
24473 eoschar
= force_reg (QImode
, eoschar
);
24475 src
= replace_equiv_address_nv (src
, scratch3
);
24477 /* If .md starts supporting :P, this can be done in .md. */
24478 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
24479 scratch4
), UNSPEC_SCAS
);
24480 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
24481 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
24482 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
24487 /* For given symbol (function) construct code to compute address of it's PLT
24488 entry in large x86-64 PIC model. */
24490 construct_plt_address (rtx symbol
)
24494 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
24495 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
24496 gcc_assert (Pmode
== DImode
);
24498 tmp
= gen_reg_rtx (Pmode
);
24499 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
24501 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
24502 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
24507 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
24509 rtx pop
, bool sibcall
)
24511 unsigned int const cregs_size
24512 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers
);
24513 rtx vec
[3 + cregs_size
];
24514 rtx use
= NULL
, call
;
24515 unsigned int vec_len
= 0;
24517 if (pop
== const0_rtx
)
24519 gcc_assert (!TARGET_64BIT
|| !pop
);
24521 if (TARGET_MACHO
&& !TARGET_64BIT
)
24524 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
24525 fnaddr
= machopic_indirect_call_target (fnaddr
);
24530 /* Static functions and indirect calls don't need the pic register. */
24533 || (ix86_cmodel
== CM_LARGE_PIC
24534 && DEFAULT_ABI
!= MS_ABI
))
24535 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24536 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
24537 use_reg (&use
, pic_offset_table_rtx
);
24540 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
24542 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
24543 emit_move_insn (al
, callarg2
);
24544 use_reg (&use
, al
);
24547 if (ix86_cmodel
== CM_LARGE_PIC
24550 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
24551 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
24552 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
24554 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
24555 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
24557 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
24558 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
24561 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
24563 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
24564 vec
[vec_len
++] = call
;
24568 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
24569 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
24570 vec
[vec_len
++] = pop
;
24573 if (TARGET_64BIT_MS_ABI
24574 && (!callarg2
|| INTVAL (callarg2
) != -2))
24578 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
24579 UNSPEC_MS_TO_SYSV_CALL
);
24581 for (i
= 0; i
< cregs_size
; i
++)
24583 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
24584 enum machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
24587 = gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (mode
, regno
));
24592 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
24593 call
= emit_call_insn (call
);
24595 CALL_INSN_FUNCTION_USAGE (call
) = use
;
24600 /* Output the assembly for a call instruction. */
24603 ix86_output_call_insn (rtx insn
, rtx call_op
)
24605 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
24606 bool seh_nop_p
= false;
24609 if (SIBLING_CALL_P (insn
))
24613 /* SEH epilogue detection requires the indirect branch case
24614 to include REX.W. */
24615 else if (TARGET_SEH
)
24616 xasm
= "rex.W jmp %A0";
24620 output_asm_insn (xasm
, &call_op
);
24624 /* SEH unwinding can require an extra nop to be emitted in several
24625 circumstances. Determine if we have one of those. */
24630 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
24632 /* If we get to another real insn, we don't need the nop. */
24636 /* If we get to the epilogue note, prevent a catch region from
24637 being adjacent to the standard epilogue sequence. If non-
24638 call-exceptions, we'll have done this during epilogue emission. */
24639 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
24640 && !flag_non_call_exceptions
24641 && !can_throw_internal (insn
))
24648 /* If we didn't find a real insn following the call, prevent the
24649 unwinder from looking into the next function. */
24655 xasm
= "call\t%P0";
24657 xasm
= "call\t%A0";
24659 output_asm_insn (xasm
, &call_op
);
24667 /* Clear stack slot assignments remembered from previous functions.
24668 This is called from INIT_EXPANDERS once before RTL is emitted for each
24671 static struct machine_function
*
24672 ix86_init_machine_status (void)
24674 struct machine_function
*f
;
24676 f
= ggc_alloc_cleared_machine_function ();
24677 f
->use_fast_prologue_epilogue_nregs
= -1;
24678 f
->call_abi
= ix86_abi
;
24683 /* Return a MEM corresponding to a stack slot with mode MODE.
24684 Allocate a new slot if necessary.
24686 The RTL for a function can have several slots available: N is
24687 which slot to use. */
24690 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
24692 struct stack_local_entry
*s
;
24694 gcc_assert (n
< MAX_386_STACK_LOCALS
);
24696 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24697 if (s
->mode
== mode
&& s
->n
== n
)
24698 return validize_mem (copy_rtx (s
->rtl
));
24700 s
= ggc_alloc_stack_local_entry ();
24703 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
24705 s
->next
= ix86_stack_locals
;
24706 ix86_stack_locals
= s
;
24707 return validize_mem (s
->rtl
);
24711 ix86_instantiate_decls (void)
24713 struct stack_local_entry
*s
;
24715 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
24716 if (s
->rtl
!= NULL_RTX
)
24717 instantiate_decl_rtl (s
->rtl
);
24720 /* Check whether x86 address PARTS is a pc-relative address. */
24723 rip_relative_addr_p (struct ix86_address
*parts
)
24725 rtx base
, index
, disp
;
24727 base
= parts
->base
;
24728 index
= parts
->index
;
24729 disp
= parts
->disp
;
24731 if (disp
&& !base
&& !index
)
24737 if (GET_CODE (disp
) == CONST
)
24738 symbol
= XEXP (disp
, 0);
24739 if (GET_CODE (symbol
) == PLUS
24740 && CONST_INT_P (XEXP (symbol
, 1)))
24741 symbol
= XEXP (symbol
, 0);
24743 if (GET_CODE (symbol
) == LABEL_REF
24744 || (GET_CODE (symbol
) == SYMBOL_REF
24745 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
24746 || (GET_CODE (symbol
) == UNSPEC
24747 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
24748 || XINT (symbol
, 1) == UNSPEC_PCREL
24749 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
24756 /* Calculate the length of the memory address in the instruction encoding.
24757 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24758 or other prefixes. We never generate addr32 prefix for LEA insn. */
24761 memory_address_length (rtx addr
, bool lea
)
24763 struct ix86_address parts
;
24764 rtx base
, index
, disp
;
24768 if (GET_CODE (addr
) == PRE_DEC
24769 || GET_CODE (addr
) == POST_INC
24770 || GET_CODE (addr
) == PRE_MODIFY
24771 || GET_CODE (addr
) == POST_MODIFY
)
24774 ok
= ix86_decompose_address (addr
, &parts
);
24777 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
24779 /* If this is not LEA instruction, add the length of addr32 prefix. */
24780 if (TARGET_64BIT
&& !lea
24781 && (SImode_address_operand (addr
, VOIDmode
)
24782 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
24783 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
24787 index
= parts
.index
;
24790 if (base
&& GET_CODE (base
) == SUBREG
)
24791 base
= SUBREG_REG (base
);
24792 if (index
&& GET_CODE (index
) == SUBREG
)
24793 index
= SUBREG_REG (index
);
24795 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
24796 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
24799 - esp as the base always wants an index,
24800 - ebp as the base always wants a displacement,
24801 - r12 as the base always wants an index,
24802 - r13 as the base always wants a displacement. */
24804 /* Register Indirect. */
24805 if (base
&& !index
&& !disp
)
24807 /* esp (for its index) and ebp (for its displacement) need
24808 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24810 if (base
== arg_pointer_rtx
24811 || base
== frame_pointer_rtx
24812 || REGNO (base
) == SP_REG
24813 || REGNO (base
) == BP_REG
24814 || REGNO (base
) == R12_REG
24815 || REGNO (base
) == R13_REG
)
24819 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24820 is not disp32, but disp32(%rip), so for disp32
24821 SIB byte is needed, unless print_operand_address
24822 optimizes it into disp32(%rip) or (%rip) is implied
24824 else if (disp
&& !base
&& !index
)
24827 if (rip_relative_addr_p (&parts
))
24832 /* Find the length of the displacement constant. */
24835 if (base
&& satisfies_constraint_K (disp
))
24840 /* ebp always wants a displacement. Similarly r13. */
24841 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
24844 /* An index requires the two-byte modrm form.... */
24846 /* ...like esp (or r12), which always wants an index. */
24847 || base
== arg_pointer_rtx
24848 || base
== frame_pointer_rtx
24849 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
24856 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24857 is set, expect that insn have 8bit immediate alternative. */
24859 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
24863 extract_insn_cached (insn
);
24864 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24865 if (CONSTANT_P (recog_data
.operand
[i
]))
24867 enum attr_mode mode
= get_attr_mode (insn
);
24870 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
24872 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
24879 ival
= trunc_int_for_mode (ival
, HImode
);
24882 ival
= trunc_int_for_mode (ival
, SImode
);
24887 if (IN_RANGE (ival
, -128, 127))
24904 /* Immediates for DImode instructions are encoded
24905 as 32bit sign extended values. */
24910 fatal_insn ("unknown insn mode", insn
);
24916 /* Compute default value for "length_address" attribute. */
24918 ix86_attr_length_address_default (rtx insn
)
24922 if (get_attr_type (insn
) == TYPE_LEA
)
24924 rtx set
= PATTERN (insn
), addr
;
24926 if (GET_CODE (set
) == PARALLEL
)
24927 set
= XVECEXP (set
, 0, 0);
24929 gcc_assert (GET_CODE (set
) == SET
);
24931 addr
= SET_SRC (set
);
24933 return memory_address_length (addr
, true);
24936 extract_insn_cached (insn
);
24937 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24938 if (MEM_P (recog_data
.operand
[i
]))
24940 constrain_operands_cached (reload_completed
);
24941 if (which_alternative
!= -1)
24943 const char *constraints
= recog_data
.constraints
[i
];
24944 int alt
= which_alternative
;
24946 while (*constraints
== '=' || *constraints
== '+')
24949 while (*constraints
++ != ',')
24951 /* Skip ignored operands. */
24952 if (*constraints
== 'X')
24955 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24960 /* Compute default value for "length_vex" attribute. It includes
24961 2 or 3 byte VEX prefix and 1 opcode byte. */
24964 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24968 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24969 byte VEX prefix. */
24970 if (!has_0f_opcode
|| has_vex_w
)
24973 /* We can always use 2 byte VEX prefix in 32bit. */
24977 extract_insn_cached (insn
);
24979 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24980 if (REG_P (recog_data
.operand
[i
]))
24982 /* REX.W bit uses 3 byte VEX prefix. */
24983 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24984 && GENERAL_REG_P (recog_data
.operand
[i
]))
24989 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24990 if (MEM_P (recog_data
.operand
[i
])
24991 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24998 /* Return the maximum number of instructions a cpu can issue. */
25001 ix86_issue_rate (void)
25005 case PROCESSOR_PENTIUM
:
25006 case PROCESSOR_ATOM
:
25007 case PROCESSOR_SLM
:
25009 case PROCESSOR_BTVER2
:
25010 case PROCESSOR_PENTIUM4
:
25011 case PROCESSOR_NOCONA
:
25014 case PROCESSOR_PENTIUMPRO
:
25015 case PROCESSOR_ATHLON
:
25017 case PROCESSOR_AMDFAM10
:
25018 case PROCESSOR_GENERIC
:
25019 case PROCESSOR_BTVER1
:
25022 case PROCESSOR_BDVER1
:
25023 case PROCESSOR_BDVER2
:
25024 case PROCESSOR_BDVER3
:
25025 case PROCESSOR_BDVER4
:
25026 case PROCESSOR_CORE2
:
25027 case PROCESSOR_COREI7
:
25028 case PROCESSOR_COREI7_AVX
:
25029 case PROCESSOR_HASWELL
:
25037 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25038 by DEP_INSN and nothing set by DEP_INSN. */
25041 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
25045 /* Simplify the test for uninteresting insns. */
25046 if (insn_type
!= TYPE_SETCC
25047 && insn_type
!= TYPE_ICMOV
25048 && insn_type
!= TYPE_FCMOV
25049 && insn_type
!= TYPE_IBR
)
25052 if ((set
= single_set (dep_insn
)) != 0)
25054 set
= SET_DEST (set
);
25057 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
25058 && XVECLEN (PATTERN (dep_insn
), 0) == 2
25059 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
25060 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
25062 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
25063 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
25068 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
25071 /* This test is true if the dependent insn reads the flags but
25072 not any other potentially set register. */
25073 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
25076 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
25082 /* Return true iff USE_INSN has a memory address with operands set by
25086 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
25089 extract_insn_cached (use_insn
);
25090 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
25091 if (MEM_P (recog_data
.operand
[i
]))
25093 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
25094 return modified_in_p (addr
, set_insn
) != 0;
25099 /* Helper function for exact_store_load_dependency.
25100 Return true if addr is found in insn. */
25102 exact_dependency_1 (rtx addr
, rtx insn
)
25104 enum rtx_code code
;
25105 const char *format_ptr
;
25108 code
= GET_CODE (insn
);
25112 if (rtx_equal_p (addr
, insn
))
25127 format_ptr
= GET_RTX_FORMAT (code
);
25128 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
25130 switch (*format_ptr
++)
25133 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
25137 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
25138 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
25146 /* Return true if there exists exact dependency for store & load, i.e.
25147 the same memory address is used in them. */
25149 exact_store_load_dependency (rtx store
, rtx load
)
25153 set1
= single_set (store
);
25156 if (!MEM_P (SET_DEST (set1
)))
25158 set2
= single_set (load
);
25161 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
25167 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
25169 enum attr_type insn_type
, dep_insn_type
;
25170 enum attr_memory memory
;
25172 int dep_insn_code_number
;
25174 /* Anti and output dependencies have zero cost on all CPUs. */
25175 if (REG_NOTE_KIND (link
) != 0)
25178 dep_insn_code_number
= recog_memoized (dep_insn
);
25180 /* If we can't recognize the insns, we can't really do anything. */
25181 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
25184 insn_type
= get_attr_type (insn
);
25185 dep_insn_type
= get_attr_type (dep_insn
);
25189 case PROCESSOR_PENTIUM
:
25190 /* Address Generation Interlock adds a cycle of latency. */
25191 if (insn_type
== TYPE_LEA
)
25193 rtx addr
= PATTERN (insn
);
25195 if (GET_CODE (addr
) == PARALLEL
)
25196 addr
= XVECEXP (addr
, 0, 0);
25198 gcc_assert (GET_CODE (addr
) == SET
);
25200 addr
= SET_SRC (addr
);
25201 if (modified_in_p (addr
, dep_insn
))
25204 else if (ix86_agi_dependent (dep_insn
, insn
))
25207 /* ??? Compares pair with jump/setcc. */
25208 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
25211 /* Floating point stores require value to be ready one cycle earlier. */
25212 if (insn_type
== TYPE_FMOV
25213 && get_attr_memory (insn
) == MEMORY_STORE
25214 && !ix86_agi_dependent (dep_insn
, insn
))
25218 case PROCESSOR_PENTIUMPRO
:
25219 memory
= get_attr_memory (insn
);
25221 /* INT->FP conversion is expensive. */
25222 if (get_attr_fp_int_src (dep_insn
))
25225 /* There is one cycle extra latency between an FP op and a store. */
25226 if (insn_type
== TYPE_FMOV
25227 && (set
= single_set (dep_insn
)) != NULL_RTX
25228 && (set2
= single_set (insn
)) != NULL_RTX
25229 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
25230 && MEM_P (SET_DEST (set2
)))
25233 /* Show ability of reorder buffer to hide latency of load by executing
25234 in parallel with previous instruction in case
25235 previous instruction is not needed to compute the address. */
25236 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25237 && !ix86_agi_dependent (dep_insn
, insn
))
25239 /* Claim moves to take one cycle, as core can issue one load
25240 at time and the next load can start cycle later. */
25241 if (dep_insn_type
== TYPE_IMOV
25242 || dep_insn_type
== TYPE_FMOV
)
25250 memory
= get_attr_memory (insn
);
25252 /* The esp dependency is resolved before the instruction is really
25254 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25255 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25258 /* INT->FP conversion is expensive. */
25259 if (get_attr_fp_int_src (dep_insn
))
25262 /* Show ability of reorder buffer to hide latency of load by executing
25263 in parallel with previous instruction in case
25264 previous instruction is not needed to compute the address. */
25265 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25266 && !ix86_agi_dependent (dep_insn
, insn
))
25268 /* Claim moves to take one cycle, as core can issue one load
25269 at time and the next load can start cycle later. */
25270 if (dep_insn_type
== TYPE_IMOV
25271 || dep_insn_type
== TYPE_FMOV
)
25280 case PROCESSOR_ATHLON
:
25282 case PROCESSOR_AMDFAM10
:
25283 case PROCESSOR_BDVER1
:
25284 case PROCESSOR_BDVER2
:
25285 case PROCESSOR_BDVER3
:
25286 case PROCESSOR_BDVER4
:
25287 case PROCESSOR_BTVER1
:
25288 case PROCESSOR_BTVER2
:
25289 case PROCESSOR_GENERIC
:
25290 memory
= get_attr_memory (insn
);
25292 /* Stack engine allows to execute push&pop instructions in parall. */
25293 if (((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25294 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25295 && (ix86_tune
!= PROCESSOR_ATHLON
&& ix86_tune
!= PROCESSOR_K8
))
25298 /* Show ability of reorder buffer to hide latency of load by executing
25299 in parallel with previous instruction in case
25300 previous instruction is not needed to compute the address. */
25301 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25302 && !ix86_agi_dependent (dep_insn
, insn
))
25304 enum attr_unit unit
= get_attr_unit (insn
);
25307 /* Because of the difference between the length of integer and
25308 floating unit pipeline preparation stages, the memory operands
25309 for floating point are cheaper.
25311 ??? For Athlon it the difference is most probably 2. */
25312 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
25315 loadcost
= TARGET_ATHLON
? 2 : 0;
25317 if (cost
>= loadcost
)
25324 case PROCESSOR_CORE2
:
25325 case PROCESSOR_COREI7
:
25326 case PROCESSOR_COREI7_AVX
:
25327 case PROCESSOR_HASWELL
:
25328 memory
= get_attr_memory (insn
);
25330 /* Stack engine allows to execute push&pop instructions in parall. */
25331 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
25332 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
25335 /* Show ability of reorder buffer to hide latency of load by executing
25336 in parallel with previous instruction in case
25337 previous instruction is not needed to compute the address. */
25338 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25339 && !ix86_agi_dependent (dep_insn
, insn
))
25348 case PROCESSOR_SLM
:
25349 if (!reload_completed
)
25352 /* Increase cost of integer loads. */
25353 memory
= get_attr_memory (dep_insn
);
25354 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
25356 enum attr_unit unit
= get_attr_unit (dep_insn
);
25357 if (unit
== UNIT_INTEGER
&& cost
== 1)
25359 if (memory
== MEMORY_LOAD
)
25363 /* Increase cost of ld/st for short int types only
25364 because of store forwarding issue. */
25365 rtx set
= single_set (dep_insn
);
25366 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
25367 || GET_MODE (SET_DEST (set
)) == HImode
))
25369 /* Increase cost of store/load insn if exact
25370 dependence exists and it is load insn. */
25371 enum attr_memory insn_memory
= get_attr_memory (insn
);
25372 if (insn_memory
== MEMORY_LOAD
25373 && exact_store_load_dependency (dep_insn
, insn
))
25387 /* How many alternative schedules to try. This should be as wide as the
25388 scheduling freedom in the DFA, but no wider. Making this value too
25389 large results extra work for the scheduler. */
25392 ia32_multipass_dfa_lookahead (void)
25396 case PROCESSOR_PENTIUM
:
25399 case PROCESSOR_PENTIUMPRO
:
25403 case PROCESSOR_BDVER1
:
25404 case PROCESSOR_BDVER2
:
25405 case PROCESSOR_BDVER3
:
25406 case PROCESSOR_BDVER4
:
25407 /* We use lookahead value 4 for BD both before and after reload
25408 schedules. Plan is to have value 8 included for O3. */
25411 case PROCESSOR_CORE2
:
25412 case PROCESSOR_COREI7
:
25413 case PROCESSOR_COREI7_AVX
:
25414 case PROCESSOR_HASWELL
:
25415 case PROCESSOR_ATOM
:
25416 case PROCESSOR_SLM
:
25417 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25418 as many instructions can be executed on a cycle, i.e.,
25419 issue_rate. I wonder why tuning for many CPUs does not do this. */
25420 if (reload_completed
)
25421 return ix86_issue_rate ();
25422 /* Don't use lookahead for pre-reload schedule to save compile time. */
25430 /* Return true if target platform supports macro-fusion. */
25433 ix86_macro_fusion_p ()
25435 return TARGET_FUSE_CMP_AND_BRANCH
;
25438 /* Check whether current microarchitecture support macro fusion
25439 for insn pair "CONDGEN + CONDJMP". Refer to
25440 "Intel Architectures Optimization Reference Manual". */
25443 ix86_macro_fusion_pair_p (rtx condgen
, rtx condjmp
)
25446 rtx single_set
= single_set (condgen
);
25447 enum rtx_code ccode
;
25448 rtx compare_set
= NULL_RTX
, test_if
, cond
;
25449 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
25451 if (get_attr_type (condgen
) != TYPE_TEST
25452 && get_attr_type (condgen
) != TYPE_ICMP
25453 && get_attr_type (condgen
) != TYPE_INCDEC
25454 && get_attr_type (condgen
) != TYPE_ALU
)
25457 if (single_set
== NULL_RTX
25458 && !TARGET_FUSE_ALU_AND_BRANCH
)
25461 if (single_set
!= NULL_RTX
)
25462 compare_set
= single_set
;
25466 rtx pat
= PATTERN (condgen
);
25467 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
25468 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
25470 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
25471 if (GET_CODE (set_src
) == COMPARE
)
25472 compare_set
= XVECEXP (pat
, 0, i
);
25474 alu_set
= XVECEXP (pat
, 0, i
);
25477 if (compare_set
== NULL_RTX
)
25479 src
= SET_SRC (compare_set
);
25480 if (GET_CODE (src
) != COMPARE
)
25483 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25485 if ((MEM_P (XEXP (src
, 0))
25486 && CONST_INT_P (XEXP (src
, 1)))
25487 || (MEM_P (XEXP (src
, 1))
25488 && CONST_INT_P (XEXP (src
, 0))))
25491 /* No fusion for RIP-relative address. */
25492 if (MEM_P (XEXP (src
, 0)))
25493 addr
= XEXP (XEXP (src
, 0), 0);
25494 else if (MEM_P (XEXP (src
, 1)))
25495 addr
= XEXP (XEXP (src
, 1), 0);
25498 ix86_address parts
;
25499 int ok
= ix86_decompose_address (addr
, &parts
);
25502 if (rip_relative_addr_p (&parts
))
25506 test_if
= SET_SRC (pc_set (condjmp
));
25507 cond
= XEXP (test_if
, 0);
25508 ccode
= GET_CODE (cond
);
25509 /* Check whether conditional jump use Sign or Overflow Flags. */
25510 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25517 /* Return true for TYPE_TEST and TYPE_ICMP. */
25518 if (get_attr_type (condgen
) == TYPE_TEST
25519 || get_attr_type (condgen
) == TYPE_ICMP
)
25522 /* The following is the case that macro-fusion for alu + jmp. */
25523 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
25526 /* No fusion for alu op with memory destination operand. */
25527 dest
= SET_DEST (alu_set
);
25531 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25533 if (get_attr_type (condgen
) == TYPE_INCDEC
25543 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25544 execution. It is applied if
25545 (1) IMUL instruction is on the top of list;
25546 (2) There exists the only producer of independent IMUL instruction in
25548 Return index of IMUL producer if it was found and -1 otherwise. */
25550 do_reorder_for_imul (rtx
*ready
, int n_ready
)
25552 rtx insn
, set
, insn1
, insn2
;
25553 sd_iterator_def sd_it
;
25558 if (ix86_tune
!= PROCESSOR_ATOM
)
25561 /* Check that IMUL instruction is on the top of ready list. */
25562 insn
= ready
[n_ready
- 1];
25563 set
= single_set (insn
);
25566 if (!(GET_CODE (SET_SRC (set
)) == MULT
25567 && GET_MODE (SET_SRC (set
)) == SImode
))
25570 /* Search for producer of independent IMUL instruction. */
25571 for (i
= n_ready
- 2; i
>= 0; i
--)
25574 if (!NONDEBUG_INSN_P (insn
))
25576 /* Skip IMUL instruction. */
25577 insn2
= PATTERN (insn
);
25578 if (GET_CODE (insn2
) == PARALLEL
)
25579 insn2
= XVECEXP (insn2
, 0, 0);
25580 if (GET_CODE (insn2
) == SET
25581 && GET_CODE (SET_SRC (insn2
)) == MULT
25582 && GET_MODE (SET_SRC (insn2
)) == SImode
)
25585 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
25588 con
= DEP_CON (dep
);
25589 if (!NONDEBUG_INSN_P (con
))
25591 insn1
= PATTERN (con
);
25592 if (GET_CODE (insn1
) == PARALLEL
)
25593 insn1
= XVECEXP (insn1
, 0, 0);
25595 if (GET_CODE (insn1
) == SET
25596 && GET_CODE (SET_SRC (insn1
)) == MULT
25597 && GET_MODE (SET_SRC (insn1
)) == SImode
)
25599 sd_iterator_def sd_it1
;
25601 /* Check if there is no other dependee for IMUL. */
25603 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
25606 pro
= DEP_PRO (dep1
);
25607 if (!NONDEBUG_INSN_P (pro
))
25622 /* Try to find the best candidate on the top of ready list if two insns
25623 have the same priority - candidate is best if its dependees were
25624 scheduled earlier. Applied for Silvermont only.
25625 Return true if top 2 insns must be interchanged. */
25627 swap_top_of_ready_list (rtx
*ready
, int n_ready
)
25629 rtx top
= ready
[n_ready
- 1];
25630 rtx next
= ready
[n_ready
- 2];
25632 sd_iterator_def sd_it
;
25636 #define INSN_TICK(INSN) (HID (INSN)->tick)
25638 if (ix86_tune
!= PROCESSOR_SLM
)
25641 if (!NONDEBUG_INSN_P (top
))
25643 if (!NONJUMP_INSN_P (top
))
25645 if (!NONDEBUG_INSN_P (next
))
25647 if (!NONJUMP_INSN_P (next
))
25649 set
= single_set (top
);
25652 set
= single_set (next
);
25656 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
25658 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
25660 /* Determine winner more precise. */
25661 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
25664 pro
= DEP_PRO (dep
);
25665 if (!NONDEBUG_INSN_P (pro
))
25667 if (INSN_TICK (pro
) > clock1
)
25668 clock1
= INSN_TICK (pro
);
25670 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
25673 pro
= DEP_PRO (dep
);
25674 if (!NONDEBUG_INSN_P (pro
))
25676 if (INSN_TICK (pro
) > clock2
)
25677 clock2
= INSN_TICK (pro
);
25680 if (clock1
== clock2
)
25682 /* Determine winner - load must win. */
25683 enum attr_memory memory1
, memory2
;
25684 memory1
= get_attr_memory (top
);
25685 memory2
= get_attr_memory (next
);
25686 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
25689 return (bool) (clock2
< clock1
);
25695 /* Perform possible reodering of ready list for Atom/Silvermont only.
25696 Return issue rate. */
25698 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
25701 int issue_rate
= -1;
25702 int n_ready
= *pn_ready
;
25707 /* Set up issue rate. */
25708 issue_rate
= ix86_issue_rate ();
25710 /* Do reodering for Atom/SLM only. */
25711 if (ix86_tune
!= PROCESSOR_ATOM
&& ix86_tune
!= PROCESSOR_SLM
)
25714 /* Nothing to do if ready list contains only 1 instruction. */
25718 /* Do reodering for post-reload scheduler only. */
25719 if (!reload_completed
)
25722 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
25724 if (sched_verbose
> 1)
25725 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
25726 INSN_UID (ready
[index
]));
25728 /* Put IMUL producer (ready[index]) at the top of ready list. */
25729 insn
= ready
[index
];
25730 for (i
= index
; i
< n_ready
- 1; i
++)
25731 ready
[i
] = ready
[i
+ 1];
25732 ready
[n_ready
- 1] = insn
;
25735 if (clock_var
!= 0 && swap_top_of_ready_list (ready
, n_ready
))
25737 if (sched_verbose
> 1)
25738 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
25739 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
25740 /* Swap 2 top elements of ready list. */
25741 insn
= ready
[n_ready
- 1];
25742 ready
[n_ready
- 1] = ready
[n_ready
- 2];
25743 ready
[n_ready
- 2] = insn
;
25749 ix86_class_likely_spilled_p (reg_class_t
);
25751 /* Returns true if lhs of insn is HW function argument register and set up
25752 is_spilled to true if it is likely spilled HW register. */
25754 insn_is_function_arg (rtx insn
, bool* is_spilled
)
25758 if (!NONDEBUG_INSN_P (insn
))
25760 /* Call instructions are not movable, ignore it. */
25763 insn
= PATTERN (insn
);
25764 if (GET_CODE (insn
) == PARALLEL
)
25765 insn
= XVECEXP (insn
, 0, 0);
25766 if (GET_CODE (insn
) != SET
)
25768 dst
= SET_DEST (insn
);
25769 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
25770 && ix86_function_arg_regno_p (REGNO (dst
)))
25772 /* Is it likely spilled HW register? */
25773 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
25774 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
25775 *is_spilled
= true;
25781 /* Add output dependencies for chain of function adjacent arguments if only
25782 there is a move to likely spilled HW register. Return first argument
25783 if at least one dependence was added or NULL otherwise. */
25785 add_parameter_dependencies (rtx call
, rtx head
)
25789 rtx first_arg
= NULL
;
25790 bool is_spilled
= false;
25792 head
= PREV_INSN (head
);
25794 /* Find nearest to call argument passing instruction. */
25797 last
= PREV_INSN (last
);
25800 if (!NONDEBUG_INSN_P (last
))
25802 if (insn_is_function_arg (last
, &is_spilled
))
25810 insn
= PREV_INSN (last
);
25811 if (!INSN_P (insn
))
25815 if (!NONDEBUG_INSN_P (insn
))
25820 if (insn_is_function_arg (insn
, &is_spilled
))
25822 /* Add output depdendence between two function arguments if chain
25823 of output arguments contains likely spilled HW registers. */
25825 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25826 first_arg
= last
= insn
;
25836 /* Add output or anti dependency from insn to first_arg to restrict its code
25839 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
25844 set
= single_set (insn
);
25847 tmp
= SET_DEST (set
);
25850 /* Add output dependency to the first function argument. */
25851 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
25854 /* Add anti dependency. */
25855 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
25858 /* Avoid cross block motion of function argument through adding dependency
25859 from the first non-jump instruction in bb. */
25861 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
25863 rtx insn
= BB_END (bb
);
25867 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
25869 rtx set
= single_set (insn
);
25872 avoid_func_arg_motion (arg
, insn
);
25876 if (insn
== BB_HEAD (bb
))
25878 insn
= PREV_INSN (insn
);
25882 /* Hook for pre-reload schedule - avoid motion of function arguments
25883 passed in likely spilled HW registers. */
25885 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
25888 rtx first_arg
= NULL
;
25889 if (reload_completed
)
25891 while (head
!= tail
&& DEBUG_INSN_P (head
))
25892 head
= NEXT_INSN (head
);
25893 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
25894 if (INSN_P (insn
) && CALL_P (insn
))
25896 first_arg
= add_parameter_dependencies (insn
, head
);
25899 /* Add dependee for first argument to predecessors if only
25900 region contains more than one block. */
25901 basic_block bb
= BLOCK_FOR_INSN (insn
);
25902 int rgn
= CONTAINING_RGN (bb
->index
);
25903 int nr_blks
= RGN_NR_BLOCKS (rgn
);
25904 /* Skip trivial regions and region head blocks that can have
25905 predecessors outside of region. */
25906 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
25910 /* Assume that region is SCC, i.e. all immediate predecessors
25911 of non-head block are in the same region. */
25912 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
25914 /* Avoid creating of loop-carried dependencies through
25915 using topological odering in region. */
25916 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
25917 add_dependee_for_func_arg (first_arg
, e
->src
);
25925 else if (first_arg
)
25926 avoid_func_arg_motion (first_arg
, insn
);
25929 /* Hook for pre-reload schedule - set priority of moves from likely spilled
25930 HW registers to maximum, to schedule them at soon as possible. These are
25931 moves from function argument registers at the top of the function entry
25932 and moves from function return value registers after call. */
25934 ix86_adjust_priority (rtx insn
, int priority
)
25938 if (reload_completed
)
25941 if (!NONDEBUG_INSN_P (insn
))
25944 set
= single_set (insn
);
25947 rtx tmp
= SET_SRC (set
);
25949 && HARD_REGISTER_P (tmp
)
25950 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
25951 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
25952 return current_sched_info
->sched_max_insns_priority
;
25958 /* Model decoder of Core 2/i7.
25959 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
25960 track the instruction fetch block boundaries and make sure that long
25961 (9+ bytes) instructions are assigned to D0. */
25963 /* Maximum length of an insn that can be handled by
25964 a secondary decoder unit. '8' for Core 2/i7. */
25965 static int core2i7_secondary_decoder_max_insn_size
;
25967 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
25968 '16' for Core 2/i7. */
25969 static int core2i7_ifetch_block_size
;
25971 /* Maximum number of instructions decoder can handle per cycle.
25972 '6' for Core 2/i7. */
25973 static int core2i7_ifetch_block_max_insns
;
25975 typedef struct ix86_first_cycle_multipass_data_
*
25976 ix86_first_cycle_multipass_data_t
;
25977 typedef const struct ix86_first_cycle_multipass_data_
*
25978 const_ix86_first_cycle_multipass_data_t
;
25980 /* A variable to store target state across calls to max_issue within
25982 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
25983 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
25985 /* Initialize DATA. */
25987 core2i7_first_cycle_multipass_init (void *_data
)
25989 ix86_first_cycle_multipass_data_t data
25990 = (ix86_first_cycle_multipass_data_t
) _data
;
25992 data
->ifetch_block_len
= 0;
25993 data
->ifetch_block_n_insns
= 0;
25994 data
->ready_try_change
= NULL
;
25995 data
->ready_try_change_size
= 0;
25998 /* Advancing the cycle; reset ifetch block counts. */
26000 core2i7_dfa_post_advance_cycle (void)
26002 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
26004 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26006 data
->ifetch_block_len
= 0;
26007 data
->ifetch_block_n_insns
= 0;
26010 static int min_insn_size (rtx
);
26012 /* Filter out insns from ready_try that the core will not be able to issue
26013 on current cycle due to decoder. */
26015 core2i7_first_cycle_multipass_filter_ready_try
26016 (const_ix86_first_cycle_multipass_data_t data
,
26017 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
26024 if (ready_try
[n_ready
])
26027 insn
= get_ready_element (n_ready
);
26028 insn_size
= min_insn_size (insn
);
26030 if (/* If this is a too long an insn for a secondary decoder ... */
26031 (!first_cycle_insn_p
26032 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
26033 /* ... or it would not fit into the ifetch block ... */
26034 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
26035 /* ... or the decoder is full already ... */
26036 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
26037 /* ... mask the insn out. */
26039 ready_try
[n_ready
] = 1;
26041 if (data
->ready_try_change
)
26042 bitmap_set_bit (data
->ready_try_change
, n_ready
);
26047 /* Prepare for a new round of multipass lookahead scheduling. */
26049 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
26050 bool first_cycle_insn_p
)
26052 ix86_first_cycle_multipass_data_t data
26053 = (ix86_first_cycle_multipass_data_t
) _data
;
26054 const_ix86_first_cycle_multipass_data_t prev_data
26055 = ix86_first_cycle_multipass_data
;
26057 /* Restore the state from the end of the previous round. */
26058 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
26059 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
26061 /* Filter instructions that cannot be issued on current cycle due to
26062 decoder restrictions. */
26063 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26064 first_cycle_insn_p
);
26067 /* INSN is being issued in current solution. Account for its impact on
26068 the decoder model. */
26070 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
26071 rtx insn
, const void *_prev_data
)
26073 ix86_first_cycle_multipass_data_t data
26074 = (ix86_first_cycle_multipass_data_t
) _data
;
26075 const_ix86_first_cycle_multipass_data_t prev_data
26076 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
26078 int insn_size
= min_insn_size (insn
);
26080 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
26081 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
26082 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
26083 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
26085 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26086 if (!data
->ready_try_change
)
26088 data
->ready_try_change
= sbitmap_alloc (n_ready
);
26089 data
->ready_try_change_size
= n_ready
;
26091 else if (data
->ready_try_change_size
< n_ready
)
26093 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
26095 data
->ready_try_change_size
= n_ready
;
26097 bitmap_clear (data
->ready_try_change
);
26099 /* Filter out insns from ready_try that the core will not be able to issue
26100 on current cycle due to decoder. */
26101 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
26105 /* Revert the effect on ready_try. */
26107 core2i7_first_cycle_multipass_backtrack (const void *_data
,
26109 int n_ready ATTRIBUTE_UNUSED
)
26111 const_ix86_first_cycle_multipass_data_t data
26112 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26113 unsigned int i
= 0;
26114 sbitmap_iterator sbi
;
26116 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
26117 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
26123 /* Save the result of multipass lookahead scheduling for the next round. */
26125 core2i7_first_cycle_multipass_end (const void *_data
)
26127 const_ix86_first_cycle_multipass_data_t data
26128 = (const_ix86_first_cycle_multipass_data_t
) _data
;
26129 ix86_first_cycle_multipass_data_t next_data
26130 = ix86_first_cycle_multipass_data
;
26134 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
26135 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
26139 /* Deallocate target data. */
26141 core2i7_first_cycle_multipass_fini (void *_data
)
26143 ix86_first_cycle_multipass_data_t data
26144 = (ix86_first_cycle_multipass_data_t
) _data
;
26146 if (data
->ready_try_change
)
26148 sbitmap_free (data
->ready_try_change
);
26149 data
->ready_try_change
= NULL
;
26150 data
->ready_try_change_size
= 0;
26154 /* Prepare for scheduling pass. */
26156 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
26157 int verbose ATTRIBUTE_UNUSED
,
26158 int max_uid ATTRIBUTE_UNUSED
)
26160 /* Install scheduling hooks for current CPU. Some of these hooks are used
26161 in time-critical parts of the scheduler, so we only set them up when
26162 they are actually used. */
26165 case PROCESSOR_CORE2
:
26166 case PROCESSOR_COREI7
:
26167 case PROCESSOR_COREI7_AVX
:
26168 case PROCESSOR_HASWELL
:
26169 /* Do not perform multipass scheduling for pre-reload schedule
26170 to save compile time. */
26171 if (reload_completed
)
26173 targetm
.sched
.dfa_post_advance_cycle
26174 = core2i7_dfa_post_advance_cycle
;
26175 targetm
.sched
.first_cycle_multipass_init
26176 = core2i7_first_cycle_multipass_init
;
26177 targetm
.sched
.first_cycle_multipass_begin
26178 = core2i7_first_cycle_multipass_begin
;
26179 targetm
.sched
.first_cycle_multipass_issue
26180 = core2i7_first_cycle_multipass_issue
;
26181 targetm
.sched
.first_cycle_multipass_backtrack
26182 = core2i7_first_cycle_multipass_backtrack
;
26183 targetm
.sched
.first_cycle_multipass_end
26184 = core2i7_first_cycle_multipass_end
;
26185 targetm
.sched
.first_cycle_multipass_fini
26186 = core2i7_first_cycle_multipass_fini
;
26188 /* Set decoder parameters. */
26189 core2i7_secondary_decoder_max_insn_size
= 8;
26190 core2i7_ifetch_block_size
= 16;
26191 core2i7_ifetch_block_max_insns
= 6;
26194 /* ... Fall through ... */
26196 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
26197 targetm
.sched
.first_cycle_multipass_init
= NULL
;
26198 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
26199 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
26200 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
26201 targetm
.sched
.first_cycle_multipass_end
= NULL
;
26202 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
26208 /* Compute the alignment given to a constant that is being placed in memory.
26209 EXP is the constant and ALIGN is the alignment that the object would
26211 The value of this function is used instead of that alignment to align
26215 ix86_constant_alignment (tree exp
, int align
)
26217 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
26218 || TREE_CODE (exp
) == INTEGER_CST
)
26220 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
26222 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
26225 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
26226 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
26227 return BITS_PER_WORD
;
26232 /* Compute the alignment for a static variable.
26233 TYPE is the data type, and ALIGN is the alignment that
26234 the object would ordinarily have. The value of this function is used
26235 instead of that alignment to align the object. */
26238 ix86_data_alignment (tree type
, int align
, bool opt
)
26240 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
26243 && AGGREGATE_TYPE_P (type
)
26244 && TYPE_SIZE (type
)
26245 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26246 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
26247 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
26248 && align
< max_align
)
26251 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26252 to 16byte boundary. */
26255 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
26256 && TYPE_SIZE (type
)
26257 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26258 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
26259 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26266 if (TREE_CODE (type
) == ARRAY_TYPE
)
26268 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26270 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26273 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26276 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26278 if ((TYPE_MODE (type
) == XCmode
26279 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26282 else if ((TREE_CODE (type
) == RECORD_TYPE
26283 || TREE_CODE (type
) == UNION_TYPE
26284 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26285 && TYPE_FIELDS (type
))
26287 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26289 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26292 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26293 || TREE_CODE (type
) == INTEGER_TYPE
)
26295 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26297 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26304 /* Compute the alignment for a local variable or a stack slot. EXP is
26305 the data type or decl itself, MODE is the widest mode available and
26306 ALIGN is the alignment that the object would ordinarily have. The
26307 value of this macro is used instead of that alignment to align the
26311 ix86_local_alignment (tree exp
, enum machine_mode mode
,
26312 unsigned int align
)
26316 if (exp
&& DECL_P (exp
))
26318 type
= TREE_TYPE (exp
);
26327 /* Don't do dynamic stack realignment for long long objects with
26328 -mpreferred-stack-boundary=2. */
26331 && ix86_preferred_stack_boundary
< 64
26332 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26333 && (!type
|| !TYPE_USER_ALIGN (type
))
26334 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26337 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26338 register in MODE. We will return the largest alignment of XF
26342 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
26343 align
= GET_MODE_ALIGNMENT (DFmode
);
26347 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26348 to 16byte boundary. Exact wording is:
26350 An array uses the same alignment as its elements, except that a local or
26351 global array variable of length at least 16 bytes or
26352 a C99 variable-length array variable always has alignment of at least 16 bytes.
26354 This was added to allow use of aligned SSE instructions at arrays. This
26355 rule is meant for static storage (where compiler can not do the analysis
26356 by itself). We follow it for automatic variables only when convenient.
26357 We fully control everything in the function compiled and functions from
26358 other unit can not rely on the alignment.
26360 Exclude va_list type. It is the common case of local array where
26361 we can not benefit from the alignment.
26363 TODO: Probably one should optimize for size only when var is not escaping. */
26364 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
26367 if (AGGREGATE_TYPE_P (type
)
26368 && (va_list_type_node
== NULL_TREE
26369 || (TYPE_MAIN_VARIANT (type
)
26370 != TYPE_MAIN_VARIANT (va_list_type_node
)))
26371 && TYPE_SIZE (type
)
26372 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
26373 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
26374 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
26377 if (TREE_CODE (type
) == ARRAY_TYPE
)
26379 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
26381 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
26384 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
26386 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
26388 if ((TYPE_MODE (type
) == XCmode
26389 || TYPE_MODE (type
) == TCmode
) && align
< 128)
26392 else if ((TREE_CODE (type
) == RECORD_TYPE
26393 || TREE_CODE (type
) == UNION_TYPE
26394 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
26395 && TYPE_FIELDS (type
))
26397 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
26399 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
26402 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
26403 || TREE_CODE (type
) == INTEGER_TYPE
)
26406 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
26408 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
26414 /* Compute the minimum required alignment for dynamic stack realignment
26415 purposes for a local variable, parameter or a stack slot. EXP is
26416 the data type or decl itself, MODE is its mode and ALIGN is the
26417 alignment that the object would ordinarily have. */
26420 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
26421 unsigned int align
)
26425 if (exp
&& DECL_P (exp
))
26427 type
= TREE_TYPE (exp
);
26436 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
26439 /* Don't do dynamic stack realignment for long long objects with
26440 -mpreferred-stack-boundary=2. */
26441 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
26442 && (!type
|| !TYPE_USER_ALIGN (type
))
26443 && (!decl
|| !DECL_USER_ALIGN (decl
)))
26449 /* Find a location for the static chain incoming to a nested function.
26450 This is a register, unless all free registers are used by arguments. */
26453 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
26457 if (!DECL_STATIC_CHAIN (fndecl
))
26462 /* We always use R10 in 64-bit mode. */
26470 /* By default in 32-bit mode we use ECX to pass the static chain. */
26473 fntype
= TREE_TYPE (fndecl
);
26474 ccvt
= ix86_get_callcvt (fntype
);
26475 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
26477 /* Fastcall functions use ecx/edx for arguments, which leaves
26478 us with EAX for the static chain.
26479 Thiscall functions use ecx for arguments, which also
26480 leaves us with EAX for the static chain. */
26483 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
26485 /* Thiscall functions use ecx for arguments, which leaves
26486 us with EAX and EDX for the static chain.
26487 We are using for abi-compatibility EAX. */
26490 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
26492 /* For regparm 3, we have no free call-clobbered registers in
26493 which to store the static chain. In order to implement this,
26494 we have the trampoline push the static chain to the stack.
26495 However, we can't push a value below the return address when
26496 we call the nested function directly, so we have to use an
26497 alternate entry point. For this we use ESI, and have the
26498 alternate entry point push ESI, so that things appear the
26499 same once we're executing the nested function. */
26502 if (fndecl
== current_function_decl
)
26503 ix86_static_chain_on_stack
= true;
26504 return gen_frame_mem (SImode
,
26505 plus_constant (Pmode
,
26506 arg_pointer_rtx
, -8));
26512 return gen_rtx_REG (Pmode
, regno
);
26515 /* Emit RTL insns to initialize the variable parts of a trampoline.
26516 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26517 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26518 to be passed to the target function. */
26521 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
26527 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
26533 /* Load the function address to r11. Try to load address using
26534 the shorter movl instead of movabs. We may want to support
26535 movq for kernel mode, but kernel does not use trampolines at
26536 the moment. FNADDR is a 32bit address and may not be in
26537 DImode when ptr_mode == SImode. Always use movl in this
26539 if (ptr_mode
== SImode
26540 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
26542 fnaddr
= copy_addr_to_reg (fnaddr
);
26544 mem
= adjust_address (m_tramp
, HImode
, offset
);
26545 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
26547 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
26548 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
26553 mem
= adjust_address (m_tramp
, HImode
, offset
);
26554 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
26556 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
26557 emit_move_insn (mem
, fnaddr
);
26561 /* Load static chain using movabs to r10. Use the shorter movl
26562 instead of movabs when ptr_mode == SImode. */
26563 if (ptr_mode
== SImode
)
26574 mem
= adjust_address (m_tramp
, HImode
, offset
);
26575 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
26577 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
26578 emit_move_insn (mem
, chain_value
);
26581 /* Jump to r11; the last (unused) byte is a nop, only there to
26582 pad the write out to a single 32-bit store. */
26583 mem
= adjust_address (m_tramp
, SImode
, offset
);
26584 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
26591 /* Depending on the static chain location, either load a register
26592 with a constant, or push the constant to the stack. All of the
26593 instructions are the same size. */
26594 chain
= ix86_static_chain (fndecl
, true);
26597 switch (REGNO (chain
))
26600 opcode
= 0xb8; break;
26602 opcode
= 0xb9; break;
26604 gcc_unreachable ();
26610 mem
= adjust_address (m_tramp
, QImode
, offset
);
26611 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
26613 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26614 emit_move_insn (mem
, chain_value
);
26617 mem
= adjust_address (m_tramp
, QImode
, offset
);
26618 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
26620 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
26622 /* Compute offset from the end of the jmp to the target function.
26623 In the case in which the trampoline stores the static chain on
26624 the stack, we need to skip the first insn which pushes the
26625 (call-saved) register static chain; this push is 1 byte. */
26627 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
26628 plus_constant (Pmode
, XEXP (m_tramp
, 0),
26629 offset
- (MEM_P (chain
) ? 1 : 0)),
26630 NULL_RTX
, 1, OPTAB_DIRECT
);
26631 emit_move_insn (mem
, disp
);
26634 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
26636 #ifdef HAVE_ENABLE_EXECUTE_STACK
26637 #ifdef CHECK_EXECUTE_STACK_ENABLED
26638 if (CHECK_EXECUTE_STACK_ENABLED
)
26640 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
26641 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
26645 /* The following file contains several enumerations and data structures
26646 built from the definitions in i386-builtin-types.def. */
26648 #include "i386-builtin-types.inc"
26650 /* Table for the ix86 builtin non-function types. */
26651 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
26653 /* Retrieve an element from the above table, building some of
26654 the types lazily. */
26657 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
26659 unsigned int index
;
26662 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
26664 type
= ix86_builtin_type_tab
[(int) tcode
];
26668 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
26669 if (tcode
<= IX86_BT_LAST_VECT
)
26671 enum machine_mode mode
;
26673 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
26674 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
26675 mode
= ix86_builtin_type_vect_mode
[index
];
26677 type
= build_vector_type_for_mode (itype
, mode
);
26683 index
= tcode
- IX86_BT_LAST_VECT
- 1;
26684 if (tcode
<= IX86_BT_LAST_PTR
)
26685 quals
= TYPE_UNQUALIFIED
;
26687 quals
= TYPE_QUAL_CONST
;
26689 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
26690 if (quals
!= TYPE_UNQUALIFIED
)
26691 itype
= build_qualified_type (itype
, quals
);
26693 type
= build_pointer_type (itype
);
26696 ix86_builtin_type_tab
[(int) tcode
] = type
;
26700 /* Table for the ix86 builtin function types. */
26701 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
26703 /* Retrieve an element from the above table, building some of
26704 the types lazily. */
26707 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
26711 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
26713 type
= ix86_builtin_func_type_tab
[(int) tcode
];
26717 if (tcode
<= IX86_BT_LAST_FUNC
)
26719 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
26720 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
26721 tree rtype
, atype
, args
= void_list_node
;
26724 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
26725 for (i
= after
- 1; i
> start
; --i
)
26727 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
26728 args
= tree_cons (NULL
, atype
, args
);
26731 type
= build_function_type (rtype
, args
);
26735 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
26736 enum ix86_builtin_func_type icode
;
26738 icode
= ix86_builtin_func_alias_base
[index
];
26739 type
= ix86_get_builtin_func_type (icode
);
26742 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
26747 /* Codes for all the SSE/MMX builtins. */
26750 IX86_BUILTIN_ADDPS
,
26751 IX86_BUILTIN_ADDSS
,
26752 IX86_BUILTIN_DIVPS
,
26753 IX86_BUILTIN_DIVSS
,
26754 IX86_BUILTIN_MULPS
,
26755 IX86_BUILTIN_MULSS
,
26756 IX86_BUILTIN_SUBPS
,
26757 IX86_BUILTIN_SUBSS
,
26759 IX86_BUILTIN_CMPEQPS
,
26760 IX86_BUILTIN_CMPLTPS
,
26761 IX86_BUILTIN_CMPLEPS
,
26762 IX86_BUILTIN_CMPGTPS
,
26763 IX86_BUILTIN_CMPGEPS
,
26764 IX86_BUILTIN_CMPNEQPS
,
26765 IX86_BUILTIN_CMPNLTPS
,
26766 IX86_BUILTIN_CMPNLEPS
,
26767 IX86_BUILTIN_CMPNGTPS
,
26768 IX86_BUILTIN_CMPNGEPS
,
26769 IX86_BUILTIN_CMPORDPS
,
26770 IX86_BUILTIN_CMPUNORDPS
,
26771 IX86_BUILTIN_CMPEQSS
,
26772 IX86_BUILTIN_CMPLTSS
,
26773 IX86_BUILTIN_CMPLESS
,
26774 IX86_BUILTIN_CMPNEQSS
,
26775 IX86_BUILTIN_CMPNLTSS
,
26776 IX86_BUILTIN_CMPNLESS
,
26777 IX86_BUILTIN_CMPORDSS
,
26778 IX86_BUILTIN_CMPUNORDSS
,
26780 IX86_BUILTIN_COMIEQSS
,
26781 IX86_BUILTIN_COMILTSS
,
26782 IX86_BUILTIN_COMILESS
,
26783 IX86_BUILTIN_COMIGTSS
,
26784 IX86_BUILTIN_COMIGESS
,
26785 IX86_BUILTIN_COMINEQSS
,
26786 IX86_BUILTIN_UCOMIEQSS
,
26787 IX86_BUILTIN_UCOMILTSS
,
26788 IX86_BUILTIN_UCOMILESS
,
26789 IX86_BUILTIN_UCOMIGTSS
,
26790 IX86_BUILTIN_UCOMIGESS
,
26791 IX86_BUILTIN_UCOMINEQSS
,
26793 IX86_BUILTIN_CVTPI2PS
,
26794 IX86_BUILTIN_CVTPS2PI
,
26795 IX86_BUILTIN_CVTSI2SS
,
26796 IX86_BUILTIN_CVTSI642SS
,
26797 IX86_BUILTIN_CVTSS2SI
,
26798 IX86_BUILTIN_CVTSS2SI64
,
26799 IX86_BUILTIN_CVTTPS2PI
,
26800 IX86_BUILTIN_CVTTSS2SI
,
26801 IX86_BUILTIN_CVTTSS2SI64
,
26803 IX86_BUILTIN_MAXPS
,
26804 IX86_BUILTIN_MAXSS
,
26805 IX86_BUILTIN_MINPS
,
26806 IX86_BUILTIN_MINSS
,
26808 IX86_BUILTIN_LOADUPS
,
26809 IX86_BUILTIN_STOREUPS
,
26810 IX86_BUILTIN_MOVSS
,
26812 IX86_BUILTIN_MOVHLPS
,
26813 IX86_BUILTIN_MOVLHPS
,
26814 IX86_BUILTIN_LOADHPS
,
26815 IX86_BUILTIN_LOADLPS
,
26816 IX86_BUILTIN_STOREHPS
,
26817 IX86_BUILTIN_STORELPS
,
26819 IX86_BUILTIN_MASKMOVQ
,
26820 IX86_BUILTIN_MOVMSKPS
,
26821 IX86_BUILTIN_PMOVMSKB
,
26823 IX86_BUILTIN_MOVNTPS
,
26824 IX86_BUILTIN_MOVNTQ
,
26826 IX86_BUILTIN_LOADDQU
,
26827 IX86_BUILTIN_STOREDQU
,
26829 IX86_BUILTIN_PACKSSWB
,
26830 IX86_BUILTIN_PACKSSDW
,
26831 IX86_BUILTIN_PACKUSWB
,
26833 IX86_BUILTIN_PADDB
,
26834 IX86_BUILTIN_PADDW
,
26835 IX86_BUILTIN_PADDD
,
26836 IX86_BUILTIN_PADDQ
,
26837 IX86_BUILTIN_PADDSB
,
26838 IX86_BUILTIN_PADDSW
,
26839 IX86_BUILTIN_PADDUSB
,
26840 IX86_BUILTIN_PADDUSW
,
26841 IX86_BUILTIN_PSUBB
,
26842 IX86_BUILTIN_PSUBW
,
26843 IX86_BUILTIN_PSUBD
,
26844 IX86_BUILTIN_PSUBQ
,
26845 IX86_BUILTIN_PSUBSB
,
26846 IX86_BUILTIN_PSUBSW
,
26847 IX86_BUILTIN_PSUBUSB
,
26848 IX86_BUILTIN_PSUBUSW
,
26851 IX86_BUILTIN_PANDN
,
26855 IX86_BUILTIN_PAVGB
,
26856 IX86_BUILTIN_PAVGW
,
26858 IX86_BUILTIN_PCMPEQB
,
26859 IX86_BUILTIN_PCMPEQW
,
26860 IX86_BUILTIN_PCMPEQD
,
26861 IX86_BUILTIN_PCMPGTB
,
26862 IX86_BUILTIN_PCMPGTW
,
26863 IX86_BUILTIN_PCMPGTD
,
26865 IX86_BUILTIN_PMADDWD
,
26867 IX86_BUILTIN_PMAXSW
,
26868 IX86_BUILTIN_PMAXUB
,
26869 IX86_BUILTIN_PMINSW
,
26870 IX86_BUILTIN_PMINUB
,
26872 IX86_BUILTIN_PMULHUW
,
26873 IX86_BUILTIN_PMULHW
,
26874 IX86_BUILTIN_PMULLW
,
26876 IX86_BUILTIN_PSADBW
,
26877 IX86_BUILTIN_PSHUFW
,
26879 IX86_BUILTIN_PSLLW
,
26880 IX86_BUILTIN_PSLLD
,
26881 IX86_BUILTIN_PSLLQ
,
26882 IX86_BUILTIN_PSRAW
,
26883 IX86_BUILTIN_PSRAD
,
26884 IX86_BUILTIN_PSRLW
,
26885 IX86_BUILTIN_PSRLD
,
26886 IX86_BUILTIN_PSRLQ
,
26887 IX86_BUILTIN_PSLLWI
,
26888 IX86_BUILTIN_PSLLDI
,
26889 IX86_BUILTIN_PSLLQI
,
26890 IX86_BUILTIN_PSRAWI
,
26891 IX86_BUILTIN_PSRADI
,
26892 IX86_BUILTIN_PSRLWI
,
26893 IX86_BUILTIN_PSRLDI
,
26894 IX86_BUILTIN_PSRLQI
,
26896 IX86_BUILTIN_PUNPCKHBW
,
26897 IX86_BUILTIN_PUNPCKHWD
,
26898 IX86_BUILTIN_PUNPCKHDQ
,
26899 IX86_BUILTIN_PUNPCKLBW
,
26900 IX86_BUILTIN_PUNPCKLWD
,
26901 IX86_BUILTIN_PUNPCKLDQ
,
26903 IX86_BUILTIN_SHUFPS
,
26905 IX86_BUILTIN_RCPPS
,
26906 IX86_BUILTIN_RCPSS
,
26907 IX86_BUILTIN_RSQRTPS
,
26908 IX86_BUILTIN_RSQRTPS_NR
,
26909 IX86_BUILTIN_RSQRTSS
,
26910 IX86_BUILTIN_RSQRTF
,
26911 IX86_BUILTIN_SQRTPS
,
26912 IX86_BUILTIN_SQRTPS_NR
,
26913 IX86_BUILTIN_SQRTSS
,
26915 IX86_BUILTIN_UNPCKHPS
,
26916 IX86_BUILTIN_UNPCKLPS
,
26918 IX86_BUILTIN_ANDPS
,
26919 IX86_BUILTIN_ANDNPS
,
26921 IX86_BUILTIN_XORPS
,
26924 IX86_BUILTIN_LDMXCSR
,
26925 IX86_BUILTIN_STMXCSR
,
26926 IX86_BUILTIN_SFENCE
,
26928 IX86_BUILTIN_FXSAVE
,
26929 IX86_BUILTIN_FXRSTOR
,
26930 IX86_BUILTIN_FXSAVE64
,
26931 IX86_BUILTIN_FXRSTOR64
,
26933 IX86_BUILTIN_XSAVE
,
26934 IX86_BUILTIN_XRSTOR
,
26935 IX86_BUILTIN_XSAVE64
,
26936 IX86_BUILTIN_XRSTOR64
,
26938 IX86_BUILTIN_XSAVEOPT
,
26939 IX86_BUILTIN_XSAVEOPT64
,
26941 /* 3DNow! Original */
26942 IX86_BUILTIN_FEMMS
,
26943 IX86_BUILTIN_PAVGUSB
,
26944 IX86_BUILTIN_PF2ID
,
26945 IX86_BUILTIN_PFACC
,
26946 IX86_BUILTIN_PFADD
,
26947 IX86_BUILTIN_PFCMPEQ
,
26948 IX86_BUILTIN_PFCMPGE
,
26949 IX86_BUILTIN_PFCMPGT
,
26950 IX86_BUILTIN_PFMAX
,
26951 IX86_BUILTIN_PFMIN
,
26952 IX86_BUILTIN_PFMUL
,
26953 IX86_BUILTIN_PFRCP
,
26954 IX86_BUILTIN_PFRCPIT1
,
26955 IX86_BUILTIN_PFRCPIT2
,
26956 IX86_BUILTIN_PFRSQIT1
,
26957 IX86_BUILTIN_PFRSQRT
,
26958 IX86_BUILTIN_PFSUB
,
26959 IX86_BUILTIN_PFSUBR
,
26960 IX86_BUILTIN_PI2FD
,
26961 IX86_BUILTIN_PMULHRW
,
26963 /* 3DNow! Athlon Extensions */
26964 IX86_BUILTIN_PF2IW
,
26965 IX86_BUILTIN_PFNACC
,
26966 IX86_BUILTIN_PFPNACC
,
26967 IX86_BUILTIN_PI2FW
,
26968 IX86_BUILTIN_PSWAPDSI
,
26969 IX86_BUILTIN_PSWAPDSF
,
26972 IX86_BUILTIN_ADDPD
,
26973 IX86_BUILTIN_ADDSD
,
26974 IX86_BUILTIN_DIVPD
,
26975 IX86_BUILTIN_DIVSD
,
26976 IX86_BUILTIN_MULPD
,
26977 IX86_BUILTIN_MULSD
,
26978 IX86_BUILTIN_SUBPD
,
26979 IX86_BUILTIN_SUBSD
,
26981 IX86_BUILTIN_CMPEQPD
,
26982 IX86_BUILTIN_CMPLTPD
,
26983 IX86_BUILTIN_CMPLEPD
,
26984 IX86_BUILTIN_CMPGTPD
,
26985 IX86_BUILTIN_CMPGEPD
,
26986 IX86_BUILTIN_CMPNEQPD
,
26987 IX86_BUILTIN_CMPNLTPD
,
26988 IX86_BUILTIN_CMPNLEPD
,
26989 IX86_BUILTIN_CMPNGTPD
,
26990 IX86_BUILTIN_CMPNGEPD
,
26991 IX86_BUILTIN_CMPORDPD
,
26992 IX86_BUILTIN_CMPUNORDPD
,
26993 IX86_BUILTIN_CMPEQSD
,
26994 IX86_BUILTIN_CMPLTSD
,
26995 IX86_BUILTIN_CMPLESD
,
26996 IX86_BUILTIN_CMPNEQSD
,
26997 IX86_BUILTIN_CMPNLTSD
,
26998 IX86_BUILTIN_CMPNLESD
,
26999 IX86_BUILTIN_CMPORDSD
,
27000 IX86_BUILTIN_CMPUNORDSD
,
27002 IX86_BUILTIN_COMIEQSD
,
27003 IX86_BUILTIN_COMILTSD
,
27004 IX86_BUILTIN_COMILESD
,
27005 IX86_BUILTIN_COMIGTSD
,
27006 IX86_BUILTIN_COMIGESD
,
27007 IX86_BUILTIN_COMINEQSD
,
27008 IX86_BUILTIN_UCOMIEQSD
,
27009 IX86_BUILTIN_UCOMILTSD
,
27010 IX86_BUILTIN_UCOMILESD
,
27011 IX86_BUILTIN_UCOMIGTSD
,
27012 IX86_BUILTIN_UCOMIGESD
,
27013 IX86_BUILTIN_UCOMINEQSD
,
27015 IX86_BUILTIN_MAXPD
,
27016 IX86_BUILTIN_MAXSD
,
27017 IX86_BUILTIN_MINPD
,
27018 IX86_BUILTIN_MINSD
,
27020 IX86_BUILTIN_ANDPD
,
27021 IX86_BUILTIN_ANDNPD
,
27023 IX86_BUILTIN_XORPD
,
27025 IX86_BUILTIN_SQRTPD
,
27026 IX86_BUILTIN_SQRTSD
,
27028 IX86_BUILTIN_UNPCKHPD
,
27029 IX86_BUILTIN_UNPCKLPD
,
27031 IX86_BUILTIN_SHUFPD
,
27033 IX86_BUILTIN_LOADUPD
,
27034 IX86_BUILTIN_STOREUPD
,
27035 IX86_BUILTIN_MOVSD
,
27037 IX86_BUILTIN_LOADHPD
,
27038 IX86_BUILTIN_LOADLPD
,
27040 IX86_BUILTIN_CVTDQ2PD
,
27041 IX86_BUILTIN_CVTDQ2PS
,
27043 IX86_BUILTIN_CVTPD2DQ
,
27044 IX86_BUILTIN_CVTPD2PI
,
27045 IX86_BUILTIN_CVTPD2PS
,
27046 IX86_BUILTIN_CVTTPD2DQ
,
27047 IX86_BUILTIN_CVTTPD2PI
,
27049 IX86_BUILTIN_CVTPI2PD
,
27050 IX86_BUILTIN_CVTSI2SD
,
27051 IX86_BUILTIN_CVTSI642SD
,
27053 IX86_BUILTIN_CVTSD2SI
,
27054 IX86_BUILTIN_CVTSD2SI64
,
27055 IX86_BUILTIN_CVTSD2SS
,
27056 IX86_BUILTIN_CVTSS2SD
,
27057 IX86_BUILTIN_CVTTSD2SI
,
27058 IX86_BUILTIN_CVTTSD2SI64
,
27060 IX86_BUILTIN_CVTPS2DQ
,
27061 IX86_BUILTIN_CVTPS2PD
,
27062 IX86_BUILTIN_CVTTPS2DQ
,
27064 IX86_BUILTIN_MOVNTI
,
27065 IX86_BUILTIN_MOVNTI64
,
27066 IX86_BUILTIN_MOVNTPD
,
27067 IX86_BUILTIN_MOVNTDQ
,
27069 IX86_BUILTIN_MOVQ128
,
27072 IX86_BUILTIN_MASKMOVDQU
,
27073 IX86_BUILTIN_MOVMSKPD
,
27074 IX86_BUILTIN_PMOVMSKB128
,
27076 IX86_BUILTIN_PACKSSWB128
,
27077 IX86_BUILTIN_PACKSSDW128
,
27078 IX86_BUILTIN_PACKUSWB128
,
27080 IX86_BUILTIN_PADDB128
,
27081 IX86_BUILTIN_PADDW128
,
27082 IX86_BUILTIN_PADDD128
,
27083 IX86_BUILTIN_PADDQ128
,
27084 IX86_BUILTIN_PADDSB128
,
27085 IX86_BUILTIN_PADDSW128
,
27086 IX86_BUILTIN_PADDUSB128
,
27087 IX86_BUILTIN_PADDUSW128
,
27088 IX86_BUILTIN_PSUBB128
,
27089 IX86_BUILTIN_PSUBW128
,
27090 IX86_BUILTIN_PSUBD128
,
27091 IX86_BUILTIN_PSUBQ128
,
27092 IX86_BUILTIN_PSUBSB128
,
27093 IX86_BUILTIN_PSUBSW128
,
27094 IX86_BUILTIN_PSUBUSB128
,
27095 IX86_BUILTIN_PSUBUSW128
,
27097 IX86_BUILTIN_PAND128
,
27098 IX86_BUILTIN_PANDN128
,
27099 IX86_BUILTIN_POR128
,
27100 IX86_BUILTIN_PXOR128
,
27102 IX86_BUILTIN_PAVGB128
,
27103 IX86_BUILTIN_PAVGW128
,
27105 IX86_BUILTIN_PCMPEQB128
,
27106 IX86_BUILTIN_PCMPEQW128
,
27107 IX86_BUILTIN_PCMPEQD128
,
27108 IX86_BUILTIN_PCMPGTB128
,
27109 IX86_BUILTIN_PCMPGTW128
,
27110 IX86_BUILTIN_PCMPGTD128
,
27112 IX86_BUILTIN_PMADDWD128
,
27114 IX86_BUILTIN_PMAXSW128
,
27115 IX86_BUILTIN_PMAXUB128
,
27116 IX86_BUILTIN_PMINSW128
,
27117 IX86_BUILTIN_PMINUB128
,
27119 IX86_BUILTIN_PMULUDQ
,
27120 IX86_BUILTIN_PMULUDQ128
,
27121 IX86_BUILTIN_PMULHUW128
,
27122 IX86_BUILTIN_PMULHW128
,
27123 IX86_BUILTIN_PMULLW128
,
27125 IX86_BUILTIN_PSADBW128
,
27126 IX86_BUILTIN_PSHUFHW
,
27127 IX86_BUILTIN_PSHUFLW
,
27128 IX86_BUILTIN_PSHUFD
,
27130 IX86_BUILTIN_PSLLDQI128
,
27131 IX86_BUILTIN_PSLLWI128
,
27132 IX86_BUILTIN_PSLLDI128
,
27133 IX86_BUILTIN_PSLLQI128
,
27134 IX86_BUILTIN_PSRAWI128
,
27135 IX86_BUILTIN_PSRADI128
,
27136 IX86_BUILTIN_PSRLDQI128
,
27137 IX86_BUILTIN_PSRLWI128
,
27138 IX86_BUILTIN_PSRLDI128
,
27139 IX86_BUILTIN_PSRLQI128
,
27141 IX86_BUILTIN_PSLLDQ128
,
27142 IX86_BUILTIN_PSLLW128
,
27143 IX86_BUILTIN_PSLLD128
,
27144 IX86_BUILTIN_PSLLQ128
,
27145 IX86_BUILTIN_PSRAW128
,
27146 IX86_BUILTIN_PSRAD128
,
27147 IX86_BUILTIN_PSRLW128
,
27148 IX86_BUILTIN_PSRLD128
,
27149 IX86_BUILTIN_PSRLQ128
,
27151 IX86_BUILTIN_PUNPCKHBW128
,
27152 IX86_BUILTIN_PUNPCKHWD128
,
27153 IX86_BUILTIN_PUNPCKHDQ128
,
27154 IX86_BUILTIN_PUNPCKHQDQ128
,
27155 IX86_BUILTIN_PUNPCKLBW128
,
27156 IX86_BUILTIN_PUNPCKLWD128
,
27157 IX86_BUILTIN_PUNPCKLDQ128
,
27158 IX86_BUILTIN_PUNPCKLQDQ128
,
27160 IX86_BUILTIN_CLFLUSH
,
27161 IX86_BUILTIN_MFENCE
,
27162 IX86_BUILTIN_LFENCE
,
27163 IX86_BUILTIN_PAUSE
,
27165 IX86_BUILTIN_FNSTENV
,
27166 IX86_BUILTIN_FLDENV
,
27167 IX86_BUILTIN_FNSTSW
,
27168 IX86_BUILTIN_FNCLEX
,
27170 IX86_BUILTIN_BSRSI
,
27171 IX86_BUILTIN_BSRDI
,
27172 IX86_BUILTIN_RDPMC
,
27173 IX86_BUILTIN_RDTSC
,
27174 IX86_BUILTIN_RDTSCP
,
27175 IX86_BUILTIN_ROLQI
,
27176 IX86_BUILTIN_ROLHI
,
27177 IX86_BUILTIN_RORQI
,
27178 IX86_BUILTIN_RORHI
,
27181 IX86_BUILTIN_ADDSUBPS
,
27182 IX86_BUILTIN_HADDPS
,
27183 IX86_BUILTIN_HSUBPS
,
27184 IX86_BUILTIN_MOVSHDUP
,
27185 IX86_BUILTIN_MOVSLDUP
,
27186 IX86_BUILTIN_ADDSUBPD
,
27187 IX86_BUILTIN_HADDPD
,
27188 IX86_BUILTIN_HSUBPD
,
27189 IX86_BUILTIN_LDDQU
,
27191 IX86_BUILTIN_MONITOR
,
27192 IX86_BUILTIN_MWAIT
,
27195 IX86_BUILTIN_PHADDW
,
27196 IX86_BUILTIN_PHADDD
,
27197 IX86_BUILTIN_PHADDSW
,
27198 IX86_BUILTIN_PHSUBW
,
27199 IX86_BUILTIN_PHSUBD
,
27200 IX86_BUILTIN_PHSUBSW
,
27201 IX86_BUILTIN_PMADDUBSW
,
27202 IX86_BUILTIN_PMULHRSW
,
27203 IX86_BUILTIN_PSHUFB
,
27204 IX86_BUILTIN_PSIGNB
,
27205 IX86_BUILTIN_PSIGNW
,
27206 IX86_BUILTIN_PSIGND
,
27207 IX86_BUILTIN_PALIGNR
,
27208 IX86_BUILTIN_PABSB
,
27209 IX86_BUILTIN_PABSW
,
27210 IX86_BUILTIN_PABSD
,
27212 IX86_BUILTIN_PHADDW128
,
27213 IX86_BUILTIN_PHADDD128
,
27214 IX86_BUILTIN_PHADDSW128
,
27215 IX86_BUILTIN_PHSUBW128
,
27216 IX86_BUILTIN_PHSUBD128
,
27217 IX86_BUILTIN_PHSUBSW128
,
27218 IX86_BUILTIN_PMADDUBSW128
,
27219 IX86_BUILTIN_PMULHRSW128
,
27220 IX86_BUILTIN_PSHUFB128
,
27221 IX86_BUILTIN_PSIGNB128
,
27222 IX86_BUILTIN_PSIGNW128
,
27223 IX86_BUILTIN_PSIGND128
,
27224 IX86_BUILTIN_PALIGNR128
,
27225 IX86_BUILTIN_PABSB128
,
27226 IX86_BUILTIN_PABSW128
,
27227 IX86_BUILTIN_PABSD128
,
27229 /* AMDFAM10 - SSE4A New Instructions. */
27230 IX86_BUILTIN_MOVNTSD
,
27231 IX86_BUILTIN_MOVNTSS
,
27232 IX86_BUILTIN_EXTRQI
,
27233 IX86_BUILTIN_EXTRQ
,
27234 IX86_BUILTIN_INSERTQI
,
27235 IX86_BUILTIN_INSERTQ
,
27238 IX86_BUILTIN_BLENDPD
,
27239 IX86_BUILTIN_BLENDPS
,
27240 IX86_BUILTIN_BLENDVPD
,
27241 IX86_BUILTIN_BLENDVPS
,
27242 IX86_BUILTIN_PBLENDVB128
,
27243 IX86_BUILTIN_PBLENDW128
,
27248 IX86_BUILTIN_INSERTPS128
,
27250 IX86_BUILTIN_MOVNTDQA
,
27251 IX86_BUILTIN_MPSADBW128
,
27252 IX86_BUILTIN_PACKUSDW128
,
27253 IX86_BUILTIN_PCMPEQQ
,
27254 IX86_BUILTIN_PHMINPOSUW128
,
27256 IX86_BUILTIN_PMAXSB128
,
27257 IX86_BUILTIN_PMAXSD128
,
27258 IX86_BUILTIN_PMAXUD128
,
27259 IX86_BUILTIN_PMAXUW128
,
27261 IX86_BUILTIN_PMINSB128
,
27262 IX86_BUILTIN_PMINSD128
,
27263 IX86_BUILTIN_PMINUD128
,
27264 IX86_BUILTIN_PMINUW128
,
27266 IX86_BUILTIN_PMOVSXBW128
,
27267 IX86_BUILTIN_PMOVSXBD128
,
27268 IX86_BUILTIN_PMOVSXBQ128
,
27269 IX86_BUILTIN_PMOVSXWD128
,
27270 IX86_BUILTIN_PMOVSXWQ128
,
27271 IX86_BUILTIN_PMOVSXDQ128
,
27273 IX86_BUILTIN_PMOVZXBW128
,
27274 IX86_BUILTIN_PMOVZXBD128
,
27275 IX86_BUILTIN_PMOVZXBQ128
,
27276 IX86_BUILTIN_PMOVZXWD128
,
27277 IX86_BUILTIN_PMOVZXWQ128
,
27278 IX86_BUILTIN_PMOVZXDQ128
,
27280 IX86_BUILTIN_PMULDQ128
,
27281 IX86_BUILTIN_PMULLD128
,
27283 IX86_BUILTIN_ROUNDSD
,
27284 IX86_BUILTIN_ROUNDSS
,
27286 IX86_BUILTIN_ROUNDPD
,
27287 IX86_BUILTIN_ROUNDPS
,
27289 IX86_BUILTIN_FLOORPD
,
27290 IX86_BUILTIN_CEILPD
,
27291 IX86_BUILTIN_TRUNCPD
,
27292 IX86_BUILTIN_RINTPD
,
27293 IX86_BUILTIN_ROUNDPD_AZ
,
27295 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
27296 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
27297 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
27299 IX86_BUILTIN_FLOORPS
,
27300 IX86_BUILTIN_CEILPS
,
27301 IX86_BUILTIN_TRUNCPS
,
27302 IX86_BUILTIN_RINTPS
,
27303 IX86_BUILTIN_ROUNDPS_AZ
,
27305 IX86_BUILTIN_FLOORPS_SFIX
,
27306 IX86_BUILTIN_CEILPS_SFIX
,
27307 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
27309 IX86_BUILTIN_PTESTZ
,
27310 IX86_BUILTIN_PTESTC
,
27311 IX86_BUILTIN_PTESTNZC
,
27313 IX86_BUILTIN_VEC_INIT_V2SI
,
27314 IX86_BUILTIN_VEC_INIT_V4HI
,
27315 IX86_BUILTIN_VEC_INIT_V8QI
,
27316 IX86_BUILTIN_VEC_EXT_V2DF
,
27317 IX86_BUILTIN_VEC_EXT_V2DI
,
27318 IX86_BUILTIN_VEC_EXT_V4SF
,
27319 IX86_BUILTIN_VEC_EXT_V4SI
,
27320 IX86_BUILTIN_VEC_EXT_V8HI
,
27321 IX86_BUILTIN_VEC_EXT_V2SI
,
27322 IX86_BUILTIN_VEC_EXT_V4HI
,
27323 IX86_BUILTIN_VEC_EXT_V16QI
,
27324 IX86_BUILTIN_VEC_SET_V2DI
,
27325 IX86_BUILTIN_VEC_SET_V4SF
,
27326 IX86_BUILTIN_VEC_SET_V4SI
,
27327 IX86_BUILTIN_VEC_SET_V8HI
,
27328 IX86_BUILTIN_VEC_SET_V4HI
,
27329 IX86_BUILTIN_VEC_SET_V16QI
,
27331 IX86_BUILTIN_VEC_PACK_SFIX
,
27332 IX86_BUILTIN_VEC_PACK_SFIX256
,
27335 IX86_BUILTIN_CRC32QI
,
27336 IX86_BUILTIN_CRC32HI
,
27337 IX86_BUILTIN_CRC32SI
,
27338 IX86_BUILTIN_CRC32DI
,
27340 IX86_BUILTIN_PCMPESTRI128
,
27341 IX86_BUILTIN_PCMPESTRM128
,
27342 IX86_BUILTIN_PCMPESTRA128
,
27343 IX86_BUILTIN_PCMPESTRC128
,
27344 IX86_BUILTIN_PCMPESTRO128
,
27345 IX86_BUILTIN_PCMPESTRS128
,
27346 IX86_BUILTIN_PCMPESTRZ128
,
27347 IX86_BUILTIN_PCMPISTRI128
,
27348 IX86_BUILTIN_PCMPISTRM128
,
27349 IX86_BUILTIN_PCMPISTRA128
,
27350 IX86_BUILTIN_PCMPISTRC128
,
27351 IX86_BUILTIN_PCMPISTRO128
,
27352 IX86_BUILTIN_PCMPISTRS128
,
27353 IX86_BUILTIN_PCMPISTRZ128
,
27355 IX86_BUILTIN_PCMPGTQ
,
27357 /* AES instructions */
27358 IX86_BUILTIN_AESENC128
,
27359 IX86_BUILTIN_AESENCLAST128
,
27360 IX86_BUILTIN_AESDEC128
,
27361 IX86_BUILTIN_AESDECLAST128
,
27362 IX86_BUILTIN_AESIMC128
,
27363 IX86_BUILTIN_AESKEYGENASSIST128
,
27365 /* PCLMUL instruction */
27366 IX86_BUILTIN_PCLMULQDQ128
,
27369 IX86_BUILTIN_ADDPD256
,
27370 IX86_BUILTIN_ADDPS256
,
27371 IX86_BUILTIN_ADDSUBPD256
,
27372 IX86_BUILTIN_ADDSUBPS256
,
27373 IX86_BUILTIN_ANDPD256
,
27374 IX86_BUILTIN_ANDPS256
,
27375 IX86_BUILTIN_ANDNPD256
,
27376 IX86_BUILTIN_ANDNPS256
,
27377 IX86_BUILTIN_BLENDPD256
,
27378 IX86_BUILTIN_BLENDPS256
,
27379 IX86_BUILTIN_BLENDVPD256
,
27380 IX86_BUILTIN_BLENDVPS256
,
27381 IX86_BUILTIN_DIVPD256
,
27382 IX86_BUILTIN_DIVPS256
,
27383 IX86_BUILTIN_DPPS256
,
27384 IX86_BUILTIN_HADDPD256
,
27385 IX86_BUILTIN_HADDPS256
,
27386 IX86_BUILTIN_HSUBPD256
,
27387 IX86_BUILTIN_HSUBPS256
,
27388 IX86_BUILTIN_MAXPD256
,
27389 IX86_BUILTIN_MAXPS256
,
27390 IX86_BUILTIN_MINPD256
,
27391 IX86_BUILTIN_MINPS256
,
27392 IX86_BUILTIN_MULPD256
,
27393 IX86_BUILTIN_MULPS256
,
27394 IX86_BUILTIN_ORPD256
,
27395 IX86_BUILTIN_ORPS256
,
27396 IX86_BUILTIN_SHUFPD256
,
27397 IX86_BUILTIN_SHUFPS256
,
27398 IX86_BUILTIN_SUBPD256
,
27399 IX86_BUILTIN_SUBPS256
,
27400 IX86_BUILTIN_XORPD256
,
27401 IX86_BUILTIN_XORPS256
,
27402 IX86_BUILTIN_CMPSD
,
27403 IX86_BUILTIN_CMPSS
,
27404 IX86_BUILTIN_CMPPD
,
27405 IX86_BUILTIN_CMPPS
,
27406 IX86_BUILTIN_CMPPD256
,
27407 IX86_BUILTIN_CMPPS256
,
27408 IX86_BUILTIN_CVTDQ2PD256
,
27409 IX86_BUILTIN_CVTDQ2PS256
,
27410 IX86_BUILTIN_CVTPD2PS256
,
27411 IX86_BUILTIN_CVTPS2DQ256
,
27412 IX86_BUILTIN_CVTPS2PD256
,
27413 IX86_BUILTIN_CVTTPD2DQ256
,
27414 IX86_BUILTIN_CVTPD2DQ256
,
27415 IX86_BUILTIN_CVTTPS2DQ256
,
27416 IX86_BUILTIN_EXTRACTF128PD256
,
27417 IX86_BUILTIN_EXTRACTF128PS256
,
27418 IX86_BUILTIN_EXTRACTF128SI256
,
27419 IX86_BUILTIN_VZEROALL
,
27420 IX86_BUILTIN_VZEROUPPER
,
27421 IX86_BUILTIN_VPERMILVARPD
,
27422 IX86_BUILTIN_VPERMILVARPS
,
27423 IX86_BUILTIN_VPERMILVARPD256
,
27424 IX86_BUILTIN_VPERMILVARPS256
,
27425 IX86_BUILTIN_VPERMILPD
,
27426 IX86_BUILTIN_VPERMILPS
,
27427 IX86_BUILTIN_VPERMILPD256
,
27428 IX86_BUILTIN_VPERMILPS256
,
27429 IX86_BUILTIN_VPERMIL2PD
,
27430 IX86_BUILTIN_VPERMIL2PS
,
27431 IX86_BUILTIN_VPERMIL2PD256
,
27432 IX86_BUILTIN_VPERMIL2PS256
,
27433 IX86_BUILTIN_VPERM2F128PD256
,
27434 IX86_BUILTIN_VPERM2F128PS256
,
27435 IX86_BUILTIN_VPERM2F128SI256
,
27436 IX86_BUILTIN_VBROADCASTSS
,
27437 IX86_BUILTIN_VBROADCASTSD256
,
27438 IX86_BUILTIN_VBROADCASTSS256
,
27439 IX86_BUILTIN_VBROADCASTPD256
,
27440 IX86_BUILTIN_VBROADCASTPS256
,
27441 IX86_BUILTIN_VINSERTF128PD256
,
27442 IX86_BUILTIN_VINSERTF128PS256
,
27443 IX86_BUILTIN_VINSERTF128SI256
,
27444 IX86_BUILTIN_LOADUPD256
,
27445 IX86_BUILTIN_LOADUPS256
,
27446 IX86_BUILTIN_STOREUPD256
,
27447 IX86_BUILTIN_STOREUPS256
,
27448 IX86_BUILTIN_LDDQU256
,
27449 IX86_BUILTIN_MOVNTDQ256
,
27450 IX86_BUILTIN_MOVNTPD256
,
27451 IX86_BUILTIN_MOVNTPS256
,
27452 IX86_BUILTIN_LOADDQU256
,
27453 IX86_BUILTIN_STOREDQU256
,
27454 IX86_BUILTIN_MASKLOADPD
,
27455 IX86_BUILTIN_MASKLOADPS
,
27456 IX86_BUILTIN_MASKSTOREPD
,
27457 IX86_BUILTIN_MASKSTOREPS
,
27458 IX86_BUILTIN_MASKLOADPD256
,
27459 IX86_BUILTIN_MASKLOADPS256
,
27460 IX86_BUILTIN_MASKSTOREPD256
,
27461 IX86_BUILTIN_MASKSTOREPS256
,
27462 IX86_BUILTIN_MOVSHDUP256
,
27463 IX86_BUILTIN_MOVSLDUP256
,
27464 IX86_BUILTIN_MOVDDUP256
,
27466 IX86_BUILTIN_SQRTPD256
,
27467 IX86_BUILTIN_SQRTPS256
,
27468 IX86_BUILTIN_SQRTPS_NR256
,
27469 IX86_BUILTIN_RSQRTPS256
,
27470 IX86_BUILTIN_RSQRTPS_NR256
,
27472 IX86_BUILTIN_RCPPS256
,
27474 IX86_BUILTIN_ROUNDPD256
,
27475 IX86_BUILTIN_ROUNDPS256
,
27477 IX86_BUILTIN_FLOORPD256
,
27478 IX86_BUILTIN_CEILPD256
,
27479 IX86_BUILTIN_TRUNCPD256
,
27480 IX86_BUILTIN_RINTPD256
,
27481 IX86_BUILTIN_ROUNDPD_AZ256
,
27483 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
27484 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
27485 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
27487 IX86_BUILTIN_FLOORPS256
,
27488 IX86_BUILTIN_CEILPS256
,
27489 IX86_BUILTIN_TRUNCPS256
,
27490 IX86_BUILTIN_RINTPS256
,
27491 IX86_BUILTIN_ROUNDPS_AZ256
,
27493 IX86_BUILTIN_FLOORPS_SFIX256
,
27494 IX86_BUILTIN_CEILPS_SFIX256
,
27495 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
27497 IX86_BUILTIN_UNPCKHPD256
,
27498 IX86_BUILTIN_UNPCKLPD256
,
27499 IX86_BUILTIN_UNPCKHPS256
,
27500 IX86_BUILTIN_UNPCKLPS256
,
27502 IX86_BUILTIN_SI256_SI
,
27503 IX86_BUILTIN_PS256_PS
,
27504 IX86_BUILTIN_PD256_PD
,
27505 IX86_BUILTIN_SI_SI256
,
27506 IX86_BUILTIN_PS_PS256
,
27507 IX86_BUILTIN_PD_PD256
,
27509 IX86_BUILTIN_VTESTZPD
,
27510 IX86_BUILTIN_VTESTCPD
,
27511 IX86_BUILTIN_VTESTNZCPD
,
27512 IX86_BUILTIN_VTESTZPS
,
27513 IX86_BUILTIN_VTESTCPS
,
27514 IX86_BUILTIN_VTESTNZCPS
,
27515 IX86_BUILTIN_VTESTZPD256
,
27516 IX86_BUILTIN_VTESTCPD256
,
27517 IX86_BUILTIN_VTESTNZCPD256
,
27518 IX86_BUILTIN_VTESTZPS256
,
27519 IX86_BUILTIN_VTESTCPS256
,
27520 IX86_BUILTIN_VTESTNZCPS256
,
27521 IX86_BUILTIN_PTESTZ256
,
27522 IX86_BUILTIN_PTESTC256
,
27523 IX86_BUILTIN_PTESTNZC256
,
27525 IX86_BUILTIN_MOVMSKPD256
,
27526 IX86_BUILTIN_MOVMSKPS256
,
27529 IX86_BUILTIN_MPSADBW256
,
27530 IX86_BUILTIN_PABSB256
,
27531 IX86_BUILTIN_PABSW256
,
27532 IX86_BUILTIN_PABSD256
,
27533 IX86_BUILTIN_PACKSSDW256
,
27534 IX86_BUILTIN_PACKSSWB256
,
27535 IX86_BUILTIN_PACKUSDW256
,
27536 IX86_BUILTIN_PACKUSWB256
,
27537 IX86_BUILTIN_PADDB256
,
27538 IX86_BUILTIN_PADDW256
,
27539 IX86_BUILTIN_PADDD256
,
27540 IX86_BUILTIN_PADDQ256
,
27541 IX86_BUILTIN_PADDSB256
,
27542 IX86_BUILTIN_PADDSW256
,
27543 IX86_BUILTIN_PADDUSB256
,
27544 IX86_BUILTIN_PADDUSW256
,
27545 IX86_BUILTIN_PALIGNR256
,
27546 IX86_BUILTIN_AND256I
,
27547 IX86_BUILTIN_ANDNOT256I
,
27548 IX86_BUILTIN_PAVGB256
,
27549 IX86_BUILTIN_PAVGW256
,
27550 IX86_BUILTIN_PBLENDVB256
,
27551 IX86_BUILTIN_PBLENDVW256
,
27552 IX86_BUILTIN_PCMPEQB256
,
27553 IX86_BUILTIN_PCMPEQW256
,
27554 IX86_BUILTIN_PCMPEQD256
,
27555 IX86_BUILTIN_PCMPEQQ256
,
27556 IX86_BUILTIN_PCMPGTB256
,
27557 IX86_BUILTIN_PCMPGTW256
,
27558 IX86_BUILTIN_PCMPGTD256
,
27559 IX86_BUILTIN_PCMPGTQ256
,
27560 IX86_BUILTIN_PHADDW256
,
27561 IX86_BUILTIN_PHADDD256
,
27562 IX86_BUILTIN_PHADDSW256
,
27563 IX86_BUILTIN_PHSUBW256
,
27564 IX86_BUILTIN_PHSUBD256
,
27565 IX86_BUILTIN_PHSUBSW256
,
27566 IX86_BUILTIN_PMADDUBSW256
,
27567 IX86_BUILTIN_PMADDWD256
,
27568 IX86_BUILTIN_PMAXSB256
,
27569 IX86_BUILTIN_PMAXSW256
,
27570 IX86_BUILTIN_PMAXSD256
,
27571 IX86_BUILTIN_PMAXUB256
,
27572 IX86_BUILTIN_PMAXUW256
,
27573 IX86_BUILTIN_PMAXUD256
,
27574 IX86_BUILTIN_PMINSB256
,
27575 IX86_BUILTIN_PMINSW256
,
27576 IX86_BUILTIN_PMINSD256
,
27577 IX86_BUILTIN_PMINUB256
,
27578 IX86_BUILTIN_PMINUW256
,
27579 IX86_BUILTIN_PMINUD256
,
27580 IX86_BUILTIN_PMOVMSKB256
,
27581 IX86_BUILTIN_PMOVSXBW256
,
27582 IX86_BUILTIN_PMOVSXBD256
,
27583 IX86_BUILTIN_PMOVSXBQ256
,
27584 IX86_BUILTIN_PMOVSXWD256
,
27585 IX86_BUILTIN_PMOVSXWQ256
,
27586 IX86_BUILTIN_PMOVSXDQ256
,
27587 IX86_BUILTIN_PMOVZXBW256
,
27588 IX86_BUILTIN_PMOVZXBD256
,
27589 IX86_BUILTIN_PMOVZXBQ256
,
27590 IX86_BUILTIN_PMOVZXWD256
,
27591 IX86_BUILTIN_PMOVZXWQ256
,
27592 IX86_BUILTIN_PMOVZXDQ256
,
27593 IX86_BUILTIN_PMULDQ256
,
27594 IX86_BUILTIN_PMULHRSW256
,
27595 IX86_BUILTIN_PMULHUW256
,
27596 IX86_BUILTIN_PMULHW256
,
27597 IX86_BUILTIN_PMULLW256
,
27598 IX86_BUILTIN_PMULLD256
,
27599 IX86_BUILTIN_PMULUDQ256
,
27600 IX86_BUILTIN_POR256
,
27601 IX86_BUILTIN_PSADBW256
,
27602 IX86_BUILTIN_PSHUFB256
,
27603 IX86_BUILTIN_PSHUFD256
,
27604 IX86_BUILTIN_PSHUFHW256
,
27605 IX86_BUILTIN_PSHUFLW256
,
27606 IX86_BUILTIN_PSIGNB256
,
27607 IX86_BUILTIN_PSIGNW256
,
27608 IX86_BUILTIN_PSIGND256
,
27609 IX86_BUILTIN_PSLLDQI256
,
27610 IX86_BUILTIN_PSLLWI256
,
27611 IX86_BUILTIN_PSLLW256
,
27612 IX86_BUILTIN_PSLLDI256
,
27613 IX86_BUILTIN_PSLLD256
,
27614 IX86_BUILTIN_PSLLQI256
,
27615 IX86_BUILTIN_PSLLQ256
,
27616 IX86_BUILTIN_PSRAWI256
,
27617 IX86_BUILTIN_PSRAW256
,
27618 IX86_BUILTIN_PSRADI256
,
27619 IX86_BUILTIN_PSRAD256
,
27620 IX86_BUILTIN_PSRLDQI256
,
27621 IX86_BUILTIN_PSRLWI256
,
27622 IX86_BUILTIN_PSRLW256
,
27623 IX86_BUILTIN_PSRLDI256
,
27624 IX86_BUILTIN_PSRLD256
,
27625 IX86_BUILTIN_PSRLQI256
,
27626 IX86_BUILTIN_PSRLQ256
,
27627 IX86_BUILTIN_PSUBB256
,
27628 IX86_BUILTIN_PSUBW256
,
27629 IX86_BUILTIN_PSUBD256
,
27630 IX86_BUILTIN_PSUBQ256
,
27631 IX86_BUILTIN_PSUBSB256
,
27632 IX86_BUILTIN_PSUBSW256
,
27633 IX86_BUILTIN_PSUBUSB256
,
27634 IX86_BUILTIN_PSUBUSW256
,
27635 IX86_BUILTIN_PUNPCKHBW256
,
27636 IX86_BUILTIN_PUNPCKHWD256
,
27637 IX86_BUILTIN_PUNPCKHDQ256
,
27638 IX86_BUILTIN_PUNPCKHQDQ256
,
27639 IX86_BUILTIN_PUNPCKLBW256
,
27640 IX86_BUILTIN_PUNPCKLWD256
,
27641 IX86_BUILTIN_PUNPCKLDQ256
,
27642 IX86_BUILTIN_PUNPCKLQDQ256
,
27643 IX86_BUILTIN_PXOR256
,
27644 IX86_BUILTIN_MOVNTDQA256
,
27645 IX86_BUILTIN_VBROADCASTSS_PS
,
27646 IX86_BUILTIN_VBROADCASTSS_PS256
,
27647 IX86_BUILTIN_VBROADCASTSD_PD256
,
27648 IX86_BUILTIN_VBROADCASTSI256
,
27649 IX86_BUILTIN_PBLENDD256
,
27650 IX86_BUILTIN_PBLENDD128
,
27651 IX86_BUILTIN_PBROADCASTB256
,
27652 IX86_BUILTIN_PBROADCASTW256
,
27653 IX86_BUILTIN_PBROADCASTD256
,
27654 IX86_BUILTIN_PBROADCASTQ256
,
27655 IX86_BUILTIN_PBROADCASTB128
,
27656 IX86_BUILTIN_PBROADCASTW128
,
27657 IX86_BUILTIN_PBROADCASTD128
,
27658 IX86_BUILTIN_PBROADCASTQ128
,
27659 IX86_BUILTIN_VPERMVARSI256
,
27660 IX86_BUILTIN_VPERMDF256
,
27661 IX86_BUILTIN_VPERMVARSF256
,
27662 IX86_BUILTIN_VPERMDI256
,
27663 IX86_BUILTIN_VPERMTI256
,
27664 IX86_BUILTIN_VEXTRACT128I256
,
27665 IX86_BUILTIN_VINSERT128I256
,
27666 IX86_BUILTIN_MASKLOADD
,
27667 IX86_BUILTIN_MASKLOADQ
,
27668 IX86_BUILTIN_MASKLOADD256
,
27669 IX86_BUILTIN_MASKLOADQ256
,
27670 IX86_BUILTIN_MASKSTORED
,
27671 IX86_BUILTIN_MASKSTOREQ
,
27672 IX86_BUILTIN_MASKSTORED256
,
27673 IX86_BUILTIN_MASKSTOREQ256
,
27674 IX86_BUILTIN_PSLLVV4DI
,
27675 IX86_BUILTIN_PSLLVV2DI
,
27676 IX86_BUILTIN_PSLLVV8SI
,
27677 IX86_BUILTIN_PSLLVV4SI
,
27678 IX86_BUILTIN_PSRAVV8SI
,
27679 IX86_BUILTIN_PSRAVV4SI
,
27680 IX86_BUILTIN_PSRLVV4DI
,
27681 IX86_BUILTIN_PSRLVV2DI
,
27682 IX86_BUILTIN_PSRLVV8SI
,
27683 IX86_BUILTIN_PSRLVV4SI
,
27685 IX86_BUILTIN_GATHERSIV2DF
,
27686 IX86_BUILTIN_GATHERSIV4DF
,
27687 IX86_BUILTIN_GATHERDIV2DF
,
27688 IX86_BUILTIN_GATHERDIV4DF
,
27689 IX86_BUILTIN_GATHERSIV4SF
,
27690 IX86_BUILTIN_GATHERSIV8SF
,
27691 IX86_BUILTIN_GATHERDIV4SF
,
27692 IX86_BUILTIN_GATHERDIV8SF
,
27693 IX86_BUILTIN_GATHERSIV2DI
,
27694 IX86_BUILTIN_GATHERSIV4DI
,
27695 IX86_BUILTIN_GATHERDIV2DI
,
27696 IX86_BUILTIN_GATHERDIV4DI
,
27697 IX86_BUILTIN_GATHERSIV4SI
,
27698 IX86_BUILTIN_GATHERSIV8SI
,
27699 IX86_BUILTIN_GATHERDIV4SI
,
27700 IX86_BUILTIN_GATHERDIV8SI
,
27702 /* Alternate 4 element gather for the vectorizer where
27703 all operands are 32-byte wide. */
27704 IX86_BUILTIN_GATHERALTSIV4DF
,
27705 IX86_BUILTIN_GATHERALTDIV8SF
,
27706 IX86_BUILTIN_GATHERALTSIV4DI
,
27707 IX86_BUILTIN_GATHERALTDIV8SI
,
27709 /* TFmode support builtins. */
27711 IX86_BUILTIN_HUGE_VALQ
,
27712 IX86_BUILTIN_FABSQ
,
27713 IX86_BUILTIN_COPYSIGNQ
,
27715 /* Vectorizer support builtins. */
27716 IX86_BUILTIN_CPYSGNPS
,
27717 IX86_BUILTIN_CPYSGNPD
,
27718 IX86_BUILTIN_CPYSGNPS256
,
27719 IX86_BUILTIN_CPYSGNPD256
,
27721 /* FMA4 instructions. */
27722 IX86_BUILTIN_VFMADDSS
,
27723 IX86_BUILTIN_VFMADDSD
,
27724 IX86_BUILTIN_VFMADDPS
,
27725 IX86_BUILTIN_VFMADDPD
,
27726 IX86_BUILTIN_VFMADDPS256
,
27727 IX86_BUILTIN_VFMADDPD256
,
27728 IX86_BUILTIN_VFMADDSUBPS
,
27729 IX86_BUILTIN_VFMADDSUBPD
,
27730 IX86_BUILTIN_VFMADDSUBPS256
,
27731 IX86_BUILTIN_VFMADDSUBPD256
,
27733 /* FMA3 instructions. */
27734 IX86_BUILTIN_VFMADDSS3
,
27735 IX86_BUILTIN_VFMADDSD3
,
27737 /* XOP instructions. */
27738 IX86_BUILTIN_VPCMOV
,
27739 IX86_BUILTIN_VPCMOV_V2DI
,
27740 IX86_BUILTIN_VPCMOV_V4SI
,
27741 IX86_BUILTIN_VPCMOV_V8HI
,
27742 IX86_BUILTIN_VPCMOV_V16QI
,
27743 IX86_BUILTIN_VPCMOV_V4SF
,
27744 IX86_BUILTIN_VPCMOV_V2DF
,
27745 IX86_BUILTIN_VPCMOV256
,
27746 IX86_BUILTIN_VPCMOV_V4DI256
,
27747 IX86_BUILTIN_VPCMOV_V8SI256
,
27748 IX86_BUILTIN_VPCMOV_V16HI256
,
27749 IX86_BUILTIN_VPCMOV_V32QI256
,
27750 IX86_BUILTIN_VPCMOV_V8SF256
,
27751 IX86_BUILTIN_VPCMOV_V4DF256
,
27753 IX86_BUILTIN_VPPERM
,
27755 IX86_BUILTIN_VPMACSSWW
,
27756 IX86_BUILTIN_VPMACSWW
,
27757 IX86_BUILTIN_VPMACSSWD
,
27758 IX86_BUILTIN_VPMACSWD
,
27759 IX86_BUILTIN_VPMACSSDD
,
27760 IX86_BUILTIN_VPMACSDD
,
27761 IX86_BUILTIN_VPMACSSDQL
,
27762 IX86_BUILTIN_VPMACSSDQH
,
27763 IX86_BUILTIN_VPMACSDQL
,
27764 IX86_BUILTIN_VPMACSDQH
,
27765 IX86_BUILTIN_VPMADCSSWD
,
27766 IX86_BUILTIN_VPMADCSWD
,
27768 IX86_BUILTIN_VPHADDBW
,
27769 IX86_BUILTIN_VPHADDBD
,
27770 IX86_BUILTIN_VPHADDBQ
,
27771 IX86_BUILTIN_VPHADDWD
,
27772 IX86_BUILTIN_VPHADDWQ
,
27773 IX86_BUILTIN_VPHADDDQ
,
27774 IX86_BUILTIN_VPHADDUBW
,
27775 IX86_BUILTIN_VPHADDUBD
,
27776 IX86_BUILTIN_VPHADDUBQ
,
27777 IX86_BUILTIN_VPHADDUWD
,
27778 IX86_BUILTIN_VPHADDUWQ
,
27779 IX86_BUILTIN_VPHADDUDQ
,
27780 IX86_BUILTIN_VPHSUBBW
,
27781 IX86_BUILTIN_VPHSUBWD
,
27782 IX86_BUILTIN_VPHSUBDQ
,
27784 IX86_BUILTIN_VPROTB
,
27785 IX86_BUILTIN_VPROTW
,
27786 IX86_BUILTIN_VPROTD
,
27787 IX86_BUILTIN_VPROTQ
,
27788 IX86_BUILTIN_VPROTB_IMM
,
27789 IX86_BUILTIN_VPROTW_IMM
,
27790 IX86_BUILTIN_VPROTD_IMM
,
27791 IX86_BUILTIN_VPROTQ_IMM
,
27793 IX86_BUILTIN_VPSHLB
,
27794 IX86_BUILTIN_VPSHLW
,
27795 IX86_BUILTIN_VPSHLD
,
27796 IX86_BUILTIN_VPSHLQ
,
27797 IX86_BUILTIN_VPSHAB
,
27798 IX86_BUILTIN_VPSHAW
,
27799 IX86_BUILTIN_VPSHAD
,
27800 IX86_BUILTIN_VPSHAQ
,
27802 IX86_BUILTIN_VFRCZSS
,
27803 IX86_BUILTIN_VFRCZSD
,
27804 IX86_BUILTIN_VFRCZPS
,
27805 IX86_BUILTIN_VFRCZPD
,
27806 IX86_BUILTIN_VFRCZPS256
,
27807 IX86_BUILTIN_VFRCZPD256
,
27809 IX86_BUILTIN_VPCOMEQUB
,
27810 IX86_BUILTIN_VPCOMNEUB
,
27811 IX86_BUILTIN_VPCOMLTUB
,
27812 IX86_BUILTIN_VPCOMLEUB
,
27813 IX86_BUILTIN_VPCOMGTUB
,
27814 IX86_BUILTIN_VPCOMGEUB
,
27815 IX86_BUILTIN_VPCOMFALSEUB
,
27816 IX86_BUILTIN_VPCOMTRUEUB
,
27818 IX86_BUILTIN_VPCOMEQUW
,
27819 IX86_BUILTIN_VPCOMNEUW
,
27820 IX86_BUILTIN_VPCOMLTUW
,
27821 IX86_BUILTIN_VPCOMLEUW
,
27822 IX86_BUILTIN_VPCOMGTUW
,
27823 IX86_BUILTIN_VPCOMGEUW
,
27824 IX86_BUILTIN_VPCOMFALSEUW
,
27825 IX86_BUILTIN_VPCOMTRUEUW
,
27827 IX86_BUILTIN_VPCOMEQUD
,
27828 IX86_BUILTIN_VPCOMNEUD
,
27829 IX86_BUILTIN_VPCOMLTUD
,
27830 IX86_BUILTIN_VPCOMLEUD
,
27831 IX86_BUILTIN_VPCOMGTUD
,
27832 IX86_BUILTIN_VPCOMGEUD
,
27833 IX86_BUILTIN_VPCOMFALSEUD
,
27834 IX86_BUILTIN_VPCOMTRUEUD
,
27836 IX86_BUILTIN_VPCOMEQUQ
,
27837 IX86_BUILTIN_VPCOMNEUQ
,
27838 IX86_BUILTIN_VPCOMLTUQ
,
27839 IX86_BUILTIN_VPCOMLEUQ
,
27840 IX86_BUILTIN_VPCOMGTUQ
,
27841 IX86_BUILTIN_VPCOMGEUQ
,
27842 IX86_BUILTIN_VPCOMFALSEUQ
,
27843 IX86_BUILTIN_VPCOMTRUEUQ
,
27845 IX86_BUILTIN_VPCOMEQB
,
27846 IX86_BUILTIN_VPCOMNEB
,
27847 IX86_BUILTIN_VPCOMLTB
,
27848 IX86_BUILTIN_VPCOMLEB
,
27849 IX86_BUILTIN_VPCOMGTB
,
27850 IX86_BUILTIN_VPCOMGEB
,
27851 IX86_BUILTIN_VPCOMFALSEB
,
27852 IX86_BUILTIN_VPCOMTRUEB
,
27854 IX86_BUILTIN_VPCOMEQW
,
27855 IX86_BUILTIN_VPCOMNEW
,
27856 IX86_BUILTIN_VPCOMLTW
,
27857 IX86_BUILTIN_VPCOMLEW
,
27858 IX86_BUILTIN_VPCOMGTW
,
27859 IX86_BUILTIN_VPCOMGEW
,
27860 IX86_BUILTIN_VPCOMFALSEW
,
27861 IX86_BUILTIN_VPCOMTRUEW
,
27863 IX86_BUILTIN_VPCOMEQD
,
27864 IX86_BUILTIN_VPCOMNED
,
27865 IX86_BUILTIN_VPCOMLTD
,
27866 IX86_BUILTIN_VPCOMLED
,
27867 IX86_BUILTIN_VPCOMGTD
,
27868 IX86_BUILTIN_VPCOMGED
,
27869 IX86_BUILTIN_VPCOMFALSED
,
27870 IX86_BUILTIN_VPCOMTRUED
,
27872 IX86_BUILTIN_VPCOMEQQ
,
27873 IX86_BUILTIN_VPCOMNEQ
,
27874 IX86_BUILTIN_VPCOMLTQ
,
27875 IX86_BUILTIN_VPCOMLEQ
,
27876 IX86_BUILTIN_VPCOMGTQ
,
27877 IX86_BUILTIN_VPCOMGEQ
,
27878 IX86_BUILTIN_VPCOMFALSEQ
,
27879 IX86_BUILTIN_VPCOMTRUEQ
,
27881 /* LWP instructions. */
27882 IX86_BUILTIN_LLWPCB
,
27883 IX86_BUILTIN_SLWPCB
,
27884 IX86_BUILTIN_LWPVAL32
,
27885 IX86_BUILTIN_LWPVAL64
,
27886 IX86_BUILTIN_LWPINS32
,
27887 IX86_BUILTIN_LWPINS64
,
27892 IX86_BUILTIN_XBEGIN
,
27894 IX86_BUILTIN_XABORT
,
27895 IX86_BUILTIN_XTEST
,
27897 /* BMI instructions. */
27898 IX86_BUILTIN_BEXTR32
,
27899 IX86_BUILTIN_BEXTR64
,
27902 /* TBM instructions. */
27903 IX86_BUILTIN_BEXTRI32
,
27904 IX86_BUILTIN_BEXTRI64
,
27906 /* BMI2 instructions. */
27907 IX86_BUILTIN_BZHI32
,
27908 IX86_BUILTIN_BZHI64
,
27909 IX86_BUILTIN_PDEP32
,
27910 IX86_BUILTIN_PDEP64
,
27911 IX86_BUILTIN_PEXT32
,
27912 IX86_BUILTIN_PEXT64
,
27914 /* ADX instructions. */
27915 IX86_BUILTIN_ADDCARRYX32
,
27916 IX86_BUILTIN_ADDCARRYX64
,
27918 /* FSGSBASE instructions. */
27919 IX86_BUILTIN_RDFSBASE32
,
27920 IX86_BUILTIN_RDFSBASE64
,
27921 IX86_BUILTIN_RDGSBASE32
,
27922 IX86_BUILTIN_RDGSBASE64
,
27923 IX86_BUILTIN_WRFSBASE32
,
27924 IX86_BUILTIN_WRFSBASE64
,
27925 IX86_BUILTIN_WRGSBASE32
,
27926 IX86_BUILTIN_WRGSBASE64
,
27928 /* RDRND instructions. */
27929 IX86_BUILTIN_RDRAND16_STEP
,
27930 IX86_BUILTIN_RDRAND32_STEP
,
27931 IX86_BUILTIN_RDRAND64_STEP
,
27933 /* RDSEED instructions. */
27934 IX86_BUILTIN_RDSEED16_STEP
,
27935 IX86_BUILTIN_RDSEED32_STEP
,
27936 IX86_BUILTIN_RDSEED64_STEP
,
27938 /* F16C instructions. */
27939 IX86_BUILTIN_CVTPH2PS
,
27940 IX86_BUILTIN_CVTPH2PS256
,
27941 IX86_BUILTIN_CVTPS2PH
,
27942 IX86_BUILTIN_CVTPS2PH256
,
27944 /* CFString built-in for darwin */
27945 IX86_BUILTIN_CFSTRING
,
27947 /* Builtins to get CPU type and supported features. */
27948 IX86_BUILTIN_CPU_INIT
,
27949 IX86_BUILTIN_CPU_IS
,
27950 IX86_BUILTIN_CPU_SUPPORTS
,
27952 /* Read/write FLAGS register built-ins. */
27953 IX86_BUILTIN_READ_FLAGS
,
27954 IX86_BUILTIN_WRITE_FLAGS
,
27959 /* Table for the ix86 builtin decls. */
27960 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
27962 /* Table of all of the builtin functions that are possible with different ISA's
27963 but are waiting to be built until a function is declared to use that
27965 struct builtin_isa
{
27966 const char *name
; /* function name */
27967 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
27968 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
27969 bool const_p
; /* true if the declaration is constant */
27970 bool set_and_not_built_p
;
27973 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
27976 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
27977 of which isa_flags to use in the ix86_builtins_isa array. Stores the
27978 function decl in the ix86_builtins array. Returns the function decl or
27979 NULL_TREE, if the builtin was not added.
27981 If the front end has a special hook for builtin functions, delay adding
27982 builtin functions that aren't in the current ISA until the ISA is changed
27983 with function specific optimization. Doing so, can save about 300K for the
27984 default compiler. When the builtin is expanded, check at that time whether
27987 If the front end doesn't have a special hook, record all builtins, even if
27988 it isn't an instruction set in the current ISA in case the user uses
27989 function specific options for a different ISA, so that we don't get scope
27990 errors if a builtin is added in the middle of a function scope. */
27993 def_builtin (HOST_WIDE_INT mask
, const char *name
,
27994 enum ix86_builtin_func_type tcode
,
27995 enum ix86_builtins code
)
27997 tree decl
= NULL_TREE
;
27999 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
28001 ix86_builtins_isa
[(int) code
].isa
= mask
;
28003 mask
&= ~OPTION_MASK_ISA_64BIT
;
28005 || (mask
& ix86_isa_flags
) != 0
28006 || (lang_hooks
.builtin_function
28007 == lang_hooks
.builtin_function_ext_scope
))
28010 tree type
= ix86_get_builtin_func_type (tcode
);
28011 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28013 ix86_builtins
[(int) code
] = decl
;
28014 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
28018 ix86_builtins
[(int) code
] = NULL_TREE
;
28019 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
28020 ix86_builtins_isa
[(int) code
].name
= name
;
28021 ix86_builtins_isa
[(int) code
].const_p
= false;
28022 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
28029 /* Like def_builtin, but also marks the function decl "const". */
28032 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
28033 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
28035 tree decl
= def_builtin (mask
, name
, tcode
, code
);
28037 TREE_READONLY (decl
) = 1;
28039 ix86_builtins_isa
[(int) code
].const_p
= true;
28044 /* Add any new builtin functions for a given ISA that may not have been
28045 declared. This saves a bit of space compared to adding all of the
28046 declarations to the tree, even if we didn't use them. */
28049 ix86_add_new_builtins (HOST_WIDE_INT isa
)
28053 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
28055 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
28056 && ix86_builtins_isa
[i
].set_and_not_built_p
)
28060 /* Don't define the builtin again. */
28061 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
28063 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
28064 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
28065 type
, i
, BUILT_IN_MD
, NULL
,
28068 ix86_builtins
[i
] = decl
;
28069 if (ix86_builtins_isa
[i
].const_p
)
28070 TREE_READONLY (decl
) = 1;
28075 /* Bits for builtin_description.flag. */
28077 /* Set when we don't support the comparison natively, and should
28078 swap_comparison in order to support it. */
28079 #define BUILTIN_DESC_SWAP_OPERANDS 1
28081 struct builtin_description
28083 const HOST_WIDE_INT mask
;
28084 const enum insn_code icode
;
28085 const char *const name
;
28086 const enum ix86_builtins code
;
28087 const enum rtx_code comparison
;
28091 static const struct builtin_description bdesc_comi
[] =
28093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
28094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
28095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
28096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
28097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
28098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
28099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
28100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
28101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
28102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
28103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
28104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
28105 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
28106 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
28107 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
28108 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
28109 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
28110 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
28111 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
28112 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
28113 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
28114 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
28115 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
28116 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
28119 static const struct builtin_description bdesc_pcmpestr
[] =
28122 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
28123 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
28124 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
28125 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
28126 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
28127 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
28128 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
28131 static const struct builtin_description bdesc_pcmpistr
[] =
28134 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
28135 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
28136 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
28137 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
28138 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
28139 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
28140 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
28143 /* Special builtins with variable number of arguments. */
28144 static const struct builtin_description bdesc_special_args
[] =
28146 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28147 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
28148 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28150 /* 80387 (for use internally for atomic compound assignment). */
28151 { 0, CODE_FOR_fnstenv
, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28152 { 0, CODE_FOR_fldenv
, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV
, UNKNOWN
, (int) VOID_FTYPE_PCVOID
},
28153 { 0, CODE_FOR_fnstsw
, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW
, UNKNOWN
, (int) VOID_FTYPE_PUSHORT
},
28154 { 0, CODE_FOR_fnclex
, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28157 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28160 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28162 /* FXSR, XSAVE and XSAVEOPT */
28163 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28164 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28165 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28166 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28167 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28169 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28170 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28171 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28172 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28173 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
28176 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28177 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28178 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28180 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28181 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
28182 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28183 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
28185 /* SSE or 3DNow!A */
28186 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28187 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
28190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedquv16qi
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
28194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
28196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
28197 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
28198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
28199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddquv16qi
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
28205 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
28208 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
28211 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
28212 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
28215 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28216 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28218 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
28219 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28220 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28221 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
28222 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
28224 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
28225 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
28226 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28227 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28228 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddquv32qi
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28229 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedquv32qi
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
28230 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
28232 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
28233 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
28234 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
28236 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
28237 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
28238 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
28239 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
28240 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
28241 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
28242 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
28243 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
28246 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
28247 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
28248 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
28249 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
28250 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
28251 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
28252 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
28253 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
28254 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
28256 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
28257 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
28258 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
28259 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
28260 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
28261 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
28264 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28265 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28266 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28267 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
28268 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28269 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28270 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
28271 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
28274 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
28275 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
28276 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
28279 /* Builtins with variable number of arguments. */
28280 static const struct builtin_description bdesc_args
[] =
28282 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
28283 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
28284 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
28285 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28286 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28287 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
28288 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
28291 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28292 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28293 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28294 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28295 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28296 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28298 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28299 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28300 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28301 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28302 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28303 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28304 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28305 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28307 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28308 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28310 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28311 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28312 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28313 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28315 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28316 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28317 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28318 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28319 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28320 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28322 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28323 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28324 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28325 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28326 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28327 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28329 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28330 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
28331 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
28333 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
28335 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28336 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28337 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28338 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28339 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28340 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28342 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28343 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28344 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
28345 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28346 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28347 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
28349 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
28350 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
28351 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
28352 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
28355 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28356 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28357 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28358 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28360 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28361 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28362 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28363 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28364 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28365 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
28366 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28367 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28368 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28369 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28370 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28371 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28372 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28373 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28374 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28377 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
28378 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
28379 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28380 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
28381 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28382 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
28385 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28386 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28387 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28388 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28389 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28390 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28391 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28392 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28393 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28394 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
28395 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
28396 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
28398 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28400 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28401 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28402 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28403 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28404 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28405 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28406 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28407 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28409 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28410 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28411 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28412 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28413 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28414 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28415 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28416 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28417 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28418 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28419 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
28420 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28421 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
28422 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28423 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28424 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28425 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28426 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
28427 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
28428 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
28430 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28431 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28432 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28433 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28435 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28436 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28437 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28438 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28440 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28442 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28443 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28444 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28445 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28446 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28448 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
28449 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
28450 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
28452 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
28454 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28455 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28456 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
28458 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
28459 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
28461 /* SSE MMX or 3Dnow!A */
28462 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28463 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28464 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28466 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28467 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28468 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28469 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28471 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
28472 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
28474 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
28477 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28479 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
28481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28482 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
28483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
28485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28487 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
28488 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
28489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
28491 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
28493 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
28495 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28496 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
28498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_fix_notruncv4sfv4si
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
28500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28502 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28504 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28505 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28506 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28507 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28508 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28509 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28511 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28512 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28513 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28514 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28515 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28516 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28517 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28518 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28519 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28520 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28521 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
28522 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28523 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
28524 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28525 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28526 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28527 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28528 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
28529 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
28530 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
28532 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28533 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28534 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28535 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28537 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28538 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28539 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28540 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28542 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28544 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28545 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28546 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28548 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28550 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28551 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28552 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28553 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28554 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28555 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28556 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28557 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28559 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28560 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28561 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28562 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28563 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28564 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28565 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28566 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28568 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28569 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
28571 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28572 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28573 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28574 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28576 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28577 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28579 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28580 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28581 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28582 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28583 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28584 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28586 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28587 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28588 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28589 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28591 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28592 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28593 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28594 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28595 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28596 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28597 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28598 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28600 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28601 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28602 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
28604 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28605 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
28607 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
28608 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28610 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
28612 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
28613 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
28614 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
28615 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
28617 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28618 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28619 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28620 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28621 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28622 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28623 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28625 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
28626 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28627 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28628 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
28629 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28630 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28631 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
28633 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
28634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
28635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
28636 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
28638 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
28639 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28640 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
28642 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
28644 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28647 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
28651 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28652 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28654 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28655 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28656 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28657 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28658 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
28659 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
28662 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
28663 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
28664 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28665 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
28666 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
28667 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
28669 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28670 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28671 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28672 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28673 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28674 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28675 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28676 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28677 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28678 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28679 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28680 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28681 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
28682 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
28683 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28684 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28685 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28686 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28687 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28688 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
28689 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28690 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
28691 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28692 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
28695 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
28696 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
28699 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28700 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28701 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
28702 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
28703 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28704 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28705 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28706 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
28707 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
28708 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
28710 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28711 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28712 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28713 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28714 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28715 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28716 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
28717 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
28718 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
28719 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
28720 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
28721 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
28722 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
28724 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
28725 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28726 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28727 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28728 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28729 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28730 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
28731 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28732 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28733 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
28734 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
28735 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
28738 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28739 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28740 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28741 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28743 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28744 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
28745 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
28746 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
28748 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28749 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
28751 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
28752 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
28754 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28755 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
28756 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
28757 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
28759 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
28760 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
28762 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
28763 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
28765 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28766 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28767 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
28770 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28771 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
28772 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
28773 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
28774 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
28777 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
28778 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
28779 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
28780 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
28784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
28786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28787 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28789 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
28792 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
28795 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28796 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28797 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28798 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28799 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28800 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28801 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28802 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28803 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28804 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28805 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28806 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28807 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28808 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28809 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28810 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28811 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28812 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28813 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28814 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28815 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28816 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28817 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28818 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28819 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28820 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28822 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
28823 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
28824 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
28825 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
28827 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28828 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28829 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
28830 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
28831 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28832 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28833 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28834 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28835 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28836 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
28837 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
28838 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28839 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28840 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
28841 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
28842 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
28843 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
28844 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
28845 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
28846 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_fix_notruncv8sfv8si
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28847 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
28848 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28849 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
28850 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28851 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
28852 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
28853 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
28854 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
28855 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
28856 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28857 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28858 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
28859 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
28860 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
28862 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28863 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28864 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28866 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28867 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28868 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28869 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28870 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28872 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28874 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
28875 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
28877 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28878 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
28879 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
28880 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
28882 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
28883 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28885 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28886 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
28888 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28889 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
28890 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
28891 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
28893 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
28894 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
28896 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
28897 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
28899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28900 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28901 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28902 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28904 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
28905 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
28906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
28907 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
28908 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
28909 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
28911 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28912 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28913 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
28914 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28915 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28916 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
28917 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28918 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28919 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
28920 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28921 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28922 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
28923 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
28927 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
28928 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
28930 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
28931 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
28933 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
28936 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
28937 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
28938 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
28939 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
28940 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
28943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
28944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28950 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28951 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28952 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
28953 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28954 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
28958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
28959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28964 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28965 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28966 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
28967 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28968 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28969 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28970 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28971 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28972 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28973 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
28974 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
28975 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28976 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28977 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28978 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28979 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28980 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28981 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28982 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28983 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28984 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
28985 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
28986 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
28987 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
28988 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28989 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28990 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28991 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28992 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28993 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
28994 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
28995 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
28996 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
28997 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
28998 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
28999 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
29000 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
29001 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29002 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29003 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29004 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29005 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29006 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
29007 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29008 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
29009 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29010 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
29011 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
29012 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
29013 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29014 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29015 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29016 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
29017 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29018 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29019 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29020 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29021 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
29022 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
29023 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29024 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29025 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29026 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29027 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
29028 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
29029 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
29030 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
29031 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
29032 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
29033 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
29034 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29035 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29036 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29037 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29038 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29039 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29040 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29041 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29042 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29043 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29044 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29045 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29046 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
29047 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
29048 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29049 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29050 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29051 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
29052 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
29053 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
29054 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
29055 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
29056 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
29057 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
29058 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
29059 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
29060 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
29061 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
29062 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
29063 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
29064 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
29065 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29066 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
29067 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
29068 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
29069 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
29070 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
29071 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
29072 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29073 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29074 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29075 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29076 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29077 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29078 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
29079 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
29080 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
29081 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
29083 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29086 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29087 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29088 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
29091 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29092 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29095 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
29096 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
29097 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
29098 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
29101 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29102 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29103 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29104 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29105 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
29106 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
29109 /* FMA4 and XOP. */
29110 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
29111 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
29112 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
29113 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
29114 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
29115 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
29116 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
29117 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
29118 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
29119 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
29120 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
29121 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
29122 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
29123 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
29124 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
29125 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
29126 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
29127 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
29128 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
29129 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
29130 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
29131 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
29132 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
29133 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
29134 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
29135 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
29136 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
29137 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
29138 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
29139 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
29140 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
29141 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
29142 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
29143 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
29144 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
29145 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
29146 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
29147 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
29148 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
29149 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
29150 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
29151 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
29152 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
29153 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
29154 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
29155 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
29156 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
29157 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
29158 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
29159 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
29160 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
29161 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
29163 static const struct builtin_description bdesc_multi_arg
[] =
29165 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
29166 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
29167 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29168 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
29169 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
29170 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29172 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
29173 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
29174 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29175 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
29176 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
29177 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29179 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
29180 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
29181 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29182 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
29183 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
29184 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29185 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
29186 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
29187 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29188 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
29189 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
29190 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29192 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
29193 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
29194 UNKNOWN
, (int)MULTI_ARG_3_SF
},
29195 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
29196 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
29197 UNKNOWN
, (int)MULTI_ARG_3_DF
},
29198 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
29199 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
29200 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29201 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
29202 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
29203 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29205 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29206 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
29207 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29208 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29209 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
29210 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
29211 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
29213 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29214 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
29215 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
29216 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
29217 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
29218 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
29219 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
29221 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
29223 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29224 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
29225 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29226 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29227 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29228 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
29229 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29230 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29231 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29232 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
29233 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29234 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
29236 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29237 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29238 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29239 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29240 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
29241 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
29242 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
29243 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
29244 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29245 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29246 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29247 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29248 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
29249 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
29250 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
29251 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
29253 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
29254 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
29255 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
29256 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
29257 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
29258 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
29260 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29261 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29262 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29263 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29264 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29265 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29266 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29267 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
29268 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
29269 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29270 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
29271 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29272 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
29273 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
29274 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
29276 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29277 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29278 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29279 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
29280 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
29281 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
29282 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
29284 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29285 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29286 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29287 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
29288 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
29289 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
29290 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
29292 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29293 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29294 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29295 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
29296 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
29297 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
29298 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
29300 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29301 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29302 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29303 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
29304 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
29305 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
29306 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
29308 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
29309 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29310 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
29311 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
29312 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
29313 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
29314 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
29316 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
29317 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29318 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
29319 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
29320 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
29321 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
29322 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
29324 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
29325 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29326 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
29327 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
29328 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
29329 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
29330 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
29332 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
29333 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29334 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
29335 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
29336 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
29337 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
29338 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
29340 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29341 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29342 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29343 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29344 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
29345 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
29346 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
29347 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
29349 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29350 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29351 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29352 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29353 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
29354 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
29355 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
29356 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
29358 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
29359 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
29360 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
29361 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
29365 /* TM vector builtins. */
29367 /* Reuse the existing x86-specific `struct builtin_description' cause
29368 we're lazy. Add casts to make them fit. */
29369 static const struct builtin_description bdesc_tm
[] =
29371 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29372 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29373 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
29374 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29375 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29376 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29377 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
29379 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29380 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29381 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
29382 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29383 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29384 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29385 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
29387 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29388 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29389 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
29390 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29391 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29392 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29393 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
29395 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29396 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29397 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
29400 /* TM callbacks. */
29402 /* Return the builtin decl needed to load a vector of TYPE. */
29405 ix86_builtin_tm_load (tree type
)
29407 if (TREE_CODE (type
) == VECTOR_TYPE
)
29409 switch (tree_to_uhwi (TYPE_SIZE (type
)))
29412 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
29414 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
29416 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
29422 /* Return the builtin decl needed to store a vector of TYPE. */
29425 ix86_builtin_tm_store (tree type
)
29427 if (TREE_CODE (type
) == VECTOR_TYPE
)
29429 switch (tree_to_uhwi (TYPE_SIZE (type
)))
29432 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
29434 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
29436 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
29442 /* Initialize the transactional memory vector load/store builtins. */
29445 ix86_init_tm_builtins (void)
29447 enum ix86_builtin_func_type ftype
;
29448 const struct builtin_description
*d
;
29451 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
29452 tree attrs_log
, attrs_type_log
;
29457 /* If there are no builtins defined, we must be compiling in a
29458 language without trans-mem support. */
29459 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
29462 /* Use whatever attributes a normal TM load has. */
29463 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
29464 attrs_load
= DECL_ATTRIBUTES (decl
);
29465 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29466 /* Use whatever attributes a normal TM store has. */
29467 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
29468 attrs_store
= DECL_ATTRIBUTES (decl
);
29469 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29470 /* Use whatever attributes a normal TM log has. */
29471 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
29472 attrs_log
= DECL_ATTRIBUTES (decl
);
29473 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
29475 for (i
= 0, d
= bdesc_tm
;
29476 i
< ARRAY_SIZE (bdesc_tm
);
29479 if ((d
->mask
& ix86_isa_flags
) != 0
29480 || (lang_hooks
.builtin_function
29481 == lang_hooks
.builtin_function_ext_scope
))
29483 tree type
, attrs
, attrs_type
;
29484 enum built_in_function code
= (enum built_in_function
) d
->code
;
29486 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29487 type
= ix86_get_builtin_func_type (ftype
);
29489 if (BUILTIN_TM_LOAD_P (code
))
29491 attrs
= attrs_load
;
29492 attrs_type
= attrs_type_load
;
29494 else if (BUILTIN_TM_STORE_P (code
))
29496 attrs
= attrs_store
;
29497 attrs_type
= attrs_type_store
;
29502 attrs_type
= attrs_type_log
;
29504 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
29505 /* The builtin without the prefix for
29506 calling it directly. */
29507 d
->name
+ strlen ("__builtin_"),
29509 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
29510 set the TYPE_ATTRIBUTES. */
29511 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
29513 set_builtin_decl (code
, decl
, false);
29518 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
29519 in the current target ISA to allow the user to compile particular modules
29520 with different target specific options that differ from the command line
29523 ix86_init_mmx_sse_builtins (void)
29525 const struct builtin_description
* d
;
29526 enum ix86_builtin_func_type ftype
;
29529 /* Add all special builtins with variable number of operands. */
29530 for (i
= 0, d
= bdesc_special_args
;
29531 i
< ARRAY_SIZE (bdesc_special_args
);
29537 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29538 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
29541 /* Add all builtins with variable number of operands. */
29542 for (i
= 0, d
= bdesc_args
;
29543 i
< ARRAY_SIZE (bdesc_args
);
29549 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29550 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29553 /* pcmpestr[im] insns. */
29554 for (i
= 0, d
= bdesc_pcmpestr
;
29555 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29558 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29559 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
29561 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
29562 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29565 /* pcmpistr[im] insns. */
29566 for (i
= 0, d
= bdesc_pcmpistr
;
29567 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29570 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29571 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
29573 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
29574 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29577 /* comi/ucomi insns. */
29578 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29580 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
29581 ftype
= INT_FTYPE_V2DF_V2DF
;
29583 ftype
= INT_FTYPE_V4SF_V4SF
;
29584 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29588 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
29589 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
29590 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
29591 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
29593 /* SSE or 3DNow!A */
29594 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29595 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
29596 IX86_BUILTIN_MASKMOVQ
);
29599 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
29600 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
29602 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
29603 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
29604 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
29605 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
29608 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
29609 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
29610 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
29611 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
29614 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
29615 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
29616 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
29617 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
29618 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
29619 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
29620 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
29621 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
29622 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
29623 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
29624 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
29625 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
29628 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
29629 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
29632 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
29633 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
29634 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
29635 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
29636 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
29637 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
29638 IX86_BUILTIN_RDRAND64_STEP
);
29641 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
29642 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
29643 IX86_BUILTIN_GATHERSIV2DF
);
29645 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
29646 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
29647 IX86_BUILTIN_GATHERSIV4DF
);
29649 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
29650 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
29651 IX86_BUILTIN_GATHERDIV2DF
);
29653 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
29654 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
29655 IX86_BUILTIN_GATHERDIV4DF
);
29657 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
29658 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
29659 IX86_BUILTIN_GATHERSIV4SF
);
29661 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
29662 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
29663 IX86_BUILTIN_GATHERSIV8SF
);
29665 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
29666 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
29667 IX86_BUILTIN_GATHERDIV4SF
);
29669 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
29670 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
29671 IX86_BUILTIN_GATHERDIV8SF
);
29673 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
29674 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
29675 IX86_BUILTIN_GATHERSIV2DI
);
29677 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
29678 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
29679 IX86_BUILTIN_GATHERSIV4DI
);
29681 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
29682 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
29683 IX86_BUILTIN_GATHERDIV2DI
);
29685 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
29686 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
29687 IX86_BUILTIN_GATHERDIV4DI
);
29689 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
29690 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
29691 IX86_BUILTIN_GATHERSIV4SI
);
29693 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
29694 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
29695 IX86_BUILTIN_GATHERSIV8SI
);
29697 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
29698 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
29699 IX86_BUILTIN_GATHERDIV4SI
);
29701 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
29702 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
29703 IX86_BUILTIN_GATHERDIV8SI
);
29705 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
29706 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
29707 IX86_BUILTIN_GATHERALTSIV4DF
);
29709 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
29710 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
29711 IX86_BUILTIN_GATHERALTDIV8SF
);
29713 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
29714 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
29715 IX86_BUILTIN_GATHERALTSIV4DI
);
29717 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
29718 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
29719 IX86_BUILTIN_GATHERALTDIV8SI
);
29722 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
29723 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
29725 /* MMX access to the vec_init patterns. */
29726 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
29727 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
29729 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
29730 V4HI_FTYPE_HI_HI_HI_HI
,
29731 IX86_BUILTIN_VEC_INIT_V4HI
);
29733 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
29734 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
29735 IX86_BUILTIN_VEC_INIT_V8QI
);
29737 /* Access to the vec_extract patterns. */
29738 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
29739 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
29740 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
29741 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
29742 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
29743 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
29744 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
29745 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
29746 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
29747 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
29749 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29750 "__builtin_ia32_vec_ext_v4hi",
29751 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
29753 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
29754 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
29756 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
29757 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
29759 /* Access to the vec_set patterns. */
29760 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
29761 "__builtin_ia32_vec_set_v2di",
29762 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
29764 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
29765 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
29767 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
29768 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
29770 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
29771 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
29773 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
29774 "__builtin_ia32_vec_set_v4hi",
29775 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
29777 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
29778 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
29781 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
29782 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
29783 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
29784 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
29785 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
29786 "__builtin_ia32_rdseed_di_step",
29787 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
29790 def_builtin (0, "__builtin_ia32_addcarryx_u32",
29791 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
29792 def_builtin (OPTION_MASK_ISA_64BIT
,
29793 "__builtin_ia32_addcarryx_u64",
29794 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
29795 IX86_BUILTIN_ADDCARRYX64
);
29797 /* Read/write FLAGS. */
29798 def_builtin (~OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u32",
29799 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
29800 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u64",
29801 UINT64_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
29802 def_builtin (~OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u32",
29803 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_WRITE_FLAGS
);
29804 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u64",
29805 VOID_FTYPE_UINT64
, IX86_BUILTIN_WRITE_FLAGS
);
29808 /* Add FMA4 multi-arg argument instructions */
29809 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29814 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
29815 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
29819 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
29820 to return a pointer to VERSION_DECL if the outcome of the expression
29821 formed by PREDICATE_CHAIN is true. This function will be called during
29822 version dispatch to decide which function version to execute. It returns
29823 the basic block at the end, to which more conditions can be added. */
29826 add_condition_to_bb (tree function_decl
, tree version_decl
,
29827 tree predicate_chain
, basic_block new_bb
)
29829 gimple return_stmt
;
29830 tree convert_expr
, result_var
;
29831 gimple convert_stmt
;
29832 gimple call_cond_stmt
;
29833 gimple if_else_stmt
;
29835 basic_block bb1
, bb2
, bb3
;
29838 tree cond_var
, and_expr_var
= NULL_TREE
;
29841 tree predicate_decl
, predicate_arg
;
29843 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
29845 gcc_assert (new_bb
!= NULL
);
29846 gseq
= bb_seq (new_bb
);
29849 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
29850 build_fold_addr_expr (version_decl
));
29851 result_var
= create_tmp_var (ptr_type_node
, NULL
);
29852 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
29853 return_stmt
= gimple_build_return (result_var
);
29855 if (predicate_chain
== NULL_TREE
)
29857 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29858 gimple_seq_add_stmt (&gseq
, return_stmt
);
29859 set_bb_seq (new_bb
, gseq
);
29860 gimple_set_bb (convert_stmt
, new_bb
);
29861 gimple_set_bb (return_stmt
, new_bb
);
29866 while (predicate_chain
!= NULL
)
29868 cond_var
= create_tmp_var (integer_type_node
, NULL
);
29869 predicate_decl
= TREE_PURPOSE (predicate_chain
);
29870 predicate_arg
= TREE_VALUE (predicate_chain
);
29871 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
29872 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
29874 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
29875 gimple_set_bb (call_cond_stmt
, new_bb
);
29876 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
29878 predicate_chain
= TREE_CHAIN (predicate_chain
);
29880 if (and_expr_var
== NULL
)
29881 and_expr_var
= cond_var
;
29884 gimple assign_stmt
;
29885 /* Use MIN_EXPR to check if any integer is zero?.
29886 and_expr_var = min_expr <cond_var, and_expr_var> */
29887 assign_stmt
= gimple_build_assign (and_expr_var
,
29888 build2 (MIN_EXPR
, integer_type_node
,
29889 cond_var
, and_expr_var
));
29891 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
29892 gimple_set_bb (assign_stmt
, new_bb
);
29893 gimple_seq_add_stmt (&gseq
, assign_stmt
);
29897 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
29899 NULL_TREE
, NULL_TREE
);
29900 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
29901 gimple_set_bb (if_else_stmt
, new_bb
);
29902 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
29904 gimple_seq_add_stmt (&gseq
, convert_stmt
);
29905 gimple_seq_add_stmt (&gseq
, return_stmt
);
29906 set_bb_seq (new_bb
, gseq
);
29909 e12
= split_block (bb1
, if_else_stmt
);
29911 e12
->flags
&= ~EDGE_FALLTHRU
;
29912 e12
->flags
|= EDGE_TRUE_VALUE
;
29914 e23
= split_block (bb2
, return_stmt
);
29916 gimple_set_bb (convert_stmt
, bb2
);
29917 gimple_set_bb (return_stmt
, bb2
);
29920 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
29923 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
29930 /* This parses the attribute arguments to target in DECL and determines
29931 the right builtin to use to match the platform specification.
29932 It returns the priority value for this version decl. If PREDICATE_LIST
29933 is not NULL, it stores the list of cpu features that need to be checked
29934 before dispatching this function. */
29936 static unsigned int
29937 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
29940 struct cl_target_option cur_target
;
29942 struct cl_target_option
*new_target
;
29943 const char *arg_str
= NULL
;
29944 const char *attrs_str
= NULL
;
29945 char *tok_str
= NULL
;
29948 /* Priority of i386 features, greater value is higher priority. This is
29949 used to decide the order in which function dispatch must happen. For
29950 instance, a version specialized for SSE4.2 should be checked for dispatch
29951 before a version for SSE3, as SSE4.2 implies SSE3. */
29952 enum feature_priority
29973 enum feature_priority priority
= P_ZERO
;
29975 /* These are the target attribute strings for which a dispatcher is
29976 available, from fold_builtin_cpu. */
29978 static struct _feature_list
29980 const char *const name
;
29981 const enum feature_priority priority
;
29983 const feature_list
[] =
29989 {"ssse3", P_SSSE3
},
29990 {"sse4.1", P_SSE4_1
},
29991 {"sse4.2", P_SSE4_2
},
29992 {"popcnt", P_POPCNT
},
29998 static unsigned int NUM_FEATURES
29999 = sizeof (feature_list
) / sizeof (struct _feature_list
);
30003 tree predicate_chain
= NULL_TREE
;
30004 tree predicate_decl
, predicate_arg
;
30006 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30007 gcc_assert (attrs
!= NULL
);
30009 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
30011 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
30012 attrs_str
= TREE_STRING_POINTER (attrs
);
30014 /* Return priority zero for default function. */
30015 if (strcmp (attrs_str
, "default") == 0)
30018 /* Handle arch= if specified. For priority, set it to be 1 more than
30019 the best instruction set the processor can handle. For instance, if
30020 there is a version for atom and a version for ssse3 (the highest ISA
30021 priority for atom), the atom version must be checked for dispatch
30022 before the ssse3 version. */
30023 if (strstr (attrs_str
, "arch=") != NULL
)
30025 cl_target_option_save (&cur_target
, &global_options
);
30026 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
30027 &global_options_set
);
30029 gcc_assert (target_node
);
30030 new_target
= TREE_TARGET_OPTION (target_node
);
30031 gcc_assert (new_target
);
30033 if (new_target
->arch_specified
&& new_target
->arch
> 0)
30035 switch (new_target
->arch
)
30037 case PROCESSOR_CORE2
:
30039 priority
= P_PROC_SSSE3
;
30041 case PROCESSOR_COREI7
:
30042 arg_str
= "corei7";
30043 priority
= P_PROC_SSE4_2
;
30045 case PROCESSOR_COREI7_AVX
:
30046 arg_str
= "corei7-avx";
30047 priority
= P_PROC_SSE4_2
;
30049 case PROCESSOR_ATOM
:
30051 priority
= P_PROC_SSSE3
;
30053 case PROCESSOR_AMDFAM10
:
30054 arg_str
= "amdfam10h";
30055 priority
= P_PROC_SSE4_a
;
30057 case PROCESSOR_BDVER1
:
30058 arg_str
= "bdver1";
30059 priority
= P_PROC_FMA
;
30061 case PROCESSOR_BDVER2
:
30062 arg_str
= "bdver2";
30063 priority
= P_PROC_FMA
;
30068 cl_target_option_restore (&global_options
, &cur_target
);
30070 if (predicate_list
&& arg_str
== NULL
)
30072 error_at (DECL_SOURCE_LOCATION (decl
),
30073 "No dispatcher found for the versioning attributes");
30077 if (predicate_list
)
30079 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
30080 /* For a C string literal the length includes the trailing NULL. */
30081 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
30082 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
30087 /* Process feature name. */
30088 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
30089 strcpy (tok_str
, attrs_str
);
30090 token
= strtok (tok_str
, ",");
30091 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
30093 while (token
!= NULL
)
30095 /* Do not process "arch=" */
30096 if (strncmp (token
, "arch=", 5) == 0)
30098 token
= strtok (NULL
, ",");
30101 for (i
= 0; i
< NUM_FEATURES
; ++i
)
30103 if (strcmp (token
, feature_list
[i
].name
) == 0)
30105 if (predicate_list
)
30107 predicate_arg
= build_string_literal (
30108 strlen (feature_list
[i
].name
) + 1,
30109 feature_list
[i
].name
);
30110 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
30113 /* Find the maximum priority feature. */
30114 if (feature_list
[i
].priority
> priority
)
30115 priority
= feature_list
[i
].priority
;
30120 if (predicate_list
&& i
== NUM_FEATURES
)
30122 error_at (DECL_SOURCE_LOCATION (decl
),
30123 "No dispatcher found for %s", token
);
30126 token
= strtok (NULL
, ",");
30130 if (predicate_list
&& predicate_chain
== NULL_TREE
)
30132 error_at (DECL_SOURCE_LOCATION (decl
),
30133 "No dispatcher found for the versioning attributes : %s",
30137 else if (predicate_list
)
30139 predicate_chain
= nreverse (predicate_chain
);
30140 *predicate_list
= predicate_chain
;
30146 /* This compares the priority of target features in function DECL1
30147 and DECL2. It returns positive value if DECL1 is higher priority,
30148 negative value if DECL2 is higher priority and 0 if they are the
30152 ix86_compare_version_priority (tree decl1
, tree decl2
)
30154 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
30155 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
30157 return (int)priority1
- (int)priority2
;
30160 /* V1 and V2 point to function versions with different priorities
30161 based on the target ISA. This function compares their priorities. */
30164 feature_compare (const void *v1
, const void *v2
)
30166 typedef struct _function_version_info
30169 tree predicate_chain
;
30170 unsigned int dispatch_priority
;
30171 } function_version_info
;
30173 const function_version_info c1
= *(const function_version_info
*)v1
;
30174 const function_version_info c2
= *(const function_version_info
*)v2
;
30175 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
30178 /* This function generates the dispatch function for
30179 multi-versioned functions. DISPATCH_DECL is the function which will
30180 contain the dispatch logic. FNDECLS are the function choices for
30181 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
30182 in DISPATCH_DECL in which the dispatch code is generated. */
30185 dispatch_function_versions (tree dispatch_decl
,
30187 basic_block
*empty_bb
)
30190 gimple ifunc_cpu_init_stmt
;
30194 vec
<tree
> *fndecls
;
30195 unsigned int num_versions
= 0;
30196 unsigned int actual_versions
= 0;
30199 struct _function_version_info
30202 tree predicate_chain
;
30203 unsigned int dispatch_priority
;
30204 }*function_version_info
;
30206 gcc_assert (dispatch_decl
!= NULL
30207 && fndecls_p
!= NULL
30208 && empty_bb
!= NULL
);
30210 /*fndecls_p is actually a vector. */
30211 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
30213 /* At least one more version other than the default. */
30214 num_versions
= fndecls
->length ();
30215 gcc_assert (num_versions
>= 2);
30217 function_version_info
= (struct _function_version_info
*)
30218 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
30220 /* The first version in the vector is the default decl. */
30221 default_decl
= (*fndecls
)[0];
30223 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
30225 gseq
= bb_seq (*empty_bb
);
30226 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
30227 constructors, so explicity call __builtin_cpu_init here. */
30228 ifunc_cpu_init_stmt
= gimple_build_call_vec (
30229 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
30230 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
30231 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
30232 set_bb_seq (*empty_bb
, gseq
);
30237 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
30239 tree version_decl
= ele
;
30240 tree predicate_chain
= NULL_TREE
;
30241 unsigned int priority
;
30242 /* Get attribute string, parse it and find the right predicate decl.
30243 The predicate function could be a lengthy combination of many
30244 features, like arch-type and various isa-variants. */
30245 priority
= get_builtin_code_for_version (version_decl
,
30248 if (predicate_chain
== NULL_TREE
)
30251 function_version_info
[actual_versions
].version_decl
= version_decl
;
30252 function_version_info
[actual_versions
].predicate_chain
30254 function_version_info
[actual_versions
].dispatch_priority
= priority
;
30258 /* Sort the versions according to descending order of dispatch priority. The
30259 priority is based on the ISA. This is not a perfect solution. There
30260 could still be ambiguity. If more than one function version is suitable
30261 to execute, which one should be dispatched? In future, allow the user
30262 to specify a dispatch priority next to the version. */
30263 qsort (function_version_info
, actual_versions
,
30264 sizeof (struct _function_version_info
), feature_compare
);
30266 for (i
= 0; i
< actual_versions
; ++i
)
30267 *empty_bb
= add_condition_to_bb (dispatch_decl
,
30268 function_version_info
[i
].version_decl
,
30269 function_version_info
[i
].predicate_chain
,
30272 /* dispatch default version at the end. */
30273 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
30276 free (function_version_info
);
30280 /* Comparator function to be used in qsort routine to sort attribute
30281 specification strings to "target". */
30284 attr_strcmp (const void *v1
, const void *v2
)
30286 const char *c1
= *(char *const*)v1
;
30287 const char *c2
= *(char *const*)v2
;
30288 return strcmp (c1
, c2
);
30291 /* ARGLIST is the argument to target attribute. This function tokenizes
30292 the comma separated arguments, sorts them and returns a string which
30293 is a unique identifier for the comma separated arguments. It also
30294 replaces non-identifier characters "=,-" with "_". */
30297 sorted_attr_string (tree arglist
)
30300 size_t str_len_sum
= 0;
30301 char **args
= NULL
;
30302 char *attr_str
, *ret_str
;
30304 unsigned int argnum
= 1;
30307 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30309 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30310 size_t len
= strlen (str
);
30311 str_len_sum
+= len
+ 1;
30312 if (arg
!= arglist
)
30314 for (i
= 0; i
< strlen (str
); i
++)
30319 attr_str
= XNEWVEC (char, str_len_sum
);
30321 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
30323 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
30324 size_t len
= strlen (str
);
30325 memcpy (attr_str
+ str_len_sum
, str
, len
);
30326 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
30327 str_len_sum
+= len
+ 1;
30330 /* Replace "=,-" with "_". */
30331 for (i
= 0; i
< strlen (attr_str
); i
++)
30332 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
30338 args
= XNEWVEC (char *, argnum
);
30341 attr
= strtok (attr_str
, ",");
30342 while (attr
!= NULL
)
30346 attr
= strtok (NULL
, ",");
30349 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
30351 ret_str
= XNEWVEC (char, str_len_sum
);
30353 for (i
= 0; i
< argnum
; i
++)
30355 size_t len
= strlen (args
[i
]);
30356 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
30357 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
30358 str_len_sum
+= len
+ 1;
30362 XDELETEVEC (attr_str
);
30366 /* This function changes the assembler name for functions that are
30367 versions. If DECL is a function version and has a "target"
30368 attribute, it appends the attribute string to its assembler name. */
30371 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
30374 const char *orig_name
, *version_string
;
30375 char *attr_str
, *assembler_name
;
30377 if (DECL_DECLARED_INLINE_P (decl
)
30378 && lookup_attribute ("gnu_inline",
30379 DECL_ATTRIBUTES (decl
)))
30380 error_at (DECL_SOURCE_LOCATION (decl
),
30381 "Function versions cannot be marked as gnu_inline,"
30382 " bodies have to be generated");
30384 if (DECL_VIRTUAL_P (decl
)
30385 || DECL_VINDEX (decl
))
30386 sorry ("Virtual function multiversioning not supported");
30388 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30390 /* target attribute string cannot be NULL. */
30391 gcc_assert (version_attr
!= NULL_TREE
);
30393 orig_name
= IDENTIFIER_POINTER (id
);
30395 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
30397 if (strcmp (version_string
, "default") == 0)
30400 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
30401 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
30403 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
30405 /* Allow assembler name to be modified if already set. */
30406 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
30407 SET_DECL_RTL (decl
, NULL
);
30409 tree ret
= get_identifier (assembler_name
);
30410 XDELETEVEC (attr_str
);
30411 XDELETEVEC (assembler_name
);
30415 /* This function returns true if FN1 and FN2 are versions of the same function,
30416 that is, the target strings of the function decls are different. This assumes
30417 that FN1 and FN2 have the same signature. */
30420 ix86_function_versions (tree fn1
, tree fn2
)
30423 char *target1
, *target2
;
30426 if (TREE_CODE (fn1
) != FUNCTION_DECL
30427 || TREE_CODE (fn2
) != FUNCTION_DECL
)
30430 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
30431 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
30433 /* At least one function decl should have the target attribute specified. */
30434 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
30437 /* Diagnose missing target attribute if one of the decls is already
30438 multi-versioned. */
30439 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
30441 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
30443 if (attr2
!= NULL_TREE
)
30450 error_at (DECL_SOURCE_LOCATION (fn2
),
30451 "missing %<target%> attribute for multi-versioned %D",
30453 inform (DECL_SOURCE_LOCATION (fn1
),
30454 "previous declaration of %D", fn1
);
30455 /* Prevent diagnosing of the same error multiple times. */
30456 DECL_ATTRIBUTES (fn2
)
30457 = tree_cons (get_identifier ("target"),
30458 copy_node (TREE_VALUE (attr1
)),
30459 DECL_ATTRIBUTES (fn2
));
30464 target1
= sorted_attr_string (TREE_VALUE (attr1
));
30465 target2
= sorted_attr_string (TREE_VALUE (attr2
));
30467 /* The sorted target strings must be different for fn1 and fn2
30469 if (strcmp (target1
, target2
) == 0)
30474 XDELETEVEC (target1
);
30475 XDELETEVEC (target2
);
30481 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
30483 /* For function version, add the target suffix to the assembler name. */
30484 if (TREE_CODE (decl
) == FUNCTION_DECL
30485 && DECL_FUNCTION_VERSIONED (decl
))
30486 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
30487 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
30488 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
30494 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
30495 is true, append the full path name of the source file. */
30498 make_name (tree decl
, const char *suffix
, bool make_unique
)
30500 char *global_var_name
;
30503 const char *unique_name
= NULL
;
30505 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
30507 /* Get a unique name that can be used globally without any chances
30508 of collision at link time. */
30510 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
30512 name_len
= strlen (name
) + strlen (suffix
) + 2;
30515 name_len
+= strlen (unique_name
) + 1;
30516 global_var_name
= XNEWVEC (char, name_len
);
30518 /* Use '.' to concatenate names as it is demangler friendly. */
30520 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
30523 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
30525 return global_var_name
;
30528 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30530 /* Make a dispatcher declaration for the multi-versioned function DECL.
30531 Calls to DECL function will be replaced with calls to the dispatcher
30532 by the front-end. Return the decl created. */
30535 make_dispatcher_decl (const tree decl
)
30539 tree fn_type
, func_type
;
30540 bool is_uniq
= false;
30542 if (TREE_PUBLIC (decl
) == 0)
30545 func_name
= make_name (decl
, "ifunc", is_uniq
);
30547 fn_type
= TREE_TYPE (decl
);
30548 func_type
= build_function_type (TREE_TYPE (fn_type
),
30549 TYPE_ARG_TYPES (fn_type
));
30551 func_decl
= build_fn_decl (func_name
, func_type
);
30552 XDELETEVEC (func_name
);
30553 TREE_USED (func_decl
) = 1;
30554 DECL_CONTEXT (func_decl
) = NULL_TREE
;
30555 DECL_INITIAL (func_decl
) = error_mark_node
;
30556 DECL_ARTIFICIAL (func_decl
) = 1;
30557 /* Mark this func as external, the resolver will flip it again if
30558 it gets generated. */
30559 DECL_EXTERNAL (func_decl
) = 1;
30560 /* This will be of type IFUNCs have to be externally visible. */
30561 TREE_PUBLIC (func_decl
) = 1;
30568 /* Returns true if decl is multi-versioned and DECL is the default function,
30569 that is it is not tagged with target specific optimization. */
30572 is_function_default_version (const tree decl
)
30574 if (TREE_CODE (decl
) != FUNCTION_DECL
30575 || !DECL_FUNCTION_VERSIONED (decl
))
30577 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
30579 attr
= TREE_VALUE (TREE_VALUE (attr
));
30580 return (TREE_CODE (attr
) == STRING_CST
30581 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
30584 /* Make a dispatcher declaration for the multi-versioned function DECL.
30585 Calls to DECL function will be replaced with calls to the dispatcher
30586 by the front-end. Returns the decl of the dispatcher function. */
30589 ix86_get_function_versions_dispatcher (void *decl
)
30591 tree fn
= (tree
) decl
;
30592 struct cgraph_node
*node
= NULL
;
30593 struct cgraph_node
*default_node
= NULL
;
30594 struct cgraph_function_version_info
*node_v
= NULL
;
30595 struct cgraph_function_version_info
*first_v
= NULL
;
30597 tree dispatch_decl
= NULL
;
30599 struct cgraph_function_version_info
*default_version_info
= NULL
;
30601 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
30603 node
= cgraph_get_node (fn
);
30604 gcc_assert (node
!= NULL
);
30606 node_v
= get_cgraph_node_version (node
);
30607 gcc_assert (node_v
!= NULL
);
30609 if (node_v
->dispatcher_resolver
!= NULL
)
30610 return node_v
->dispatcher_resolver
;
30612 /* Find the default version and make it the first node. */
30614 /* Go to the beginning of the chain. */
30615 while (first_v
->prev
!= NULL
)
30616 first_v
= first_v
->prev
;
30617 default_version_info
= first_v
;
30618 while (default_version_info
!= NULL
)
30620 if (is_function_default_version
30621 (default_version_info
->this_node
->decl
))
30623 default_version_info
= default_version_info
->next
;
30626 /* If there is no default node, just return NULL. */
30627 if (default_version_info
== NULL
)
30630 /* Make default info the first node. */
30631 if (first_v
!= default_version_info
)
30633 default_version_info
->prev
->next
= default_version_info
->next
;
30634 if (default_version_info
->next
)
30635 default_version_info
->next
->prev
= default_version_info
->prev
;
30636 first_v
->prev
= default_version_info
;
30637 default_version_info
->next
= first_v
;
30638 default_version_info
->prev
= NULL
;
30641 default_node
= default_version_info
->this_node
;
30643 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
30644 if (targetm
.has_ifunc_p ())
30646 struct cgraph_function_version_info
*it_v
= NULL
;
30647 struct cgraph_node
*dispatcher_node
= NULL
;
30648 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
30650 /* Right now, the dispatching is done via ifunc. */
30651 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
30653 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
30654 gcc_assert (dispatcher_node
!= NULL
);
30655 dispatcher_node
->dispatcher_function
= 1;
30656 dispatcher_version_info
30657 = insert_new_cgraph_node_version (dispatcher_node
);
30658 dispatcher_version_info
->next
= default_version_info
;
30659 dispatcher_node
->definition
= 1;
30661 /* Set the dispatcher for all the versions. */
30662 it_v
= default_version_info
;
30663 while (it_v
!= NULL
)
30665 it_v
->dispatcher_resolver
= dispatch_decl
;
30672 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
30673 "multiversioning needs ifunc which is not supported "
30677 return dispatch_decl
;
30680 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
30684 make_attribute (const char *name
, const char *arg_name
, tree chain
)
30687 tree attr_arg_name
;
30691 attr_name
= get_identifier (name
);
30692 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
30693 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
30694 attr
= tree_cons (attr_name
, attr_args
, chain
);
30698 /* Make the resolver function decl to dispatch the versions of
30699 a multi-versioned function, DEFAULT_DECL. Create an
30700 empty basic block in the resolver and store the pointer in
30701 EMPTY_BB. Return the decl of the resolver function. */
30704 make_resolver_func (const tree default_decl
,
30705 const tree dispatch_decl
,
30706 basic_block
*empty_bb
)
30708 char *resolver_name
;
30709 tree decl
, type
, decl_name
, t
;
30710 bool is_uniq
= false;
30712 /* IFUNC's have to be globally visible. So, if the default_decl is
30713 not, then the name of the IFUNC should be made unique. */
30714 if (TREE_PUBLIC (default_decl
) == 0)
30717 /* Append the filename to the resolver function if the versions are
30718 not externally visible. This is because the resolver function has
30719 to be externally visible for the loader to find it. So, appending
30720 the filename will prevent conflicts with a resolver function from
30721 another module which is based on the same version name. */
30722 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
30724 /* The resolver function should return a (void *). */
30725 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
30727 decl
= build_fn_decl (resolver_name
, type
);
30728 decl_name
= get_identifier (resolver_name
);
30729 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
30731 DECL_NAME (decl
) = decl_name
;
30732 TREE_USED (decl
) = 1;
30733 DECL_ARTIFICIAL (decl
) = 1;
30734 DECL_IGNORED_P (decl
) = 0;
30735 /* IFUNC resolvers have to be externally visible. */
30736 TREE_PUBLIC (decl
) = 1;
30737 DECL_UNINLINABLE (decl
) = 1;
30739 /* Resolver is not external, body is generated. */
30740 DECL_EXTERNAL (decl
) = 0;
30741 DECL_EXTERNAL (dispatch_decl
) = 0;
30743 DECL_CONTEXT (decl
) = NULL_TREE
;
30744 DECL_INITIAL (decl
) = make_node (BLOCK
);
30745 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
30747 if (DECL_COMDAT_GROUP (default_decl
)
30748 || TREE_PUBLIC (default_decl
))
30750 /* In this case, each translation unit with a call to this
30751 versioned function will put out a resolver. Ensure it
30752 is comdat to keep just one copy. */
30753 DECL_COMDAT (decl
) = 1;
30754 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
30756 /* Build result decl and add to function_decl. */
30757 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
30758 DECL_ARTIFICIAL (t
) = 1;
30759 DECL_IGNORED_P (t
) = 1;
30760 DECL_RESULT (decl
) = t
;
30762 gimplify_function_tree (decl
);
30763 push_cfun (DECL_STRUCT_FUNCTION (decl
));
30764 *empty_bb
= init_lowered_empty_function (decl
, false);
30766 cgraph_add_new_function (decl
, true);
30767 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
30771 gcc_assert (dispatch_decl
!= NULL
);
30772 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
30773 DECL_ATTRIBUTES (dispatch_decl
)
30774 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
30776 /* Create the alias for dispatch to resolver here. */
30777 /*cgraph_create_function_alias (dispatch_decl, decl);*/
30778 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
30779 XDELETEVEC (resolver_name
);
30783 /* Generate the dispatching code body to dispatch multi-versioned function
30784 DECL. The target hook is called to process the "target" attributes and
30785 provide the code to dispatch the right function at run-time. NODE points
30786 to the dispatcher decl whose body will be created. */
30789 ix86_generate_version_dispatcher_body (void *node_p
)
30791 tree resolver_decl
;
30792 basic_block empty_bb
;
30793 tree default_ver_decl
;
30794 struct cgraph_node
*versn
;
30795 struct cgraph_node
*node
;
30797 struct cgraph_function_version_info
*node_version_info
= NULL
;
30798 struct cgraph_function_version_info
*versn_info
= NULL
;
30800 node
= (cgraph_node
*)node_p
;
30802 node_version_info
= get_cgraph_node_version (node
);
30803 gcc_assert (node
->dispatcher_function
30804 && node_version_info
!= NULL
);
30806 if (node_version_info
->dispatcher_resolver
)
30807 return node_version_info
->dispatcher_resolver
;
30809 /* The first version in the chain corresponds to the default version. */
30810 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
30812 /* node is going to be an alias, so remove the finalized bit. */
30813 node
->definition
= false;
30815 resolver_decl
= make_resolver_func (default_ver_decl
,
30816 node
->decl
, &empty_bb
);
30818 node_version_info
->dispatcher_resolver
= resolver_decl
;
30820 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
30822 stack_vec
<tree
, 2> fn_ver_vec
;
30824 for (versn_info
= node_version_info
->next
; versn_info
;
30825 versn_info
= versn_info
->next
)
30827 versn
= versn_info
->this_node
;
30828 /* Check for virtual functions here again, as by this time it should
30829 have been determined if this function needs a vtable index or
30830 not. This happens for methods in derived classes that override
30831 virtual methods in base classes but are not explicitly marked as
30833 if (DECL_VINDEX (versn
->decl
))
30834 sorry ("Virtual function multiversioning not supported");
30836 fn_ver_vec
.safe_push (versn
->decl
);
30839 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
30840 rebuild_cgraph_edges ();
30842 return resolver_decl
;
30844 /* This builds the processor_model struct type defined in
30845 libgcc/config/i386/cpuinfo.c */
30848 build_processor_model_struct (void)
30850 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
30852 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
30854 tree type
= make_node (RECORD_TYPE
);
30856 /* The first 3 fields are unsigned int. */
30857 for (i
= 0; i
< 3; ++i
)
30859 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30860 get_identifier (field_name
[i
]), unsigned_type_node
);
30861 if (field_chain
!= NULL_TREE
)
30862 DECL_CHAIN (field
) = field_chain
;
30863 field_chain
= field
;
30866 /* The last field is an array of unsigned integers of size one. */
30867 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
30868 get_identifier (field_name
[3]),
30869 build_array_type (unsigned_type_node
,
30870 build_index_type (size_one_node
)));
30871 if (field_chain
!= NULL_TREE
)
30872 DECL_CHAIN (field
) = field_chain
;
30873 field_chain
= field
;
30875 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
30879 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
30882 make_var_decl (tree type
, const char *name
)
30886 new_decl
= build_decl (UNKNOWN_LOCATION
,
30888 get_identifier(name
),
30891 DECL_EXTERNAL (new_decl
) = 1;
30892 TREE_STATIC (new_decl
) = 1;
30893 TREE_PUBLIC (new_decl
) = 1;
30894 DECL_INITIAL (new_decl
) = 0;
30895 DECL_ARTIFICIAL (new_decl
) = 0;
30896 DECL_PRESERVE_P (new_decl
) = 1;
30898 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
30899 assemble_variable (new_decl
, 0, 0, 0);
30904 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
30905 into an integer defined in libgcc/config/i386/cpuinfo.c */
30908 fold_builtin_cpu (tree fndecl
, tree
*args
)
30911 enum ix86_builtins fn_code
= (enum ix86_builtins
)
30912 DECL_FUNCTION_CODE (fndecl
);
30913 tree param_string_cst
= NULL
;
30915 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
30916 enum processor_features
30932 /* These are the values for vendor types and cpu types and subtypes
30933 in cpuinfo.c. Cpu types and subtypes should be subtracted by
30934 the corresponding start value. */
30935 enum processor_model
30946 M_CPU_SUBTYPE_START
,
30947 M_INTEL_COREI7_NEHALEM
,
30948 M_INTEL_COREI7_WESTMERE
,
30949 M_INTEL_COREI7_SANDYBRIDGE
,
30950 M_AMDFAM10H_BARCELONA
,
30951 M_AMDFAM10H_SHANGHAI
,
30952 M_AMDFAM10H_ISTANBUL
,
30953 M_AMDFAM15H_BDVER1
,
30954 M_AMDFAM15H_BDVER2
,
30955 M_AMDFAM15H_BDVER3
,
30959 static struct _arch_names_table
30961 const char *const name
;
30962 const enum processor_model model
;
30964 const arch_names_table
[] =
30967 {"intel", M_INTEL
},
30968 {"atom", M_INTEL_ATOM
},
30969 {"slm", M_INTEL_SLM
},
30970 {"core2", M_INTEL_CORE2
},
30971 {"corei7", M_INTEL_COREI7
},
30972 {"nehalem", M_INTEL_COREI7_NEHALEM
},
30973 {"westmere", M_INTEL_COREI7_WESTMERE
},
30974 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
30975 {"amdfam10h", M_AMDFAM10H
},
30976 {"barcelona", M_AMDFAM10H_BARCELONA
},
30977 {"shanghai", M_AMDFAM10H_SHANGHAI
},
30978 {"istanbul", M_AMDFAM10H_ISTANBUL
},
30979 {"amdfam15h", M_AMDFAM15H
},
30980 {"bdver1", M_AMDFAM15H_BDVER1
},
30981 {"bdver2", M_AMDFAM15H_BDVER2
},
30982 {"bdver3", M_AMDFAM15H_BDVER3
},
30983 {"bdver4", M_AMDFAM15H_BDVER4
},
30986 static struct _isa_names_table
30988 const char *const name
;
30989 const enum processor_features feature
;
30991 const isa_names_table
[] =
30995 {"popcnt", F_POPCNT
},
30999 {"ssse3", F_SSSE3
},
31000 {"sse4.1", F_SSE4_1
},
31001 {"sse4.2", F_SSE4_2
},
31006 tree __processor_model_type
= build_processor_model_struct ();
31007 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
31011 varpool_add_new_variable (__cpu_model_var
);
31013 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
31015 param_string_cst
= *args
;
31016 while (param_string_cst
31017 && TREE_CODE (param_string_cst
) != STRING_CST
)
31019 /* *args must be a expr that can contain other EXPRS leading to a
31021 if (!EXPR_P (param_string_cst
))
31023 error ("Parameter to builtin must be a string constant or literal");
31024 return integer_zero_node
;
31026 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
31029 gcc_assert (param_string_cst
);
31031 if (fn_code
== IX86_BUILTIN_CPU_IS
)
31037 unsigned int field_val
= 0;
31038 unsigned int NUM_ARCH_NAMES
31039 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
31041 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
31042 if (strcmp (arch_names_table
[i
].name
,
31043 TREE_STRING_POINTER (param_string_cst
)) == 0)
31046 if (i
== NUM_ARCH_NAMES
)
31048 error ("Parameter to builtin not valid: %s",
31049 TREE_STRING_POINTER (param_string_cst
));
31050 return integer_zero_node
;
31053 field
= TYPE_FIELDS (__processor_model_type
);
31054 field_val
= arch_names_table
[i
].model
;
31056 /* CPU types are stored in the next field. */
31057 if (field_val
> M_CPU_TYPE_START
31058 && field_val
< M_CPU_SUBTYPE_START
)
31060 field
= DECL_CHAIN (field
);
31061 field_val
-= M_CPU_TYPE_START
;
31064 /* CPU subtypes are stored in the next field. */
31065 if (field_val
> M_CPU_SUBTYPE_START
)
31067 field
= DECL_CHAIN ( DECL_CHAIN (field
));
31068 field_val
-= M_CPU_SUBTYPE_START
;
31071 /* Get the appropriate field in __cpu_model. */
31072 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
31075 /* Check the value. */
31076 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
31077 build_int_cstu (unsigned_type_node
, field_val
));
31078 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
31080 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
31087 unsigned int field_val
= 0;
31088 unsigned int NUM_ISA_NAMES
31089 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
31091 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
31092 if (strcmp (isa_names_table
[i
].name
,
31093 TREE_STRING_POINTER (param_string_cst
)) == 0)
31096 if (i
== NUM_ISA_NAMES
)
31098 error ("Parameter to builtin not valid: %s",
31099 TREE_STRING_POINTER (param_string_cst
));
31100 return integer_zero_node
;
31103 field
= TYPE_FIELDS (__processor_model_type
);
31104 /* Get the last field, which is __cpu_features. */
31105 while (DECL_CHAIN (field
))
31106 field
= DECL_CHAIN (field
);
31108 /* Get the appropriate field: __cpu_model.__cpu_features */
31109 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
31112 /* Access the 0th element of __cpu_features array. */
31113 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
31114 integer_zero_node
, NULL_TREE
, NULL_TREE
);
31116 field_val
= (1 << isa_names_table
[i
].feature
);
31117 /* Return __cpu_model.__cpu_features[0] & field_val */
31118 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
31119 build_int_cstu (unsigned_type_node
, field_val
));
31120 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
31122 gcc_unreachable ();
31126 ix86_fold_builtin (tree fndecl
, int n_args
,
31127 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
31129 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
31131 enum ix86_builtins fn_code
= (enum ix86_builtins
)
31132 DECL_FUNCTION_CODE (fndecl
);
31133 if (fn_code
== IX86_BUILTIN_CPU_IS
31134 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
31136 gcc_assert (n_args
== 1);
31137 return fold_builtin_cpu (fndecl
, args
);
31141 #ifdef SUBTARGET_FOLD_BUILTIN
31142 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
31148 /* Make builtins to detect cpu type and features supported. NAME is
31149 the builtin name, CODE is the builtin code, and FTYPE is the function
31150 type of the builtin. */
31153 make_cpu_type_builtin (const char* name
, int code
,
31154 enum ix86_builtin_func_type ftype
, bool is_const
)
31159 type
= ix86_get_builtin_func_type (ftype
);
31160 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
31162 gcc_assert (decl
!= NULL_TREE
);
31163 ix86_builtins
[(int) code
] = decl
;
31164 TREE_READONLY (decl
) = is_const
;
31167 /* Make builtins to get CPU type and features supported. The created
31170 __builtin_cpu_init (), to detect cpu type and features,
31171 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
31172 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
31176 ix86_init_platform_type_builtins (void)
31178 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
31179 INT_FTYPE_VOID
, false);
31180 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
31181 INT_FTYPE_PCCHAR
, true);
31182 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
31183 INT_FTYPE_PCCHAR
, true);
31186 /* Internal method for ix86_init_builtins. */
31189 ix86_init_builtins_va_builtins_abi (void)
31191 tree ms_va_ref
, sysv_va_ref
;
31192 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
31193 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
31194 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
31195 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
31199 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
31200 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
31201 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
31203 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
31206 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31207 fnvoid_va_start_ms
=
31208 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
31209 fnvoid_va_end_sysv
=
31210 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
31211 fnvoid_va_start_sysv
=
31212 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
31214 fnvoid_va_copy_ms
=
31215 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
31217 fnvoid_va_copy_sysv
=
31218 build_function_type_list (void_type_node
, sysv_va_ref
,
31219 sysv_va_ref
, NULL_TREE
);
31221 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
31222 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31223 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
31224 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31225 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
31226 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
31227 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
31228 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31229 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
31230 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31231 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
31232 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
31236 ix86_init_builtin_types (void)
31238 tree float128_type_node
, float80_type_node
;
31240 /* The __float80 type. */
31241 float80_type_node
= long_double_type_node
;
31242 if (TYPE_MODE (float80_type_node
) != XFmode
)
31244 /* The __float80 type. */
31245 float80_type_node
= make_node (REAL_TYPE
);
31247 TYPE_PRECISION (float80_type_node
) = 80;
31248 layout_type (float80_type_node
);
31250 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
31252 /* The __float128 type. */
31253 float128_type_node
= make_node (REAL_TYPE
);
31254 TYPE_PRECISION (float128_type_node
) = 128;
31255 layout_type (float128_type_node
);
31256 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
31258 /* This macro is built by i386-builtin-types.awk. */
31259 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
31263 ix86_init_builtins (void)
31267 ix86_init_builtin_types ();
31269 /* Builtins to get CPU type and features. */
31270 ix86_init_platform_type_builtins ();
31272 /* TFmode support builtins. */
31273 def_builtin_const (0, "__builtin_infq",
31274 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
31275 def_builtin_const (0, "__builtin_huge_valq",
31276 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
31278 /* We will expand them to normal call if SSE isn't available since
31279 they are used by libgcc. */
31280 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
31281 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
31282 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
31283 TREE_READONLY (t
) = 1;
31284 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
31286 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
31287 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
31288 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
31289 TREE_READONLY (t
) = 1;
31290 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
31292 ix86_init_tm_builtins ();
31293 ix86_init_mmx_sse_builtins ();
31296 ix86_init_builtins_va_builtins_abi ();
31298 #ifdef SUBTARGET_INIT_BUILTINS
31299 SUBTARGET_INIT_BUILTINS
;
31303 /* Return the ix86 builtin for CODE. */
31306 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
31308 if (code
>= IX86_BUILTIN_MAX
)
31309 return error_mark_node
;
31311 return ix86_builtins
[code
];
31314 /* Errors in the source file can cause expand_expr to return const0_rtx
31315 where we expect a vector. To avoid crashing, use one of the vector
31316 clear instructions. */
31318 safe_vector_operand (rtx x
, enum machine_mode mode
)
31320 if (x
== const0_rtx
)
31321 x
= CONST0_RTX (mode
);
31325 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
31328 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
31331 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31332 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31333 rtx op0
= expand_normal (arg0
);
31334 rtx op1
= expand_normal (arg1
);
31335 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31336 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31337 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
31339 if (VECTOR_MODE_P (mode0
))
31340 op0
= safe_vector_operand (op0
, mode0
);
31341 if (VECTOR_MODE_P (mode1
))
31342 op1
= safe_vector_operand (op1
, mode1
);
31344 if (optimize
|| !target
31345 || GET_MODE (target
) != tmode
31346 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31347 target
= gen_reg_rtx (tmode
);
31349 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
31351 rtx x
= gen_reg_rtx (V4SImode
);
31352 emit_insn (gen_sse2_loadd (x
, op1
));
31353 op1
= gen_lowpart (TImode
, x
);
31356 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31357 op0
= copy_to_mode_reg (mode0
, op0
);
31358 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
31359 op1
= copy_to_mode_reg (mode1
, op1
);
31361 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31370 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
31373 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
31374 enum ix86_builtin_func_type m_type
,
31375 enum rtx_code sub_code
)
31380 bool comparison_p
= false;
31382 bool last_arg_constant
= false;
31383 int num_memory
= 0;
31386 enum machine_mode mode
;
31389 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31393 case MULTI_ARG_4_DF2_DI_I
:
31394 case MULTI_ARG_4_DF2_DI_I1
:
31395 case MULTI_ARG_4_SF2_SI_I
:
31396 case MULTI_ARG_4_SF2_SI_I1
:
31398 last_arg_constant
= true;
31401 case MULTI_ARG_3_SF
:
31402 case MULTI_ARG_3_DF
:
31403 case MULTI_ARG_3_SF2
:
31404 case MULTI_ARG_3_DF2
:
31405 case MULTI_ARG_3_DI
:
31406 case MULTI_ARG_3_SI
:
31407 case MULTI_ARG_3_SI_DI
:
31408 case MULTI_ARG_3_HI
:
31409 case MULTI_ARG_3_HI_SI
:
31410 case MULTI_ARG_3_QI
:
31411 case MULTI_ARG_3_DI2
:
31412 case MULTI_ARG_3_SI2
:
31413 case MULTI_ARG_3_HI2
:
31414 case MULTI_ARG_3_QI2
:
31418 case MULTI_ARG_2_SF
:
31419 case MULTI_ARG_2_DF
:
31420 case MULTI_ARG_2_DI
:
31421 case MULTI_ARG_2_SI
:
31422 case MULTI_ARG_2_HI
:
31423 case MULTI_ARG_2_QI
:
31427 case MULTI_ARG_2_DI_IMM
:
31428 case MULTI_ARG_2_SI_IMM
:
31429 case MULTI_ARG_2_HI_IMM
:
31430 case MULTI_ARG_2_QI_IMM
:
31432 last_arg_constant
= true;
31435 case MULTI_ARG_1_SF
:
31436 case MULTI_ARG_1_DF
:
31437 case MULTI_ARG_1_SF2
:
31438 case MULTI_ARG_1_DF2
:
31439 case MULTI_ARG_1_DI
:
31440 case MULTI_ARG_1_SI
:
31441 case MULTI_ARG_1_HI
:
31442 case MULTI_ARG_1_QI
:
31443 case MULTI_ARG_1_SI_DI
:
31444 case MULTI_ARG_1_HI_DI
:
31445 case MULTI_ARG_1_HI_SI
:
31446 case MULTI_ARG_1_QI_DI
:
31447 case MULTI_ARG_1_QI_SI
:
31448 case MULTI_ARG_1_QI_HI
:
31452 case MULTI_ARG_2_DI_CMP
:
31453 case MULTI_ARG_2_SI_CMP
:
31454 case MULTI_ARG_2_HI_CMP
:
31455 case MULTI_ARG_2_QI_CMP
:
31457 comparison_p
= true;
31460 case MULTI_ARG_2_SF_TF
:
31461 case MULTI_ARG_2_DF_TF
:
31462 case MULTI_ARG_2_DI_TF
:
31463 case MULTI_ARG_2_SI_TF
:
31464 case MULTI_ARG_2_HI_TF
:
31465 case MULTI_ARG_2_QI_TF
:
31471 gcc_unreachable ();
31474 if (optimize
|| !target
31475 || GET_MODE (target
) != tmode
31476 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31477 target
= gen_reg_rtx (tmode
);
31479 gcc_assert (nargs
<= 4);
31481 for (i
= 0; i
< nargs
; i
++)
31483 tree arg
= CALL_EXPR_ARG (exp
, i
);
31484 rtx op
= expand_normal (arg
);
31485 int adjust
= (comparison_p
) ? 1 : 0;
31486 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
31488 if (last_arg_constant
&& i
== nargs
- 1)
31490 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
31492 enum insn_code new_icode
= icode
;
31495 case CODE_FOR_xop_vpermil2v2df3
:
31496 case CODE_FOR_xop_vpermil2v4sf3
:
31497 case CODE_FOR_xop_vpermil2v4df3
:
31498 case CODE_FOR_xop_vpermil2v8sf3
:
31499 error ("the last argument must be a 2-bit immediate");
31500 return gen_reg_rtx (tmode
);
31501 case CODE_FOR_xop_rotlv2di3
:
31502 new_icode
= CODE_FOR_rotlv2di3
;
31504 case CODE_FOR_xop_rotlv4si3
:
31505 new_icode
= CODE_FOR_rotlv4si3
;
31507 case CODE_FOR_xop_rotlv8hi3
:
31508 new_icode
= CODE_FOR_rotlv8hi3
;
31510 case CODE_FOR_xop_rotlv16qi3
:
31511 new_icode
= CODE_FOR_rotlv16qi3
;
31513 if (CONST_INT_P (op
))
31515 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
31516 op
= GEN_INT (INTVAL (op
) & mask
);
31517 gcc_checking_assert
31518 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
31522 gcc_checking_assert
31524 && insn_data
[new_icode
].operand
[0].mode
== tmode
31525 && insn_data
[new_icode
].operand
[1].mode
== tmode
31526 && insn_data
[new_icode
].operand
[2].mode
== mode
31527 && insn_data
[new_icode
].operand
[0].predicate
31528 == insn_data
[icode
].operand
[0].predicate
31529 && insn_data
[new_icode
].operand
[1].predicate
31530 == insn_data
[icode
].operand
[1].predicate
);
31536 gcc_unreachable ();
31543 if (VECTOR_MODE_P (mode
))
31544 op
= safe_vector_operand (op
, mode
);
31546 /* If we aren't optimizing, only allow one memory operand to be
31548 if (memory_operand (op
, mode
))
31551 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
31554 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
31556 op
= force_reg (mode
, op
);
31560 args
[i
].mode
= mode
;
31566 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31571 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
31572 GEN_INT ((int)sub_code
));
31573 else if (! comparison_p
)
31574 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31577 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
31581 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
31586 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31590 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
31594 gcc_unreachable ();
31604 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
31605 insns with vec_merge. */
31608 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
31612 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31613 rtx op1
, op0
= expand_normal (arg0
);
31614 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
31615 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
31617 if (optimize
|| !target
31618 || GET_MODE (target
) != tmode
31619 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
31620 target
= gen_reg_rtx (tmode
);
31622 if (VECTOR_MODE_P (mode0
))
31623 op0
= safe_vector_operand (op0
, mode0
);
31625 if ((optimize
&& !register_operand (op0
, mode0
))
31626 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31627 op0
= copy_to_mode_reg (mode0
, op0
);
31630 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
31631 op1
= copy_to_mode_reg (mode0
, op1
);
31633 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
31640 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
31643 ix86_expand_sse_compare (const struct builtin_description
*d
,
31644 tree exp
, rtx target
, bool swap
)
31647 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31648 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31649 rtx op0
= expand_normal (arg0
);
31650 rtx op1
= expand_normal (arg1
);
31652 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31653 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31654 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31655 enum rtx_code comparison
= d
->comparison
;
31657 if (VECTOR_MODE_P (mode0
))
31658 op0
= safe_vector_operand (op0
, mode0
);
31659 if (VECTOR_MODE_P (mode1
))
31660 op1
= safe_vector_operand (op1
, mode1
);
31662 /* Swap operands if we have a comparison that isn't available in
31666 rtx tmp
= gen_reg_rtx (mode1
);
31667 emit_move_insn (tmp
, op1
);
31672 if (optimize
|| !target
31673 || GET_MODE (target
) != tmode
31674 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31675 target
= gen_reg_rtx (tmode
);
31677 if ((optimize
&& !register_operand (op0
, mode0
))
31678 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
31679 op0
= copy_to_mode_reg (mode0
, op0
);
31680 if ((optimize
&& !register_operand (op1
, mode1
))
31681 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
31682 op1
= copy_to_mode_reg (mode1
, op1
);
31684 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
31685 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31692 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
31695 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
31699 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31700 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31701 rtx op0
= expand_normal (arg0
);
31702 rtx op1
= expand_normal (arg1
);
31703 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31704 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31705 enum rtx_code comparison
= d
->comparison
;
31707 if (VECTOR_MODE_P (mode0
))
31708 op0
= safe_vector_operand (op0
, mode0
);
31709 if (VECTOR_MODE_P (mode1
))
31710 op1
= safe_vector_operand (op1
, mode1
);
31712 /* Swap operands if we have a comparison that isn't available in
31714 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
31721 target
= gen_reg_rtx (SImode
);
31722 emit_move_insn (target
, const0_rtx
);
31723 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31725 if ((optimize
&& !register_operand (op0
, mode0
))
31726 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31727 op0
= copy_to_mode_reg (mode0
, op0
);
31728 if ((optimize
&& !register_operand (op1
, mode1
))
31729 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31730 op1
= copy_to_mode_reg (mode1
, op1
);
31732 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31736 emit_insn (gen_rtx_SET (VOIDmode
,
31737 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31738 gen_rtx_fmt_ee (comparison
, QImode
,
31742 return SUBREG_REG (target
);
31745 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
31748 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
31752 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31753 rtx op1
, op0
= expand_normal (arg0
);
31754 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31755 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31757 if (optimize
|| target
== 0
31758 || GET_MODE (target
) != tmode
31759 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31760 target
= gen_reg_rtx (tmode
);
31762 if (VECTOR_MODE_P (mode0
))
31763 op0
= safe_vector_operand (op0
, mode0
);
31765 if ((optimize
&& !register_operand (op0
, mode0
))
31766 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31767 op0
= copy_to_mode_reg (mode0
, op0
);
31769 op1
= GEN_INT (d
->comparison
);
31771 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
31779 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
31780 tree exp
, rtx target
)
31783 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31784 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31785 rtx op0
= expand_normal (arg0
);
31786 rtx op1
= expand_normal (arg1
);
31788 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
31789 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
31790 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
31792 if (optimize
|| target
== 0
31793 || GET_MODE (target
) != tmode
31794 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
31795 target
= gen_reg_rtx (tmode
);
31797 op0
= safe_vector_operand (op0
, mode0
);
31798 op1
= safe_vector_operand (op1
, mode1
);
31800 if ((optimize
&& !register_operand (op0
, mode0
))
31801 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31802 op0
= copy_to_mode_reg (mode0
, op0
);
31803 if ((optimize
&& !register_operand (op1
, mode1
))
31804 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31805 op1
= copy_to_mode_reg (mode1
, op1
);
31807 op2
= GEN_INT (d
->comparison
);
31809 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
31816 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
31819 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
31823 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31824 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31825 rtx op0
= expand_normal (arg0
);
31826 rtx op1
= expand_normal (arg1
);
31827 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
31828 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
31829 enum rtx_code comparison
= d
->comparison
;
31831 if (VECTOR_MODE_P (mode0
))
31832 op0
= safe_vector_operand (op0
, mode0
);
31833 if (VECTOR_MODE_P (mode1
))
31834 op1
= safe_vector_operand (op1
, mode1
);
31836 target
= gen_reg_rtx (SImode
);
31837 emit_move_insn (target
, const0_rtx
);
31838 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31840 if ((optimize
&& !register_operand (op0
, mode0
))
31841 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
31842 op0
= copy_to_mode_reg (mode0
, op0
);
31843 if ((optimize
&& !register_operand (op1
, mode1
))
31844 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
31845 op1
= copy_to_mode_reg (mode1
, op1
);
31847 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
31851 emit_insn (gen_rtx_SET (VOIDmode
,
31852 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31853 gen_rtx_fmt_ee (comparison
, QImode
,
31857 return SUBREG_REG (target
);
31860 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
31863 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
31864 tree exp
, rtx target
)
31867 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31868 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31869 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31870 tree arg3
= CALL_EXPR_ARG (exp
, 3);
31871 tree arg4
= CALL_EXPR_ARG (exp
, 4);
31872 rtx scratch0
, scratch1
;
31873 rtx op0
= expand_normal (arg0
);
31874 rtx op1
= expand_normal (arg1
);
31875 rtx op2
= expand_normal (arg2
);
31876 rtx op3
= expand_normal (arg3
);
31877 rtx op4
= expand_normal (arg4
);
31878 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
31880 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31881 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31882 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31883 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
31884 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
31885 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
31886 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
31888 if (VECTOR_MODE_P (modev2
))
31889 op0
= safe_vector_operand (op0
, modev2
);
31890 if (VECTOR_MODE_P (modev4
))
31891 op2
= safe_vector_operand (op2
, modev4
);
31893 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31894 op0
= copy_to_mode_reg (modev2
, op0
);
31895 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
31896 op1
= copy_to_mode_reg (modei3
, op1
);
31897 if ((optimize
&& !register_operand (op2
, modev4
))
31898 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
31899 op2
= copy_to_mode_reg (modev4
, op2
);
31900 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
31901 op3
= copy_to_mode_reg (modei5
, op3
);
31903 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
31905 error ("the fifth argument must be an 8-bit immediate");
31909 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
31911 if (optimize
|| !target
31912 || GET_MODE (target
) != tmode0
31913 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
31914 target
= gen_reg_rtx (tmode0
);
31916 scratch1
= gen_reg_rtx (tmode1
);
31918 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31920 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
31922 if (optimize
|| !target
31923 || GET_MODE (target
) != tmode1
31924 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
31925 target
= gen_reg_rtx (tmode1
);
31927 scratch0
= gen_reg_rtx (tmode0
);
31929 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
31933 gcc_assert (d
->flag
);
31935 scratch0
= gen_reg_rtx (tmode0
);
31936 scratch1
= gen_reg_rtx (tmode1
);
31938 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
31948 target
= gen_reg_rtx (SImode
);
31949 emit_move_insn (target
, const0_rtx
);
31950 target
= gen_rtx_SUBREG (QImode
, target
, 0);
31953 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
31954 gen_rtx_fmt_ee (EQ
, QImode
,
31955 gen_rtx_REG ((enum machine_mode
) d
->flag
,
31958 return SUBREG_REG (target
);
31965 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
31968 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
31969 tree exp
, rtx target
)
31972 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31973 tree arg1
= CALL_EXPR_ARG (exp
, 1);
31974 tree arg2
= CALL_EXPR_ARG (exp
, 2);
31975 rtx scratch0
, scratch1
;
31976 rtx op0
= expand_normal (arg0
);
31977 rtx op1
= expand_normal (arg1
);
31978 rtx op2
= expand_normal (arg2
);
31979 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
31981 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
31982 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
31983 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
31984 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
31985 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
31987 if (VECTOR_MODE_P (modev2
))
31988 op0
= safe_vector_operand (op0
, modev2
);
31989 if (VECTOR_MODE_P (modev3
))
31990 op1
= safe_vector_operand (op1
, modev3
);
31992 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
31993 op0
= copy_to_mode_reg (modev2
, op0
);
31994 if ((optimize
&& !register_operand (op1
, modev3
))
31995 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
31996 op1
= copy_to_mode_reg (modev3
, op1
);
31998 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
32000 error ("the third argument must be an 8-bit immediate");
32004 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
32006 if (optimize
|| !target
32007 || GET_MODE (target
) != tmode0
32008 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
32009 target
= gen_reg_rtx (tmode0
);
32011 scratch1
= gen_reg_rtx (tmode1
);
32013 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
32015 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
32017 if (optimize
|| !target
32018 || GET_MODE (target
) != tmode1
32019 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
32020 target
= gen_reg_rtx (tmode1
);
32022 scratch0
= gen_reg_rtx (tmode0
);
32024 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
32028 gcc_assert (d
->flag
);
32030 scratch0
= gen_reg_rtx (tmode0
);
32031 scratch1
= gen_reg_rtx (tmode1
);
32033 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
32043 target
= gen_reg_rtx (SImode
);
32044 emit_move_insn (target
, const0_rtx
);
32045 target
= gen_rtx_SUBREG (QImode
, target
, 0);
32048 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
32049 gen_rtx_fmt_ee (EQ
, QImode
,
32050 gen_rtx_REG ((enum machine_mode
) d
->flag
,
32053 return SUBREG_REG (target
);
32059 /* Subroutine of ix86_expand_builtin to take care of insns with
32060 variable number of operands. */
32063 ix86_expand_args_builtin (const struct builtin_description
*d
,
32064 tree exp
, rtx target
)
32066 rtx pat
, real_target
;
32067 unsigned int i
, nargs
;
32068 unsigned int nargs_constant
= 0;
32069 int num_memory
= 0;
32073 enum machine_mode mode
;
32075 bool last_arg_count
= false;
32076 enum insn_code icode
= d
->icode
;
32077 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
32078 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
32079 enum machine_mode rmode
= VOIDmode
;
32081 enum rtx_code comparison
= d
->comparison
;
32083 switch ((enum ix86_builtin_func_type
) d
->flag
)
32085 case V2DF_FTYPE_V2DF_ROUND
:
32086 case V4DF_FTYPE_V4DF_ROUND
:
32087 case V4SF_FTYPE_V4SF_ROUND
:
32088 case V8SF_FTYPE_V8SF_ROUND
:
32089 case V4SI_FTYPE_V4SF_ROUND
:
32090 case V8SI_FTYPE_V8SF_ROUND
:
32091 return ix86_expand_sse_round (d
, exp
, target
);
32092 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
32093 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
32094 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
32095 case INT_FTYPE_V8SF_V8SF_PTEST
:
32096 case INT_FTYPE_V4DI_V4DI_PTEST
:
32097 case INT_FTYPE_V4DF_V4DF_PTEST
:
32098 case INT_FTYPE_V4SF_V4SF_PTEST
:
32099 case INT_FTYPE_V2DI_V2DI_PTEST
:
32100 case INT_FTYPE_V2DF_V2DF_PTEST
:
32101 return ix86_expand_sse_ptest (d
, exp
, target
);
32102 case FLOAT128_FTYPE_FLOAT128
:
32103 case FLOAT_FTYPE_FLOAT
:
32104 case INT_FTYPE_INT
:
32105 case UINT64_FTYPE_INT
:
32106 case UINT16_FTYPE_UINT16
:
32107 case INT64_FTYPE_INT64
:
32108 case INT64_FTYPE_V4SF
:
32109 case INT64_FTYPE_V2DF
:
32110 case INT_FTYPE_V16QI
:
32111 case INT_FTYPE_V8QI
:
32112 case INT_FTYPE_V8SF
:
32113 case INT_FTYPE_V4DF
:
32114 case INT_FTYPE_V4SF
:
32115 case INT_FTYPE_V2DF
:
32116 case INT_FTYPE_V32QI
:
32117 case V16QI_FTYPE_V16QI
:
32118 case V8SI_FTYPE_V8SF
:
32119 case V8SI_FTYPE_V4SI
:
32120 case V8HI_FTYPE_V8HI
:
32121 case V8HI_FTYPE_V16QI
:
32122 case V8QI_FTYPE_V8QI
:
32123 case V8SF_FTYPE_V8SF
:
32124 case V8SF_FTYPE_V8SI
:
32125 case V8SF_FTYPE_V4SF
:
32126 case V8SF_FTYPE_V8HI
:
32127 case V4SI_FTYPE_V4SI
:
32128 case V4SI_FTYPE_V16QI
:
32129 case V4SI_FTYPE_V4SF
:
32130 case V4SI_FTYPE_V8SI
:
32131 case V4SI_FTYPE_V8HI
:
32132 case V4SI_FTYPE_V4DF
:
32133 case V4SI_FTYPE_V2DF
:
32134 case V4HI_FTYPE_V4HI
:
32135 case V4DF_FTYPE_V4DF
:
32136 case V4DF_FTYPE_V4SI
:
32137 case V4DF_FTYPE_V4SF
:
32138 case V4DF_FTYPE_V2DF
:
32139 case V4SF_FTYPE_V4SF
:
32140 case V4SF_FTYPE_V4SI
:
32141 case V4SF_FTYPE_V8SF
:
32142 case V4SF_FTYPE_V4DF
:
32143 case V4SF_FTYPE_V8HI
:
32144 case V4SF_FTYPE_V2DF
:
32145 case V2DI_FTYPE_V2DI
:
32146 case V2DI_FTYPE_V16QI
:
32147 case V2DI_FTYPE_V8HI
:
32148 case V2DI_FTYPE_V4SI
:
32149 case V2DF_FTYPE_V2DF
:
32150 case V2DF_FTYPE_V4SI
:
32151 case V2DF_FTYPE_V4DF
:
32152 case V2DF_FTYPE_V4SF
:
32153 case V2DF_FTYPE_V2SI
:
32154 case V2SI_FTYPE_V2SI
:
32155 case V2SI_FTYPE_V4SF
:
32156 case V2SI_FTYPE_V2SF
:
32157 case V2SI_FTYPE_V2DF
:
32158 case V2SF_FTYPE_V2SF
:
32159 case V2SF_FTYPE_V2SI
:
32160 case V32QI_FTYPE_V32QI
:
32161 case V32QI_FTYPE_V16QI
:
32162 case V16HI_FTYPE_V16HI
:
32163 case V16HI_FTYPE_V8HI
:
32164 case V8SI_FTYPE_V8SI
:
32165 case V16HI_FTYPE_V16QI
:
32166 case V8SI_FTYPE_V16QI
:
32167 case V4DI_FTYPE_V16QI
:
32168 case V8SI_FTYPE_V8HI
:
32169 case V4DI_FTYPE_V8HI
:
32170 case V4DI_FTYPE_V4SI
:
32171 case V4DI_FTYPE_V2DI
:
32174 case V4SF_FTYPE_V4SF_VEC_MERGE
:
32175 case V2DF_FTYPE_V2DF_VEC_MERGE
:
32176 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
32177 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
32178 case V16QI_FTYPE_V16QI_V16QI
:
32179 case V16QI_FTYPE_V8HI_V8HI
:
32180 case V8QI_FTYPE_V8QI_V8QI
:
32181 case V8QI_FTYPE_V4HI_V4HI
:
32182 case V8HI_FTYPE_V8HI_V8HI
:
32183 case V8HI_FTYPE_V16QI_V16QI
:
32184 case V8HI_FTYPE_V4SI_V4SI
:
32185 case V8SF_FTYPE_V8SF_V8SF
:
32186 case V8SF_FTYPE_V8SF_V8SI
:
32187 case V4SI_FTYPE_V4SI_V4SI
:
32188 case V4SI_FTYPE_V8HI_V8HI
:
32189 case V4SI_FTYPE_V4SF_V4SF
:
32190 case V4SI_FTYPE_V2DF_V2DF
:
32191 case V4HI_FTYPE_V4HI_V4HI
:
32192 case V4HI_FTYPE_V8QI_V8QI
:
32193 case V4HI_FTYPE_V2SI_V2SI
:
32194 case V4DF_FTYPE_V4DF_V4DF
:
32195 case V4DF_FTYPE_V4DF_V4DI
:
32196 case V4SF_FTYPE_V4SF_V4SF
:
32197 case V4SF_FTYPE_V4SF_V4SI
:
32198 case V4SF_FTYPE_V4SF_V2SI
:
32199 case V4SF_FTYPE_V4SF_V2DF
:
32200 case V4SF_FTYPE_V4SF_DI
:
32201 case V4SF_FTYPE_V4SF_SI
:
32202 case V2DI_FTYPE_V2DI_V2DI
:
32203 case V2DI_FTYPE_V16QI_V16QI
:
32204 case V2DI_FTYPE_V4SI_V4SI
:
32205 case V2UDI_FTYPE_V4USI_V4USI
:
32206 case V2DI_FTYPE_V2DI_V16QI
:
32207 case V2DI_FTYPE_V2DF_V2DF
:
32208 case V2SI_FTYPE_V2SI_V2SI
:
32209 case V2SI_FTYPE_V4HI_V4HI
:
32210 case V2SI_FTYPE_V2SF_V2SF
:
32211 case V2DF_FTYPE_V2DF_V2DF
:
32212 case V2DF_FTYPE_V2DF_V4SF
:
32213 case V2DF_FTYPE_V2DF_V2DI
:
32214 case V2DF_FTYPE_V2DF_DI
:
32215 case V2DF_FTYPE_V2DF_SI
:
32216 case V2SF_FTYPE_V2SF_V2SF
:
32217 case V1DI_FTYPE_V1DI_V1DI
:
32218 case V1DI_FTYPE_V8QI_V8QI
:
32219 case V1DI_FTYPE_V2SI_V2SI
:
32220 case V32QI_FTYPE_V16HI_V16HI
:
32221 case V16HI_FTYPE_V8SI_V8SI
:
32222 case V32QI_FTYPE_V32QI_V32QI
:
32223 case V16HI_FTYPE_V32QI_V32QI
:
32224 case V16HI_FTYPE_V16HI_V16HI
:
32225 case V8SI_FTYPE_V4DF_V4DF
:
32226 case V8SI_FTYPE_V8SI_V8SI
:
32227 case V8SI_FTYPE_V16HI_V16HI
:
32228 case V4DI_FTYPE_V4DI_V4DI
:
32229 case V4DI_FTYPE_V8SI_V8SI
:
32230 case V4UDI_FTYPE_V8USI_V8USI
:
32231 if (comparison
== UNKNOWN
)
32232 return ix86_expand_binop_builtin (icode
, exp
, target
);
32235 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
32236 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
32237 gcc_assert (comparison
!= UNKNOWN
);
32241 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
32242 case V16HI_FTYPE_V16HI_SI_COUNT
:
32243 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
32244 case V8SI_FTYPE_V8SI_SI_COUNT
:
32245 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
32246 case V4DI_FTYPE_V4DI_INT_COUNT
:
32247 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
32248 case V8HI_FTYPE_V8HI_SI_COUNT
:
32249 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
32250 case V4SI_FTYPE_V4SI_SI_COUNT
:
32251 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
32252 case V4HI_FTYPE_V4HI_SI_COUNT
:
32253 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
32254 case V2DI_FTYPE_V2DI_SI_COUNT
:
32255 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
32256 case V2SI_FTYPE_V2SI_SI_COUNT
:
32257 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
32258 case V1DI_FTYPE_V1DI_SI_COUNT
:
32260 last_arg_count
= true;
32262 case UINT64_FTYPE_UINT64_UINT64
:
32263 case UINT_FTYPE_UINT_UINT
:
32264 case UINT_FTYPE_UINT_USHORT
:
32265 case UINT_FTYPE_UINT_UCHAR
:
32266 case UINT16_FTYPE_UINT16_INT
:
32267 case UINT8_FTYPE_UINT8_INT
:
32270 case V2DI_FTYPE_V2DI_INT_CONVERT
:
32273 nargs_constant
= 1;
32275 case V4DI_FTYPE_V4DI_INT_CONVERT
:
32278 nargs_constant
= 1;
32280 case V8HI_FTYPE_V8HI_INT
:
32281 case V8HI_FTYPE_V8SF_INT
:
32282 case V8HI_FTYPE_V4SF_INT
:
32283 case V8SF_FTYPE_V8SF_INT
:
32284 case V4SI_FTYPE_V4SI_INT
:
32285 case V4SI_FTYPE_V8SI_INT
:
32286 case V4HI_FTYPE_V4HI_INT
:
32287 case V4DF_FTYPE_V4DF_INT
:
32288 case V4SF_FTYPE_V4SF_INT
:
32289 case V4SF_FTYPE_V8SF_INT
:
32290 case V2DI_FTYPE_V2DI_INT
:
32291 case V2DF_FTYPE_V2DF_INT
:
32292 case V2DF_FTYPE_V4DF_INT
:
32293 case V16HI_FTYPE_V16HI_INT
:
32294 case V8SI_FTYPE_V8SI_INT
:
32295 case V4DI_FTYPE_V4DI_INT
:
32296 case V2DI_FTYPE_V4DI_INT
:
32298 nargs_constant
= 1;
32300 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
32301 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
32302 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
32303 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
32304 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
32305 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
32308 case V32QI_FTYPE_V32QI_V32QI_INT
:
32309 case V16HI_FTYPE_V16HI_V16HI_INT
:
32310 case V16QI_FTYPE_V16QI_V16QI_INT
:
32311 case V4DI_FTYPE_V4DI_V4DI_INT
:
32312 case V8HI_FTYPE_V8HI_V8HI_INT
:
32313 case V8SI_FTYPE_V8SI_V8SI_INT
:
32314 case V8SI_FTYPE_V8SI_V4SI_INT
:
32315 case V8SF_FTYPE_V8SF_V8SF_INT
:
32316 case V8SF_FTYPE_V8SF_V4SF_INT
:
32317 case V4SI_FTYPE_V4SI_V4SI_INT
:
32318 case V4DF_FTYPE_V4DF_V4DF_INT
:
32319 case V4DF_FTYPE_V4DF_V2DF_INT
:
32320 case V4SF_FTYPE_V4SF_V4SF_INT
:
32321 case V2DI_FTYPE_V2DI_V2DI_INT
:
32322 case V4DI_FTYPE_V4DI_V2DI_INT
:
32323 case V2DF_FTYPE_V2DF_V2DF_INT
:
32325 nargs_constant
= 1;
32327 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
32330 nargs_constant
= 1;
32332 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
32335 nargs_constant
= 1;
32337 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
32340 nargs_constant
= 1;
32342 case V2DI_FTYPE_V2DI_UINT_UINT
:
32344 nargs_constant
= 2;
32346 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
32347 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
32348 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
32349 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
32351 nargs_constant
= 1;
32353 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
32355 nargs_constant
= 2;
32357 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
32358 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
32362 gcc_unreachable ();
32365 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32367 if (comparison
!= UNKNOWN
)
32369 gcc_assert (nargs
== 2);
32370 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
32373 if (rmode
== VOIDmode
|| rmode
== tmode
)
32377 || GET_MODE (target
) != tmode
32378 || !insn_p
->operand
[0].predicate (target
, tmode
))
32379 target
= gen_reg_rtx (tmode
);
32380 real_target
= target
;
32384 real_target
= gen_reg_rtx (tmode
);
32385 target
= simplify_gen_subreg (rmode
, real_target
, tmode
, 0);
32388 for (i
= 0; i
< nargs
; i
++)
32390 tree arg
= CALL_EXPR_ARG (exp
, i
);
32391 rtx op
= expand_normal (arg
);
32392 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32393 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32395 if (last_arg_count
&& (i
+ 1) == nargs
)
32397 /* SIMD shift insns take either an 8-bit immediate or
32398 register as count. But builtin functions take int as
32399 count. If count doesn't match, we put it in register. */
32402 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
32403 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
32404 op
= copy_to_reg (op
);
32407 else if ((nargs
- i
) <= nargs_constant
)
32412 case CODE_FOR_avx2_inserti128
:
32413 case CODE_FOR_avx2_extracti128
:
32414 error ("the last argument must be an 1-bit immediate");
32417 case CODE_FOR_sse4_1_roundsd
:
32418 case CODE_FOR_sse4_1_roundss
:
32420 case CODE_FOR_sse4_1_roundpd
:
32421 case CODE_FOR_sse4_1_roundps
:
32422 case CODE_FOR_avx_roundpd256
:
32423 case CODE_FOR_avx_roundps256
:
32425 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
32426 case CODE_FOR_sse4_1_roundps_sfix
:
32427 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
32428 case CODE_FOR_avx_roundps_sfix256
:
32430 case CODE_FOR_sse4_1_blendps
:
32431 case CODE_FOR_avx_blendpd256
:
32432 case CODE_FOR_avx_vpermilv4df
:
32433 error ("the last argument must be a 4-bit immediate");
32436 case CODE_FOR_sse4_1_blendpd
:
32437 case CODE_FOR_avx_vpermilv2df
:
32438 case CODE_FOR_xop_vpermil2v2df3
:
32439 case CODE_FOR_xop_vpermil2v4sf3
:
32440 case CODE_FOR_xop_vpermil2v4df3
:
32441 case CODE_FOR_xop_vpermil2v8sf3
:
32442 error ("the last argument must be a 2-bit immediate");
32445 case CODE_FOR_avx_vextractf128v4df
:
32446 case CODE_FOR_avx_vextractf128v8sf
:
32447 case CODE_FOR_avx_vextractf128v8si
:
32448 case CODE_FOR_avx_vinsertf128v4df
:
32449 case CODE_FOR_avx_vinsertf128v8sf
:
32450 case CODE_FOR_avx_vinsertf128v8si
:
32451 error ("the last argument must be a 1-bit immediate");
32454 case CODE_FOR_avx_vmcmpv2df3
:
32455 case CODE_FOR_avx_vmcmpv4sf3
:
32456 case CODE_FOR_avx_cmpv2df3
:
32457 case CODE_FOR_avx_cmpv4sf3
:
32458 case CODE_FOR_avx_cmpv4df3
:
32459 case CODE_FOR_avx_cmpv8sf3
:
32460 error ("the last argument must be a 5-bit immediate");
32464 switch (nargs_constant
)
32467 if ((nargs
- i
) == nargs_constant
)
32469 error ("the next to last argument must be an 8-bit immediate");
32473 error ("the last argument must be an 8-bit immediate");
32476 gcc_unreachable ();
32483 if (VECTOR_MODE_P (mode
))
32484 op
= safe_vector_operand (op
, mode
);
32486 /* If we aren't optimizing, only allow one memory operand to
32488 if (memory_operand (op
, mode
))
32491 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
32493 if (optimize
|| !match
|| num_memory
> 1)
32494 op
= copy_to_mode_reg (mode
, op
);
32498 op
= copy_to_reg (op
);
32499 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
32504 args
[i
].mode
= mode
;
32510 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
32513 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
32516 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32520 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
32521 args
[2].op
, args
[3].op
);
32524 gcc_unreachable ();
32534 /* Subroutine of ix86_expand_builtin to take care of special insns
32535 with variable number of operands. */
32538 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
32539 tree exp
, rtx target
)
32543 unsigned int i
, nargs
, arg_adjust
, memory
;
32544 bool aligned_mem
= false;
32548 enum machine_mode mode
;
32550 enum insn_code icode
= d
->icode
;
32551 bool last_arg_constant
= false;
32552 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
32553 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
32554 enum { load
, store
} klass
;
32556 switch ((enum ix86_builtin_func_type
) d
->flag
)
32558 case VOID_FTYPE_VOID
:
32559 emit_insn (GEN_FCN (icode
) (target
));
32561 case VOID_FTYPE_UINT64
:
32562 case VOID_FTYPE_UNSIGNED
:
32568 case INT_FTYPE_VOID
:
32569 case UINT64_FTYPE_VOID
:
32570 case UNSIGNED_FTYPE_VOID
:
32575 case UINT64_FTYPE_PUNSIGNED
:
32576 case V2DI_FTYPE_PV2DI
:
32577 case V4DI_FTYPE_PV4DI
:
32578 case V32QI_FTYPE_PCCHAR
:
32579 case V16QI_FTYPE_PCCHAR
:
32580 case V8SF_FTYPE_PCV4SF
:
32581 case V8SF_FTYPE_PCFLOAT
:
32582 case V4SF_FTYPE_PCFLOAT
:
32583 case V4DF_FTYPE_PCV2DF
:
32584 case V4DF_FTYPE_PCDOUBLE
:
32585 case V2DF_FTYPE_PCDOUBLE
:
32586 case VOID_FTYPE_PVOID
:
32592 case CODE_FOR_sse4_1_movntdqa
:
32593 case CODE_FOR_avx2_movntdqa
:
32594 aligned_mem
= true;
32600 case VOID_FTYPE_PV2SF_V4SF
:
32601 case VOID_FTYPE_PV4DI_V4DI
:
32602 case VOID_FTYPE_PV2DI_V2DI
:
32603 case VOID_FTYPE_PCHAR_V32QI
:
32604 case VOID_FTYPE_PCHAR_V16QI
:
32605 case VOID_FTYPE_PFLOAT_V8SF
:
32606 case VOID_FTYPE_PFLOAT_V4SF
:
32607 case VOID_FTYPE_PDOUBLE_V4DF
:
32608 case VOID_FTYPE_PDOUBLE_V2DF
:
32609 case VOID_FTYPE_PLONGLONG_LONGLONG
:
32610 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
32611 case VOID_FTYPE_PINT_INT
:
32614 /* Reserve memory operand for target. */
32615 memory
= ARRAY_SIZE (args
);
32618 /* These builtins and instructions require the memory
32619 to be properly aligned. */
32620 case CODE_FOR_avx_movntv4di
:
32621 case CODE_FOR_sse2_movntv2di
:
32622 case CODE_FOR_avx_movntv8sf
:
32623 case CODE_FOR_sse_movntv4sf
:
32624 case CODE_FOR_sse4a_vmmovntv4sf
:
32625 case CODE_FOR_avx_movntv4df
:
32626 case CODE_FOR_sse2_movntv2df
:
32627 case CODE_FOR_sse4a_vmmovntv2df
:
32628 case CODE_FOR_sse2_movntidi
:
32629 case CODE_FOR_sse_movntq
:
32630 case CODE_FOR_sse2_movntisi
:
32631 aligned_mem
= true;
32637 case V4SF_FTYPE_V4SF_PCV2SF
:
32638 case V2DF_FTYPE_V2DF_PCDOUBLE
:
32643 case V8SF_FTYPE_PCV8SF_V8SI
:
32644 case V4DF_FTYPE_PCV4DF_V4DI
:
32645 case V4SF_FTYPE_PCV4SF_V4SI
:
32646 case V2DF_FTYPE_PCV2DF_V2DI
:
32647 case V8SI_FTYPE_PCV8SI_V8SI
:
32648 case V4DI_FTYPE_PCV4DI_V4DI
:
32649 case V4SI_FTYPE_PCV4SI_V4SI
:
32650 case V2DI_FTYPE_PCV2DI_V2DI
:
32655 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
32656 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
32657 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
32658 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
32659 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
32660 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
32661 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
32662 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
32665 /* Reserve memory operand for target. */
32666 memory
= ARRAY_SIZE (args
);
32668 case VOID_FTYPE_UINT_UINT_UINT
:
32669 case VOID_FTYPE_UINT64_UINT_UINT
:
32670 case UCHAR_FTYPE_UINT_UINT_UINT
:
32671 case UCHAR_FTYPE_UINT64_UINT_UINT
:
32674 memory
= ARRAY_SIZE (args
);
32675 last_arg_constant
= true;
32678 gcc_unreachable ();
32681 gcc_assert (nargs
<= ARRAY_SIZE (args
));
32683 if (klass
== store
)
32685 arg
= CALL_EXPR_ARG (exp
, 0);
32686 op
= expand_normal (arg
);
32687 gcc_assert (target
== 0);
32690 op
= ix86_zero_extend_to_Pmode (op
);
32691 target
= gen_rtx_MEM (tmode
, op
);
32692 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
32693 on it. Try to improve it using get_pointer_alignment,
32694 and if the special builtin is one that requires strict
32695 mode alignment, also from it's GET_MODE_ALIGNMENT.
32696 Failure to do so could lead to ix86_legitimate_combined_insn
32697 rejecting all changes to such insns. */
32698 unsigned int align
= get_pointer_alignment (arg
);
32699 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (tmode
))
32700 align
= GET_MODE_ALIGNMENT (tmode
);
32701 if (MEM_ALIGN (target
) < align
)
32702 set_mem_align (target
, align
);
32705 target
= force_reg (tmode
, op
);
32713 || !register_operand (target
, tmode
)
32714 || GET_MODE (target
) != tmode
)
32715 target
= gen_reg_rtx (tmode
);
32718 for (i
= 0; i
< nargs
; i
++)
32720 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
32723 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
32724 op
= expand_normal (arg
);
32725 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
32727 if (last_arg_constant
&& (i
+ 1) == nargs
)
32731 if (icode
== CODE_FOR_lwp_lwpvalsi3
32732 || icode
== CODE_FOR_lwp_lwpinssi3
32733 || icode
== CODE_FOR_lwp_lwpvaldi3
32734 || icode
== CODE_FOR_lwp_lwpinsdi3
)
32735 error ("the last argument must be a 32-bit immediate");
32737 error ("the last argument must be an 8-bit immediate");
32745 /* This must be the memory operand. */
32746 op
= ix86_zero_extend_to_Pmode (op
);
32747 op
= gen_rtx_MEM (mode
, op
);
32748 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
32749 on it. Try to improve it using get_pointer_alignment,
32750 and if the special builtin is one that requires strict
32751 mode alignment, also from it's GET_MODE_ALIGNMENT.
32752 Failure to do so could lead to ix86_legitimate_combined_insn
32753 rejecting all changes to such insns. */
32754 unsigned int align
= get_pointer_alignment (arg
);
32755 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (mode
))
32756 align
= GET_MODE_ALIGNMENT (mode
);
32757 if (MEM_ALIGN (op
) < align
)
32758 set_mem_align (op
, align
);
32762 /* This must be register. */
32763 if (VECTOR_MODE_P (mode
))
32764 op
= safe_vector_operand (op
, mode
);
32766 gcc_assert (GET_MODE (op
) == mode
32767 || GET_MODE (op
) == VOIDmode
);
32768 op
= copy_to_mode_reg (mode
, op
);
32773 args
[i
].mode
= mode
;
32779 pat
= GEN_FCN (icode
) (target
);
32782 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
32785 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
32788 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
32791 gcc_unreachable ();
32797 return klass
== store
? 0 : target
;
32800 /* Return the integer constant in ARG. Constrain it to be in the range
32801 of the subparts of VEC_TYPE; issue an error if not. */
32804 get_element_number (tree vec_type
, tree arg
)
32806 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
32808 if (!tree_fits_uhwi_p (arg
)
32809 || (elt
= tree_to_uhwi (arg
), elt
> max
))
32811 error ("selector must be an integer constant in the range 0..%wi", max
);
32818 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32819 ix86_expand_vector_init. We DO have language-level syntax for this, in
32820 the form of (type){ init-list }. Except that since we can't place emms
32821 instructions from inside the compiler, we can't allow the use of MMX
32822 registers unless the user explicitly asks for it. So we do *not* define
32823 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
32824 we have builtins invoked by mmintrin.h that gives us license to emit
32825 these sorts of instructions. */
32828 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
32830 enum machine_mode tmode
= TYPE_MODE (type
);
32831 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
32832 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
32833 rtvec v
= rtvec_alloc (n_elt
);
32835 gcc_assert (VECTOR_MODE_P (tmode
));
32836 gcc_assert (call_expr_nargs (exp
) == n_elt
);
32838 for (i
= 0; i
< n_elt
; ++i
)
32840 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
32841 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
32844 if (!target
|| !register_operand (target
, tmode
))
32845 target
= gen_reg_rtx (tmode
);
32847 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
32851 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32852 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
32853 had a language-level syntax for referencing vector elements. */
32856 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
32858 enum machine_mode tmode
, mode0
;
32863 arg0
= CALL_EXPR_ARG (exp
, 0);
32864 arg1
= CALL_EXPR_ARG (exp
, 1);
32866 op0
= expand_normal (arg0
);
32867 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
32869 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32870 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
32871 gcc_assert (VECTOR_MODE_P (mode0
));
32873 op0
= force_reg (mode0
, op0
);
32875 if (optimize
|| !target
|| !register_operand (target
, tmode
))
32876 target
= gen_reg_rtx (tmode
);
32878 ix86_expand_vector_extract (true, target
, op0
, elt
);
32883 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
32884 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
32885 a language-level syntax for referencing vector elements. */
32888 ix86_expand_vec_set_builtin (tree exp
)
32890 enum machine_mode tmode
, mode1
;
32891 tree arg0
, arg1
, arg2
;
32893 rtx op0
, op1
, target
;
32895 arg0
= CALL_EXPR_ARG (exp
, 0);
32896 arg1
= CALL_EXPR_ARG (exp
, 1);
32897 arg2
= CALL_EXPR_ARG (exp
, 2);
32899 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
32900 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
32901 gcc_assert (VECTOR_MODE_P (tmode
));
32903 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
32904 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
32905 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
32907 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
32908 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
32910 op0
= force_reg (tmode
, op0
);
32911 op1
= force_reg (mode1
, op1
);
32913 /* OP0 is the source of these builtin functions and shouldn't be
32914 modified. Create a copy, use it and return it as target. */
32915 target
= gen_reg_rtx (tmode
);
32916 emit_move_insn (target
, op0
);
32917 ix86_expand_vector_set (true, target
, op1
, elt
);
32922 /* Expand an expression EXP that calls a built-in function,
32923 with result going to TARGET if that's convenient
32924 (and in mode MODE if that's convenient).
32925 SUBTARGET may be used as the target for computing one of EXP's operands.
32926 IGNORE is nonzero if the value is to be ignored. */
32929 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
32930 enum machine_mode mode
, int ignore
)
32932 const struct builtin_description
*d
;
32934 enum insn_code icode
;
32935 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
32936 tree arg0
, arg1
, arg2
, arg3
, arg4
;
32937 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
32938 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
32939 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
32941 /* For CPU builtins that can be folded, fold first and expand the fold. */
32944 case IX86_BUILTIN_CPU_INIT
:
32946 /* Make it call __cpu_indicator_init in libgcc. */
32947 tree call_expr
, fndecl
, type
;
32948 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
32949 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
32950 call_expr
= build_call_expr (fndecl
, 0);
32951 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
32953 case IX86_BUILTIN_CPU_IS
:
32954 case IX86_BUILTIN_CPU_SUPPORTS
:
32956 tree arg0
= CALL_EXPR_ARG (exp
, 0);
32957 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
32958 gcc_assert (fold_expr
!= NULL_TREE
);
32959 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
32963 /* Determine whether the builtin function is available under the current ISA.
32964 Originally the builtin was not created if it wasn't applicable to the
32965 current ISA based on the command line switches. With function specific
32966 options, we need to check in the context of the function making the call
32967 whether it is supported. */
32968 if (ix86_builtins_isa
[fcode
].isa
32969 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
32971 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
32972 NULL
, (enum fpmath_unit
) 0, false);
32975 error ("%qE needs unknown isa option", fndecl
);
32978 gcc_assert (opts
!= NULL
);
32979 error ("%qE needs isa option %s", fndecl
, opts
);
32987 case IX86_BUILTIN_MASKMOVQ
:
32988 case IX86_BUILTIN_MASKMOVDQU
:
32989 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
32990 ? CODE_FOR_mmx_maskmovq
32991 : CODE_FOR_sse2_maskmovdqu
);
32992 /* Note the arg order is different from the operand order. */
32993 arg1
= CALL_EXPR_ARG (exp
, 0);
32994 arg2
= CALL_EXPR_ARG (exp
, 1);
32995 arg0
= CALL_EXPR_ARG (exp
, 2);
32996 op0
= expand_normal (arg0
);
32997 op1
= expand_normal (arg1
);
32998 op2
= expand_normal (arg2
);
32999 mode0
= insn_data
[icode
].operand
[0].mode
;
33000 mode1
= insn_data
[icode
].operand
[1].mode
;
33001 mode2
= insn_data
[icode
].operand
[2].mode
;
33003 op0
= ix86_zero_extend_to_Pmode (op0
);
33004 op0
= gen_rtx_MEM (mode1
, op0
);
33006 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33007 op0
= copy_to_mode_reg (mode0
, op0
);
33008 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
33009 op1
= copy_to_mode_reg (mode1
, op1
);
33010 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
33011 op2
= copy_to_mode_reg (mode2
, op2
);
33012 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
33018 case IX86_BUILTIN_LDMXCSR
:
33019 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
33020 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
33021 emit_move_insn (target
, op0
);
33022 emit_insn (gen_sse_ldmxcsr (target
));
33025 case IX86_BUILTIN_STMXCSR
:
33026 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
33027 emit_insn (gen_sse_stmxcsr (target
));
33028 return copy_to_mode_reg (SImode
, target
);
33030 case IX86_BUILTIN_CLFLUSH
:
33031 arg0
= CALL_EXPR_ARG (exp
, 0);
33032 op0
= expand_normal (arg0
);
33033 icode
= CODE_FOR_sse2_clflush
;
33034 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
33035 op0
= ix86_zero_extend_to_Pmode (op0
);
33037 emit_insn (gen_sse2_clflush (op0
));
33040 case IX86_BUILTIN_MONITOR
:
33041 arg0
= CALL_EXPR_ARG (exp
, 0);
33042 arg1
= CALL_EXPR_ARG (exp
, 1);
33043 arg2
= CALL_EXPR_ARG (exp
, 2);
33044 op0
= expand_normal (arg0
);
33045 op1
= expand_normal (arg1
);
33046 op2
= expand_normal (arg2
);
33048 op0
= ix86_zero_extend_to_Pmode (op0
);
33050 op1
= copy_to_mode_reg (SImode
, op1
);
33052 op2
= copy_to_mode_reg (SImode
, op2
);
33053 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
33056 case IX86_BUILTIN_MWAIT
:
33057 arg0
= CALL_EXPR_ARG (exp
, 0);
33058 arg1
= CALL_EXPR_ARG (exp
, 1);
33059 op0
= expand_normal (arg0
);
33060 op1
= expand_normal (arg1
);
33062 op0
= copy_to_mode_reg (SImode
, op0
);
33064 op1
= copy_to_mode_reg (SImode
, op1
);
33065 emit_insn (gen_sse3_mwait (op0
, op1
));
33068 case IX86_BUILTIN_VEC_INIT_V2SI
:
33069 case IX86_BUILTIN_VEC_INIT_V4HI
:
33070 case IX86_BUILTIN_VEC_INIT_V8QI
:
33071 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
33073 case IX86_BUILTIN_VEC_EXT_V2DF
:
33074 case IX86_BUILTIN_VEC_EXT_V2DI
:
33075 case IX86_BUILTIN_VEC_EXT_V4SF
:
33076 case IX86_BUILTIN_VEC_EXT_V4SI
:
33077 case IX86_BUILTIN_VEC_EXT_V8HI
:
33078 case IX86_BUILTIN_VEC_EXT_V2SI
:
33079 case IX86_BUILTIN_VEC_EXT_V4HI
:
33080 case IX86_BUILTIN_VEC_EXT_V16QI
:
33081 return ix86_expand_vec_ext_builtin (exp
, target
);
33083 case IX86_BUILTIN_VEC_SET_V2DI
:
33084 case IX86_BUILTIN_VEC_SET_V4SF
:
33085 case IX86_BUILTIN_VEC_SET_V4SI
:
33086 case IX86_BUILTIN_VEC_SET_V8HI
:
33087 case IX86_BUILTIN_VEC_SET_V4HI
:
33088 case IX86_BUILTIN_VEC_SET_V16QI
:
33089 return ix86_expand_vec_set_builtin (exp
);
33091 case IX86_BUILTIN_INFQ
:
33092 case IX86_BUILTIN_HUGE_VALQ
:
33094 REAL_VALUE_TYPE inf
;
33098 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
33100 tmp
= validize_mem (force_const_mem (mode
, tmp
));
33103 target
= gen_reg_rtx (mode
);
33105 emit_move_insn (target
, tmp
);
33109 case IX86_BUILTIN_RDPMC
:
33110 case IX86_BUILTIN_RDTSC
:
33111 case IX86_BUILTIN_RDTSCP
:
33113 op0
= gen_reg_rtx (DImode
);
33114 op1
= gen_reg_rtx (DImode
);
33116 if (fcode
== IX86_BUILTIN_RDPMC
)
33118 arg0
= CALL_EXPR_ARG (exp
, 0);
33119 op2
= expand_normal (arg0
);
33120 if (!register_operand (op2
, SImode
))
33121 op2
= copy_to_mode_reg (SImode
, op2
);
33123 insn
= (TARGET_64BIT
33124 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
33125 : gen_rdpmc (op0
, op2
));
33128 else if (fcode
== IX86_BUILTIN_RDTSC
)
33130 insn
= (TARGET_64BIT
33131 ? gen_rdtsc_rex64 (op0
, op1
)
33132 : gen_rdtsc (op0
));
33137 op2
= gen_reg_rtx (SImode
);
33139 insn
= (TARGET_64BIT
33140 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
33141 : gen_rdtscp (op0
, op2
));
33144 arg0
= CALL_EXPR_ARG (exp
, 0);
33145 op4
= expand_normal (arg0
);
33146 if (!address_operand (op4
, VOIDmode
))
33148 op4
= convert_memory_address (Pmode
, op4
);
33149 op4
= copy_addr_to_reg (op4
);
33151 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
33156 /* mode is VOIDmode if __builtin_rd* has been called
33158 if (mode
== VOIDmode
)
33160 target
= gen_reg_rtx (mode
);
33165 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
33166 op1
, 1, OPTAB_DIRECT
);
33167 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
33168 op0
, 1, OPTAB_DIRECT
);
33171 emit_move_insn (target
, op0
);
33174 case IX86_BUILTIN_FXSAVE
:
33175 case IX86_BUILTIN_FXRSTOR
:
33176 case IX86_BUILTIN_FXSAVE64
:
33177 case IX86_BUILTIN_FXRSTOR64
:
33178 case IX86_BUILTIN_FNSTENV
:
33179 case IX86_BUILTIN_FLDENV
:
33180 case IX86_BUILTIN_FNSTSW
:
33184 case IX86_BUILTIN_FXSAVE
:
33185 icode
= CODE_FOR_fxsave
;
33187 case IX86_BUILTIN_FXRSTOR
:
33188 icode
= CODE_FOR_fxrstor
;
33190 case IX86_BUILTIN_FXSAVE64
:
33191 icode
= CODE_FOR_fxsave64
;
33193 case IX86_BUILTIN_FXRSTOR64
:
33194 icode
= CODE_FOR_fxrstor64
;
33196 case IX86_BUILTIN_FNSTENV
:
33197 icode
= CODE_FOR_fnstenv
;
33199 case IX86_BUILTIN_FLDENV
:
33200 icode
= CODE_FOR_fldenv
;
33202 case IX86_BUILTIN_FNSTSW
:
33203 icode
= CODE_FOR_fnstsw
;
33207 gcc_unreachable ();
33210 arg0
= CALL_EXPR_ARG (exp
, 0);
33211 op0
= expand_normal (arg0
);
33213 if (!address_operand (op0
, VOIDmode
))
33215 op0
= convert_memory_address (Pmode
, op0
);
33216 op0
= copy_addr_to_reg (op0
);
33218 op0
= gen_rtx_MEM (mode0
, op0
);
33220 pat
= GEN_FCN (icode
) (op0
);
33225 case IX86_BUILTIN_XSAVE
:
33226 case IX86_BUILTIN_XRSTOR
:
33227 case IX86_BUILTIN_XSAVE64
:
33228 case IX86_BUILTIN_XRSTOR64
:
33229 case IX86_BUILTIN_XSAVEOPT
:
33230 case IX86_BUILTIN_XSAVEOPT64
:
33231 arg0
= CALL_EXPR_ARG (exp
, 0);
33232 arg1
= CALL_EXPR_ARG (exp
, 1);
33233 op0
= expand_normal (arg0
);
33234 op1
= expand_normal (arg1
);
33236 if (!address_operand (op0
, VOIDmode
))
33238 op0
= convert_memory_address (Pmode
, op0
);
33239 op0
= copy_addr_to_reg (op0
);
33241 op0
= gen_rtx_MEM (BLKmode
, op0
);
33243 op1
= force_reg (DImode
, op1
);
33247 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
33248 NULL
, 1, OPTAB_DIRECT
);
33251 case IX86_BUILTIN_XSAVE
:
33252 icode
= CODE_FOR_xsave_rex64
;
33254 case IX86_BUILTIN_XRSTOR
:
33255 icode
= CODE_FOR_xrstor_rex64
;
33257 case IX86_BUILTIN_XSAVE64
:
33258 icode
= CODE_FOR_xsave64
;
33260 case IX86_BUILTIN_XRSTOR64
:
33261 icode
= CODE_FOR_xrstor64
;
33263 case IX86_BUILTIN_XSAVEOPT
:
33264 icode
= CODE_FOR_xsaveopt_rex64
;
33266 case IX86_BUILTIN_XSAVEOPT64
:
33267 icode
= CODE_FOR_xsaveopt64
;
33270 gcc_unreachable ();
33273 op2
= gen_lowpart (SImode
, op2
);
33274 op1
= gen_lowpart (SImode
, op1
);
33275 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
33281 case IX86_BUILTIN_XSAVE
:
33282 icode
= CODE_FOR_xsave
;
33284 case IX86_BUILTIN_XRSTOR
:
33285 icode
= CODE_FOR_xrstor
;
33287 case IX86_BUILTIN_XSAVEOPT
:
33288 icode
= CODE_FOR_xsaveopt
;
33291 gcc_unreachable ();
33293 pat
= GEN_FCN (icode
) (op0
, op1
);
33300 case IX86_BUILTIN_LLWPCB
:
33301 arg0
= CALL_EXPR_ARG (exp
, 0);
33302 op0
= expand_normal (arg0
);
33303 icode
= CODE_FOR_lwp_llwpcb
;
33304 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
33305 op0
= ix86_zero_extend_to_Pmode (op0
);
33306 emit_insn (gen_lwp_llwpcb (op0
));
33309 case IX86_BUILTIN_SLWPCB
:
33310 icode
= CODE_FOR_lwp_slwpcb
;
33312 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
33313 target
= gen_reg_rtx (Pmode
);
33314 emit_insn (gen_lwp_slwpcb (target
));
33317 case IX86_BUILTIN_BEXTRI32
:
33318 case IX86_BUILTIN_BEXTRI64
:
33319 arg0
= CALL_EXPR_ARG (exp
, 0);
33320 arg1
= CALL_EXPR_ARG (exp
, 1);
33321 op0
= expand_normal (arg0
);
33322 op1
= expand_normal (arg1
);
33323 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
33324 ? CODE_FOR_tbm_bextri_si
33325 : CODE_FOR_tbm_bextri_di
);
33326 if (!CONST_INT_P (op1
))
33328 error ("last argument must be an immediate");
33333 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
33334 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
33335 op1
= GEN_INT (length
);
33336 op2
= GEN_INT (lsb_index
);
33337 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
33343 case IX86_BUILTIN_RDRAND16_STEP
:
33344 icode
= CODE_FOR_rdrandhi_1
;
33348 case IX86_BUILTIN_RDRAND32_STEP
:
33349 icode
= CODE_FOR_rdrandsi_1
;
33353 case IX86_BUILTIN_RDRAND64_STEP
:
33354 icode
= CODE_FOR_rdranddi_1
;
33358 op0
= gen_reg_rtx (mode0
);
33359 emit_insn (GEN_FCN (icode
) (op0
));
33361 arg0
= CALL_EXPR_ARG (exp
, 0);
33362 op1
= expand_normal (arg0
);
33363 if (!address_operand (op1
, VOIDmode
))
33365 op1
= convert_memory_address (Pmode
, op1
);
33366 op1
= copy_addr_to_reg (op1
);
33368 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33370 op1
= gen_reg_rtx (SImode
);
33371 emit_move_insn (op1
, CONST1_RTX (SImode
));
33373 /* Emit SImode conditional move. */
33374 if (mode0
== HImode
)
33376 op2
= gen_reg_rtx (SImode
);
33377 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
33379 else if (mode0
== SImode
)
33382 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
33385 target
= gen_reg_rtx (SImode
);
33387 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33389 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33390 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
33393 case IX86_BUILTIN_RDSEED16_STEP
:
33394 icode
= CODE_FOR_rdseedhi_1
;
33398 case IX86_BUILTIN_RDSEED32_STEP
:
33399 icode
= CODE_FOR_rdseedsi_1
;
33403 case IX86_BUILTIN_RDSEED64_STEP
:
33404 icode
= CODE_FOR_rdseeddi_1
;
33408 op0
= gen_reg_rtx (mode0
);
33409 emit_insn (GEN_FCN (icode
) (op0
));
33411 arg0
= CALL_EXPR_ARG (exp
, 0);
33412 op1
= expand_normal (arg0
);
33413 if (!address_operand (op1
, VOIDmode
))
33415 op1
= convert_memory_address (Pmode
, op1
);
33416 op1
= copy_addr_to_reg (op1
);
33418 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
33420 op2
= gen_reg_rtx (QImode
);
33422 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
33424 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
33427 target
= gen_reg_rtx (SImode
);
33429 emit_insn (gen_zero_extendqisi2 (target
, op2
));
33432 case IX86_BUILTIN_ADDCARRYX32
:
33433 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
33437 case IX86_BUILTIN_ADDCARRYX64
:
33438 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
33442 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
33443 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
33444 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
33445 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
33447 op0
= gen_reg_rtx (QImode
);
33449 /* Generate CF from input operand. */
33450 op1
= expand_normal (arg0
);
33451 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
33452 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
33454 /* Gen ADCX instruction to compute X+Y+CF. */
33455 op2
= expand_normal (arg1
);
33456 op3
= expand_normal (arg2
);
33459 op2
= copy_to_mode_reg (mode0
, op2
);
33461 op3
= copy_to_mode_reg (mode0
, op3
);
33463 op0
= gen_reg_rtx (mode0
);
33465 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
33466 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
33467 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
33469 /* Store the result. */
33470 op4
= expand_normal (arg3
);
33471 if (!address_operand (op4
, VOIDmode
))
33473 op4
= convert_memory_address (Pmode
, op4
);
33474 op4
= copy_addr_to_reg (op4
);
33476 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
33478 /* Return current CF value. */
33480 target
= gen_reg_rtx (QImode
);
33482 PUT_MODE (pat
, QImode
);
33483 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
33486 case IX86_BUILTIN_READ_FLAGS
:
33487 emit_insn (gen_push (gen_rtx_REG (word_mode
, FLAGS_REG
)));
33490 || target
== NULL_RTX
33491 || !nonimmediate_operand (target
, word_mode
)
33492 || GET_MODE (target
) != word_mode
)
33493 target
= gen_reg_rtx (word_mode
);
33495 emit_insn (gen_pop (target
));
33498 case IX86_BUILTIN_WRITE_FLAGS
:
33500 arg0
= CALL_EXPR_ARG (exp
, 0);
33501 op0
= expand_normal (arg0
);
33502 if (!general_no_elim_operand (op0
, word_mode
))
33503 op0
= copy_to_mode_reg (word_mode
, op0
);
33505 emit_insn (gen_push (op0
));
33506 emit_insn (gen_pop (gen_rtx_REG (word_mode
, FLAGS_REG
)));
33509 case IX86_BUILTIN_GATHERSIV2DF
:
33510 icode
= CODE_FOR_avx2_gathersiv2df
;
33512 case IX86_BUILTIN_GATHERSIV4DF
:
33513 icode
= CODE_FOR_avx2_gathersiv4df
;
33515 case IX86_BUILTIN_GATHERDIV2DF
:
33516 icode
= CODE_FOR_avx2_gatherdiv2df
;
33518 case IX86_BUILTIN_GATHERDIV4DF
:
33519 icode
= CODE_FOR_avx2_gatherdiv4df
;
33521 case IX86_BUILTIN_GATHERSIV4SF
:
33522 icode
= CODE_FOR_avx2_gathersiv4sf
;
33524 case IX86_BUILTIN_GATHERSIV8SF
:
33525 icode
= CODE_FOR_avx2_gathersiv8sf
;
33527 case IX86_BUILTIN_GATHERDIV4SF
:
33528 icode
= CODE_FOR_avx2_gatherdiv4sf
;
33530 case IX86_BUILTIN_GATHERDIV8SF
:
33531 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33533 case IX86_BUILTIN_GATHERSIV2DI
:
33534 icode
= CODE_FOR_avx2_gathersiv2di
;
33536 case IX86_BUILTIN_GATHERSIV4DI
:
33537 icode
= CODE_FOR_avx2_gathersiv4di
;
33539 case IX86_BUILTIN_GATHERDIV2DI
:
33540 icode
= CODE_FOR_avx2_gatherdiv2di
;
33542 case IX86_BUILTIN_GATHERDIV4DI
:
33543 icode
= CODE_FOR_avx2_gatherdiv4di
;
33545 case IX86_BUILTIN_GATHERSIV4SI
:
33546 icode
= CODE_FOR_avx2_gathersiv4si
;
33548 case IX86_BUILTIN_GATHERSIV8SI
:
33549 icode
= CODE_FOR_avx2_gathersiv8si
;
33551 case IX86_BUILTIN_GATHERDIV4SI
:
33552 icode
= CODE_FOR_avx2_gatherdiv4si
;
33554 case IX86_BUILTIN_GATHERDIV8SI
:
33555 icode
= CODE_FOR_avx2_gatherdiv8si
;
33557 case IX86_BUILTIN_GATHERALTSIV4DF
:
33558 icode
= CODE_FOR_avx2_gathersiv4df
;
33560 case IX86_BUILTIN_GATHERALTDIV8SF
:
33561 icode
= CODE_FOR_avx2_gatherdiv8sf
;
33563 case IX86_BUILTIN_GATHERALTSIV4DI
:
33564 icode
= CODE_FOR_avx2_gathersiv4di
;
33566 case IX86_BUILTIN_GATHERALTDIV8SI
:
33567 icode
= CODE_FOR_avx2_gatherdiv8si
;
33571 arg0
= CALL_EXPR_ARG (exp
, 0);
33572 arg1
= CALL_EXPR_ARG (exp
, 1);
33573 arg2
= CALL_EXPR_ARG (exp
, 2);
33574 arg3
= CALL_EXPR_ARG (exp
, 3);
33575 arg4
= CALL_EXPR_ARG (exp
, 4);
33576 op0
= expand_normal (arg0
);
33577 op1
= expand_normal (arg1
);
33578 op2
= expand_normal (arg2
);
33579 op3
= expand_normal (arg3
);
33580 op4
= expand_normal (arg4
);
33581 /* Note the arg order is different from the operand order. */
33582 mode0
= insn_data
[icode
].operand
[1].mode
;
33583 mode2
= insn_data
[icode
].operand
[3].mode
;
33584 mode3
= insn_data
[icode
].operand
[4].mode
;
33585 mode4
= insn_data
[icode
].operand
[5].mode
;
33587 if (target
== NULL_RTX
33588 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
33589 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
33591 subtarget
= target
;
33593 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
33594 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
33596 rtx half
= gen_reg_rtx (V4SImode
);
33597 if (!nonimmediate_operand (op2
, V8SImode
))
33598 op2
= copy_to_mode_reg (V8SImode
, op2
);
33599 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
33602 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
33603 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
33605 rtx (*gen
) (rtx
, rtx
);
33606 rtx half
= gen_reg_rtx (mode0
);
33607 if (mode0
== V4SFmode
)
33608 gen
= gen_vec_extract_lo_v8sf
;
33610 gen
= gen_vec_extract_lo_v8si
;
33611 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
33612 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
33613 emit_insn (gen (half
, op0
));
33615 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
33616 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
33617 emit_insn (gen (half
, op3
));
33621 /* Force memory operand only with base register here. But we
33622 don't want to do it on memory operand for other builtin
33624 op1
= ix86_zero_extend_to_Pmode (op1
);
33626 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
33627 op0
= copy_to_mode_reg (mode0
, op0
);
33628 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
33629 op1
= copy_to_mode_reg (Pmode
, op1
);
33630 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
33631 op2
= copy_to_mode_reg (mode2
, op2
);
33632 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
33633 op3
= copy_to_mode_reg (mode3
, op3
);
33634 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
33636 error ("last argument must be scale 1, 2, 4, 8");
33640 /* Optimize. If mask is known to have all high bits set,
33641 replace op0 with pc_rtx to signal that the instruction
33642 overwrites the whole destination and doesn't use its
33643 previous contents. */
33646 if (TREE_CODE (arg3
) == VECTOR_CST
)
33648 unsigned int negative
= 0;
33649 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
33651 tree cst
= VECTOR_CST_ELT (arg3
, i
);
33652 if (TREE_CODE (cst
) == INTEGER_CST
33653 && tree_int_cst_sign_bit (cst
))
33655 else if (TREE_CODE (cst
) == REAL_CST
33656 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
33659 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
33662 else if (TREE_CODE (arg3
) == SSA_NAME
)
33664 /* Recognize also when mask is like:
33665 __v2df src = _mm_setzero_pd ();
33666 __v2df mask = _mm_cmpeq_pd (src, src);
33668 __v8sf src = _mm256_setzero_ps ();
33669 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
33670 as that is a cheaper way to load all ones into
33671 a register than having to load a constant from
33673 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
33674 if (is_gimple_call (def_stmt
))
33676 tree fndecl
= gimple_call_fndecl (def_stmt
);
33678 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
33679 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
33681 case IX86_BUILTIN_CMPPD
:
33682 case IX86_BUILTIN_CMPPS
:
33683 case IX86_BUILTIN_CMPPD256
:
33684 case IX86_BUILTIN_CMPPS256
:
33685 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
33688 case IX86_BUILTIN_CMPEQPD
:
33689 case IX86_BUILTIN_CMPEQPS
:
33690 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
33691 && initializer_zerop (gimple_call_arg (def_stmt
,
33702 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
33707 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
33708 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
33710 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
33711 ? V4SFmode
: V4SImode
;
33712 if (target
== NULL_RTX
)
33713 target
= gen_reg_rtx (tmode
);
33714 if (tmode
== V4SFmode
)
33715 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
33717 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
33720 target
= subtarget
;
33724 case IX86_BUILTIN_XABORT
:
33725 icode
= CODE_FOR_xabort
;
33726 arg0
= CALL_EXPR_ARG (exp
, 0);
33727 op0
= expand_normal (arg0
);
33728 mode0
= insn_data
[icode
].operand
[0].mode
;
33729 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
33731 error ("the xabort's argument must be an 8-bit immediate");
33734 emit_insn (gen_xabort (op0
));
33741 for (i
= 0, d
= bdesc_special_args
;
33742 i
< ARRAY_SIZE (bdesc_special_args
);
33744 if (d
->code
== fcode
)
33745 return ix86_expand_special_args_builtin (d
, exp
, target
);
33747 for (i
= 0, d
= bdesc_args
;
33748 i
< ARRAY_SIZE (bdesc_args
);
33750 if (d
->code
== fcode
)
33753 case IX86_BUILTIN_FABSQ
:
33754 case IX86_BUILTIN_COPYSIGNQ
:
33756 /* Emit a normal call if SSE isn't available. */
33757 return expand_call (exp
, target
, ignore
);
33759 return ix86_expand_args_builtin (d
, exp
, target
);
33762 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
33763 if (d
->code
== fcode
)
33764 return ix86_expand_sse_comi (d
, exp
, target
);
33766 for (i
= 0, d
= bdesc_pcmpestr
;
33767 i
< ARRAY_SIZE (bdesc_pcmpestr
);
33769 if (d
->code
== fcode
)
33770 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
33772 for (i
= 0, d
= bdesc_pcmpistr
;
33773 i
< ARRAY_SIZE (bdesc_pcmpistr
);
33775 if (d
->code
== fcode
)
33776 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
33778 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33779 if (d
->code
== fcode
)
33780 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
33781 (enum ix86_builtin_func_type
)
33782 d
->flag
, d
->comparison
);
33784 gcc_unreachable ();
33787 /* This returns the target-specific builtin with code CODE if
33788 current_function_decl has visibility on this builtin, which is checked
33789 using isa flags. Returns NULL_TREE otherwise. */
33791 static tree
ix86_get_builtin (enum ix86_builtins code
)
33793 struct cl_target_option
*opts
;
33794 tree target_tree
= NULL_TREE
;
33796 /* Determine the isa flags of current_function_decl. */
33798 if (current_function_decl
)
33799 target_tree
= DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
);
33801 if (target_tree
== NULL
)
33802 target_tree
= target_option_default_node
;
33804 opts
= TREE_TARGET_OPTION (target_tree
);
33806 if (ix86_builtins_isa
[(int) code
].isa
& opts
->x_ix86_isa_flags
)
33807 return ix86_builtin_decl (code
, true);
33812 /* Returns a function decl for a vectorized version of the builtin function
33813 with builtin function code FN and the result vector type TYPE, or NULL_TREE
33814 if it is not available. */
33817 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
33820 enum machine_mode in_mode
, out_mode
;
33822 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
33824 if (TREE_CODE (type_out
) != VECTOR_TYPE
33825 || TREE_CODE (type_in
) != VECTOR_TYPE
33826 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
33829 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
33830 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
33831 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
33832 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
33836 case BUILT_IN_SQRT
:
33837 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33839 if (out_n
== 2 && in_n
== 2)
33840 return ix86_get_builtin (IX86_BUILTIN_SQRTPD
);
33841 else if (out_n
== 4 && in_n
== 4)
33842 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256
);
33846 case BUILT_IN_SQRTF
:
33847 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33849 if (out_n
== 4 && in_n
== 4)
33850 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR
);
33851 else if (out_n
== 8 && in_n
== 8)
33852 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256
);
33856 case BUILT_IN_IFLOOR
:
33857 case BUILT_IN_LFLOOR
:
33858 case BUILT_IN_LLFLOOR
:
33859 /* The round insn does not trap on denormals. */
33860 if (flag_trapping_math
|| !TARGET_ROUND
)
33863 if (out_mode
== SImode
&& in_mode
== DFmode
)
33865 if (out_n
== 4 && in_n
== 2)
33866 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
);
33867 else if (out_n
== 8 && in_n
== 4)
33868 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
);
33872 case BUILT_IN_IFLOORF
:
33873 case BUILT_IN_LFLOORF
:
33874 case BUILT_IN_LLFLOORF
:
33875 /* The round insn does not trap on denormals. */
33876 if (flag_trapping_math
|| !TARGET_ROUND
)
33879 if (out_mode
== SImode
&& in_mode
== SFmode
)
33881 if (out_n
== 4 && in_n
== 4)
33882 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX
);
33883 else if (out_n
== 8 && in_n
== 8)
33884 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256
);
33888 case BUILT_IN_ICEIL
:
33889 case BUILT_IN_LCEIL
:
33890 case BUILT_IN_LLCEIL
:
33891 /* The round insn does not trap on denormals. */
33892 if (flag_trapping_math
|| !TARGET_ROUND
)
33895 if (out_mode
== SImode
&& in_mode
== DFmode
)
33897 if (out_n
== 4 && in_n
== 2)
33898 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
);
33899 else if (out_n
== 8 && in_n
== 4)
33900 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
);
33904 case BUILT_IN_ICEILF
:
33905 case BUILT_IN_LCEILF
:
33906 case BUILT_IN_LLCEILF
:
33907 /* The round insn does not trap on denormals. */
33908 if (flag_trapping_math
|| !TARGET_ROUND
)
33911 if (out_mode
== SImode
&& in_mode
== SFmode
)
33913 if (out_n
== 4 && in_n
== 4)
33914 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX
);
33915 else if (out_n
== 8 && in_n
== 8)
33916 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256
);
33920 case BUILT_IN_IRINT
:
33921 case BUILT_IN_LRINT
:
33922 case BUILT_IN_LLRINT
:
33923 if (out_mode
== SImode
&& in_mode
== DFmode
)
33925 if (out_n
== 4 && in_n
== 2)
33926 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX
);
33927 else if (out_n
== 8 && in_n
== 4)
33928 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256
);
33932 case BUILT_IN_IRINTF
:
33933 case BUILT_IN_LRINTF
:
33934 case BUILT_IN_LLRINTF
:
33935 if (out_mode
== SImode
&& in_mode
== SFmode
)
33937 if (out_n
== 4 && in_n
== 4)
33938 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ
);
33939 else if (out_n
== 8 && in_n
== 8)
33940 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256
);
33944 case BUILT_IN_IROUND
:
33945 case BUILT_IN_LROUND
:
33946 case BUILT_IN_LLROUND
:
33947 /* The round insn does not trap on denormals. */
33948 if (flag_trapping_math
|| !TARGET_ROUND
)
33951 if (out_mode
== SImode
&& in_mode
== DFmode
)
33953 if (out_n
== 4 && in_n
== 2)
33954 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
);
33955 else if (out_n
== 8 && in_n
== 4)
33956 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
);
33960 case BUILT_IN_IROUNDF
:
33961 case BUILT_IN_LROUNDF
:
33962 case BUILT_IN_LLROUNDF
:
33963 /* The round insn does not trap on denormals. */
33964 if (flag_trapping_math
|| !TARGET_ROUND
)
33967 if (out_mode
== SImode
&& in_mode
== SFmode
)
33969 if (out_n
== 4 && in_n
== 4)
33970 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX
);
33971 else if (out_n
== 8 && in_n
== 8)
33972 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256
);
33976 case BUILT_IN_COPYSIGN
:
33977 if (out_mode
== DFmode
&& in_mode
== DFmode
)
33979 if (out_n
== 2 && in_n
== 2)
33980 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD
);
33981 else if (out_n
== 4 && in_n
== 4)
33982 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256
);
33986 case BUILT_IN_COPYSIGNF
:
33987 if (out_mode
== SFmode
&& in_mode
== SFmode
)
33989 if (out_n
== 4 && in_n
== 4)
33990 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS
);
33991 else if (out_n
== 8 && in_n
== 8)
33992 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256
);
33996 case BUILT_IN_FLOOR
:
33997 /* The round insn does not trap on denormals. */
33998 if (flag_trapping_math
|| !TARGET_ROUND
)
34001 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34003 if (out_n
== 2 && in_n
== 2)
34004 return ix86_get_builtin (IX86_BUILTIN_FLOORPD
);
34005 else if (out_n
== 4 && in_n
== 4)
34006 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256
);
34010 case BUILT_IN_FLOORF
:
34011 /* The round insn does not trap on denormals. */
34012 if (flag_trapping_math
|| !TARGET_ROUND
)
34015 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34017 if (out_n
== 4 && in_n
== 4)
34018 return ix86_get_builtin (IX86_BUILTIN_FLOORPS
);
34019 else if (out_n
== 8 && in_n
== 8)
34020 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256
);
34024 case BUILT_IN_CEIL
:
34025 /* The round insn does not trap on denormals. */
34026 if (flag_trapping_math
|| !TARGET_ROUND
)
34029 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34031 if (out_n
== 2 && in_n
== 2)
34032 return ix86_get_builtin (IX86_BUILTIN_CEILPD
);
34033 else if (out_n
== 4 && in_n
== 4)
34034 return ix86_get_builtin (IX86_BUILTIN_CEILPD256
);
34038 case BUILT_IN_CEILF
:
34039 /* The round insn does not trap on denormals. */
34040 if (flag_trapping_math
|| !TARGET_ROUND
)
34043 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34045 if (out_n
== 4 && in_n
== 4)
34046 return ix86_get_builtin (IX86_BUILTIN_CEILPS
);
34047 else if (out_n
== 8 && in_n
== 8)
34048 return ix86_get_builtin (IX86_BUILTIN_CEILPS256
);
34052 case BUILT_IN_TRUNC
:
34053 /* The round insn does not trap on denormals. */
34054 if (flag_trapping_math
|| !TARGET_ROUND
)
34057 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34059 if (out_n
== 2 && in_n
== 2)
34060 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD
);
34061 else if (out_n
== 4 && in_n
== 4)
34062 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256
);
34066 case BUILT_IN_TRUNCF
:
34067 /* The round insn does not trap on denormals. */
34068 if (flag_trapping_math
|| !TARGET_ROUND
)
34071 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34073 if (out_n
== 4 && in_n
== 4)
34074 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS
);
34075 else if (out_n
== 8 && in_n
== 8)
34076 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256
);
34080 case BUILT_IN_RINT
:
34081 /* The round insn does not trap on denormals. */
34082 if (flag_trapping_math
|| !TARGET_ROUND
)
34085 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34087 if (out_n
== 2 && in_n
== 2)
34088 return ix86_get_builtin (IX86_BUILTIN_RINTPD
);
34089 else if (out_n
== 4 && in_n
== 4)
34090 return ix86_get_builtin (IX86_BUILTIN_RINTPD256
);
34094 case BUILT_IN_RINTF
:
34095 /* The round insn does not trap on denormals. */
34096 if (flag_trapping_math
|| !TARGET_ROUND
)
34099 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34101 if (out_n
== 4 && in_n
== 4)
34102 return ix86_get_builtin (IX86_BUILTIN_RINTPS
);
34103 else if (out_n
== 8 && in_n
== 8)
34104 return ix86_get_builtin (IX86_BUILTIN_RINTPS256
);
34108 case BUILT_IN_ROUND
:
34109 /* The round insn does not trap on denormals. */
34110 if (flag_trapping_math
|| !TARGET_ROUND
)
34113 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34115 if (out_n
== 2 && in_n
== 2)
34116 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ
);
34117 else if (out_n
== 4 && in_n
== 4)
34118 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256
);
34122 case BUILT_IN_ROUNDF
:
34123 /* The round insn does not trap on denormals. */
34124 if (flag_trapping_math
|| !TARGET_ROUND
)
34127 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34129 if (out_n
== 4 && in_n
== 4)
34130 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ
);
34131 else if (out_n
== 8 && in_n
== 8)
34132 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256
);
34137 if (out_mode
== DFmode
&& in_mode
== DFmode
)
34139 if (out_n
== 2 && in_n
== 2)
34140 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD
);
34141 if (out_n
== 4 && in_n
== 4)
34142 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256
);
34146 case BUILT_IN_FMAF
:
34147 if (out_mode
== SFmode
&& in_mode
== SFmode
)
34149 if (out_n
== 4 && in_n
== 4)
34150 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS
);
34151 if (out_n
== 8 && in_n
== 8)
34152 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256
);
34160 /* Dispatch to a handler for a vectorization library. */
34161 if (ix86_veclib_handler
)
34162 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
34168 /* Handler for an SVML-style interface to
34169 a library with vectorized intrinsics. */
34172 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
34175 tree fntype
, new_fndecl
, args
;
34178 enum machine_mode el_mode
, in_mode
;
34181 /* The SVML is suitable for unsafe math only. */
34182 if (!flag_unsafe_math_optimizations
)
34185 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
34186 n
= TYPE_VECTOR_SUBPARTS (type_out
);
34187 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
34188 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
34189 if (el_mode
!= in_mode
34197 case BUILT_IN_LOG10
:
34199 case BUILT_IN_TANH
:
34201 case BUILT_IN_ATAN
:
34202 case BUILT_IN_ATAN2
:
34203 case BUILT_IN_ATANH
:
34204 case BUILT_IN_CBRT
:
34205 case BUILT_IN_SINH
:
34207 case BUILT_IN_ASINH
:
34208 case BUILT_IN_ASIN
:
34209 case BUILT_IN_COSH
:
34211 case BUILT_IN_ACOSH
:
34212 case BUILT_IN_ACOS
:
34213 if (el_mode
!= DFmode
|| n
!= 2)
34217 case BUILT_IN_EXPF
:
34218 case BUILT_IN_LOGF
:
34219 case BUILT_IN_LOG10F
:
34220 case BUILT_IN_POWF
:
34221 case BUILT_IN_TANHF
:
34222 case BUILT_IN_TANF
:
34223 case BUILT_IN_ATANF
:
34224 case BUILT_IN_ATAN2F
:
34225 case BUILT_IN_ATANHF
:
34226 case BUILT_IN_CBRTF
:
34227 case BUILT_IN_SINHF
:
34228 case BUILT_IN_SINF
:
34229 case BUILT_IN_ASINHF
:
34230 case BUILT_IN_ASINF
:
34231 case BUILT_IN_COSHF
:
34232 case BUILT_IN_COSF
:
34233 case BUILT_IN_ACOSHF
:
34234 case BUILT_IN_ACOSF
:
34235 if (el_mode
!= SFmode
|| n
!= 4)
34243 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34245 if (fn
== BUILT_IN_LOGF
)
34246 strcpy (name
, "vmlsLn4");
34247 else if (fn
== BUILT_IN_LOG
)
34248 strcpy (name
, "vmldLn2");
34251 sprintf (name
, "vmls%s", bname
+10);
34252 name
[strlen (name
)-1] = '4';
34255 sprintf (name
, "vmld%s2", bname
+10);
34257 /* Convert to uppercase. */
34261 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34263 args
= TREE_CHAIN (args
))
34267 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34269 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34271 /* Build a function declaration for the vectorized function. */
34272 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34273 FUNCTION_DECL
, get_identifier (name
), fntype
);
34274 TREE_PUBLIC (new_fndecl
) = 1;
34275 DECL_EXTERNAL (new_fndecl
) = 1;
34276 DECL_IS_NOVOPS (new_fndecl
) = 1;
34277 TREE_READONLY (new_fndecl
) = 1;
34282 /* Handler for an ACML-style interface to
34283 a library with vectorized intrinsics. */
34286 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
34288 char name
[20] = "__vr.._";
34289 tree fntype
, new_fndecl
, args
;
34292 enum machine_mode el_mode
, in_mode
;
34295 /* The ACML is 64bits only and suitable for unsafe math only as
34296 it does not correctly support parts of IEEE with the required
34297 precision such as denormals. */
34299 || !flag_unsafe_math_optimizations
)
34302 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
34303 n
= TYPE_VECTOR_SUBPARTS (type_out
);
34304 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
34305 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
34306 if (el_mode
!= in_mode
34316 case BUILT_IN_LOG2
:
34317 case BUILT_IN_LOG10
:
34320 if (el_mode
!= DFmode
34325 case BUILT_IN_SINF
:
34326 case BUILT_IN_COSF
:
34327 case BUILT_IN_EXPF
:
34328 case BUILT_IN_POWF
:
34329 case BUILT_IN_LOGF
:
34330 case BUILT_IN_LOG2F
:
34331 case BUILT_IN_LOG10F
:
34334 if (el_mode
!= SFmode
34343 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
34344 sprintf (name
+ 7, "%s", bname
+10);
34347 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
34349 args
= TREE_CHAIN (args
))
34353 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
34355 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
34357 /* Build a function declaration for the vectorized function. */
34358 new_fndecl
= build_decl (BUILTINS_LOCATION
,
34359 FUNCTION_DECL
, get_identifier (name
), fntype
);
34360 TREE_PUBLIC (new_fndecl
) = 1;
34361 DECL_EXTERNAL (new_fndecl
) = 1;
34362 DECL_IS_NOVOPS (new_fndecl
) = 1;
34363 TREE_READONLY (new_fndecl
) = 1;
34368 /* Returns a decl of a function that implements gather load with
34369 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
34370 Return NULL_TREE if it is not available. */
34373 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
34374 const_tree index_type
, int scale
)
34377 enum ix86_builtins code
;
34382 if ((TREE_CODE (index_type
) != INTEGER_TYPE
34383 && !POINTER_TYPE_P (index_type
))
34384 || (TYPE_MODE (index_type
) != SImode
34385 && TYPE_MODE (index_type
) != DImode
))
34388 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
34391 /* v*gather* insn sign extends index to pointer mode. */
34392 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
34393 && TYPE_UNSIGNED (index_type
))
34398 || (scale
& (scale
- 1)) != 0)
34401 si
= TYPE_MODE (index_type
) == SImode
;
34402 switch (TYPE_MODE (mem_vectype
))
34405 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
34408 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
34411 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
34414 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
34417 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
34420 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
34423 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
34426 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
34432 return ix86_get_builtin (code
);
34435 /* Returns a code for a target-specific builtin that implements
34436 reciprocal of the function, or NULL_TREE if not available. */
34439 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
34440 bool sqrt ATTRIBUTE_UNUSED
)
34442 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
34443 && flag_finite_math_only
&& !flag_trapping_math
34444 && flag_unsafe_math_optimizations
))
34448 /* Machine dependent builtins. */
34451 /* Vectorized version of sqrt to rsqrt conversion. */
34452 case IX86_BUILTIN_SQRTPS_NR
:
34453 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR
);
34455 case IX86_BUILTIN_SQRTPS_NR256
:
34456 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256
);
34462 /* Normal builtins. */
34465 /* Sqrt to rsqrt conversion. */
34466 case BUILT_IN_SQRTF
:
34467 return ix86_get_builtin (IX86_BUILTIN_RSQRTF
);
34474 /* Helper for avx_vpermilps256_operand et al. This is also used by
34475 the expansion functions to turn the parallel back into a mask.
34476 The return value is 0 for no match and the imm8+1 for a match. */
34479 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
34481 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
34483 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34485 if (XVECLEN (par
, 0) != (int) nelt
)
34488 /* Validate that all of the elements are constants, and not totally
34489 out of range. Copy the data into an integral array to make the
34490 subsequent checks easier. */
34491 for (i
= 0; i
< nelt
; ++i
)
34493 rtx er
= XVECEXP (par
, 0, i
);
34494 unsigned HOST_WIDE_INT ei
;
34496 if (!CONST_INT_P (er
))
34507 /* In the 256-bit DFmode case, we can only move elements within
34509 for (i
= 0; i
< 2; ++i
)
34513 mask
|= ipar
[i
] << i
;
34515 for (i
= 2; i
< 4; ++i
)
34519 mask
|= (ipar
[i
] - 2) << i
;
34524 /* In the 256-bit SFmode case, we have full freedom of movement
34525 within the low 128-bit lane, but the high 128-bit lane must
34526 mirror the exact same pattern. */
34527 for (i
= 0; i
< 4; ++i
)
34528 if (ipar
[i
] + 4 != ipar
[i
+ 4])
34535 /* In the 128-bit case, we've full freedom in the placement of
34536 the elements from the source operand. */
34537 for (i
= 0; i
< nelt
; ++i
)
34538 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
34542 gcc_unreachable ();
34545 /* Make sure success has a non-zero value by adding one. */
34549 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
34550 the expansion functions to turn the parallel back into a mask.
34551 The return value is 0 for no match and the imm8+1 for a match. */
34554 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
34556 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
34558 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
34560 if (XVECLEN (par
, 0) != (int) nelt
)
34563 /* Validate that all of the elements are constants, and not totally
34564 out of range. Copy the data into an integral array to make the
34565 subsequent checks easier. */
34566 for (i
= 0; i
< nelt
; ++i
)
34568 rtx er
= XVECEXP (par
, 0, i
);
34569 unsigned HOST_WIDE_INT ei
;
34571 if (!CONST_INT_P (er
))
34574 if (ei
>= 2 * nelt
)
34579 /* Validate that the halves of the permute are halves. */
34580 for (i
= 0; i
< nelt2
- 1; ++i
)
34581 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34583 for (i
= nelt2
; i
< nelt
- 1; ++i
)
34584 if (ipar
[i
] + 1 != ipar
[i
+ 1])
34587 /* Reconstruct the mask. */
34588 for (i
= 0; i
< 2; ++i
)
34590 unsigned e
= ipar
[i
* nelt2
];
34594 mask
|= e
<< (i
* 4);
34597 /* Make sure success has a non-zero value by adding one. */
34601 /* Store OPERAND to the memory after reload is completed. This means
34602 that we can't easily use assign_stack_local. */
34604 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
34608 gcc_assert (reload_completed
);
34609 if (ix86_using_red_zone ())
34611 result
= gen_rtx_MEM (mode
,
34612 gen_rtx_PLUS (Pmode
,
34614 GEN_INT (-RED_ZONE_SIZE
)));
34615 emit_move_insn (result
, operand
);
34617 else if (TARGET_64BIT
)
34623 operand
= gen_lowpart (DImode
, operand
);
34627 gen_rtx_SET (VOIDmode
,
34628 gen_rtx_MEM (DImode
,
34629 gen_rtx_PRE_DEC (DImode
,
34630 stack_pointer_rtx
)),
34634 gcc_unreachable ();
34636 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34645 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
34647 gen_rtx_SET (VOIDmode
,
34648 gen_rtx_MEM (SImode
,
34649 gen_rtx_PRE_DEC (Pmode
,
34650 stack_pointer_rtx
)),
34653 gen_rtx_SET (VOIDmode
,
34654 gen_rtx_MEM (SImode
,
34655 gen_rtx_PRE_DEC (Pmode
,
34656 stack_pointer_rtx
)),
34661 /* Store HImodes as SImodes. */
34662 operand
= gen_lowpart (SImode
, operand
);
34666 gen_rtx_SET (VOIDmode
,
34667 gen_rtx_MEM (GET_MODE (operand
),
34668 gen_rtx_PRE_DEC (SImode
,
34669 stack_pointer_rtx
)),
34673 gcc_unreachable ();
34675 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
34680 /* Free operand from the memory. */
34682 ix86_free_from_memory (enum machine_mode mode
)
34684 if (!ix86_using_red_zone ())
34688 if (mode
== DImode
|| TARGET_64BIT
)
34692 /* Use LEA to deallocate stack space. In peephole2 it will be converted
34693 to pop or add instruction if registers are available. */
34694 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
34695 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
34700 /* Return a register priority for hard reg REGNO. */
34702 ix86_register_priority (int hard_regno
)
34704 /* ebp and r13 as the base always wants a displacement, r12 as the
34705 base always wants an index. So discourage their usage in an
34707 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
34709 if (hard_regno
== BP_REG
)
34711 /* New x86-64 int registers result in bigger code size. Discourage
34713 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
34715 /* New x86-64 SSE registers result in bigger code size. Discourage
34717 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
34719 /* Usage of AX register results in smaller code. Prefer it. */
34720 if (hard_regno
== 0)
34725 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
34727 Put float CONST_DOUBLE in the constant pool instead of fp regs.
34728 QImode must go into class Q_REGS.
34729 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
34730 movdf to do mem-to-mem moves through integer regs. */
34733 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
34735 enum machine_mode mode
= GET_MODE (x
);
34737 /* We're only allowed to return a subclass of CLASS. Many of the
34738 following checks fail for NO_REGS, so eliminate that early. */
34739 if (regclass
== NO_REGS
)
34742 /* All classes can load zeros. */
34743 if (x
== CONST0_RTX (mode
))
34746 /* Force constants into memory if we are loading a (nonzero) constant into
34747 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
34748 instructions to load from a constant. */
34750 && (MAYBE_MMX_CLASS_P (regclass
)
34751 || MAYBE_SSE_CLASS_P (regclass
)
34752 || MAYBE_MASK_CLASS_P (regclass
)))
34755 /* Prefer SSE regs only, if we can use them for math. */
34756 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
34757 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34759 /* Floating-point constants need more complex checks. */
34760 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
34762 /* General regs can load everything. */
34763 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
34766 /* Floats can load 0 and 1 plus some others. Note that we eliminated
34767 zero above. We only want to wind up preferring 80387 registers if
34768 we plan on doing computation with them. */
34770 && standard_80387_constant_p (x
) > 0)
34772 /* Limit class to non-sse. */
34773 if (regclass
== FLOAT_SSE_REGS
)
34775 if (regclass
== FP_TOP_SSE_REGS
)
34777 if (regclass
== FP_SECOND_SSE_REGS
)
34778 return FP_SECOND_REG
;
34779 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
34786 /* Generally when we see PLUS here, it's the function invariant
34787 (plus soft-fp const_int). Which can only be computed into general
34789 if (GET_CODE (x
) == PLUS
)
34790 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
34792 /* QImode constants are easy to load, but non-constant QImode data
34793 must go into Q_REGS. */
34794 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
34796 if (reg_class_subset_p (regclass
, Q_REGS
))
34798 if (reg_class_subset_p (Q_REGS
, regclass
))
34806 /* Discourage putting floating-point values in SSE registers unless
34807 SSE math is being used, and likewise for the 387 registers. */
34809 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
34811 enum machine_mode mode
= GET_MODE (x
);
34813 /* Restrict the output reload class to the register bank that we are doing
34814 math on. If we would like not to return a subset of CLASS, reject this
34815 alternative: if reload cannot do this, it will still use its choice. */
34816 mode
= GET_MODE (x
);
34817 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
34818 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
34820 if (X87_FLOAT_MODE_P (mode
))
34822 if (regclass
== FP_TOP_SSE_REGS
)
34824 else if (regclass
== FP_SECOND_SSE_REGS
)
34825 return FP_SECOND_REG
;
34827 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
34834 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
34835 enum machine_mode mode
, secondary_reload_info
*sri
)
34837 /* Double-word spills from general registers to non-offsettable memory
34838 references (zero-extended addresses) require special handling. */
34841 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
34842 && INTEGER_CLASS_P (rclass
)
34843 && !offsettable_memref_p (x
))
34846 ? CODE_FOR_reload_noff_load
34847 : CODE_FOR_reload_noff_store
);
34848 /* Add the cost of moving address to a temporary. */
34849 sri
->extra_cost
= 1;
34854 /* QImode spills from non-QI registers require
34855 intermediate register on 32bit targets. */
34857 && (MAYBE_MASK_CLASS_P (rclass
)
34858 || (!TARGET_64BIT
&& !in_p
34859 && INTEGER_CLASS_P (rclass
)
34860 && MAYBE_NON_Q_CLASS_P (rclass
))))
34869 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
34870 regno
= true_regnum (x
);
34872 /* Return Q_REGS if the operand is in memory. */
34877 /* This condition handles corner case where an expression involving
34878 pointers gets vectorized. We're trying to use the address of a
34879 stack slot as a vector initializer.
34881 (set (reg:V2DI 74 [ vect_cst_.2 ])
34882 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
34884 Eventually frame gets turned into sp+offset like this:
34886 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34887 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34888 (const_int 392 [0x188]))))
34890 That later gets turned into:
34892 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34893 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
34894 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
34896 We'll have the following reload recorded:
34898 Reload 0: reload_in (DI) =
34899 (plus:DI (reg/f:DI 7 sp)
34900 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
34901 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34902 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
34903 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
34904 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
34905 reload_reg_rtx: (reg:V2DI 22 xmm1)
34907 Which isn't going to work since SSE instructions can't handle scalar
34908 additions. Returning GENERAL_REGS forces the addition into integer
34909 register and reload can handle subsequent reloads without problems. */
34911 if (in_p
&& GET_CODE (x
) == PLUS
34912 && SSE_CLASS_P (rclass
)
34913 && SCALAR_INT_MODE_P (mode
))
34914 return GENERAL_REGS
;
34919 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
34922 ix86_class_likely_spilled_p (reg_class_t rclass
)
34933 case SSE_FIRST_REG
:
34935 case FP_SECOND_REG
:
34945 /* If we are copying between general and FP registers, we need a memory
34946 location. The same is true for SSE and MMX registers.
34948 To optimize register_move_cost performance, allow inline variant.
34950 The macro can't work reliably when one of the CLASSES is class containing
34951 registers from multiple units (SSE, MMX, integer). We avoid this by never
34952 combining those units in single alternative in the machine description.
34953 Ensure that this constraint holds to avoid unexpected surprises.
34955 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
34956 enforce these sanity checks. */
34959 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
34960 enum machine_mode mode
, int strict
)
34962 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
34964 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
34965 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
34966 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
34967 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
34968 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
34969 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
34971 gcc_assert (!strict
|| lra_in_progress
);
34975 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
34978 /* ??? This is a lie. We do have moves between mmx/general, and for
34979 mmx/sse2. But by saying we need secondary memory we discourage the
34980 register allocator from using the mmx registers unless needed. */
34981 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
34984 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
34986 /* SSE1 doesn't have any direct moves from other classes. */
34990 /* If the target says that inter-unit moves are more expensive
34991 than moving through memory, then don't generate them. */
34992 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
34993 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
34996 /* Between SSE and general, we have moves no larger than word size. */
34997 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35005 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
35006 enum machine_mode mode
, int strict
)
35008 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
35011 /* Implement the TARGET_CLASS_MAX_NREGS hook.
35013 On the 80386, this is the size of MODE in words,
35014 except in the FP regs, where a single reg is always enough. */
35016 static unsigned char
35017 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
35019 if (MAYBE_INTEGER_CLASS_P (rclass
))
35021 if (mode
== XFmode
)
35022 return (TARGET_64BIT
? 2 : 3);
35023 else if (mode
== XCmode
)
35024 return (TARGET_64BIT
? 4 : 6);
35026 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
35030 if (COMPLEX_MODE_P (mode
))
35037 /* Return true if the registers in CLASS cannot represent the change from
35038 modes FROM to TO. */
35041 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
35042 enum reg_class regclass
)
35047 /* x87 registers can't do subreg at all, as all values are reformatted
35048 to extended precision. */
35049 if (MAYBE_FLOAT_CLASS_P (regclass
))
35052 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
35054 /* Vector registers do not support QI or HImode loads. If we don't
35055 disallow a change to these modes, reload will assume it's ok to
35056 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
35057 the vec_dupv4hi pattern. */
35058 if (GET_MODE_SIZE (from
) < 4)
35061 /* Vector registers do not support subreg with nonzero offsets, which
35062 are otherwise valid for integer registers. Since we can't see
35063 whether we have a nonzero offset from here, prohibit all
35064 nonparadoxical subregs changing size. */
35065 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
35072 /* Return the cost of moving data of mode M between a
35073 register and memory. A value of 2 is the default; this cost is
35074 relative to those in `REGISTER_MOVE_COST'.
35076 This function is used extensively by register_move_cost that is used to
35077 build tables at startup. Make it inline in this case.
35078 When IN is 2, return maximum of in and out move cost.
35080 If moving between registers and memory is more expensive than
35081 between two registers, you should define this macro to express the
35084 Model also increased moving costs of QImode registers in non
35088 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
35092 if (FLOAT_CLASS_P (regclass
))
35110 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
35111 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
35113 if (SSE_CLASS_P (regclass
))
35116 switch (GET_MODE_SIZE (mode
))
35131 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
35132 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
35134 if (MMX_CLASS_P (regclass
))
35137 switch (GET_MODE_SIZE (mode
))
35149 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
35150 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
35152 switch (GET_MODE_SIZE (mode
))
35155 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
35158 return ix86_cost
->int_store
[0];
35159 if (TARGET_PARTIAL_REG_DEPENDENCY
35160 && optimize_function_for_speed_p (cfun
))
35161 cost
= ix86_cost
->movzbl_load
;
35163 cost
= ix86_cost
->int_load
[0];
35165 return MAX (cost
, ix86_cost
->int_store
[0]);
35171 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
35173 return ix86_cost
->movzbl_load
;
35175 return ix86_cost
->int_store
[0] + 4;
35180 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
35181 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
35183 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
35184 if (mode
== TFmode
)
35187 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
35189 cost
= ix86_cost
->int_load
[2];
35191 cost
= ix86_cost
->int_store
[2];
35192 return (cost
* (((int) GET_MODE_SIZE (mode
)
35193 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
35198 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
35201 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
35205 /* Return the cost of moving data from a register in class CLASS1 to
35206 one in class CLASS2.
35208 It is not required that the cost always equal 2 when FROM is the same as TO;
35209 on some machines it is expensive to move between registers if they are not
35210 general registers. */
35213 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
35214 reg_class_t class2_i
)
35216 enum reg_class class1
= (enum reg_class
) class1_i
;
35217 enum reg_class class2
= (enum reg_class
) class2_i
;
35219 /* In case we require secondary memory, compute cost of the store followed
35220 by load. In order to avoid bad register allocation choices, we need
35221 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
35223 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
35227 cost
+= inline_memory_move_cost (mode
, class1
, 2);
35228 cost
+= inline_memory_move_cost (mode
, class2
, 2);
35230 /* In case of copying from general_purpose_register we may emit multiple
35231 stores followed by single load causing memory size mismatch stall.
35232 Count this as arbitrarily high cost of 20. */
35233 if (targetm
.class_max_nregs (class1
, mode
)
35234 > targetm
.class_max_nregs (class2
, mode
))
35237 /* In the case of FP/MMX moves, the registers actually overlap, and we
35238 have to switch modes in order to treat them differently. */
35239 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
35240 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
35246 /* Moves between SSE/MMX and integer unit are expensive. */
35247 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
35248 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
35250 /* ??? By keeping returned value relatively high, we limit the number
35251 of moves between integer and MMX/SSE registers for all targets.
35252 Additionally, high value prevents problem with x86_modes_tieable_p(),
35253 where integer modes in MMX/SSE registers are not tieable
35254 because of missing QImode and HImode moves to, from or between
35255 MMX/SSE registers. */
35256 return MAX (8, ix86_cost
->mmxsse_to_integer
);
35258 if (MAYBE_FLOAT_CLASS_P (class1
))
35259 return ix86_cost
->fp_move
;
35260 if (MAYBE_SSE_CLASS_P (class1
))
35261 return ix86_cost
->sse_move
;
35262 if (MAYBE_MMX_CLASS_P (class1
))
35263 return ix86_cost
->mmx_move
;
35267 /* Return TRUE if hard register REGNO can hold a value of machine-mode
35271 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
35273 /* Flags and only flags can only hold CCmode values. */
35274 if (CC_REGNO_P (regno
))
35275 return GET_MODE_CLASS (mode
) == MODE_CC
;
35276 if (GET_MODE_CLASS (mode
) == MODE_CC
35277 || GET_MODE_CLASS (mode
) == MODE_RANDOM
35278 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
35280 if (STACK_REGNO_P (regno
))
35281 return VALID_FP_MODE_P (mode
);
35282 if (MASK_REGNO_P (regno
))
35283 return VALID_MASK_REG_MODE (mode
);
35284 if (SSE_REGNO_P (regno
))
35286 /* We implement the move patterns for all vector modes into and
35287 out of SSE registers, even when no operation instructions
35290 /* For AVX-512 we allow, regardless of regno:
35292 - any of 512-bit wide vector mode
35293 - any scalar mode. */
35296 || VALID_AVX512F_REG_MODE (mode
)
35297 || VALID_AVX512F_SCALAR_MODE (mode
)))
35300 /* xmm16-xmm31 are only available for AVX-512. */
35301 if (EXT_REX_SSE_REGNO_P (regno
))
35304 /* OImode move is available only when AVX is enabled. */
35305 return ((TARGET_AVX
&& mode
== OImode
)
35306 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35307 || VALID_SSE_REG_MODE (mode
)
35308 || VALID_SSE2_REG_MODE (mode
)
35309 || VALID_MMX_REG_MODE (mode
)
35310 || VALID_MMX_REG_MODE_3DNOW (mode
));
35312 if (MMX_REGNO_P (regno
))
35314 /* We implement the move patterns for 3DNOW modes even in MMX mode,
35315 so if the register is available at all, then we can move data of
35316 the given mode into or out of it. */
35317 return (VALID_MMX_REG_MODE (mode
)
35318 || VALID_MMX_REG_MODE_3DNOW (mode
));
35321 if (mode
== QImode
)
35323 /* Take care for QImode values - they can be in non-QI regs,
35324 but then they do cause partial register stalls. */
35325 if (ANY_QI_REGNO_P (regno
))
35327 if (!TARGET_PARTIAL_REG_STALL
)
35329 /* LRA checks if the hard register is OK for the given mode.
35330 QImode values can live in non-QI regs, so we allow all
35332 if (lra_in_progress
)
35334 return !can_create_pseudo_p ();
35336 /* We handle both integer and floats in the general purpose registers. */
35337 else if (VALID_INT_MODE_P (mode
))
35339 else if (VALID_FP_MODE_P (mode
))
35341 else if (VALID_DFP_MODE_P (mode
))
35343 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
35344 on to use that value in smaller contexts, this can easily force a
35345 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
35346 supporting DImode, allow it. */
35347 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
35353 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
35354 tieable integer mode. */
35357 ix86_tieable_integer_mode_p (enum machine_mode mode
)
35366 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
35369 return TARGET_64BIT
;
35376 /* Return true if MODE1 is accessible in a register that can hold MODE2
35377 without copying. That is, all register classes that can hold MODE2
35378 can also hold MODE1. */
35381 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
35383 if (mode1
== mode2
)
35386 if (ix86_tieable_integer_mode_p (mode1
)
35387 && ix86_tieable_integer_mode_p (mode2
))
35390 /* MODE2 being XFmode implies fp stack or general regs, which means we
35391 can tie any smaller floating point modes to it. Note that we do not
35392 tie this with TFmode. */
35393 if (mode2
== XFmode
)
35394 return mode1
== SFmode
|| mode1
== DFmode
;
35396 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
35397 that we can tie it with SFmode. */
35398 if (mode2
== DFmode
)
35399 return mode1
== SFmode
;
35401 /* If MODE2 is only appropriate for an SSE register, then tie with
35402 any other mode acceptable to SSE registers. */
35403 if (GET_MODE_SIZE (mode2
) == 32
35404 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35405 return (GET_MODE_SIZE (mode1
) == 32
35406 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35407 if (GET_MODE_SIZE (mode2
) == 16
35408 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
35409 return (GET_MODE_SIZE (mode1
) == 16
35410 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
35412 /* If MODE2 is appropriate for an MMX register, then tie
35413 with any other mode acceptable to MMX registers. */
35414 if (GET_MODE_SIZE (mode2
) == 8
35415 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
35416 return (GET_MODE_SIZE (mode1
) == 8
35417 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
35422 /* Return the cost of moving between two registers of mode MODE. */
35425 ix86_set_reg_reg_cost (enum machine_mode mode
)
35427 unsigned int units
= UNITS_PER_WORD
;
35429 switch (GET_MODE_CLASS (mode
))
35435 units
= GET_MODE_SIZE (CCmode
);
35439 if ((TARGET_SSE
&& mode
== TFmode
)
35440 || (TARGET_80387
&& mode
== XFmode
)
35441 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
35442 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
35443 units
= GET_MODE_SIZE (mode
);
35446 case MODE_COMPLEX_FLOAT
:
35447 if ((TARGET_SSE
&& mode
== TCmode
)
35448 || (TARGET_80387
&& mode
== XCmode
)
35449 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
35450 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
35451 units
= GET_MODE_SIZE (mode
);
35454 case MODE_VECTOR_INT
:
35455 case MODE_VECTOR_FLOAT
:
35456 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
35457 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35458 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35459 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35460 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
35461 units
= GET_MODE_SIZE (mode
);
35464 /* Return the cost of moving between two registers of mode MODE,
35465 assuming that the move will be in pieces of at most UNITS bytes. */
35466 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
35469 /* Compute a (partial) cost for rtx X. Return true if the complete
35470 cost has been computed, and false if subexpressions should be
35471 scanned. In either case, *TOTAL contains the cost result. */
35474 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
35477 enum rtx_code code
= (enum rtx_code
) code_i
;
35478 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
35479 enum machine_mode mode
= GET_MODE (x
);
35480 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
35485 if (register_operand (SET_DEST (x
), VOIDmode
)
35486 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
35488 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
35497 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
35499 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
35501 else if (flag_pic
&& SYMBOLIC_CONST (x
)
35503 || (!GET_CODE (x
) != LABEL_REF
35504 && (GET_CODE (x
) != SYMBOL_REF
35505 || !SYMBOL_REF_LOCAL_P (x
)))))
35512 if (mode
== VOIDmode
)
35517 switch (standard_80387_constant_p (x
))
35522 default: /* Other constants */
35529 if (SSE_FLOAT_MODE_P (mode
))
35532 switch (standard_sse_constant_p (x
))
35536 case 1: /* 0: xor eliminates false dependency */
35539 default: /* -1: cmp contains false dependency */
35544 /* Fall back to (MEM (SYMBOL_REF)), since that's where
35545 it'll probably end up. Add a penalty for size. */
35546 *total
= (COSTS_N_INSNS (1)
35547 + (flag_pic
!= 0 && !TARGET_64BIT
)
35548 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
35552 /* The zero extensions is often completely free on x86_64, so make
35553 it as cheap as possible. */
35554 if (TARGET_64BIT
&& mode
== DImode
35555 && GET_MODE (XEXP (x
, 0)) == SImode
)
35557 else if (TARGET_ZERO_EXTEND_WITH_AND
)
35558 *total
= cost
->add
;
35560 *total
= cost
->movzx
;
35564 *total
= cost
->movsx
;
35568 if (SCALAR_INT_MODE_P (mode
)
35569 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
35570 && CONST_INT_P (XEXP (x
, 1)))
35572 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35575 *total
= cost
->add
;
35578 if ((value
== 2 || value
== 3)
35579 && cost
->lea
<= cost
->shift_const
)
35581 *total
= cost
->lea
;
35591 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35593 /* ??? Should be SSE vector operation cost. */
35594 /* At least for published AMD latencies, this really is the same
35595 as the latency for a simple fpu operation like fabs. */
35596 /* V*QImode is emulated with 1-11 insns. */
35597 if (mode
== V16QImode
|| mode
== V32QImode
)
35600 if (TARGET_XOP
&& mode
== V16QImode
)
35602 /* For XOP we use vpshab, which requires a broadcast of the
35603 value to the variable shift insn. For constants this
35604 means a V16Q const in mem; even when we can perform the
35605 shift with one insn set the cost to prefer paddb. */
35606 if (CONSTANT_P (XEXP (x
, 1)))
35608 *total
= (cost
->fabs
35609 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
35610 + (speed
? 2 : COSTS_N_BYTES (16)));
35615 else if (TARGET_SSSE3
)
35617 *total
= cost
->fabs
* count
;
35620 *total
= cost
->fabs
;
35622 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35624 if (CONST_INT_P (XEXP (x
, 1)))
35626 if (INTVAL (XEXP (x
, 1)) > 32)
35627 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
35629 *total
= cost
->shift_const
* 2;
35633 if (GET_CODE (XEXP (x
, 1)) == AND
)
35634 *total
= cost
->shift_var
* 2;
35636 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
35641 if (CONST_INT_P (XEXP (x
, 1)))
35642 *total
= cost
->shift_const
;
35643 else if (GET_CODE (XEXP (x
, 1)) == SUBREG
35644 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
35646 /* Return the cost after shift-and truncation. */
35647 *total
= cost
->shift_var
;
35651 *total
= cost
->shift_var
;
35659 gcc_assert (FLOAT_MODE_P (mode
));
35660 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
35662 /* ??? SSE scalar/vector cost should be used here. */
35663 /* ??? Bald assumption that fma has the same cost as fmul. */
35664 *total
= cost
->fmul
;
35665 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
35667 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
35669 if (GET_CODE (sub
) == NEG
)
35670 sub
= XEXP (sub
, 0);
35671 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
35674 if (GET_CODE (sub
) == NEG
)
35675 sub
= XEXP (sub
, 0);
35676 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
35681 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35683 /* ??? SSE scalar cost should be used here. */
35684 *total
= cost
->fmul
;
35687 else if (X87_FLOAT_MODE_P (mode
))
35689 *total
= cost
->fmul
;
35692 else if (FLOAT_MODE_P (mode
))
35694 /* ??? SSE vector cost should be used here. */
35695 *total
= cost
->fmul
;
35698 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35700 /* V*QImode is emulated with 7-13 insns. */
35701 if (mode
== V16QImode
|| mode
== V32QImode
)
35704 if (TARGET_XOP
&& mode
== V16QImode
)
35706 else if (TARGET_SSSE3
)
35708 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
35710 /* V*DImode is emulated with 5-8 insns. */
35711 else if (mode
== V2DImode
|| mode
== V4DImode
)
35713 if (TARGET_XOP
&& mode
== V2DImode
)
35714 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
35716 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
35718 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
35719 insns, including two PMULUDQ. */
35720 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
35721 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
35723 *total
= cost
->fmul
;
35728 rtx op0
= XEXP (x
, 0);
35729 rtx op1
= XEXP (x
, 1);
35731 if (CONST_INT_P (XEXP (x
, 1)))
35733 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
35734 for (nbits
= 0; value
!= 0; value
&= value
- 1)
35738 /* This is arbitrary. */
35741 /* Compute costs correctly for widening multiplication. */
35742 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
35743 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
35744 == GET_MODE_SIZE (mode
))
35746 int is_mulwiden
= 0;
35747 enum machine_mode inner_mode
= GET_MODE (op0
);
35749 if (GET_CODE (op0
) == GET_CODE (op1
))
35750 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
35751 else if (CONST_INT_P (op1
))
35753 if (GET_CODE (op0
) == SIGN_EXTEND
)
35754 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
35757 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
35761 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
35764 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
35765 + nbits
* cost
->mult_bit
35766 + rtx_cost (op0
, outer_code
, opno
, speed
)
35767 + rtx_cost (op1
, outer_code
, opno
, speed
));
35776 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35777 /* ??? SSE cost should be used here. */
35778 *total
= cost
->fdiv
;
35779 else if (X87_FLOAT_MODE_P (mode
))
35780 *total
= cost
->fdiv
;
35781 else if (FLOAT_MODE_P (mode
))
35782 /* ??? SSE vector cost should be used here. */
35783 *total
= cost
->fdiv
;
35785 *total
= cost
->divide
[MODE_INDEX (mode
)];
35789 if (GET_MODE_CLASS (mode
) == MODE_INT
35790 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
35792 if (GET_CODE (XEXP (x
, 0)) == PLUS
35793 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
35794 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
35795 && CONSTANT_P (XEXP (x
, 1)))
35797 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
35798 if (val
== 2 || val
== 4 || val
== 8)
35800 *total
= cost
->lea
;
35801 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35802 outer_code
, opno
, speed
);
35803 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
35804 outer_code
, opno
, speed
);
35805 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35809 else if (GET_CODE (XEXP (x
, 0)) == MULT
35810 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
35812 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
35813 if (val
== 2 || val
== 4 || val
== 8)
35815 *total
= cost
->lea
;
35816 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35817 outer_code
, opno
, speed
);
35818 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35822 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
35824 *total
= cost
->lea
;
35825 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
35826 outer_code
, opno
, speed
);
35827 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
35828 outer_code
, opno
, speed
);
35829 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
35836 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35838 /* ??? SSE cost should be used here. */
35839 *total
= cost
->fadd
;
35842 else if (X87_FLOAT_MODE_P (mode
))
35844 *total
= cost
->fadd
;
35847 else if (FLOAT_MODE_P (mode
))
35849 /* ??? SSE vector cost should be used here. */
35850 *total
= cost
->fadd
;
35858 if (GET_MODE_CLASS (mode
) == MODE_INT
35859 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35861 *total
= (cost
->add
* 2
35862 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
35863 << (GET_MODE (XEXP (x
, 0)) != DImode
))
35864 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
35865 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
35871 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35873 /* ??? SSE cost should be used here. */
35874 *total
= cost
->fchs
;
35877 else if (X87_FLOAT_MODE_P (mode
))
35879 *total
= cost
->fchs
;
35882 else if (FLOAT_MODE_P (mode
))
35884 /* ??? SSE vector cost should be used here. */
35885 *total
= cost
->fchs
;
35891 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
35893 /* ??? Should be SSE vector operation cost. */
35894 /* At least for published AMD latencies, this really is the same
35895 as the latency for a simple fpu operation like fabs. */
35896 *total
= cost
->fabs
;
35898 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
35899 *total
= cost
->add
* 2;
35901 *total
= cost
->add
;
35905 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
35906 && XEXP (XEXP (x
, 0), 1) == const1_rtx
35907 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
35908 && XEXP (x
, 1) == const0_rtx
)
35910 /* This kind of construct is implemented using test[bwl].
35911 Treat it as if we had an AND. */
35912 *total
= (cost
->add
35913 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
35914 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
35920 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
35925 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35926 /* ??? SSE cost should be used here. */
35927 *total
= cost
->fabs
;
35928 else if (X87_FLOAT_MODE_P (mode
))
35929 *total
= cost
->fabs
;
35930 else if (FLOAT_MODE_P (mode
))
35931 /* ??? SSE vector cost should be used here. */
35932 *total
= cost
->fabs
;
35936 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
35937 /* ??? SSE cost should be used here. */
35938 *total
= cost
->fsqrt
;
35939 else if (X87_FLOAT_MODE_P (mode
))
35940 *total
= cost
->fsqrt
;
35941 else if (FLOAT_MODE_P (mode
))
35942 /* ??? SSE vector cost should be used here. */
35943 *total
= cost
->fsqrt
;
35947 if (XINT (x
, 1) == UNSPEC_TP
)
35954 case VEC_DUPLICATE
:
35955 /* ??? Assume all of these vector manipulation patterns are
35956 recognizable. In which case they all pretty much have the
35958 *total
= cost
->fabs
;
35968 static int current_machopic_label_num
;
35970 /* Given a symbol name and its associated stub, write out the
35971 definition of the stub. */
35974 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
35976 unsigned int length
;
35977 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
35978 int label
= ++current_machopic_label_num
;
35980 /* For 64-bit we shouldn't get here. */
35981 gcc_assert (!TARGET_64BIT
);
35983 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
35984 symb
= targetm
.strip_name_encoding (symb
);
35986 length
= strlen (stub
);
35987 binder_name
= XALLOCAVEC (char, length
+ 32);
35988 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
35990 length
= strlen (symb
);
35991 symbol_name
= XALLOCAVEC (char, length
+ 32);
35992 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
35994 sprintf (lazy_ptr_name
, "L%d$lz", label
);
35996 if (MACHOPIC_ATT_STUB
)
35997 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
35998 else if (MACHOPIC_PURE
)
35999 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
36001 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
36003 fprintf (file
, "%s:\n", stub
);
36004 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36006 if (MACHOPIC_ATT_STUB
)
36008 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
36010 else if (MACHOPIC_PURE
)
36013 /* 25-byte PIC stub using "CALL get_pc_thunk". */
36014 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
36015 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
36016 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
36017 label
, lazy_ptr_name
, label
);
36018 fprintf (file
, "\tjmp\t*%%ecx\n");
36021 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
36023 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
36024 it needs no stub-binding-helper. */
36025 if (MACHOPIC_ATT_STUB
)
36028 fprintf (file
, "%s:\n", binder_name
);
36032 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
36033 fprintf (file
, "\tpushl\t%%ecx\n");
36036 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
36038 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
36040 /* N.B. Keep the correspondence of these
36041 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
36042 old-pic/new-pic/non-pic stubs; altering this will break
36043 compatibility with existing dylibs. */
36046 /* 25-byte PIC stub using "CALL get_pc_thunk". */
36047 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
36050 /* 16-byte -mdynamic-no-pic stub. */
36051 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
36053 fprintf (file
, "%s:\n", lazy_ptr_name
);
36054 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
36055 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
36057 #endif /* TARGET_MACHO */
36059 /* Order the registers for register allocator. */
36062 x86_order_regs_for_local_alloc (void)
36067 /* First allocate the local general purpose registers. */
36068 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
36069 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
36070 reg_alloc_order
[pos
++] = i
;
36072 /* Global general purpose registers. */
36073 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
36074 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
36075 reg_alloc_order
[pos
++] = i
;
36077 /* x87 registers come first in case we are doing FP math
36079 if (!TARGET_SSE_MATH
)
36080 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
36081 reg_alloc_order
[pos
++] = i
;
36083 /* SSE registers. */
36084 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
36085 reg_alloc_order
[pos
++] = i
;
36086 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
36087 reg_alloc_order
[pos
++] = i
;
36089 /* Extended REX SSE registers. */
36090 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
36091 reg_alloc_order
[pos
++] = i
;
36093 /* Mask register. */
36094 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
36095 reg_alloc_order
[pos
++] = i
;
36097 /* x87 registers. */
36098 if (TARGET_SSE_MATH
)
36099 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
36100 reg_alloc_order
[pos
++] = i
;
36102 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
36103 reg_alloc_order
[pos
++] = i
;
36105 /* Initialize the rest of array as we do not allocate some registers
36107 while (pos
< FIRST_PSEUDO_REGISTER
)
36108 reg_alloc_order
[pos
++] = 0;
36111 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
36112 in struct attribute_spec handler. */
36114 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
36116 int flags ATTRIBUTE_UNUSED
,
36117 bool *no_add_attrs
)
36119 if (TREE_CODE (*node
) != FUNCTION_TYPE
36120 && TREE_CODE (*node
) != METHOD_TYPE
36121 && TREE_CODE (*node
) != FIELD_DECL
36122 && TREE_CODE (*node
) != TYPE_DECL
)
36124 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36126 *no_add_attrs
= true;
36131 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
36133 *no_add_attrs
= true;
36136 if (is_attribute_p ("callee_pop_aggregate_return", name
))
36140 cst
= TREE_VALUE (args
);
36141 if (TREE_CODE (cst
) != INTEGER_CST
)
36143 warning (OPT_Wattributes
,
36144 "%qE attribute requires an integer constant argument",
36146 *no_add_attrs
= true;
36148 else if (compare_tree_int (cst
, 0) != 0
36149 && compare_tree_int (cst
, 1) != 0)
36151 warning (OPT_Wattributes
,
36152 "argument to %qE attribute is neither zero, nor one",
36154 *no_add_attrs
= true;
36163 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
36164 struct attribute_spec.handler. */
36166 ix86_handle_abi_attribute (tree
*node
, tree name
,
36167 tree args ATTRIBUTE_UNUSED
,
36168 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36170 if (TREE_CODE (*node
) != FUNCTION_TYPE
36171 && TREE_CODE (*node
) != METHOD_TYPE
36172 && TREE_CODE (*node
) != FIELD_DECL
36173 && TREE_CODE (*node
) != TYPE_DECL
)
36175 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36177 *no_add_attrs
= true;
36181 /* Can combine regparm with all attributes but fastcall. */
36182 if (is_attribute_p ("ms_abi", name
))
36184 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
36186 error ("ms_abi and sysv_abi attributes are not compatible");
36191 else if (is_attribute_p ("sysv_abi", name
))
36193 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
36195 error ("ms_abi and sysv_abi attributes are not compatible");
36204 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
36205 struct attribute_spec.handler. */
36207 ix86_handle_struct_attribute (tree
*node
, tree name
,
36208 tree args ATTRIBUTE_UNUSED
,
36209 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36212 if (DECL_P (*node
))
36214 if (TREE_CODE (*node
) == TYPE_DECL
)
36215 type
= &TREE_TYPE (*node
);
36220 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
36222 warning (OPT_Wattributes
, "%qE attribute ignored",
36224 *no_add_attrs
= true;
36227 else if ((is_attribute_p ("ms_struct", name
)
36228 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
36229 || ((is_attribute_p ("gcc_struct", name
)
36230 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
36232 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
36234 *no_add_attrs
= true;
36241 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
36242 tree args ATTRIBUTE_UNUSED
,
36243 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
36245 if (TREE_CODE (*node
) != FUNCTION_DECL
)
36247 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
36249 *no_add_attrs
= true;
36255 ix86_ms_bitfield_layout_p (const_tree record_type
)
36257 return ((TARGET_MS_BITFIELD_LAYOUT
36258 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
36259 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
36262 /* Returns an expression indicating where the this parameter is
36263 located on entry to the FUNCTION. */
36266 x86_this_parameter (tree function
)
36268 tree type
= TREE_TYPE (function
);
36269 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
36274 const int *parm_regs
;
36276 if (ix86_function_type_abi (type
) == MS_ABI
)
36277 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
36279 parm_regs
= x86_64_int_parameter_registers
;
36280 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
36283 nregs
= ix86_function_regparm (type
, function
);
36285 if (nregs
> 0 && !stdarg_p (type
))
36288 unsigned int ccvt
= ix86_get_callcvt (type
);
36290 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36291 regno
= aggr
? DX_REG
: CX_REG
;
36292 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36296 return gen_rtx_MEM (SImode
,
36297 plus_constant (Pmode
, stack_pointer_rtx
, 4));
36306 return gen_rtx_MEM (SImode
,
36307 plus_constant (Pmode
,
36308 stack_pointer_rtx
, 4));
36311 return gen_rtx_REG (SImode
, regno
);
36314 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
36318 /* Determine whether x86_output_mi_thunk can succeed. */
36321 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
36322 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
36323 HOST_WIDE_INT vcall_offset
, const_tree function
)
36325 /* 64-bit can handle anything. */
36329 /* For 32-bit, everything's fine if we have one free register. */
36330 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
36333 /* Need a free register for vcall_offset. */
36337 /* Need a free register for GOT references. */
36338 if (flag_pic
&& !targetm
.binds_local_p (function
))
36341 /* Otherwise ok. */
36345 /* Output the assembler code for a thunk function. THUNK_DECL is the
36346 declaration for the thunk function itself, FUNCTION is the decl for
36347 the target function. DELTA is an immediate constant offset to be
36348 added to THIS. If VCALL_OFFSET is nonzero, the word at
36349 *(*this + vcall_offset) should be added to THIS. */
36352 x86_output_mi_thunk (FILE *file
,
36353 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
36354 HOST_WIDE_INT vcall_offset
, tree function
)
36356 rtx this_param
= x86_this_parameter (function
);
36357 rtx this_reg
, tmp
, fnaddr
;
36358 unsigned int tmp_regno
;
36361 tmp_regno
= R10_REG
;
36364 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
36365 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
36366 tmp_regno
= AX_REG
;
36367 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
36368 tmp_regno
= DX_REG
;
36370 tmp_regno
= CX_REG
;
36373 emit_note (NOTE_INSN_PROLOGUE_END
);
36375 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
36376 pull it in now and let DELTA benefit. */
36377 if (REG_P (this_param
))
36378 this_reg
= this_param
;
36379 else if (vcall_offset
)
36381 /* Put the this parameter into %eax. */
36382 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
36383 emit_move_insn (this_reg
, this_param
);
36386 this_reg
= NULL_RTX
;
36388 /* Adjust the this parameter by a fixed constant. */
36391 rtx delta_rtx
= GEN_INT (delta
);
36392 rtx delta_dst
= this_reg
? this_reg
: this_param
;
36396 if (!x86_64_general_operand (delta_rtx
, Pmode
))
36398 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36399 emit_move_insn (tmp
, delta_rtx
);
36404 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
36407 /* Adjust the this parameter by a value stored in the vtable. */
36410 rtx vcall_addr
, vcall_mem
, this_mem
;
36412 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
36414 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
36415 if (Pmode
!= ptr_mode
)
36416 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
36417 emit_move_insn (tmp
, this_mem
);
36419 /* Adjust the this parameter. */
36420 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
36422 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
36424 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
36425 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
36426 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
36429 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
36430 if (Pmode
!= ptr_mode
)
36431 emit_insn (gen_addsi_1_zext (this_reg
,
36432 gen_rtx_REG (ptr_mode
,
36436 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
36439 /* If necessary, drop THIS back to its stack slot. */
36440 if (this_reg
&& this_reg
!= this_param
)
36441 emit_move_insn (this_param
, this_reg
);
36443 fnaddr
= XEXP (DECL_RTL (function
), 0);
36446 if (!flag_pic
|| targetm
.binds_local_p (function
)
36451 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
36452 tmp
= gen_rtx_CONST (Pmode
, tmp
);
36453 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
36458 if (!flag_pic
|| targetm
.binds_local_p (function
))
36461 else if (TARGET_MACHO
)
36463 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
36464 fnaddr
= XEXP (fnaddr
, 0);
36466 #endif /* TARGET_MACHO */
36469 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
36470 output_set_got (tmp
, NULL_RTX
);
36472 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
36473 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
36474 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
36478 /* Our sibling call patterns do not allow memories, because we have no
36479 predicate that can distinguish between frame and non-frame memory.
36480 For our purposes here, we can get away with (ab)using a jump pattern,
36481 because we're going to do no optimization. */
36482 if (MEM_P (fnaddr
))
36483 emit_jump_insn (gen_indirect_jump (fnaddr
));
36486 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
36487 fnaddr
= legitimize_pic_address (fnaddr
,
36488 gen_rtx_REG (Pmode
, tmp_regno
));
36490 if (!sibcall_insn_operand (fnaddr
, word_mode
))
36492 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
36493 if (GET_MODE (fnaddr
) != word_mode
)
36494 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
36495 emit_move_insn (tmp
, fnaddr
);
36499 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
36500 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
36501 tmp
= emit_call_insn (tmp
);
36502 SIBLING_CALL_P (tmp
) = 1;
36506 /* Emit just enough of rest_of_compilation to get the insns emitted.
36507 Note that use_thunk calls assemble_start_function et al. */
36508 tmp
= get_insns ();
36509 shorten_branches (tmp
);
36510 final_start_function (tmp
, file
, 1);
36511 final (tmp
, file
, 1);
36512 final_end_function ();
36516 x86_file_start (void)
36518 default_file_start ();
36520 darwin_file_start ();
36522 if (X86_FILE_START_VERSION_DIRECTIVE
)
36523 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
36524 if (X86_FILE_START_FLTUSED
)
36525 fputs ("\t.global\t__fltused\n", asm_out_file
);
36526 if (ix86_asm_dialect
== ASM_INTEL
)
36527 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
36531 x86_field_alignment (tree field
, int computed
)
36533 enum machine_mode mode
;
36534 tree type
= TREE_TYPE (field
);
36536 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
36538 mode
= TYPE_MODE (strip_array_types (type
));
36539 if (mode
== DFmode
|| mode
== DCmode
36540 || GET_MODE_CLASS (mode
) == MODE_INT
36541 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
36542 return MIN (32, computed
);
36546 /* Output assembler code to FILE to increment profiler label # LABELNO
36547 for profiling a function entry. */
36549 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
36551 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
36556 #ifndef NO_PROFILE_COUNTERS
36557 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
36560 if (!TARGET_PECOFF
&& flag_pic
)
36561 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
36563 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36567 #ifndef NO_PROFILE_COUNTERS
36568 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
36571 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
36575 #ifndef NO_PROFILE_COUNTERS
36576 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
36579 fprintf (file
, "\tcall\t%s\n", mcount_name
);
36583 /* We don't have exact information about the insn sizes, but we may assume
36584 quite safely that we are informed about all 1 byte insns and memory
36585 address sizes. This is enough to eliminate unnecessary padding in
36589 min_insn_size (rtx insn
)
36593 if (!INSN_P (insn
) || !active_insn_p (insn
))
36596 /* Discard alignments we've emit and jump instructions. */
36597 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
36598 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
36601 /* Important case - calls are always 5 bytes.
36602 It is common to have many calls in the row. */
36604 && symbolic_reference_mentioned_p (PATTERN (insn
))
36605 && !SIBLING_CALL_P (insn
))
36607 len
= get_attr_length (insn
);
36611 /* For normal instructions we rely on get_attr_length being exact,
36612 with a few exceptions. */
36613 if (!JUMP_P (insn
))
36615 enum attr_type type
= get_attr_type (insn
);
36620 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
36621 || asm_noperands (PATTERN (insn
)) >= 0)
36628 /* Otherwise trust get_attr_length. */
36632 l
= get_attr_length_address (insn
);
36633 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
36642 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36644 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
36648 ix86_avoid_jump_mispredicts (void)
36650 rtx insn
, start
= get_insns ();
36651 int nbytes
= 0, njumps
= 0;
36654 /* Look for all minimal intervals of instructions containing 4 jumps.
36655 The intervals are bounded by START and INSN. NBYTES is the total
36656 size of instructions in the interval including INSN and not including
36657 START. When the NBYTES is smaller than 16 bytes, it is possible
36658 that the end of START and INSN ends up in the same 16byte page.
36660 The smallest offset in the page INSN can start is the case where START
36661 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
36662 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
36664 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
36668 if (LABEL_P (insn
))
36670 int align
= label_to_alignment (insn
);
36671 int max_skip
= label_to_max_skip (insn
);
36675 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
36676 already in the current 16 byte page, because otherwise
36677 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
36678 bytes to reach 16 byte boundary. */
36680 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
36683 fprintf (dump_file
, "Label %i with max_skip %i\n",
36684 INSN_UID (insn
), max_skip
);
36687 while (nbytes
+ max_skip
>= 16)
36689 start
= NEXT_INSN (start
);
36690 if (JUMP_P (start
) || CALL_P (start
))
36691 njumps
--, isjump
= 1;
36694 nbytes
-= min_insn_size (start
);
36700 min_size
= min_insn_size (insn
);
36701 nbytes
+= min_size
;
36703 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
36704 INSN_UID (insn
), min_size
);
36705 if (JUMP_P (insn
) || CALL_P (insn
))
36712 start
= NEXT_INSN (start
);
36713 if (JUMP_P (start
) || CALL_P (start
))
36714 njumps
--, isjump
= 1;
36717 nbytes
-= min_insn_size (start
);
36719 gcc_assert (njumps
>= 0);
36721 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
36722 INSN_UID (start
), INSN_UID (insn
), nbytes
);
36724 if (njumps
== 3 && isjump
&& nbytes
< 16)
36726 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
36729 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
36730 INSN_UID (insn
), padsize
);
36731 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
36737 /* AMD Athlon works faster
36738 when RET is not destination of conditional jump or directly preceded
36739 by other jump instruction. We avoid the penalty by inserting NOP just
36740 before the RET instructions in such cases. */
36742 ix86_pad_returns (void)
36747 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36749 basic_block bb
= e
->src
;
36750 rtx ret
= BB_END (bb
);
36752 bool replace
= false;
36754 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
36755 || optimize_bb_for_size_p (bb
))
36757 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
36758 if (active_insn_p (prev
) || LABEL_P (prev
))
36760 if (prev
&& LABEL_P (prev
))
36765 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36766 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
36767 && !(e
->flags
& EDGE_FALLTHRU
))
36775 prev
= prev_active_insn (ret
);
36777 && ((JUMP_P (prev
) && any_condjump_p (prev
))
36780 /* Empty functions get branch mispredict even when
36781 the jump destination is not visible to us. */
36782 if (!prev
&& !optimize_function_for_size_p (cfun
))
36787 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
36793 /* Count the minimum number of instructions in BB. Return 4 if the
36794 number of instructions >= 4. */
36797 ix86_count_insn_bb (basic_block bb
)
36800 int insn_count
= 0;
36802 /* Count number of instructions in this block. Return 4 if the number
36803 of instructions >= 4. */
36804 FOR_BB_INSNS (bb
, insn
)
36806 /* Only happen in exit blocks. */
36808 && ANY_RETURN_P (PATTERN (insn
)))
36811 if (NONDEBUG_INSN_P (insn
)
36812 && GET_CODE (PATTERN (insn
)) != USE
36813 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
36816 if (insn_count
>= 4)
36825 /* Count the minimum number of instructions in code path in BB.
36826 Return 4 if the number of instructions >= 4. */
36829 ix86_count_insn (basic_block bb
)
36833 int min_prev_count
;
36835 /* Only bother counting instructions along paths with no
36836 more than 2 basic blocks between entry and exit. Given
36837 that BB has an edge to exit, determine if a predecessor
36838 of BB has an edge from entry. If so, compute the number
36839 of instructions in the predecessor block. If there
36840 happen to be multiple such blocks, compute the minimum. */
36841 min_prev_count
= 4;
36842 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
36845 edge_iterator prev_ei
;
36847 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
36849 min_prev_count
= 0;
36852 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
36854 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
36856 int count
= ix86_count_insn_bb (e
->src
);
36857 if (count
< min_prev_count
)
36858 min_prev_count
= count
;
36864 if (min_prev_count
< 4)
36865 min_prev_count
+= ix86_count_insn_bb (bb
);
36867 return min_prev_count
;
36870 /* Pad short function to 4 instructions. */
36873 ix86_pad_short_function (void)
36878 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36880 rtx ret
= BB_END (e
->src
);
36881 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
36883 int insn_count
= ix86_count_insn (e
->src
);
36885 /* Pad short function. */
36886 if (insn_count
< 4)
36890 /* Find epilogue. */
36893 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
36894 insn
= PREV_INSN (insn
);
36899 /* Two NOPs count as one instruction. */
36900 insn_count
= 2 * (4 - insn_count
);
36901 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
36907 /* Fix up a Windows system unwinder issue. If an EH region falls through into
36908 the epilogue, the Windows system unwinder will apply epilogue logic and
36909 produce incorrect offsets. This can be avoided by adding a nop between
36910 the last insn that can throw and the first insn of the epilogue. */
36913 ix86_seh_fixup_eh_fallthru (void)
36918 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
36922 /* Find the beginning of the epilogue. */
36923 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
36924 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
36929 /* We only care about preceding insns that can throw. */
36930 insn
= prev_active_insn (insn
);
36931 if (insn
== NULL
|| !can_throw_internal (insn
))
36934 /* Do not separate calls from their debug information. */
36935 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
36937 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
36938 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
36943 emit_insn_after (gen_nops (const1_rtx
), insn
);
36947 /* Implement machine specific optimizations. We implement padding of returns
36948 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
36952 /* We are freeing block_for_insn in the toplev to keep compatibility
36953 with old MDEP_REORGS that are not CFG based. Recompute it now. */
36954 compute_bb_for_insn ();
36956 if (TARGET_SEH
&& current_function_has_exception_handlers ())
36957 ix86_seh_fixup_eh_fallthru ();
36959 if (optimize
&& optimize_function_for_speed_p (cfun
))
36961 if (TARGET_PAD_SHORT_FUNCTION
)
36962 ix86_pad_short_function ();
36963 else if (TARGET_PAD_RETURNS
)
36964 ix86_pad_returns ();
36965 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
36966 if (TARGET_FOUR_JUMP_LIMIT
)
36967 ix86_avoid_jump_mispredicts ();
36972 /* Return nonzero when QImode register that must be represented via REX prefix
36975 x86_extended_QIreg_mentioned_p (rtx insn
)
36978 extract_insn_cached (insn
);
36979 for (i
= 0; i
< recog_data
.n_operands
; i
++)
36980 if (GENERAL_REG_P (recog_data
.operand
[i
])
36981 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
36986 /* Return nonzero when P points to register encoded via REX prefix.
36987 Called via for_each_rtx. */
36989 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
36991 unsigned int regno
;
36994 regno
= REGNO (*p
);
36995 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
36998 /* Return true when INSN mentions register that must be encoded using REX
37001 x86_extended_reg_mentioned_p (rtx insn
)
37003 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
37004 extended_reg_mentioned_1
, NULL
);
37007 /* If profitable, negate (without causing overflow) integer constant
37008 of mode MODE at location LOC. Return true in this case. */
37010 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
37014 if (!CONST_INT_P (*loc
))
37020 /* DImode x86_64 constants must fit in 32 bits. */
37021 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
37032 gcc_unreachable ();
37035 /* Avoid overflows. */
37036 if (mode_signbit_p (mode
, *loc
))
37039 val
= INTVAL (*loc
);
37041 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
37042 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
37043 if ((val
< 0 && val
!= -128)
37046 *loc
= GEN_INT (-val
);
37053 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
37054 optabs would emit if we didn't have TFmode patterns. */
37057 x86_emit_floatuns (rtx operands
[2])
37059 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
37060 enum machine_mode mode
, inmode
;
37062 inmode
= GET_MODE (operands
[1]);
37063 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
37066 in
= force_reg (inmode
, operands
[1]);
37067 mode
= GET_MODE (out
);
37068 neglab
= gen_label_rtx ();
37069 donelab
= gen_label_rtx ();
37070 f0
= gen_reg_rtx (mode
);
37072 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
37074 expand_float (out
, in
, 0);
37076 emit_jump_insn (gen_jump (donelab
));
37079 emit_label (neglab
);
37081 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
37083 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
37085 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
37087 expand_float (f0
, i0
, 0);
37089 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
37091 emit_label (donelab
);
37094 /* AVX512F does support 64-byte integer vector operations,
37095 thus the longest vector we are faced with is V64QImode. */
37096 #define MAX_VECT_LEN 64
37098 struct expand_vec_perm_d
37100 rtx target
, op0
, op1
;
37101 unsigned char perm
[MAX_VECT_LEN
];
37102 enum machine_mode vmode
;
37103 unsigned char nelt
;
37104 bool one_operand_p
;
37108 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
37109 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
37110 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
37112 /* Get a vector mode of the same size as the original but with elements
37113 twice as wide. This is only guaranteed to apply to integral vectors. */
37115 static inline enum machine_mode
37116 get_mode_wider_vector (enum machine_mode o
)
37118 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
37119 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
37120 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
37121 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
37125 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37126 with all elements equal to VAR. Return true if successful. */
37129 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
37130 rtx target
, rtx val
)
37153 /* First attempt to recognize VAL as-is. */
37154 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
37155 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
37156 if (recog_memoized (insn
) < 0)
37159 /* If that fails, force VAL into a register. */
37162 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
37163 seq
= get_insns ();
37166 emit_insn_before (seq
, insn
);
37168 ok
= recog_memoized (insn
) >= 0;
37177 if (TARGET_SSE
|| TARGET_3DNOW_A
)
37181 val
= gen_lowpart (SImode
, val
);
37182 x
= gen_rtx_TRUNCATE (HImode
, val
);
37183 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
37184 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37197 struct expand_vec_perm_d dperm
;
37201 memset (&dperm
, 0, sizeof (dperm
));
37202 dperm
.target
= target
;
37203 dperm
.vmode
= mode
;
37204 dperm
.nelt
= GET_MODE_NUNITS (mode
);
37205 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
37206 dperm
.one_operand_p
= true;
37208 /* Extend to SImode using a paradoxical SUBREG. */
37209 tmp1
= gen_reg_rtx (SImode
);
37210 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
37212 /* Insert the SImode value as low element of a V4SImode vector. */
37213 tmp2
= gen_reg_rtx (V4SImode
);
37214 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
37215 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
37217 ok
= (expand_vec_perm_1 (&dperm
)
37218 || expand_vec_perm_broadcast_1 (&dperm
));
37230 /* Replicate the value once into the next wider mode and recurse. */
37232 enum machine_mode smode
, wsmode
, wvmode
;
37235 smode
= GET_MODE_INNER (mode
);
37236 wvmode
= get_mode_wider_vector (mode
);
37237 wsmode
= GET_MODE_INNER (wvmode
);
37239 val
= convert_modes (wsmode
, smode
, val
, true);
37240 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
37241 GEN_INT (GET_MODE_BITSIZE (smode
)),
37242 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37243 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
37245 x
= gen_reg_rtx (wvmode
);
37246 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
37248 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
37255 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
37256 rtx x
= gen_reg_rtx (hvmode
);
37258 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
37261 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
37262 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37271 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37272 whose ONE_VAR element is VAR, and other elements are zero. Return true
37276 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
37277 rtx target
, rtx var
, int one_var
)
37279 enum machine_mode vsimode
;
37282 bool use_vector_set
= false;
37287 /* For SSE4.1, we normally use vector set. But if the second
37288 element is zero and inter-unit moves are OK, we use movq
37290 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
37291 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
37297 use_vector_set
= TARGET_SSE4_1
;
37300 use_vector_set
= TARGET_SSE2
;
37303 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
37310 use_vector_set
= TARGET_AVX
;
37313 /* Use ix86_expand_vector_set in 64bit mode only. */
37314 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
37320 if (use_vector_set
)
37322 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
37323 var
= force_reg (GET_MODE_INNER (mode
), var
);
37324 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37340 var
= force_reg (GET_MODE_INNER (mode
), var
);
37341 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
37342 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
37347 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
37348 new_target
= gen_reg_rtx (mode
);
37350 new_target
= target
;
37351 var
= force_reg (GET_MODE_INNER (mode
), var
);
37352 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
37353 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
37354 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
37357 /* We need to shuffle the value to the correct position, so
37358 create a new pseudo to store the intermediate result. */
37360 /* With SSE2, we can use the integer shuffle insns. */
37361 if (mode
!= V4SFmode
&& TARGET_SSE2
)
37363 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
37365 GEN_INT (one_var
== 1 ? 0 : 1),
37366 GEN_INT (one_var
== 2 ? 0 : 1),
37367 GEN_INT (one_var
== 3 ? 0 : 1)));
37368 if (target
!= new_target
)
37369 emit_move_insn (target
, new_target
);
37373 /* Otherwise convert the intermediate result to V4SFmode and
37374 use the SSE1 shuffle instructions. */
37375 if (mode
!= V4SFmode
)
37377 tmp
= gen_reg_rtx (V4SFmode
);
37378 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
37383 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
37385 GEN_INT (one_var
== 1 ? 0 : 1),
37386 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
37387 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
37389 if (mode
!= V4SFmode
)
37390 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
37391 else if (tmp
!= target
)
37392 emit_move_insn (target
, tmp
);
37394 else if (target
!= new_target
)
37395 emit_move_insn (target
, new_target
);
37400 vsimode
= V4SImode
;
37406 vsimode
= V2SImode
;
37412 /* Zero extend the variable element to SImode and recurse. */
37413 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
37415 x
= gen_reg_rtx (vsimode
);
37416 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
37418 gcc_unreachable ();
37420 emit_move_insn (target
, gen_lowpart (mode
, x
));
37428 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
37429 consisting of the values in VALS. It is known that all elements
37430 except ONE_VAR are constants. Return true if successful. */
37433 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
37434 rtx target
, rtx vals
, int one_var
)
37436 rtx var
= XVECEXP (vals
, 0, one_var
);
37437 enum machine_mode wmode
;
37440 const_vec
= copy_rtx (vals
);
37441 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
37442 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
37450 /* For the two element vectors, it's just as easy to use
37451 the general case. */
37455 /* Use ix86_expand_vector_set in 64bit mode only. */
37478 /* There's no way to set one QImode entry easily. Combine
37479 the variable value with its adjacent constant value, and
37480 promote to an HImode set. */
37481 x
= XVECEXP (vals
, 0, one_var
^ 1);
37484 var
= convert_modes (HImode
, QImode
, var
, true);
37485 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
37486 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
37487 x
= GEN_INT (INTVAL (x
) & 0xff);
37491 var
= convert_modes (HImode
, QImode
, var
, true);
37492 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
37494 if (x
!= const0_rtx
)
37495 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
37496 1, OPTAB_LIB_WIDEN
);
37498 x
= gen_reg_rtx (wmode
);
37499 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
37500 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
37502 emit_move_insn (target
, gen_lowpart (mode
, x
));
37509 emit_move_insn (target
, const_vec
);
37510 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
37514 /* A subroutine of ix86_expand_vector_init_general. Use vector
37515 concatenate to handle the most general case: all values variable,
37516 and none identical. */
37519 ix86_expand_vector_init_concat (enum machine_mode mode
,
37520 rtx target
, rtx
*ops
, int n
)
37522 enum machine_mode cmode
, hmode
= VOIDmode
;
37523 rtx first
[8], second
[4];
37563 gcc_unreachable ();
37566 if (!register_operand (ops
[1], cmode
))
37567 ops
[1] = force_reg (cmode
, ops
[1]);
37568 if (!register_operand (ops
[0], cmode
))
37569 ops
[0] = force_reg (cmode
, ops
[0]);
37570 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37571 gen_rtx_VEC_CONCAT (mode
, ops
[0],
37591 gcc_unreachable ();
37607 gcc_unreachable ();
37612 /* FIXME: We process inputs backward to help RA. PR 36222. */
37615 for (; i
> 0; i
-= 2, j
--)
37617 first
[j
] = gen_reg_rtx (cmode
);
37618 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
37619 ix86_expand_vector_init (false, first
[j
],
37620 gen_rtx_PARALLEL (cmode
, v
));
37626 gcc_assert (hmode
!= VOIDmode
);
37627 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37629 second
[j
] = gen_reg_rtx (hmode
);
37630 ix86_expand_vector_init_concat (hmode
, second
[j
],
37634 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
37637 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
37641 gcc_unreachable ();
37645 /* A subroutine of ix86_expand_vector_init_general. Use vector
37646 interleave to handle the most general case: all values variable,
37647 and none identical. */
37650 ix86_expand_vector_init_interleave (enum machine_mode mode
,
37651 rtx target
, rtx
*ops
, int n
)
37653 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
37656 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
37657 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
37658 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
37663 gen_load_even
= gen_vec_setv8hi
;
37664 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
37665 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37666 inner_mode
= HImode
;
37667 first_imode
= V4SImode
;
37668 second_imode
= V2DImode
;
37669 third_imode
= VOIDmode
;
37672 gen_load_even
= gen_vec_setv16qi
;
37673 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
37674 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
37675 inner_mode
= QImode
;
37676 first_imode
= V8HImode
;
37677 second_imode
= V4SImode
;
37678 third_imode
= V2DImode
;
37681 gcc_unreachable ();
37684 for (i
= 0; i
< n
; i
++)
37686 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
37687 op0
= gen_reg_rtx (SImode
);
37688 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
37690 /* Insert the SImode value as low element of V4SImode vector. */
37691 op1
= gen_reg_rtx (V4SImode
);
37692 op0
= gen_rtx_VEC_MERGE (V4SImode
,
37693 gen_rtx_VEC_DUPLICATE (V4SImode
,
37695 CONST0_RTX (V4SImode
),
37697 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
37699 /* Cast the V4SImode vector back to a vector in orignal mode. */
37700 op0
= gen_reg_rtx (mode
);
37701 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
37703 /* Load even elements into the second position. */
37704 emit_insn (gen_load_even (op0
,
37705 force_reg (inner_mode
,
37709 /* Cast vector to FIRST_IMODE vector. */
37710 ops
[i
] = gen_reg_rtx (first_imode
);
37711 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
37714 /* Interleave low FIRST_IMODE vectors. */
37715 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
37717 op0
= gen_reg_rtx (first_imode
);
37718 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
37720 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
37721 ops
[j
] = gen_reg_rtx (second_imode
);
37722 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
37725 /* Interleave low SECOND_IMODE vectors. */
37726 switch (second_imode
)
37729 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
37731 op0
= gen_reg_rtx (second_imode
);
37732 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
37735 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
37737 ops
[j
] = gen_reg_rtx (third_imode
);
37738 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
37740 second_imode
= V2DImode
;
37741 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
37745 op0
= gen_reg_rtx (second_imode
);
37746 emit_insn (gen_interleave_second_low (op0
, ops
[0],
37749 /* Cast the SECOND_IMODE vector back to a vector on original
37751 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37752 gen_lowpart (mode
, op0
)));
37756 gcc_unreachable ();
37760 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
37761 all values variable, and none identical. */
37764 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
37765 rtx target
, rtx vals
)
37767 rtx ops
[32], op0
, op1
;
37768 enum machine_mode half_mode
= VOIDmode
;
37775 if (!mmx_ok
&& !TARGET_SSE
)
37787 n
= GET_MODE_NUNITS (mode
);
37788 for (i
= 0; i
< n
; i
++)
37789 ops
[i
] = XVECEXP (vals
, 0, i
);
37790 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
37794 half_mode
= V16QImode
;
37798 half_mode
= V8HImode
;
37802 n
= GET_MODE_NUNITS (mode
);
37803 for (i
= 0; i
< n
; i
++)
37804 ops
[i
] = XVECEXP (vals
, 0, i
);
37805 op0
= gen_reg_rtx (half_mode
);
37806 op1
= gen_reg_rtx (half_mode
);
37807 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
37809 ix86_expand_vector_init_interleave (half_mode
, op1
,
37810 &ops
[n
>> 1], n
>> 2);
37811 emit_insn (gen_rtx_SET (VOIDmode
, target
,
37812 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
37816 if (!TARGET_SSE4_1
)
37824 /* Don't use ix86_expand_vector_init_interleave if we can't
37825 move from GPR to SSE register directly. */
37826 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
37829 n
= GET_MODE_NUNITS (mode
);
37830 for (i
= 0; i
< n
; i
++)
37831 ops
[i
] = XVECEXP (vals
, 0, i
);
37832 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
37840 gcc_unreachable ();
37844 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
37845 enum machine_mode inner_mode
;
37846 rtx words
[4], shift
;
37848 inner_mode
= GET_MODE_INNER (mode
);
37849 n_elts
= GET_MODE_NUNITS (mode
);
37850 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
37851 n_elt_per_word
= n_elts
/ n_words
;
37852 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
37854 for (i
= 0; i
< n_words
; ++i
)
37856 rtx word
= NULL_RTX
;
37858 for (j
= 0; j
< n_elt_per_word
; ++j
)
37860 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
37861 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
37867 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
37868 word
, 1, OPTAB_LIB_WIDEN
);
37869 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
37870 word
, 1, OPTAB_LIB_WIDEN
);
37878 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
37879 else if (n_words
== 2)
37881 rtx tmp
= gen_reg_rtx (mode
);
37882 emit_clobber (tmp
);
37883 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
37884 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
37885 emit_move_insn (target
, tmp
);
37887 else if (n_words
== 4)
37889 rtx tmp
= gen_reg_rtx (V4SImode
);
37890 gcc_assert (word_mode
== SImode
);
37891 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
37892 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
37893 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
37896 gcc_unreachable ();
37900 /* Initialize vector TARGET via VALS. Suppress the use of MMX
37901 instructions unless MMX_OK is true. */
37904 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
37906 enum machine_mode mode
= GET_MODE (target
);
37907 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37908 int n_elts
= GET_MODE_NUNITS (mode
);
37909 int n_var
= 0, one_var
= -1;
37910 bool all_same
= true, all_const_zero
= true;
37914 for (i
= 0; i
< n_elts
; ++i
)
37916 x
= XVECEXP (vals
, 0, i
);
37917 if (!(CONST_INT_P (x
)
37918 || GET_CODE (x
) == CONST_DOUBLE
37919 || GET_CODE (x
) == CONST_FIXED
))
37920 n_var
++, one_var
= i
;
37921 else if (x
!= CONST0_RTX (inner_mode
))
37922 all_const_zero
= false;
37923 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
37927 /* Constants are best loaded from the constant pool. */
37930 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
37934 /* If all values are identical, broadcast the value. */
37936 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
37937 XVECEXP (vals
, 0, 0)))
37940 /* Values where only one field is non-constant are best loaded from
37941 the pool and overwritten via move later. */
37945 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
37946 XVECEXP (vals
, 0, one_var
),
37950 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
37954 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
37958 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
37960 enum machine_mode mode
= GET_MODE (target
);
37961 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
37962 enum machine_mode half_mode
;
37963 bool use_vec_merge
= false;
37965 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
37967 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
37968 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
37969 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
37970 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
37971 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
37972 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
37974 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
37976 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
37977 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
37978 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
37979 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
37980 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
37981 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
37991 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
37992 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
37994 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
37996 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
37997 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38003 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
38007 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
38008 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
38010 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
38012 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
38013 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38020 /* For the two element vectors, we implement a VEC_CONCAT with
38021 the extraction of the other element. */
38023 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
38024 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
38027 op0
= val
, op1
= tmp
;
38029 op0
= tmp
, op1
= val
;
38031 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
38032 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38037 use_vec_merge
= TARGET_SSE4_1
;
38044 use_vec_merge
= true;
38048 /* tmp = target = A B C D */
38049 tmp
= copy_to_reg (target
);
38050 /* target = A A B B */
38051 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
38052 /* target = X A B B */
38053 ix86_expand_vector_set (false, target
, val
, 0);
38054 /* target = A X C D */
38055 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
38056 const1_rtx
, const0_rtx
,
38057 GEN_INT (2+4), GEN_INT (3+4)));
38061 /* tmp = target = A B C D */
38062 tmp
= copy_to_reg (target
);
38063 /* tmp = X B C D */
38064 ix86_expand_vector_set (false, tmp
, val
, 0);
38065 /* target = A B X D */
38066 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
38067 const0_rtx
, const1_rtx
,
38068 GEN_INT (0+4), GEN_INT (3+4)));
38072 /* tmp = target = A B C D */
38073 tmp
= copy_to_reg (target
);
38074 /* tmp = X B C D */
38075 ix86_expand_vector_set (false, tmp
, val
, 0);
38076 /* target = A B X D */
38077 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
38078 const0_rtx
, const1_rtx
,
38079 GEN_INT (2+4), GEN_INT (0+4)));
38083 gcc_unreachable ();
38088 use_vec_merge
= TARGET_SSE4_1
;
38092 /* Element 0 handled by vec_merge below. */
38095 use_vec_merge
= true;
38101 /* With SSE2, use integer shuffles to swap element 0 and ELT,
38102 store into element 0, then shuffle them back. */
38106 order
[0] = GEN_INT (elt
);
38107 order
[1] = const1_rtx
;
38108 order
[2] = const2_rtx
;
38109 order
[3] = GEN_INT (3);
38110 order
[elt
] = const0_rtx
;
38112 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
38113 order
[1], order
[2], order
[3]));
38115 ix86_expand_vector_set (false, target
, val
, 0);
38117 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
38118 order
[1], order
[2], order
[3]));
38122 /* For SSE1, we have to reuse the V4SF code. */
38123 rtx t
= gen_reg_rtx (V4SFmode
);
38124 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
38125 emit_move_insn (target
, gen_lowpart (mode
, t
));
38130 use_vec_merge
= TARGET_SSE2
;
38133 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
38137 use_vec_merge
= TARGET_SSE4_1
;
38144 half_mode
= V16QImode
;
38150 half_mode
= V8HImode
;
38156 half_mode
= V4SImode
;
38162 half_mode
= V2DImode
;
38168 half_mode
= V4SFmode
;
38174 half_mode
= V2DFmode
;
38180 /* Compute offset. */
38184 gcc_assert (i
<= 1);
38186 /* Extract the half. */
38187 tmp
= gen_reg_rtx (half_mode
);
38188 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
38190 /* Put val in tmp at elt. */
38191 ix86_expand_vector_set (false, tmp
, val
, elt
);
38194 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
38203 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
38204 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
38205 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38209 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38211 emit_move_insn (mem
, target
);
38213 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38214 emit_move_insn (tmp
, val
);
38216 emit_move_insn (target
, mem
);
38221 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
38223 enum machine_mode mode
= GET_MODE (vec
);
38224 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
38225 bool use_vec_extr
= false;
38238 use_vec_extr
= true;
38242 use_vec_extr
= TARGET_SSE4_1
;
38254 tmp
= gen_reg_rtx (mode
);
38255 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
38256 GEN_INT (elt
), GEN_INT (elt
),
38257 GEN_INT (elt
+4), GEN_INT (elt
+4)));
38261 tmp
= gen_reg_rtx (mode
);
38262 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
38266 gcc_unreachable ();
38269 use_vec_extr
= true;
38274 use_vec_extr
= TARGET_SSE4_1
;
38288 tmp
= gen_reg_rtx (mode
);
38289 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
38290 GEN_INT (elt
), GEN_INT (elt
),
38291 GEN_INT (elt
), GEN_INT (elt
)));
38295 tmp
= gen_reg_rtx (mode
);
38296 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
38300 gcc_unreachable ();
38303 use_vec_extr
= true;
38308 /* For SSE1, we have to reuse the V4SF code. */
38309 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
38310 gen_lowpart (V4SFmode
, vec
), elt
);
38316 use_vec_extr
= TARGET_SSE2
;
38319 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
38323 use_vec_extr
= TARGET_SSE4_1
;
38329 tmp
= gen_reg_rtx (V4SFmode
);
38331 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
38333 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
38334 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38342 tmp
= gen_reg_rtx (V2DFmode
);
38344 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
38346 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
38347 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38355 tmp
= gen_reg_rtx (V16QImode
);
38357 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
38359 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
38360 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
38368 tmp
= gen_reg_rtx (V8HImode
);
38370 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
38372 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
38373 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
38381 tmp
= gen_reg_rtx (V4SImode
);
38383 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
38385 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
38386 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
38394 tmp
= gen_reg_rtx (V2DImode
);
38396 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
38398 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
38399 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
38405 /* ??? Could extract the appropriate HImode element and shift. */
38412 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
38413 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
38415 /* Let the rtl optimizers know about the zero extension performed. */
38416 if (inner_mode
== QImode
|| inner_mode
== HImode
)
38418 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
38419 target
= gen_lowpart (SImode
, target
);
38422 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
38426 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
38428 emit_move_insn (mem
, vec
);
38430 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
38431 emit_move_insn (target
, tmp
);
38435 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
38436 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
38437 The upper bits of DEST are undefined, though they shouldn't cause
38438 exceptions (some bits from src or all zeros are ok). */
38441 emit_reduc_half (rtx dest
, rtx src
, int i
)
38444 switch (GET_MODE (src
))
38448 tem
= gen_sse_movhlps (dest
, src
, src
);
38450 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
38451 GEN_INT (1 + 4), GEN_INT (1 + 4));
38454 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
38460 d
= gen_reg_rtx (V1TImode
);
38461 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
38466 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
38468 tem
= gen_avx_shufps256 (dest
, src
, src
,
38469 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
38473 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
38475 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
38483 if (GET_MODE (dest
) != V4DImode
)
38484 d
= gen_reg_rtx (V4DImode
);
38485 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
38486 gen_lowpart (V4DImode
, src
),
38491 d
= gen_reg_rtx (V2TImode
);
38492 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
38497 gcc_unreachable ();
38501 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
38504 /* Expand a vector reduction. FN is the binary pattern to reduce;
38505 DEST is the destination; IN is the input vector. */
38508 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
38510 rtx half
, dst
, vec
= in
;
38511 enum machine_mode mode
= GET_MODE (in
);
38514 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
38516 && mode
== V8HImode
38517 && fn
== gen_uminv8hi3
)
38519 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
38523 for (i
= GET_MODE_BITSIZE (mode
);
38524 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
38527 half
= gen_reg_rtx (mode
);
38528 emit_reduc_half (half
, vec
, i
);
38529 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
38532 dst
= gen_reg_rtx (mode
);
38533 emit_insn (fn (dst
, half
, vec
));
38538 /* Target hook for scalar_mode_supported_p. */
38540 ix86_scalar_mode_supported_p (enum machine_mode mode
)
38542 if (DECIMAL_FLOAT_MODE_P (mode
))
38543 return default_decimal_float_supported_p ();
38544 else if (mode
== TFmode
)
38547 return default_scalar_mode_supported_p (mode
);
38550 /* Implements target hook vector_mode_supported_p. */
38552 ix86_vector_mode_supported_p (enum machine_mode mode
)
38554 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
38556 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
38558 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
38560 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
38562 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
38567 /* Target hook for c_mode_for_suffix. */
38568 static enum machine_mode
38569 ix86_c_mode_for_suffix (char suffix
)
38579 /* Worker function for TARGET_MD_ASM_CLOBBERS.
38581 We do this in the new i386 backend to maintain source compatibility
38582 with the old cc0-based compiler. */
38585 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
38586 tree inputs ATTRIBUTE_UNUSED
,
38589 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
38591 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
38596 /* Implements target vector targetm.asm.encode_section_info. */
38598 static void ATTRIBUTE_UNUSED
38599 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
38601 default_encode_section_info (decl
, rtl
, first
);
38603 if (TREE_CODE (decl
) == VAR_DECL
38604 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
38605 && ix86_in_large_data_p (decl
))
38606 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
38609 /* Worker function for REVERSE_CONDITION. */
38612 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
38614 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
38615 ? reverse_condition (code
)
38616 : reverse_condition_maybe_unordered (code
));
38619 /* Output code to perform an x87 FP register move, from OPERANDS[1]
38623 output_387_reg_move (rtx insn
, rtx
*operands
)
38625 if (REG_P (operands
[0]))
38627 if (REG_P (operands
[1])
38628 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38630 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
38631 return output_387_ffreep (operands
, 0);
38632 return "fstp\t%y0";
38634 if (STACK_TOP_P (operands
[0]))
38635 return "fld%Z1\t%y1";
38638 else if (MEM_P (operands
[0]))
38640 gcc_assert (REG_P (operands
[1]));
38641 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
38642 return "fstp%Z0\t%y0";
38645 /* There is no non-popping store to memory for XFmode.
38646 So if we need one, follow the store with a load. */
38647 if (GET_MODE (operands
[0]) == XFmode
)
38648 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
38650 return "fst%Z0\t%y0";
38657 /* Output code to perform a conditional jump to LABEL, if C2 flag in
38658 FP status register is set. */
38661 ix86_emit_fp_unordered_jump (rtx label
)
38663 rtx reg
= gen_reg_rtx (HImode
);
38666 emit_insn (gen_x86_fnstsw_1 (reg
));
38668 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
38670 emit_insn (gen_x86_sahf_1 (reg
));
38672 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
38673 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
38677 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
38679 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38680 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
38683 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
38684 gen_rtx_LABEL_REF (VOIDmode
, label
),
38686 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
38688 emit_jump_insn (temp
);
38689 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
38692 /* Output code to perform a log1p XFmode calculation. */
38694 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
38696 rtx label1
= gen_label_rtx ();
38697 rtx label2
= gen_label_rtx ();
38699 rtx tmp
= gen_reg_rtx (XFmode
);
38700 rtx tmp2
= gen_reg_rtx (XFmode
);
38703 emit_insn (gen_absxf2 (tmp
, op1
));
38704 test
= gen_rtx_GE (VOIDmode
, tmp
,
38705 CONST_DOUBLE_FROM_REAL_VALUE (
38706 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
38708 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
38710 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38711 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
38712 emit_jump (label2
);
38714 emit_label (label1
);
38715 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
38716 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
38717 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
38718 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
38720 emit_label (label2
);
38723 /* Emit code for round calculation. */
38724 void ix86_emit_i387_round (rtx op0
, rtx op1
)
38726 enum machine_mode inmode
= GET_MODE (op1
);
38727 enum machine_mode outmode
= GET_MODE (op0
);
38728 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
38729 rtx scratch
= gen_reg_rtx (HImode
);
38730 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
38731 rtx jump_label
= gen_label_rtx ();
38733 rtx (*gen_abs
) (rtx
, rtx
);
38734 rtx (*gen_neg
) (rtx
, rtx
);
38739 gen_abs
= gen_abssf2
;
38742 gen_abs
= gen_absdf2
;
38745 gen_abs
= gen_absxf2
;
38748 gcc_unreachable ();
38754 gen_neg
= gen_negsf2
;
38757 gen_neg
= gen_negdf2
;
38760 gen_neg
= gen_negxf2
;
38763 gen_neg
= gen_neghi2
;
38766 gen_neg
= gen_negsi2
;
38769 gen_neg
= gen_negdi2
;
38772 gcc_unreachable ();
38775 e1
= gen_reg_rtx (inmode
);
38776 e2
= gen_reg_rtx (inmode
);
38777 res
= gen_reg_rtx (outmode
);
38779 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
38781 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
38783 /* scratch = fxam(op1) */
38784 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
38785 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
38787 /* e1 = fabs(op1) */
38788 emit_insn (gen_abs (e1
, op1
));
38790 /* e2 = e1 + 0.5 */
38791 half
= force_reg (inmode
, half
);
38792 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38793 gen_rtx_PLUS (inmode
, e1
, half
)));
38795 /* res = floor(e2) */
38796 if (inmode
!= XFmode
)
38798 tmp1
= gen_reg_rtx (XFmode
);
38800 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
38801 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
38811 rtx tmp0
= gen_reg_rtx (XFmode
);
38813 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
38815 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38816 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
38817 UNSPEC_TRUNC_NOOP
)));
38821 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
38824 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
38827 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
38830 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
38833 gcc_unreachable ();
38836 /* flags = signbit(a) */
38837 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
38839 /* if (flags) then res = -res */
38840 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
38841 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
38842 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
38844 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
38845 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
38846 JUMP_LABEL (insn
) = jump_label
;
38848 emit_insn (gen_neg (res
, res
));
38850 emit_label (jump_label
);
38851 LABEL_NUSES (jump_label
) = 1;
38853 emit_move_insn (op0
, res
);
38856 /* Output code to perform a Newton-Rhapson approximation of a single precision
38857 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
38859 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
38861 rtx x0
, x1
, e0
, e1
;
38863 x0
= gen_reg_rtx (mode
);
38864 e0
= gen_reg_rtx (mode
);
38865 e1
= gen_reg_rtx (mode
);
38866 x1
= gen_reg_rtx (mode
);
38868 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
38870 b
= force_reg (mode
, b
);
38872 /* x0 = rcp(b) estimate */
38873 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38874 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
38877 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38878 gen_rtx_MULT (mode
, x0
, b
)));
38881 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38882 gen_rtx_MULT (mode
, x0
, e0
)));
38885 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38886 gen_rtx_PLUS (mode
, x0
, x0
)));
38889 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
38890 gen_rtx_MINUS (mode
, e1
, e0
)));
38893 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38894 gen_rtx_MULT (mode
, a
, x1
)));
38897 /* Output code to perform a Newton-Rhapson approximation of a
38898 single precision floating point [reciprocal] square root. */
38900 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
38903 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
38906 x0
= gen_reg_rtx (mode
);
38907 e0
= gen_reg_rtx (mode
);
38908 e1
= gen_reg_rtx (mode
);
38909 e2
= gen_reg_rtx (mode
);
38910 e3
= gen_reg_rtx (mode
);
38912 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
38913 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38915 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
38916 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
38918 if (VECTOR_MODE_P (mode
))
38920 mthree
= ix86_build_const_vector (mode
, true, mthree
);
38921 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
38924 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
38925 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
38927 a
= force_reg (mode
, a
);
38929 /* x0 = rsqrt(a) estimate */
38930 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38931 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
38934 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
38939 zero
= gen_reg_rtx (mode
);
38940 mask
= gen_reg_rtx (mode
);
38942 zero
= force_reg (mode
, CONST0_RTX(mode
));
38943 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38944 gen_rtx_NE (mode
, zero
, a
)));
38946 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
38947 gen_rtx_AND (mode
, x0
, mask
)));
38951 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
38952 gen_rtx_MULT (mode
, x0
, a
)));
38954 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
38955 gen_rtx_MULT (mode
, e0
, x0
)));
38958 mthree
= force_reg (mode
, mthree
);
38959 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
38960 gen_rtx_PLUS (mode
, e1
, mthree
)));
38962 mhalf
= force_reg (mode
, mhalf
);
38964 /* e3 = -.5 * x0 */
38965 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38966 gen_rtx_MULT (mode
, x0
, mhalf
)));
38968 /* e3 = -.5 * e0 */
38969 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
38970 gen_rtx_MULT (mode
, e0
, mhalf
)));
38971 /* ret = e2 * e3 */
38972 emit_insn (gen_rtx_SET (VOIDmode
, res
,
38973 gen_rtx_MULT (mode
, e2
, e3
)));
38976 #ifdef TARGET_SOLARIS
38977 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
38980 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
38983 /* With Binutils 2.15, the "@unwind" marker must be specified on
38984 every occurrence of the ".eh_frame" section, not just the first
38987 && strcmp (name
, ".eh_frame") == 0)
38989 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
38990 flags
& SECTION_WRITE
? "aw" : "a");
38995 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
38997 solaris_elf_asm_comdat_section (name
, flags
, decl
);
39002 default_elf_asm_named_section (name
, flags
, decl
);
39004 #endif /* TARGET_SOLARIS */
39006 /* Return the mangling of TYPE if it is an extended fundamental type. */
39008 static const char *
39009 ix86_mangle_type (const_tree type
)
39011 type
= TYPE_MAIN_VARIANT (type
);
39013 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
39014 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
39017 switch (TYPE_MODE (type
))
39020 /* __float128 is "g". */
39023 /* "long double" or __float80 is "e". */
39030 /* For 32-bit code we can save PIC register setup by using
39031 __stack_chk_fail_local hidden function instead of calling
39032 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
39033 register, so it is better to call __stack_chk_fail directly. */
39035 static tree ATTRIBUTE_UNUSED
39036 ix86_stack_protect_fail (void)
39038 return TARGET_64BIT
39039 ? default_external_stack_protect_fail ()
39040 : default_hidden_stack_protect_fail ();
39043 /* Select a format to encode pointers in exception handling data. CODE
39044 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
39045 true if the symbol may be affected by dynamic relocations.
39047 ??? All x86 object file formats are capable of representing this.
39048 After all, the relocation needed is the same as for the call insn.
39049 Whether or not a particular assembler allows us to enter such, I
39050 guess we'll have to see. */
39052 asm_preferred_eh_data_format (int code
, int global
)
39056 int type
= DW_EH_PE_sdata8
;
39058 || ix86_cmodel
== CM_SMALL_PIC
39059 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
39060 type
= DW_EH_PE_sdata4
;
39061 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
39063 if (ix86_cmodel
== CM_SMALL
39064 || (ix86_cmodel
== CM_MEDIUM
&& code
))
39065 return DW_EH_PE_udata4
;
39066 return DW_EH_PE_absptr
;
39069 /* Expand copysign from SIGN to the positive value ABS_VALUE
39070 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
39073 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
39075 enum machine_mode mode
= GET_MODE (sign
);
39076 rtx sgn
= gen_reg_rtx (mode
);
39077 if (mask
== NULL_RTX
)
39079 enum machine_mode vmode
;
39081 if (mode
== SFmode
)
39083 else if (mode
== DFmode
)
39088 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
39089 if (!VECTOR_MODE_P (mode
))
39091 /* We need to generate a scalar mode mask in this case. */
39092 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
39093 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
39094 mask
= gen_reg_rtx (mode
);
39095 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
39099 mask
= gen_rtx_NOT (mode
, mask
);
39100 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
39101 gen_rtx_AND (mode
, mask
, sign
)));
39102 emit_insn (gen_rtx_SET (VOIDmode
, result
,
39103 gen_rtx_IOR (mode
, abs_value
, sgn
)));
39106 /* Expand fabs (OP0) and return a new rtx that holds the result. The
39107 mask for masking out the sign-bit is stored in *SMASK, if that is
39110 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
39112 enum machine_mode vmode
, mode
= GET_MODE (op0
);
39115 xa
= gen_reg_rtx (mode
);
39116 if (mode
== SFmode
)
39118 else if (mode
== DFmode
)
39122 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
39123 if (!VECTOR_MODE_P (mode
))
39125 /* We need to generate a scalar mode mask in this case. */
39126 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
39127 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
39128 mask
= gen_reg_rtx (mode
);
39129 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
39131 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
39132 gen_rtx_AND (mode
, op0
, mask
)));
39140 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
39141 swapping the operands if SWAP_OPERANDS is true. The expanded
39142 code is a forward jump to a newly created label in case the
39143 comparison is true. The generated label rtx is returned. */
39145 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
39146 bool swap_operands
)
39148 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
39158 label
= gen_label_rtx ();
39159 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
39160 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39161 gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
39162 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
39163 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
39164 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
39165 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
39166 JUMP_LABEL (tmp
) = label
;
39171 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
39172 using comparison code CODE. Operands are swapped for the comparison if
39173 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
39175 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
39176 bool swap_operands
)
39178 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
39179 enum machine_mode mode
= GET_MODE (op0
);
39180 rtx mask
= gen_reg_rtx (mode
);
39189 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
39191 emit_insn (insn (mask
, op0
, op1
,
39192 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
39196 /* Generate and return a rtx of mode MODE for 2**n where n is the number
39197 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
39199 ix86_gen_TWO52 (enum machine_mode mode
)
39201 REAL_VALUE_TYPE TWO52r
;
39204 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
39205 TWO52
= const_double_from_real_value (TWO52r
, mode
);
39206 TWO52
= force_reg (mode
, TWO52
);
39211 /* Expand SSE sequence for computing lround from OP1 storing
39214 ix86_expand_lround (rtx op0
, rtx op1
)
39216 /* C code for the stuff we're doing below:
39217 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
39220 enum machine_mode mode
= GET_MODE (op1
);
39221 const struct real_format
*fmt
;
39222 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39225 /* load nextafter (0.5, 0.0) */
39226 fmt
= REAL_MODE_FORMAT (mode
);
39227 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39228 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39230 /* adj = copysign (0.5, op1) */
39231 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39232 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
39234 /* adj = op1 + adj */
39235 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39237 /* op0 = (imode)adj */
39238 expand_fix (op0
, adj
, 0);
39241 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
39244 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
39246 /* C code for the stuff we're doing below (for do_floor):
39248 xi -= (double)xi > op1 ? 1 : 0;
39251 enum machine_mode fmode
= GET_MODE (op1
);
39252 enum machine_mode imode
= GET_MODE (op0
);
39253 rtx ireg
, freg
, label
, tmp
;
39255 /* reg = (long)op1 */
39256 ireg
= gen_reg_rtx (imode
);
39257 expand_fix (ireg
, op1
, 0);
39259 /* freg = (double)reg */
39260 freg
= gen_reg_rtx (fmode
);
39261 expand_float (freg
, ireg
, 0);
39263 /* ireg = (freg > op1) ? ireg - 1 : ireg */
39264 label
= ix86_expand_sse_compare_and_jump (UNLE
,
39265 freg
, op1
, !do_floor
);
39266 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
39267 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
39268 emit_move_insn (ireg
, tmp
);
39270 emit_label (label
);
39271 LABEL_NUSES (label
) = 1;
39273 emit_move_insn (op0
, ireg
);
39276 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
39277 result in OPERAND0. */
39279 ix86_expand_rint (rtx operand0
, rtx operand1
)
39281 /* C code for the stuff we're doing below:
39282 xa = fabs (operand1);
39283 if (!isless (xa, 2**52))
39285 xa = xa + 2**52 - 2**52;
39286 return copysign (xa, operand1);
39288 enum machine_mode mode
= GET_MODE (operand0
);
39289 rtx res
, xa
, label
, TWO52
, mask
;
39291 res
= gen_reg_rtx (mode
);
39292 emit_move_insn (res
, operand1
);
39294 /* xa = abs (operand1) */
39295 xa
= ix86_expand_sse_fabs (res
, &mask
);
39297 /* if (!isless (xa, TWO52)) goto label; */
39298 TWO52
= ix86_gen_TWO52 (mode
);
39299 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39301 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39302 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39304 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
39306 emit_label (label
);
39307 LABEL_NUSES (label
) = 1;
39309 emit_move_insn (operand0
, res
);
39312 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39315 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
39317 /* C code for the stuff we expand below.
39318 double xa = fabs (x), x2;
39319 if (!isless (xa, TWO52))
39321 xa = xa + TWO52 - TWO52;
39322 x2 = copysign (xa, x);
39331 enum machine_mode mode
= GET_MODE (operand0
);
39332 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
39334 TWO52
= ix86_gen_TWO52 (mode
);
39336 /* Temporary for holding the result, initialized to the input
39337 operand to ease control flow. */
39338 res
= gen_reg_rtx (mode
);
39339 emit_move_insn (res
, operand1
);
39341 /* xa = abs (operand1) */
39342 xa
= ix86_expand_sse_fabs (res
, &mask
);
39344 /* if (!isless (xa, TWO52)) goto label; */
39345 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39347 /* xa = xa + TWO52 - TWO52; */
39348 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39349 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
39351 /* xa = copysign (xa, operand1) */
39352 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
39354 /* generate 1.0 or -1.0 */
39355 one
= force_reg (mode
,
39356 const_double_from_real_value (do_floor
39357 ? dconst1
: dconstm1
, mode
));
39359 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39360 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39361 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39362 gen_rtx_AND (mode
, one
, tmp
)));
39363 /* We always need to subtract here to preserve signed zero. */
39364 tmp
= expand_simple_binop (mode
, MINUS
,
39365 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39366 emit_move_insn (res
, tmp
);
39368 emit_label (label
);
39369 LABEL_NUSES (label
) = 1;
39371 emit_move_insn (operand0
, res
);
39374 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
39377 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
39379 /* C code for the stuff we expand below.
39380 double xa = fabs (x), x2;
39381 if (!isless (xa, TWO52))
39383 x2 = (double)(long)x;
39390 if (HONOR_SIGNED_ZEROS (mode))
39391 return copysign (x2, x);
39394 enum machine_mode mode
= GET_MODE (operand0
);
39395 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
39397 TWO52
= ix86_gen_TWO52 (mode
);
39399 /* Temporary for holding the result, initialized to the input
39400 operand to ease control flow. */
39401 res
= gen_reg_rtx (mode
);
39402 emit_move_insn (res
, operand1
);
39404 /* xa = abs (operand1) */
39405 xa
= ix86_expand_sse_fabs (res
, &mask
);
39407 /* if (!isless (xa, TWO52)) goto label; */
39408 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39410 /* xa = (double)(long)x */
39411 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39412 expand_fix (xi
, res
, 0);
39413 expand_float (xa
, xi
, 0);
39416 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39418 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
39419 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
39420 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39421 gen_rtx_AND (mode
, one
, tmp
)));
39422 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
39423 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39424 emit_move_insn (res
, tmp
);
39426 if (HONOR_SIGNED_ZEROS (mode
))
39427 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39429 emit_label (label
);
39430 LABEL_NUSES (label
) = 1;
39432 emit_move_insn (operand0
, res
);
39435 /* Expand SSE sequence for computing round from OPERAND1 storing
39436 into OPERAND0. Sequence that works without relying on DImode truncation
39437 via cvttsd2siq that is only available on 64bit targets. */
39439 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
39441 /* C code for the stuff we expand below.
39442 double xa = fabs (x), xa2, x2;
39443 if (!isless (xa, TWO52))
39445 Using the absolute value and copying back sign makes
39446 -0.0 -> -0.0 correct.
39447 xa2 = xa + TWO52 - TWO52;
39452 else if (dxa > 0.5)
39454 x2 = copysign (xa2, x);
39457 enum machine_mode mode
= GET_MODE (operand0
);
39458 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
39460 TWO52
= ix86_gen_TWO52 (mode
);
39462 /* Temporary for holding the result, initialized to the input
39463 operand to ease control flow. */
39464 res
= gen_reg_rtx (mode
);
39465 emit_move_insn (res
, operand1
);
39467 /* xa = abs (operand1) */
39468 xa
= ix86_expand_sse_fabs (res
, &mask
);
39470 /* if (!isless (xa, TWO52)) goto label; */
39471 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39473 /* xa2 = xa + TWO52 - TWO52; */
39474 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39475 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
39477 /* dxa = xa2 - xa; */
39478 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
39480 /* generate 0.5, 1.0 and -0.5 */
39481 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
39482 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39483 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
39487 tmp
= gen_reg_rtx (mode
);
39488 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
39489 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
39490 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39491 gen_rtx_AND (mode
, one
, tmp
)));
39492 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39493 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
39494 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
39495 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
39496 gen_rtx_AND (mode
, one
, tmp
)));
39497 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
39499 /* res = copysign (xa2, operand1) */
39500 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
39502 emit_label (label
);
39503 LABEL_NUSES (label
) = 1;
39505 emit_move_insn (operand0
, res
);
39508 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39511 ix86_expand_trunc (rtx operand0
, rtx operand1
)
39513 /* C code for SSE variant we expand below.
39514 double xa = fabs (x), x2;
39515 if (!isless (xa, TWO52))
39517 x2 = (double)(long)x;
39518 if (HONOR_SIGNED_ZEROS (mode))
39519 return copysign (x2, x);
39522 enum machine_mode mode
= GET_MODE (operand0
);
39523 rtx xa
, xi
, TWO52
, label
, res
, mask
;
39525 TWO52
= ix86_gen_TWO52 (mode
);
39527 /* Temporary for holding the result, initialized to the input
39528 operand to ease control flow. */
39529 res
= gen_reg_rtx (mode
);
39530 emit_move_insn (res
, operand1
);
39532 /* xa = abs (operand1) */
39533 xa
= ix86_expand_sse_fabs (res
, &mask
);
39535 /* if (!isless (xa, TWO52)) goto label; */
39536 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39538 /* x = (double)(long)x */
39539 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39540 expand_fix (xi
, res
, 0);
39541 expand_float (res
, xi
, 0);
39543 if (HONOR_SIGNED_ZEROS (mode
))
39544 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
39546 emit_label (label
);
39547 LABEL_NUSES (label
) = 1;
39549 emit_move_insn (operand0
, res
);
39552 /* Expand SSE sequence for computing trunc from OPERAND1 storing
39555 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
39557 enum machine_mode mode
= GET_MODE (operand0
);
39558 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
39560 /* C code for SSE variant we expand below.
39561 double xa = fabs (x), x2;
39562 if (!isless (xa, TWO52))
39564 xa2 = xa + TWO52 - TWO52;
39568 x2 = copysign (xa2, x);
39572 TWO52
= ix86_gen_TWO52 (mode
);
39574 /* Temporary for holding the result, initialized to the input
39575 operand to ease control flow. */
39576 res
= gen_reg_rtx (mode
);
39577 emit_move_insn (res
, operand1
);
39579 /* xa = abs (operand1) */
39580 xa
= ix86_expand_sse_fabs (res
, &smask
);
39582 /* if (!isless (xa, TWO52)) goto label; */
39583 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39585 /* res = xa + TWO52 - TWO52; */
39586 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
39587 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
39588 emit_move_insn (res
, tmp
);
39591 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
39593 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
39594 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
39595 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
39596 gen_rtx_AND (mode
, mask
, one
)));
39597 tmp
= expand_simple_binop (mode
, MINUS
,
39598 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
39599 emit_move_insn (res
, tmp
);
39601 /* res = copysign (res, operand1) */
39602 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
39604 emit_label (label
);
39605 LABEL_NUSES (label
) = 1;
39607 emit_move_insn (operand0
, res
);
39610 /* Expand SSE sequence for computing round from OPERAND1 storing
39613 ix86_expand_round (rtx operand0
, rtx operand1
)
39615 /* C code for the stuff we're doing below:
39616 double xa = fabs (x);
39617 if (!isless (xa, TWO52))
39619 xa = (double)(long)(xa + nextafter (0.5, 0.0));
39620 return copysign (xa, x);
39622 enum machine_mode mode
= GET_MODE (operand0
);
39623 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
39624 const struct real_format
*fmt
;
39625 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39627 /* Temporary for holding the result, initialized to the input
39628 operand to ease control flow. */
39629 res
= gen_reg_rtx (mode
);
39630 emit_move_insn (res
, operand1
);
39632 TWO52
= ix86_gen_TWO52 (mode
);
39633 xa
= ix86_expand_sse_fabs (res
, &mask
);
39634 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
39636 /* load nextafter (0.5, 0.0) */
39637 fmt
= REAL_MODE_FORMAT (mode
);
39638 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39639 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39641 /* xa = xa + 0.5 */
39642 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
39643 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
39645 /* xa = (double)(int64_t)xa */
39646 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
39647 expand_fix (xi
, xa
, 0);
39648 expand_float (xa
, xi
, 0);
39650 /* res = copysign (xa, operand1) */
39651 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
39653 emit_label (label
);
39654 LABEL_NUSES (label
) = 1;
39656 emit_move_insn (operand0
, res
);
39659 /* Expand SSE sequence for computing round
39660 from OP1 storing into OP0 using sse4 round insn. */
39662 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
39664 enum machine_mode mode
= GET_MODE (op0
);
39665 rtx e1
, e2
, res
, half
;
39666 const struct real_format
*fmt
;
39667 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
39668 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
39669 rtx (*gen_round
) (rtx
, rtx
, rtx
);
39674 gen_copysign
= gen_copysignsf3
;
39675 gen_round
= gen_sse4_1_roundsf2
;
39678 gen_copysign
= gen_copysigndf3
;
39679 gen_round
= gen_sse4_1_rounddf2
;
39682 gcc_unreachable ();
39685 /* round (a) = trunc (a + copysign (0.5, a)) */
39687 /* load nextafter (0.5, 0.0) */
39688 fmt
= REAL_MODE_FORMAT (mode
);
39689 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
39690 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
39691 half
= const_double_from_real_value (pred_half
, mode
);
39693 /* e1 = copysign (0.5, op1) */
39694 e1
= gen_reg_rtx (mode
);
39695 emit_insn (gen_copysign (e1
, half
, op1
));
39697 /* e2 = op1 + e1 */
39698 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
39700 /* res = trunc (e2) */
39701 res
= gen_reg_rtx (mode
);
39702 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
39704 emit_move_insn (op0
, res
);
39708 /* Table of valid machine attributes. */
39709 static const struct attribute_spec ix86_attribute_table
[] =
39711 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
39712 affects_type_identity } */
39713 /* Stdcall attribute says callee is responsible for popping arguments
39714 if they are not variable. */
39715 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39717 /* Fastcall attribute says callee is responsible for popping arguments
39718 if they are not variable. */
39719 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39721 /* Thiscall attribute says callee is responsible for popping arguments
39722 if they are not variable. */
39723 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39725 /* Cdecl attribute says the callee is a normal C declaration */
39726 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39728 /* Regparm attribute specifies how many integer arguments are to be
39729 passed in registers. */
39730 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
39732 /* Sseregparm attribute says we are using x86_64 calling conventions
39733 for FP arguments. */
39734 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
39736 /* The transactional memory builtins are implicitly regparm or fastcall
39737 depending on the ABI. Override the generic do-nothing attribute that
39738 these builtins were declared with. */
39739 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
39741 /* force_align_arg_pointer says this function realigns the stack at entry. */
39742 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
39743 false, true, true, ix86_handle_cconv_attribute
, false },
39744 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
39745 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
39746 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
39747 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
39750 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39752 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
39754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
39755 SUBTARGET_ATTRIBUTE_TABLE
,
39757 /* ms_abi and sysv_abi calling convention function attributes. */
39758 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39759 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
39760 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
39762 { "callee_pop_aggregate_return", 1, 1, false, true, true,
39763 ix86_handle_callee_pop_aggregate_return
, true },
39765 { NULL
, 0, 0, false, false, false, NULL
, false }
39768 /* Implement targetm.vectorize.builtin_vectorization_cost. */
39770 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
39772 int misalign ATTRIBUTE_UNUSED
)
39776 switch (type_of_cost
)
39779 return ix86_cost
->scalar_stmt_cost
;
39782 return ix86_cost
->scalar_load_cost
;
39785 return ix86_cost
->scalar_store_cost
;
39788 return ix86_cost
->vec_stmt_cost
;
39791 return ix86_cost
->vec_align_load_cost
;
39794 return ix86_cost
->vec_store_cost
;
39796 case vec_to_scalar
:
39797 return ix86_cost
->vec_to_scalar_cost
;
39799 case scalar_to_vec
:
39800 return ix86_cost
->scalar_to_vec_cost
;
39802 case unaligned_load
:
39803 case unaligned_store
:
39804 return ix86_cost
->vec_unalign_load_cost
;
39806 case cond_branch_taken
:
39807 return ix86_cost
->cond_taken_branch_cost
;
39809 case cond_branch_not_taken
:
39810 return ix86_cost
->cond_not_taken_branch_cost
;
39813 case vec_promote_demote
:
39814 return ix86_cost
->vec_stmt_cost
;
39816 case vec_construct
:
39817 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
39818 return elements
/ 2 + 1;
39821 gcc_unreachable ();
39825 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
39826 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
39827 insn every time. */
39829 static GTY(()) rtx vselect_insn
;
39831 /* Initialize vselect_insn. */
39834 init_vselect_insn (void)
39839 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
39840 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
39841 XVECEXP (x
, 0, i
) = const0_rtx
;
39842 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
39844 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
39846 vselect_insn
= emit_insn (x
);
39850 /* Construct (set target (vec_select op0 (parallel perm))) and
39851 return true if that's a valid instruction in the active ISA. */
39854 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
39855 unsigned nelt
, bool testing_p
)
39858 rtx x
, save_vconcat
;
39861 if (vselect_insn
== NULL_RTX
)
39862 init_vselect_insn ();
39864 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
39865 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
39866 for (i
= 0; i
< nelt
; ++i
)
39867 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
39868 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39869 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
39870 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
39871 SET_DEST (PATTERN (vselect_insn
)) = target
;
39872 icode
= recog_memoized (vselect_insn
);
39874 if (icode
>= 0 && !testing_p
)
39875 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
39877 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
39878 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
39879 INSN_CODE (vselect_insn
) = -1;
39884 /* Similar, but generate a vec_concat from op0 and op1 as well. */
39887 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
39888 const unsigned char *perm
, unsigned nelt
,
39891 enum machine_mode v2mode
;
39895 if (vselect_insn
== NULL_RTX
)
39896 init_vselect_insn ();
39898 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
39899 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
39900 PUT_MODE (x
, v2mode
);
39903 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
39904 XEXP (x
, 0) = const0_rtx
;
39905 XEXP (x
, 1) = const0_rtx
;
39909 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39910 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
39913 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
39915 enum machine_mode vmode
= d
->vmode
;
39916 unsigned i
, mask
, nelt
= d
->nelt
;
39917 rtx target
, op0
, op1
, x
;
39918 rtx rperm
[32], vperm
;
39920 if (d
->one_operand_p
)
39922 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
39924 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
39926 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
39931 /* This is a blend, not a permute. Elements must stay in their
39932 respective lanes. */
39933 for (i
= 0; i
< nelt
; ++i
)
39935 unsigned e
= d
->perm
[i
];
39936 if (!(e
== i
|| e
== i
+ nelt
))
39943 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
39944 decision should be extracted elsewhere, so that we only try that
39945 sequence once all budget==3 options have been tried. */
39946 target
= d
->target
;
39959 for (i
= 0; i
< nelt
; ++i
)
39960 mask
|= (d
->perm
[i
] >= nelt
) << i
;
39964 for (i
= 0; i
< 2; ++i
)
39965 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
39970 for (i
= 0; i
< 4; ++i
)
39971 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
39976 /* See if bytes move in pairs so we can use pblendw with
39977 an immediate argument, rather than pblendvb with a vector
39979 for (i
= 0; i
< 16; i
+= 2)
39980 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
39983 for (i
= 0; i
< nelt
; ++i
)
39984 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
39987 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
39988 vperm
= force_reg (vmode
, vperm
);
39990 if (GET_MODE_SIZE (vmode
) == 16)
39991 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
39993 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
39994 if (target
!= d
->target
)
39995 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
39999 for (i
= 0; i
< 8; ++i
)
40000 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
40005 target
= gen_reg_rtx (vmode
);
40006 op0
= gen_lowpart (vmode
, op0
);
40007 op1
= gen_lowpart (vmode
, op1
);
40011 /* See if bytes move in pairs. If not, vpblendvb must be used. */
40012 for (i
= 0; i
< 32; i
+= 2)
40013 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
40015 /* See if bytes move in quadruplets. If yes, vpblendd
40016 with immediate can be used. */
40017 for (i
= 0; i
< 32; i
+= 4)
40018 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
40022 /* See if bytes move the same in both lanes. If yes,
40023 vpblendw with immediate can be used. */
40024 for (i
= 0; i
< 16; i
+= 2)
40025 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
40028 /* Use vpblendw. */
40029 for (i
= 0; i
< 16; ++i
)
40030 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
40035 /* Use vpblendd. */
40036 for (i
= 0; i
< 8; ++i
)
40037 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
40042 /* See if words move in pairs. If yes, vpblendd can be used. */
40043 for (i
= 0; i
< 16; i
+= 2)
40044 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
40048 /* See if words move the same in both lanes. If not,
40049 vpblendvb must be used. */
40050 for (i
= 0; i
< 8; i
++)
40051 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
40053 /* Use vpblendvb. */
40054 for (i
= 0; i
< 32; ++i
)
40055 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
40059 target
= gen_reg_rtx (vmode
);
40060 op0
= gen_lowpart (vmode
, op0
);
40061 op1
= gen_lowpart (vmode
, op1
);
40062 goto finish_pblendvb
;
40065 /* Use vpblendw. */
40066 for (i
= 0; i
< 16; ++i
)
40067 mask
|= (d
->perm
[i
] >= 16) << i
;
40071 /* Use vpblendd. */
40072 for (i
= 0; i
< 8; ++i
)
40073 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
40078 /* Use vpblendd. */
40079 for (i
= 0; i
< 4; ++i
)
40080 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
40085 gcc_unreachable ();
40088 /* This matches five different patterns with the different modes. */
40089 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
40090 x
= gen_rtx_SET (VOIDmode
, target
, x
);
40092 if (target
!= d
->target
)
40093 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40098 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40099 in terms of the variable form of vpermilps.
40101 Note that we will have already failed the immediate input vpermilps,
40102 which requires that the high and low part shuffle be identical; the
40103 variable form doesn't require that. */
40106 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
40108 rtx rperm
[8], vperm
;
40111 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
40114 /* We can only permute within the 128-bit lane. */
40115 for (i
= 0; i
< 8; ++i
)
40117 unsigned e
= d
->perm
[i
];
40118 if (i
< 4 ? e
>= 4 : e
< 4)
40125 for (i
= 0; i
< 8; ++i
)
40127 unsigned e
= d
->perm
[i
];
40129 /* Within each 128-bit lane, the elements of op0 are numbered
40130 from 0 and the elements of op1 are numbered from 4. */
40136 rperm
[i
] = GEN_INT (e
);
40139 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
40140 vperm
= force_reg (V8SImode
, vperm
);
40141 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
40146 /* Return true if permutation D can be performed as VMODE permutation
40150 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
40152 unsigned int i
, j
, chunk
;
40154 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
40155 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
40156 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
40159 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
40162 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
40163 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
40164 if (d
->perm
[i
] & (chunk
- 1))
40167 for (j
= 1; j
< chunk
; ++j
)
40168 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
40174 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40175 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
40178 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
40180 unsigned i
, nelt
, eltsz
, mask
;
40181 unsigned char perm
[32];
40182 enum machine_mode vmode
= V16QImode
;
40183 rtx rperm
[32], vperm
, target
, op0
, op1
;
40187 if (!d
->one_operand_p
)
40189 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
40192 && valid_perm_using_mode_p (V2TImode
, d
))
40197 /* Use vperm2i128 insn. The pattern uses
40198 V4DImode instead of V2TImode. */
40199 target
= d
->target
;
40200 if (d
->vmode
!= V4DImode
)
40201 target
= gen_reg_rtx (V4DImode
);
40202 op0
= gen_lowpart (V4DImode
, d
->op0
);
40203 op1
= gen_lowpart (V4DImode
, d
->op1
);
40205 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
40206 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
40207 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
40208 if (target
!= d
->target
)
40209 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40217 if (GET_MODE_SIZE (d
->vmode
) == 16)
40222 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40227 /* V4DImode should be already handled through
40228 expand_vselect by vpermq instruction. */
40229 gcc_assert (d
->vmode
!= V4DImode
);
40232 if (d
->vmode
== V8SImode
40233 || d
->vmode
== V16HImode
40234 || d
->vmode
== V32QImode
)
40236 /* First see if vpermq can be used for
40237 V8SImode/V16HImode/V32QImode. */
40238 if (valid_perm_using_mode_p (V4DImode
, d
))
40240 for (i
= 0; i
< 4; i
++)
40241 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
40244 target
= gen_reg_rtx (V4DImode
);
40245 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
40248 emit_move_insn (d
->target
,
40249 gen_lowpart (d
->vmode
, target
));
40255 /* Next see if vpermd can be used. */
40256 if (valid_perm_using_mode_p (V8SImode
, d
))
40259 /* Or if vpermps can be used. */
40260 else if (d
->vmode
== V8SFmode
)
40263 if (vmode
== V32QImode
)
40265 /* vpshufb only works intra lanes, it is not
40266 possible to shuffle bytes in between the lanes. */
40267 for (i
= 0; i
< nelt
; ++i
)
40268 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
40279 if (vmode
== V8SImode
)
40280 for (i
= 0; i
< 8; ++i
)
40281 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
40284 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40285 if (!d
->one_operand_p
)
40286 mask
= 2 * nelt
- 1;
40287 else if (vmode
== V16QImode
)
40290 mask
= nelt
/ 2 - 1;
40292 for (i
= 0; i
< nelt
; ++i
)
40294 unsigned j
, e
= d
->perm
[i
] & mask
;
40295 for (j
= 0; j
< eltsz
; ++j
)
40296 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
40300 vperm
= gen_rtx_CONST_VECTOR (vmode
,
40301 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
40302 vperm
= force_reg (vmode
, vperm
);
40304 target
= d
->target
;
40305 if (d
->vmode
!= vmode
)
40306 target
= gen_reg_rtx (vmode
);
40307 op0
= gen_lowpart (vmode
, d
->op0
);
40308 if (d
->one_operand_p
)
40310 if (vmode
== V16QImode
)
40311 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
40312 else if (vmode
== V32QImode
)
40313 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
40314 else if (vmode
== V8SFmode
)
40315 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
40317 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
40321 op1
= gen_lowpart (vmode
, d
->op1
);
40322 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
40324 if (target
!= d
->target
)
40325 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
40330 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
40331 in a single instruction. */
40334 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
40336 unsigned i
, nelt
= d
->nelt
;
40337 unsigned char perm2
[MAX_VECT_LEN
];
40339 /* Check plain VEC_SELECT first, because AVX has instructions that could
40340 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
40341 input where SEL+CONCAT may not. */
40342 if (d
->one_operand_p
)
40344 int mask
= nelt
- 1;
40345 bool identity_perm
= true;
40346 bool broadcast_perm
= true;
40348 for (i
= 0; i
< nelt
; i
++)
40350 perm2
[i
] = d
->perm
[i
] & mask
;
40352 identity_perm
= false;
40354 broadcast_perm
= false;
40360 emit_move_insn (d
->target
, d
->op0
);
40363 else if (broadcast_perm
&& TARGET_AVX2
)
40365 /* Use vpbroadcast{b,w,d}. */
40366 rtx (*gen
) (rtx
, rtx
) = NULL
;
40370 gen
= gen_avx2_pbroadcastv32qi_1
;
40373 gen
= gen_avx2_pbroadcastv16hi_1
;
40376 gen
= gen_avx2_pbroadcastv8si_1
;
40379 gen
= gen_avx2_pbroadcastv16qi
;
40382 gen
= gen_avx2_pbroadcastv8hi
;
40385 gen
= gen_avx2_vec_dupv8sf_1
;
40387 /* For other modes prefer other shuffles this function creates. */
40393 emit_insn (gen (d
->target
, d
->op0
));
40398 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
40401 /* There are plenty of patterns in sse.md that are written for
40402 SEL+CONCAT and are not replicated for a single op. Perhaps
40403 that should be changed, to avoid the nastiness here. */
40405 /* Recognize interleave style patterns, which means incrementing
40406 every other permutation operand. */
40407 for (i
= 0; i
< nelt
; i
+= 2)
40409 perm2
[i
] = d
->perm
[i
] & mask
;
40410 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
40412 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40416 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
40419 for (i
= 0; i
< nelt
; i
+= 4)
40421 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
40422 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
40423 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
40424 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
40427 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
40433 /* Finally, try the fully general two operand permute. */
40434 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
40438 /* Recognize interleave style patterns with reversed operands. */
40439 if (!d
->one_operand_p
)
40441 for (i
= 0; i
< nelt
; ++i
)
40443 unsigned e
= d
->perm
[i
];
40451 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
40456 /* Try the SSE4.1 blend variable merge instructions. */
40457 if (expand_vec_perm_blend (d
))
40460 /* Try one of the AVX vpermil variable permutations. */
40461 if (expand_vec_perm_vpermil (d
))
40464 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
40465 vpshufb, vpermd, vpermps or vpermq variable permutation. */
40466 if (expand_vec_perm_pshufb (d
))
40472 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
40473 in terms of a pair of pshuflw + pshufhw instructions. */
40476 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
40478 unsigned char perm2
[MAX_VECT_LEN
];
40482 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
40485 /* The two permutations only operate in 64-bit lanes. */
40486 for (i
= 0; i
< 4; ++i
)
40487 if (d
->perm
[i
] >= 4)
40489 for (i
= 4; i
< 8; ++i
)
40490 if (d
->perm
[i
] < 4)
40496 /* Emit the pshuflw. */
40497 memcpy (perm2
, d
->perm
, 4);
40498 for (i
= 4; i
< 8; ++i
)
40500 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
40503 /* Emit the pshufhw. */
40504 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
40505 for (i
= 0; i
< 4; ++i
)
40507 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
40513 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40514 the permutation using the SSSE3 palignr instruction. This succeeds
40515 when all of the elements in PERM fit within one vector and we merely
40516 need to shift them down so that a single vector permutation has a
40517 chance to succeed. */
40520 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
40522 unsigned i
, nelt
= d
->nelt
;
40526 struct expand_vec_perm_d dcopy
;
40528 /* Even with AVX, palignr only operates on 128-bit vectors. */
40529 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
40532 min
= nelt
, max
= 0;
40533 for (i
= 0; i
< nelt
; ++i
)
40535 unsigned e
= d
->perm
[i
];
40541 if (min
== 0 || max
- min
>= nelt
)
40544 /* Given that we have SSSE3, we know we'll be able to implement the
40545 single operand permutation after the palignr with pshufb. */
40550 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
40551 target
= gen_reg_rtx (TImode
);
40552 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, d
->op1
),
40553 gen_lowpart (TImode
, d
->op0
), shift
));
40555 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
40556 dcopy
.one_operand_p
= true;
40559 for (i
= 0; i
< nelt
; ++i
)
40561 unsigned e
= dcopy
.perm
[i
] - min
;
40567 /* Test for the degenerate case where the alignment by itself
40568 produces the desired permutation. */
40571 emit_move_insn (d
->target
, dcopy
.op0
);
40575 ok
= expand_vec_perm_1 (&dcopy
);
40581 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
40583 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40584 a two vector permutation into a single vector permutation by using
40585 an interleave operation to merge the vectors. */
40588 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
40590 struct expand_vec_perm_d dremap
, dfinal
;
40591 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
40592 unsigned HOST_WIDE_INT contents
;
40593 unsigned char remap
[2 * MAX_VECT_LEN
];
40595 bool ok
, same_halves
= false;
40597 if (GET_MODE_SIZE (d
->vmode
) == 16)
40599 if (d
->one_operand_p
)
40602 else if (GET_MODE_SIZE (d
->vmode
) == 32)
40606 /* For 32-byte modes allow even d->one_operand_p.
40607 The lack of cross-lane shuffling in some instructions
40608 might prevent a single insn shuffle. */
40610 dfinal
.testing_p
= true;
40611 /* If expand_vec_perm_interleave3 can expand this into
40612 a 3 insn sequence, give up and let it be expanded as
40613 3 insn sequence. While that is one insn longer,
40614 it doesn't need a memory operand and in the common
40615 case that both interleave low and high permutations
40616 with the same operands are adjacent needs 4 insns
40617 for both after CSE. */
40618 if (expand_vec_perm_interleave3 (&dfinal
))
40624 /* Examine from whence the elements come. */
40626 for (i
= 0; i
< nelt
; ++i
)
40627 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
40629 memset (remap
, 0xff, sizeof (remap
));
40632 if (GET_MODE_SIZE (d
->vmode
) == 16)
40634 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
40636 /* Split the two input vectors into 4 halves. */
40637 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
40642 /* If the elements from the low halves use interleave low, and similarly
40643 for interleave high. If the elements are from mis-matched halves, we
40644 can use shufps for V4SF/V4SI or do a DImode shuffle. */
40645 if ((contents
& (h1
| h3
)) == contents
)
40648 for (i
= 0; i
< nelt2
; ++i
)
40651 remap
[i
+ nelt
] = i
* 2 + 1;
40652 dremap
.perm
[i
* 2] = i
;
40653 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40655 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40656 dremap
.vmode
= V4SFmode
;
40658 else if ((contents
& (h2
| h4
)) == contents
)
40661 for (i
= 0; i
< nelt2
; ++i
)
40663 remap
[i
+ nelt2
] = i
* 2;
40664 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
40665 dremap
.perm
[i
* 2] = i
+ nelt2
;
40666 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
40668 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
40669 dremap
.vmode
= V4SFmode
;
40671 else if ((contents
& (h1
| h4
)) == contents
)
40674 for (i
= 0; i
< nelt2
; ++i
)
40677 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
40678 dremap
.perm
[i
] = i
;
40679 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
40684 dremap
.vmode
= V2DImode
;
40686 dremap
.perm
[0] = 0;
40687 dremap
.perm
[1] = 3;
40690 else if ((contents
& (h2
| h3
)) == contents
)
40693 for (i
= 0; i
< nelt2
; ++i
)
40695 remap
[i
+ nelt2
] = i
;
40696 remap
[i
+ nelt
] = i
+ nelt2
;
40697 dremap
.perm
[i
] = i
+ nelt2
;
40698 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
40703 dremap
.vmode
= V2DImode
;
40705 dremap
.perm
[0] = 1;
40706 dremap
.perm
[1] = 2;
40714 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
40715 unsigned HOST_WIDE_INT q
[8];
40716 unsigned int nonzero_halves
[4];
40718 /* Split the two input vectors into 8 quarters. */
40719 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
40720 for (i
= 1; i
< 8; ++i
)
40721 q
[i
] = q
[0] << (nelt4
* i
);
40722 for (i
= 0; i
< 4; ++i
)
40723 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
40725 nonzero_halves
[nzcnt
] = i
;
40731 gcc_assert (d
->one_operand_p
);
40732 nonzero_halves
[1] = nonzero_halves
[0];
40733 same_halves
= true;
40735 else if (d
->one_operand_p
)
40737 gcc_assert (nonzero_halves
[0] == 0);
40738 gcc_assert (nonzero_halves
[1] == 1);
40743 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
40745 /* Attempt to increase the likelihood that dfinal
40746 shuffle will be intra-lane. */
40747 char tmph
= nonzero_halves
[0];
40748 nonzero_halves
[0] = nonzero_halves
[1];
40749 nonzero_halves
[1] = tmph
;
40752 /* vperm2f128 or vperm2i128. */
40753 for (i
= 0; i
< nelt2
; ++i
)
40755 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
40756 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
40757 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
40758 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
40761 if (d
->vmode
!= V8SFmode
40762 && d
->vmode
!= V4DFmode
40763 && d
->vmode
!= V8SImode
)
40765 dremap
.vmode
= V8SImode
;
40767 for (i
= 0; i
< 4; ++i
)
40769 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
40770 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
40774 else if (d
->one_operand_p
)
40776 else if (TARGET_AVX2
40777 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
40780 for (i
= 0; i
< nelt4
; ++i
)
40783 remap
[i
+ nelt
] = i
* 2 + 1;
40784 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
40785 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
40786 dremap
.perm
[i
* 2] = i
;
40787 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
40788 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
40789 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
40792 else if (TARGET_AVX2
40793 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
40796 for (i
= 0; i
< nelt4
; ++i
)
40798 remap
[i
+ nelt4
] = i
* 2;
40799 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
40800 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
40801 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
40802 dremap
.perm
[i
* 2] = i
+ nelt4
;
40803 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
40804 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
40805 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
40812 /* Use the remapping array set up above to move the elements from their
40813 swizzled locations into their final destinations. */
40815 for (i
= 0; i
< nelt
; ++i
)
40817 unsigned e
= remap
[d
->perm
[i
]];
40818 gcc_assert (e
< nelt
);
40819 /* If same_halves is true, both halves of the remapped vector are the
40820 same. Avoid cross-lane accesses if possible. */
40821 if (same_halves
&& i
>= nelt2
)
40823 gcc_assert (e
< nelt2
);
40824 dfinal
.perm
[i
] = e
+ nelt2
;
40827 dfinal
.perm
[i
] = e
;
40829 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
40830 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40831 dfinal
.op1
= dfinal
.op0
;
40832 dfinal
.one_operand_p
= true;
40834 /* Test if the final remap can be done with a single insn. For V4SFmode or
40835 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
40837 ok
= expand_vec_perm_1 (&dfinal
);
40838 seq
= get_insns ();
40847 if (dremap
.vmode
!= dfinal
.vmode
)
40849 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
40850 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
40853 ok
= expand_vec_perm_1 (&dremap
);
40860 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
40861 a single vector cross-lane permutation into vpermq followed
40862 by any of the single insn permutations. */
40865 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
40867 struct expand_vec_perm_d dremap
, dfinal
;
40868 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
40869 unsigned contents
[2];
40873 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
40874 && d
->one_operand_p
))
40879 for (i
= 0; i
< nelt2
; ++i
)
40881 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
40882 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
40885 for (i
= 0; i
< 2; ++i
)
40887 unsigned int cnt
= 0;
40888 for (j
= 0; j
< 4; ++j
)
40889 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
40897 dremap
.vmode
= V4DImode
;
40899 dremap
.target
= gen_reg_rtx (V4DImode
);
40900 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
40901 dremap
.op1
= dremap
.op0
;
40902 dremap
.one_operand_p
= true;
40903 for (i
= 0; i
< 2; ++i
)
40905 unsigned int cnt
= 0;
40906 for (j
= 0; j
< 4; ++j
)
40907 if ((contents
[i
] & (1u << j
)) != 0)
40908 dremap
.perm
[2 * i
+ cnt
++] = j
;
40909 for (; cnt
< 2; ++cnt
)
40910 dremap
.perm
[2 * i
+ cnt
] = 0;
40914 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
40915 dfinal
.op1
= dfinal
.op0
;
40916 dfinal
.one_operand_p
= true;
40917 for (i
= 0, j
= 0; i
< nelt
; ++i
)
40921 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
40922 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
40924 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
40925 dfinal
.perm
[i
] |= nelt4
;
40927 gcc_unreachable ();
40930 ok
= expand_vec_perm_1 (&dremap
);
40933 ok
= expand_vec_perm_1 (&dfinal
);
40939 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
40940 a vector permutation using two instructions, vperm2f128 resp.
40941 vperm2i128 followed by any single in-lane permutation. */
40944 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
40946 struct expand_vec_perm_d dfirst
, dsecond
;
40947 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
40951 || GET_MODE_SIZE (d
->vmode
) != 32
40952 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
40956 dsecond
.one_operand_p
= false;
40957 dsecond
.testing_p
= true;
40959 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
40960 immediate. For perm < 16 the second permutation uses
40961 d->op0 as first operand, for perm >= 16 it uses d->op1
40962 as first operand. The second operand is the result of
40964 for (perm
= 0; perm
< 32; perm
++)
40966 /* Ignore permutations which do not move anything cross-lane. */
40969 /* The second shuffle for e.g. V4DFmode has
40970 0123 and ABCD operands.
40971 Ignore AB23, as 23 is already in the second lane
40972 of the first operand. */
40973 if ((perm
& 0xc) == (1 << 2)) continue;
40974 /* And 01CD, as 01 is in the first lane of the first
40976 if ((perm
& 3) == 0) continue;
40977 /* And 4567, as then the vperm2[fi]128 doesn't change
40978 anything on the original 4567 second operand. */
40979 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
40983 /* The second shuffle for e.g. V4DFmode has
40984 4567 and ABCD operands.
40985 Ignore AB67, as 67 is already in the second lane
40986 of the first operand. */
40987 if ((perm
& 0xc) == (3 << 2)) continue;
40988 /* And 45CD, as 45 is in the first lane of the first
40990 if ((perm
& 3) == 2) continue;
40991 /* And 0123, as then the vperm2[fi]128 doesn't change
40992 anything on the original 0123 first operand. */
40993 if ((perm
& 0xf) == (1 << 2)) continue;
40996 for (i
= 0; i
< nelt
; i
++)
40998 j
= d
->perm
[i
] / nelt2
;
40999 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
41000 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
41001 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
41002 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
41010 ok
= expand_vec_perm_1 (&dsecond
);
41021 /* Found a usable second shuffle. dfirst will be
41022 vperm2f128 on d->op0 and d->op1. */
41023 dsecond
.testing_p
= false;
41025 dfirst
.target
= gen_reg_rtx (d
->vmode
);
41026 for (i
= 0; i
< nelt
; i
++)
41027 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
41028 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
41030 ok
= expand_vec_perm_1 (&dfirst
);
41033 /* And dsecond is some single insn shuffle, taking
41034 d->op0 and result of vperm2f128 (if perm < 16) or
41035 d->op1 and result of vperm2f128 (otherwise). */
41036 dsecond
.op1
= dfirst
.target
;
41038 dsecond
.op0
= dfirst
.op1
;
41040 ok
= expand_vec_perm_1 (&dsecond
);
41046 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
41047 if (d
->one_operand_p
)
41054 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
41055 a two vector permutation using 2 intra-lane interleave insns
41056 and cross-lane shuffle for 32-byte vectors. */
41059 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
41062 rtx (*gen
) (rtx
, rtx
, rtx
);
41064 if (d
->one_operand_p
)
41066 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
41068 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
41074 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
41076 for (i
= 0; i
< nelt
; i
+= 2)
41077 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
41078 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
41088 gen
= gen_vec_interleave_highv32qi
;
41090 gen
= gen_vec_interleave_lowv32qi
;
41094 gen
= gen_vec_interleave_highv16hi
;
41096 gen
= gen_vec_interleave_lowv16hi
;
41100 gen
= gen_vec_interleave_highv8si
;
41102 gen
= gen_vec_interleave_lowv8si
;
41106 gen
= gen_vec_interleave_highv4di
;
41108 gen
= gen_vec_interleave_lowv4di
;
41112 gen
= gen_vec_interleave_highv8sf
;
41114 gen
= gen_vec_interleave_lowv8sf
;
41118 gen
= gen_vec_interleave_highv4df
;
41120 gen
= gen_vec_interleave_lowv4df
;
41123 gcc_unreachable ();
41126 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
41130 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
41131 a single vector permutation using a single intra-lane vector
41132 permutation, vperm2f128 swapping the lanes and vblend* insn blending
41133 the non-swapped and swapped vectors together. */
41136 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
41138 struct expand_vec_perm_d dfirst
, dsecond
;
41139 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
41142 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
41146 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
41147 || !d
->one_operand_p
)
41151 for (i
= 0; i
< nelt
; i
++)
41152 dfirst
.perm
[i
] = 0xff;
41153 for (i
= 0, msk
= 0; i
< nelt
; i
++)
41155 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
41156 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
41158 dfirst
.perm
[j
] = d
->perm
[i
];
41162 for (i
= 0; i
< nelt
; i
++)
41163 if (dfirst
.perm
[i
] == 0xff)
41164 dfirst
.perm
[i
] = i
;
41167 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
41170 ok
= expand_vec_perm_1 (&dfirst
);
41171 seq
= get_insns ();
41183 dsecond
.op0
= dfirst
.target
;
41184 dsecond
.op1
= dfirst
.target
;
41185 dsecond
.one_operand_p
= true;
41186 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
41187 for (i
= 0; i
< nelt
; i
++)
41188 dsecond
.perm
[i
] = i
^ nelt2
;
41190 ok
= expand_vec_perm_1 (&dsecond
);
41193 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
41194 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
41198 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
41199 permutation using two vperm2f128, followed by a vshufpd insn blending
41200 the two vectors together. */
41203 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
41205 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
41208 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
41218 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
41219 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
41220 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
41221 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
41222 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
41223 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
41224 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
41225 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
41226 dthird
.perm
[0] = (d
->perm
[0] % 2);
41227 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
41228 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
41229 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
41231 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
41232 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
41233 dthird
.op0
= dfirst
.target
;
41234 dthird
.op1
= dsecond
.target
;
41235 dthird
.one_operand_p
= false;
41237 canonicalize_perm (&dfirst
);
41238 canonicalize_perm (&dsecond
);
41240 ok
= expand_vec_perm_1 (&dfirst
)
41241 && expand_vec_perm_1 (&dsecond
)
41242 && expand_vec_perm_1 (&dthird
);
41249 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
41250 permutation with two pshufb insns and an ior. We should have already
41251 failed all two instruction sequences. */
41254 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
41256 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
41257 unsigned int i
, nelt
, eltsz
;
41259 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
41261 gcc_assert (!d
->one_operand_p
);
41264 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41266 /* Generate two permutation masks. If the required element is within
41267 the given vector it is shuffled into the proper lane. If the required
41268 element is in the other vector, force a zero into the lane by setting
41269 bit 7 in the permutation mask. */
41270 m128
= GEN_INT (-128);
41271 for (i
= 0; i
< nelt
; ++i
)
41273 unsigned j
, e
= d
->perm
[i
];
41274 unsigned which
= (e
>= nelt
);
41278 for (j
= 0; j
< eltsz
; ++j
)
41280 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
41281 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
41285 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
41286 vperm
= force_reg (V16QImode
, vperm
);
41288 l
= gen_reg_rtx (V16QImode
);
41289 op
= gen_lowpart (V16QImode
, d
->op0
);
41290 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
41292 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
41293 vperm
= force_reg (V16QImode
, vperm
);
41295 h
= gen_reg_rtx (V16QImode
);
41296 op
= gen_lowpart (V16QImode
, d
->op1
);
41297 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
41300 if (d
->vmode
!= V16QImode
)
41301 op
= gen_reg_rtx (V16QImode
);
41302 emit_insn (gen_iorv16qi3 (op
, l
, h
));
41303 if (op
!= d
->target
)
41304 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41309 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
41310 with two vpshufb insns, vpermq and vpor. We should have already failed
41311 all two or three instruction sequences. */
41314 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
41316 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
41317 unsigned int i
, nelt
, eltsz
;
41320 || !d
->one_operand_p
41321 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41328 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41330 /* Generate two permutation masks. If the required element is within
41331 the same lane, it is shuffled in. If the required element from the
41332 other lane, force a zero by setting bit 7 in the permutation mask.
41333 In the other mask the mask has non-negative elements if element
41334 is requested from the other lane, but also moved to the other lane,
41335 so that the result of vpshufb can have the two V2TImode halves
41337 m128
= GEN_INT (-128);
41338 for (i
= 0; i
< nelt
; ++i
)
41340 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41341 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41343 for (j
= 0; j
< eltsz
; ++j
)
41345 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
41346 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
41350 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41351 vperm
= force_reg (V32QImode
, vperm
);
41353 h
= gen_reg_rtx (V32QImode
);
41354 op
= gen_lowpart (V32QImode
, d
->op0
);
41355 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41357 /* Swap the 128-byte lanes of h into hp. */
41358 hp
= gen_reg_rtx (V4DImode
);
41359 op
= gen_lowpart (V4DImode
, h
);
41360 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
41363 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41364 vperm
= force_reg (V32QImode
, vperm
);
41366 l
= gen_reg_rtx (V32QImode
);
41367 op
= gen_lowpart (V32QImode
, d
->op0
);
41368 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41371 if (d
->vmode
!= V32QImode
)
41372 op
= gen_reg_rtx (V32QImode
);
41373 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
41374 if (op
!= d
->target
)
41375 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41380 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
41381 and extract-odd permutations of two V32QImode and V16QImode operand
41382 with two vpshufb insns, vpor and vpermq. We should have already
41383 failed all two or three instruction sequences. */
41386 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
41388 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
41389 unsigned int i
, nelt
, eltsz
;
41392 || d
->one_operand_p
41393 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41396 for (i
= 0; i
< d
->nelt
; ++i
)
41397 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
41404 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41406 /* Generate two permutation masks. In the first permutation mask
41407 the first quarter will contain indexes for the first half
41408 of the op0, the second quarter will contain bit 7 set, third quarter
41409 will contain indexes for the second half of the op0 and the
41410 last quarter bit 7 set. In the second permutation mask
41411 the first quarter will contain bit 7 set, the second quarter
41412 indexes for the first half of the op1, the third quarter bit 7 set
41413 and last quarter indexes for the second half of the op1.
41414 I.e. the first mask e.g. for V32QImode extract even will be:
41415 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
41416 (all values masked with 0xf except for -128) and second mask
41417 for extract even will be
41418 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
41419 m128
= GEN_INT (-128);
41420 for (i
= 0; i
< nelt
; ++i
)
41422 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41423 unsigned which
= d
->perm
[i
] >= nelt
;
41424 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
41426 for (j
= 0; j
< eltsz
; ++j
)
41428 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
41429 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
41433 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
41434 vperm
= force_reg (V32QImode
, vperm
);
41436 l
= gen_reg_rtx (V32QImode
);
41437 op
= gen_lowpart (V32QImode
, d
->op0
);
41438 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
41440 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
41441 vperm
= force_reg (V32QImode
, vperm
);
41443 h
= gen_reg_rtx (V32QImode
);
41444 op
= gen_lowpart (V32QImode
, d
->op1
);
41445 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
41447 ior
= gen_reg_rtx (V32QImode
);
41448 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
41450 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
41451 op
= gen_reg_rtx (V4DImode
);
41452 ior
= gen_lowpart (V4DImode
, ior
);
41453 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
41454 const1_rtx
, GEN_INT (3)));
41455 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
41461 and extract-odd permutations. */
41464 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
41466 rtx t1
, t2
, t3
, t4
, t5
;
41471 t1
= gen_reg_rtx (V4DFmode
);
41472 t2
= gen_reg_rtx (V4DFmode
);
41474 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41475 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41476 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41478 /* Now an unpck[lh]pd will produce the result required. */
41480 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
41482 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
41488 int mask
= odd
? 0xdd : 0x88;
41490 t1
= gen_reg_rtx (V8SFmode
);
41491 t2
= gen_reg_rtx (V8SFmode
);
41492 t3
= gen_reg_rtx (V8SFmode
);
41494 /* Shuffle within the 128-bit lanes to produce:
41495 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
41496 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
41499 /* Shuffle the lanes around to produce:
41500 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
41501 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
41504 /* Shuffle within the 128-bit lanes to produce:
41505 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
41506 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
41508 /* Shuffle within the 128-bit lanes to produce:
41509 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
41510 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
41512 /* Shuffle the lanes around to produce:
41513 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
41514 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
41523 /* These are always directly implementable by expand_vec_perm_1. */
41524 gcc_unreachable ();
41528 return expand_vec_perm_pshufb2 (d
);
41531 /* We need 2*log2(N)-1 operations to achieve odd/even
41532 with interleave. */
41533 t1
= gen_reg_rtx (V8HImode
);
41534 t2
= gen_reg_rtx (V8HImode
);
41535 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
41536 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
41537 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
41538 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
41540 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
41542 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
41549 return expand_vec_perm_pshufb2 (d
);
41552 t1
= gen_reg_rtx (V16QImode
);
41553 t2
= gen_reg_rtx (V16QImode
);
41554 t3
= gen_reg_rtx (V16QImode
);
41555 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
41556 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
41557 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
41558 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
41559 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
41560 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
41562 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
41564 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
41571 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
41576 struct expand_vec_perm_d d_copy
= *d
;
41577 d_copy
.vmode
= V4DFmode
;
41578 d_copy
.target
= gen_reg_rtx (V4DFmode
);
41579 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
41580 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
41581 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41584 emit_move_insn (d
->target
,
41585 gen_lowpart (V4DImode
, d_copy
.target
));
41591 t1
= gen_reg_rtx (V4DImode
);
41592 t2
= gen_reg_rtx (V4DImode
);
41594 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
41595 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
41596 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
41598 /* Now an vpunpck[lh]qdq will produce the result required. */
41600 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
41602 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
41609 struct expand_vec_perm_d d_copy
= *d
;
41610 d_copy
.vmode
= V8SFmode
;
41611 d_copy
.target
= gen_reg_rtx (V8SFmode
);
41612 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
41613 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
41614 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
41617 emit_move_insn (d
->target
,
41618 gen_lowpart (V8SImode
, d_copy
.target
));
41624 t1
= gen_reg_rtx (V8SImode
);
41625 t2
= gen_reg_rtx (V8SImode
);
41626 t3
= gen_reg_rtx (V4DImode
);
41627 t4
= gen_reg_rtx (V4DImode
);
41628 t5
= gen_reg_rtx (V4DImode
);
41630 /* Shuffle the lanes around into
41631 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
41632 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
41633 gen_lowpart (V4DImode
, d
->op1
),
41635 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
41636 gen_lowpart (V4DImode
, d
->op1
),
41639 /* Swap the 2nd and 3rd position in each lane into
41640 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
41641 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
41642 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41643 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
41644 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
41646 /* Now an vpunpck[lh]qdq will produce
41647 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
41649 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
41650 gen_lowpart (V4DImode
, t2
));
41652 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
41653 gen_lowpart (V4DImode
, t2
));
41655 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
41659 gcc_unreachable ();
41665 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41666 extract-even and extract-odd permutations. */
41669 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
41671 unsigned i
, odd
, nelt
= d
->nelt
;
41674 if (odd
!= 0 && odd
!= 1)
41677 for (i
= 1; i
< nelt
; ++i
)
41678 if (d
->perm
[i
] != 2 * i
+ odd
)
41681 return expand_vec_perm_even_odd_1 (d
, odd
);
41684 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
41685 permutations. We assume that expand_vec_perm_1 has already failed. */
41688 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
41690 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
41691 enum machine_mode vmode
= d
->vmode
;
41692 unsigned char perm2
[4];
41693 rtx op0
= d
->op0
, dest
;
41700 /* These are special-cased in sse.md so that we can optionally
41701 use the vbroadcast instruction. They expand to two insns
41702 if the input happens to be in a register. */
41703 gcc_unreachable ();
41709 /* These are always implementable using standard shuffle patterns. */
41710 gcc_unreachable ();
41714 /* These can be implemented via interleave. We save one insn by
41715 stopping once we have promoted to V4SImode and then use pshufd. */
41719 rtx (*gen
) (rtx
, rtx
, rtx
)
41720 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
41721 : gen_vec_interleave_lowv8hi
;
41725 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
41726 : gen_vec_interleave_highv8hi
;
41731 dest
= gen_reg_rtx (vmode
);
41732 emit_insn (gen (dest
, op0
, op0
));
41733 vmode
= get_mode_wider_vector (vmode
);
41734 op0
= gen_lowpart (vmode
, dest
);
41736 while (vmode
!= V4SImode
);
41738 memset (perm2
, elt
, 4);
41739 dest
= gen_reg_rtx (V4SImode
);
41740 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
41743 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
41750 /* For AVX2 broadcasts of the first element vpbroadcast* or
41751 vpermq should be used by expand_vec_perm_1. */
41752 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
41756 gcc_unreachable ();
41760 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
41761 broadcast permutations. */
41764 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
41766 unsigned i
, elt
, nelt
= d
->nelt
;
41768 if (!d
->one_operand_p
)
41772 for (i
= 1; i
< nelt
; ++i
)
41773 if (d
->perm
[i
] != elt
)
41776 return expand_vec_perm_broadcast_1 (d
);
41779 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
41780 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
41781 all the shorter instruction sequences. */
41784 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
41786 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
41787 unsigned int i
, nelt
, eltsz
;
41791 || d
->one_operand_p
41792 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
41799 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
41801 /* Generate 4 permutation masks. If the required element is within
41802 the same lane, it is shuffled in. If the required element from the
41803 other lane, force a zero by setting bit 7 in the permutation mask.
41804 In the other mask the mask has non-negative elements if element
41805 is requested from the other lane, but also moved to the other lane,
41806 so that the result of vpshufb can have the two V2TImode halves
41808 m128
= GEN_INT (-128);
41809 for (i
= 0; i
< 32; ++i
)
41811 rperm
[0][i
] = m128
;
41812 rperm
[1][i
] = m128
;
41813 rperm
[2][i
] = m128
;
41814 rperm
[3][i
] = m128
;
41820 for (i
= 0; i
< nelt
; ++i
)
41822 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
41823 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
41824 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
41826 for (j
= 0; j
< eltsz
; ++j
)
41827 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
41828 used
[which
] = true;
41831 for (i
= 0; i
< 2; ++i
)
41833 if (!used
[2 * i
+ 1])
41838 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
41839 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
41840 vperm
= force_reg (V32QImode
, vperm
);
41841 h
[i
] = gen_reg_rtx (V32QImode
);
41842 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41843 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
41846 /* Swap the 128-byte lanes of h[X]. */
41847 for (i
= 0; i
< 2; ++i
)
41849 if (h
[i
] == NULL_RTX
)
41851 op
= gen_reg_rtx (V4DImode
);
41852 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
41853 const2_rtx
, GEN_INT (3), const0_rtx
,
41855 h
[i
] = gen_lowpart (V32QImode
, op
);
41858 for (i
= 0; i
< 2; ++i
)
41865 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
41866 vperm
= force_reg (V32QImode
, vperm
);
41867 l
[i
] = gen_reg_rtx (V32QImode
);
41868 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
41869 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
41872 for (i
= 0; i
< 2; ++i
)
41876 op
= gen_reg_rtx (V32QImode
);
41877 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
41884 gcc_assert (l
[0] && l
[1]);
41886 if (d
->vmode
!= V32QImode
)
41887 op
= gen_reg_rtx (V32QImode
);
41888 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
41889 if (op
!= d
->target
)
41890 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
41894 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
41895 With all of the interface bits taken care of, perform the expansion
41896 in D and return true on success. */
41899 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
41901 /* Try a single instruction expansion. */
41902 if (expand_vec_perm_1 (d
))
41905 /* Try sequences of two instructions. */
41907 if (expand_vec_perm_pshuflw_pshufhw (d
))
41910 if (expand_vec_perm_palignr (d
))
41913 if (expand_vec_perm_interleave2 (d
))
41916 if (expand_vec_perm_broadcast (d
))
41919 if (expand_vec_perm_vpermq_perm_1 (d
))
41922 if (expand_vec_perm_vperm2f128 (d
))
41925 /* Try sequences of three instructions. */
41927 if (expand_vec_perm_2vperm2f128_vshuf (d
))
41930 if (expand_vec_perm_pshufb2 (d
))
41933 if (expand_vec_perm_interleave3 (d
))
41936 if (expand_vec_perm_vperm2f128_vblend (d
))
41939 /* Try sequences of four instructions. */
41941 if (expand_vec_perm_vpshufb2_vpermq (d
))
41944 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
41947 /* ??? Look for narrow permutations whose element orderings would
41948 allow the promotion to a wider mode. */
41950 /* ??? Look for sequences of interleave or a wider permute that place
41951 the data into the correct lanes for a half-vector shuffle like
41952 pshuf[lh]w or vpermilps. */
41954 /* ??? Look for sequences of interleave that produce the desired results.
41955 The combinatorics of punpck[lh] get pretty ugly... */
41957 if (expand_vec_perm_even_odd (d
))
41960 /* Even longer sequences. */
41961 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
41967 /* If a permutation only uses one operand, make it clear. Returns true
41968 if the permutation references both operands. */
41971 canonicalize_perm (struct expand_vec_perm_d
*d
)
41973 int i
, which
, nelt
= d
->nelt
;
41975 for (i
= which
= 0; i
< nelt
; ++i
)
41976 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
41978 d
->one_operand_p
= true;
41985 if (!rtx_equal_p (d
->op0
, d
->op1
))
41987 d
->one_operand_p
= false;
41990 /* The elements of PERM do not suggest that only the first operand
41991 is used, but both operands are identical. Allow easier matching
41992 of the permutation by folding the permutation into the single
41997 for (i
= 0; i
< nelt
; ++i
)
41998 d
->perm
[i
] &= nelt
- 1;
42007 return (which
== 3);
42011 ix86_expand_vec_perm_const (rtx operands
[4])
42013 struct expand_vec_perm_d d
;
42014 unsigned char perm
[MAX_VECT_LEN
];
42019 d
.target
= operands
[0];
42020 d
.op0
= operands
[1];
42021 d
.op1
= operands
[2];
42024 d
.vmode
= GET_MODE (d
.target
);
42025 gcc_assert (VECTOR_MODE_P (d
.vmode
));
42026 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
42027 d
.testing_p
= false;
42029 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
42030 gcc_assert (XVECLEN (sel
, 0) == nelt
);
42031 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
42033 for (i
= 0; i
< nelt
; ++i
)
42035 rtx e
= XVECEXP (sel
, 0, i
);
42036 int ei
= INTVAL (e
) & (2 * nelt
- 1);
42041 two_args
= canonicalize_perm (&d
);
42043 if (ix86_expand_vec_perm_const_1 (&d
))
42046 /* If the selector says both arguments are needed, but the operands are the
42047 same, the above tried to expand with one_operand_p and flattened selector.
42048 If that didn't work, retry without one_operand_p; we succeeded with that
42050 if (two_args
&& d
.one_operand_p
)
42052 d
.one_operand_p
= false;
42053 memcpy (d
.perm
, perm
, sizeof (perm
));
42054 return ix86_expand_vec_perm_const_1 (&d
);
42060 /* Implement targetm.vectorize.vec_perm_const_ok. */
42063 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
42064 const unsigned char *sel
)
42066 struct expand_vec_perm_d d
;
42067 unsigned int i
, nelt
, which
;
42071 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
42072 d
.testing_p
= true;
42074 /* Given sufficient ISA support we can just return true here
42075 for selected vector modes. */
42076 if (GET_MODE_SIZE (d
.vmode
) == 16)
42078 /* All implementable with a single vpperm insn. */
42081 /* All implementable with 2 pshufb + 1 ior. */
42084 /* All implementable with shufpd or unpck[lh]pd. */
42089 /* Extract the values from the vector CST into the permutation
42091 memcpy (d
.perm
, sel
, nelt
);
42092 for (i
= which
= 0; i
< nelt
; ++i
)
42094 unsigned char e
= d
.perm
[i
];
42095 gcc_assert (e
< 2 * nelt
);
42096 which
|= (e
< nelt
? 1 : 2);
42099 /* For all elements from second vector, fold the elements to first. */
42101 for (i
= 0; i
< nelt
; ++i
)
42104 /* Check whether the mask can be applied to the vector type. */
42105 d
.one_operand_p
= (which
!= 3);
42107 /* Implementable with shufps or pshufd. */
42108 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
42111 /* Otherwise we have to go through the motions and see if we can
42112 figure out how to generate the requested permutation. */
42113 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
42114 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
42115 if (!d
.one_operand_p
)
42116 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
42119 ret
= ix86_expand_vec_perm_const_1 (&d
);
42126 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
42128 struct expand_vec_perm_d d
;
42134 d
.vmode
= GET_MODE (targ
);
42135 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
42136 d
.one_operand_p
= false;
42137 d
.testing_p
= false;
42139 for (i
= 0; i
< nelt
; ++i
)
42140 d
.perm
[i
] = i
* 2 + odd
;
42142 /* We'll either be able to implement the permutation directly... */
42143 if (expand_vec_perm_1 (&d
))
42146 /* ... or we use the special-case patterns. */
42147 expand_vec_perm_even_odd_1 (&d
, odd
);
42151 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
42153 struct expand_vec_perm_d d
;
42154 unsigned i
, nelt
, base
;
42160 d
.vmode
= GET_MODE (targ
);
42161 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
42162 d
.one_operand_p
= false;
42163 d
.testing_p
= false;
42165 base
= high_p
? nelt
/ 2 : 0;
42166 for (i
= 0; i
< nelt
/ 2; ++i
)
42168 d
.perm
[i
* 2] = i
+ base
;
42169 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
42172 /* Note that for AVX this isn't one instruction. */
42173 ok
= ix86_expand_vec_perm_const_1 (&d
);
42178 /* Expand a vector operation CODE for a V*QImode in terms of the
42179 same operation on V*HImode. */
42182 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
42184 enum machine_mode qimode
= GET_MODE (dest
);
42185 enum machine_mode himode
;
42186 rtx (*gen_il
) (rtx
, rtx
, rtx
);
42187 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
42188 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
42189 struct expand_vec_perm_d d
;
42190 bool ok
, full_interleave
;
42191 bool uns_p
= false;
42198 gen_il
= gen_vec_interleave_lowv16qi
;
42199 gen_ih
= gen_vec_interleave_highv16qi
;
42202 himode
= V16HImode
;
42203 gen_il
= gen_avx2_interleave_lowv32qi
;
42204 gen_ih
= gen_avx2_interleave_highv32qi
;
42207 gcc_unreachable ();
42210 op2_l
= op2_h
= op2
;
42214 /* Unpack data such that we've got a source byte in each low byte of
42215 each word. We don't care what goes into the high byte of each word.
42216 Rather than trying to get zero in there, most convenient is to let
42217 it be a copy of the low byte. */
42218 op2_l
= gen_reg_rtx (qimode
);
42219 op2_h
= gen_reg_rtx (qimode
);
42220 emit_insn (gen_il (op2_l
, op2
, op2
));
42221 emit_insn (gen_ih (op2_h
, op2
, op2
));
42224 op1_l
= gen_reg_rtx (qimode
);
42225 op1_h
= gen_reg_rtx (qimode
);
42226 emit_insn (gen_il (op1_l
, op1
, op1
));
42227 emit_insn (gen_ih (op1_h
, op1
, op1
));
42228 full_interleave
= qimode
== V16QImode
;
42236 op1_l
= gen_reg_rtx (himode
);
42237 op1_h
= gen_reg_rtx (himode
);
42238 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
42239 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
42240 full_interleave
= true;
42243 gcc_unreachable ();
42246 /* Perform the operation. */
42247 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
42249 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
42251 gcc_assert (res_l
&& res_h
);
42253 /* Merge the data back into the right place. */
42255 d
.op0
= gen_lowpart (qimode
, res_l
);
42256 d
.op1
= gen_lowpart (qimode
, res_h
);
42258 d
.nelt
= GET_MODE_NUNITS (qimode
);
42259 d
.one_operand_p
= false;
42260 d
.testing_p
= false;
42262 if (full_interleave
)
42264 /* For SSE2, we used an full interleave, so the desired
42265 results are in the even elements. */
42266 for (i
= 0; i
< 32; ++i
)
42271 /* For AVX, the interleave used above was not cross-lane. So the
42272 extraction is evens but with the second and third quarter swapped.
42273 Happily, that is even one insn shorter than even extraction. */
42274 for (i
= 0; i
< 32; ++i
)
42275 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
42278 ok
= ix86_expand_vec_perm_const_1 (&d
);
42281 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42282 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
42285 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
42286 if op is CONST_VECTOR with all odd elements equal to their
42287 preceding element. */
42290 const_vector_equal_evenodd_p (rtx op
)
42292 enum machine_mode mode
= GET_MODE (op
);
42293 int i
, nunits
= GET_MODE_NUNITS (mode
);
42294 if (GET_CODE (op
) != CONST_VECTOR
42295 || nunits
!= CONST_VECTOR_NUNITS (op
))
42297 for (i
= 0; i
< nunits
; i
+= 2)
42298 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
42304 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
42305 bool uns_p
, bool odd_p
)
42307 enum machine_mode mode
= GET_MODE (op1
);
42308 enum machine_mode wmode
= GET_MODE (dest
);
42310 rtx orig_op1
= op1
, orig_op2
= op2
;
42312 if (!nonimmediate_operand (op1
, mode
))
42313 op1
= force_reg (mode
, op1
);
42314 if (!nonimmediate_operand (op2
, mode
))
42315 op2
= force_reg (mode
, op2
);
42317 /* We only play even/odd games with vectors of SImode. */
42318 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
42320 /* If we're looking for the odd results, shift those members down to
42321 the even slots. For some cpus this is faster than a PSHUFD. */
42324 /* For XOP use vpmacsdqh, but only for smult, as it is only
42326 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
42328 x
= force_reg (wmode
, CONST0_RTX (wmode
));
42329 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
42333 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
42334 if (!const_vector_equal_evenodd_p (orig_op1
))
42335 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
42336 x
, NULL
, 1, OPTAB_DIRECT
);
42337 if (!const_vector_equal_evenodd_p (orig_op2
))
42338 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
42339 x
, NULL
, 1, OPTAB_DIRECT
);
42340 op1
= gen_lowpart (mode
, op1
);
42341 op2
= gen_lowpart (mode
, op2
);
42344 if (mode
== V8SImode
)
42347 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
42349 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
42352 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
42353 else if (TARGET_SSE4_1
)
42354 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
42357 rtx s1
, s2
, t0
, t1
, t2
;
42359 /* The easiest way to implement this without PMULDQ is to go through
42360 the motions as if we are performing a full 64-bit multiply. With
42361 the exception that we need to do less shuffling of the elements. */
42363 /* Compute the sign-extension, aka highparts, of the two operands. */
42364 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42365 op1
, pc_rtx
, pc_rtx
);
42366 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
42367 op2
, pc_rtx
, pc_rtx
);
42369 /* Multiply LO(A) * HI(B), and vice-versa. */
42370 t1
= gen_reg_rtx (wmode
);
42371 t2
= gen_reg_rtx (wmode
);
42372 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
42373 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
42375 /* Multiply LO(A) * LO(B). */
42376 t0
= gen_reg_rtx (wmode
);
42377 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
42379 /* Combine and shift the highparts into place. */
42380 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
42381 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
42384 /* Combine high and low parts. */
42385 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
42392 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
42393 bool uns_p
, bool high_p
)
42395 enum machine_mode wmode
= GET_MODE (dest
);
42396 enum machine_mode mode
= GET_MODE (op1
);
42397 rtx t1
, t2
, t3
, t4
, mask
;
42402 t1
= gen_reg_rtx (mode
);
42403 t2
= gen_reg_rtx (mode
);
42404 if (TARGET_XOP
&& !uns_p
)
42406 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
42407 shuffle the elements once so that all elements are in the right
42408 place for immediate use: { A C B D }. */
42409 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
42410 const1_rtx
, GEN_INT (3)));
42411 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
42412 const1_rtx
, GEN_INT (3)));
42416 /* Put the elements into place for the multiply. */
42417 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
42418 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
42421 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
42425 /* Shuffle the elements between the lanes. After this we
42426 have { A B E F | C D G H } for each operand. */
42427 t1
= gen_reg_rtx (V4DImode
);
42428 t2
= gen_reg_rtx (V4DImode
);
42429 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
42430 const0_rtx
, const2_rtx
,
42431 const1_rtx
, GEN_INT (3)));
42432 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
42433 const0_rtx
, const2_rtx
,
42434 const1_rtx
, GEN_INT (3)));
42436 /* Shuffle the elements within the lanes. After this we
42437 have { A A B B | C C D D } or { E E F F | G G H H }. */
42438 t3
= gen_reg_rtx (V8SImode
);
42439 t4
= gen_reg_rtx (V8SImode
);
42440 mask
= GEN_INT (high_p
42441 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
42442 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
42443 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
42444 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
42446 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
42451 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
42452 uns_p
, OPTAB_DIRECT
);
42453 t2
= expand_binop (mode
,
42454 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
42455 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
42456 gcc_assert (t1
&& t2
);
42458 t3
= gen_reg_rtx (mode
);
42459 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
42460 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
42465 t1
= gen_reg_rtx (wmode
);
42466 t2
= gen_reg_rtx (wmode
);
42467 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
42468 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
42470 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
42474 gcc_unreachable ();
42479 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
42481 rtx res_1
, res_2
, res_3
, res_4
;
42483 res_1
= gen_reg_rtx (V4SImode
);
42484 res_2
= gen_reg_rtx (V4SImode
);
42485 res_3
= gen_reg_rtx (V2DImode
);
42486 res_4
= gen_reg_rtx (V2DImode
);
42487 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
42488 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
42490 /* Move the results in element 2 down to element 1; we don't care
42491 what goes in elements 2 and 3. Then we can merge the parts
42492 back together with an interleave.
42494 Note that two other sequences were tried:
42495 (1) Use interleaves at the start instead of psrldq, which allows
42496 us to use a single shufps to merge things back at the end.
42497 (2) Use shufps here to combine the two vectors, then pshufd to
42498 put the elements in the correct order.
42499 In both cases the cost of the reformatting stall was too high
42500 and the overall sequence slower. */
42502 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
42503 const0_rtx
, const2_rtx
,
42504 const0_rtx
, const0_rtx
));
42505 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
42506 const0_rtx
, const2_rtx
,
42507 const0_rtx
, const0_rtx
));
42508 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
42510 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
42514 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
42516 enum machine_mode mode
= GET_MODE (op0
);
42517 rtx t1
, t2
, t3
, t4
, t5
, t6
;
42519 if (TARGET_XOP
&& mode
== V2DImode
)
42521 /* op1: A,B,C,D, op2: E,F,G,H */
42522 op1
= gen_lowpart (V4SImode
, op1
);
42523 op2
= gen_lowpart (V4SImode
, op2
);
42525 t1
= gen_reg_rtx (V4SImode
);
42526 t2
= gen_reg_rtx (V4SImode
);
42527 t3
= gen_reg_rtx (V2DImode
);
42528 t4
= gen_reg_rtx (V2DImode
);
42531 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
42537 /* t2: (B*E),(A*F),(D*G),(C*H) */
42538 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
42540 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
42541 emit_insn (gen_xop_phadddq (t3
, t2
));
42543 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
42544 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
42546 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
42547 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
42551 enum machine_mode nmode
;
42552 rtx (*umul
) (rtx
, rtx
, rtx
);
42554 if (mode
== V2DImode
)
42556 umul
= gen_vec_widen_umult_even_v4si
;
42559 else if (mode
== V4DImode
)
42561 umul
= gen_vec_widen_umult_even_v8si
;
42565 gcc_unreachable ();
42568 /* Multiply low parts. */
42569 t1
= gen_reg_rtx (mode
);
42570 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
42572 /* Shift input vectors right 32 bits so we can multiply high parts. */
42574 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
42575 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
42577 /* Multiply high parts by low parts. */
42578 t4
= gen_reg_rtx (mode
);
42579 t5
= gen_reg_rtx (mode
);
42580 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
42581 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
42583 /* Combine and shift the highparts back. */
42584 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
42585 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
42587 /* Combine high and low parts. */
42588 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
42591 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
42592 gen_rtx_MULT (mode
, op1
, op2
));
42595 /* Calculate integer abs() using only SSE2 instructions. */
42598 ix86_expand_sse2_abs (rtx target
, rtx input
)
42600 enum machine_mode mode
= GET_MODE (target
);
42605 /* For 32-bit signed integer X, the best way to calculate the absolute
42606 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
42608 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
42609 GEN_INT (GET_MODE_BITSIZE
42610 (GET_MODE_INNER (mode
)) - 1),
42611 NULL
, 0, OPTAB_DIRECT
);
42612 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
42613 NULL
, 0, OPTAB_DIRECT
);
42614 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
42615 target
, 0, OPTAB_DIRECT
);
42618 /* For 16-bit signed integer X, the best way to calculate the absolute
42619 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
42621 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42623 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
42624 target
, 0, OPTAB_DIRECT
);
42627 /* For 8-bit signed integer X, the best way to calculate the absolute
42628 value of X is min ((unsigned char) X, (unsigned char) (-X)),
42629 as SSE2 provides the PMINUB insn. */
42631 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
42633 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
42634 target
, 0, OPTAB_DIRECT
);
42638 gcc_unreachable ();
42642 emit_move_insn (target
, x
);
42645 /* Expand an insert into a vector register through pinsr insn.
42646 Return true if successful. */
42649 ix86_expand_pinsr (rtx
*operands
)
42651 rtx dst
= operands
[0];
42652 rtx src
= operands
[3];
42654 unsigned int size
= INTVAL (operands
[1]);
42655 unsigned int pos
= INTVAL (operands
[2]);
42657 if (GET_CODE (dst
) == SUBREG
)
42659 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
42660 dst
= SUBREG_REG (dst
);
42663 if (GET_CODE (src
) == SUBREG
)
42664 src
= SUBREG_REG (src
);
42666 switch (GET_MODE (dst
))
42673 enum machine_mode srcmode
, dstmode
;
42674 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
42676 srcmode
= mode_for_size (size
, MODE_INT
, 0);
42681 if (!TARGET_SSE4_1
)
42683 dstmode
= V16QImode
;
42684 pinsr
= gen_sse4_1_pinsrb
;
42690 dstmode
= V8HImode
;
42691 pinsr
= gen_sse2_pinsrw
;
42695 if (!TARGET_SSE4_1
)
42697 dstmode
= V4SImode
;
42698 pinsr
= gen_sse4_1_pinsrd
;
42702 gcc_assert (TARGET_64BIT
);
42703 if (!TARGET_SSE4_1
)
42705 dstmode
= V2DImode
;
42706 pinsr
= gen_sse4_1_pinsrq
;
42714 if (GET_MODE (dst
) != dstmode
)
42715 d
= gen_reg_rtx (dstmode
);
42716 src
= gen_lowpart (srcmode
, src
);
42720 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
), src
,
42721 GEN_INT (1 << pos
)));
42723 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
42732 /* This function returns the calling abi specific va_list type node.
42733 It returns the FNDECL specific va_list type. */
42736 ix86_fn_abi_va_list (tree fndecl
)
42739 return va_list_type_node
;
42740 gcc_assert (fndecl
!= NULL_TREE
);
42742 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
42743 return ms_va_list_type_node
;
42745 return sysv_va_list_type_node
;
42748 /* Returns the canonical va_list type specified by TYPE. If there
42749 is no valid TYPE provided, it return NULL_TREE. */
42752 ix86_canonical_va_list_type (tree type
)
42756 /* Resolve references and pointers to va_list type. */
42757 if (TREE_CODE (type
) == MEM_REF
)
42758 type
= TREE_TYPE (type
);
42759 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
42760 type
= TREE_TYPE (type
);
42761 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
42762 type
= TREE_TYPE (type
);
42764 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
42766 wtype
= va_list_type_node
;
42767 gcc_assert (wtype
!= NULL_TREE
);
42769 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42771 /* If va_list is an array type, the argument may have decayed
42772 to a pointer type, e.g. by being passed to another function.
42773 In that case, unwrap both types so that we can compare the
42774 underlying records. */
42775 if (TREE_CODE (htype
) == ARRAY_TYPE
42776 || POINTER_TYPE_P (htype
))
42778 wtype
= TREE_TYPE (wtype
);
42779 htype
= TREE_TYPE (htype
);
42782 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42783 return va_list_type_node
;
42784 wtype
= sysv_va_list_type_node
;
42785 gcc_assert (wtype
!= NULL_TREE
);
42787 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42789 /* If va_list is an array type, the argument may have decayed
42790 to a pointer type, e.g. by being passed to another function.
42791 In that case, unwrap both types so that we can compare the
42792 underlying records. */
42793 if (TREE_CODE (htype
) == ARRAY_TYPE
42794 || POINTER_TYPE_P (htype
))
42796 wtype
= TREE_TYPE (wtype
);
42797 htype
= TREE_TYPE (htype
);
42800 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42801 return sysv_va_list_type_node
;
42802 wtype
= ms_va_list_type_node
;
42803 gcc_assert (wtype
!= NULL_TREE
);
42805 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
42807 /* If va_list is an array type, the argument may have decayed
42808 to a pointer type, e.g. by being passed to another function.
42809 In that case, unwrap both types so that we can compare the
42810 underlying records. */
42811 if (TREE_CODE (htype
) == ARRAY_TYPE
42812 || POINTER_TYPE_P (htype
))
42814 wtype
= TREE_TYPE (wtype
);
42815 htype
= TREE_TYPE (htype
);
42818 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
42819 return ms_va_list_type_node
;
42822 return std_canonical_va_list_type (type
);
42825 /* Iterate through the target-specific builtin types for va_list.
42826 IDX denotes the iterator, *PTREE is set to the result type of
42827 the va_list builtin, and *PNAME to its internal type.
42828 Returns zero if there is no element for this index, otherwise
42829 IDX should be increased upon the next call.
42830 Note, do not iterate a base builtin's name like __builtin_va_list.
42831 Used from c_common_nodes_and_builtins. */
42834 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
42844 *ptree
= ms_va_list_type_node
;
42845 *pname
= "__builtin_ms_va_list";
42849 *ptree
= sysv_va_list_type_node
;
42850 *pname
= "__builtin_sysv_va_list";
42858 #undef TARGET_SCHED_DISPATCH
42859 #define TARGET_SCHED_DISPATCH has_dispatch
42860 #undef TARGET_SCHED_DISPATCH_DO
42861 #define TARGET_SCHED_DISPATCH_DO do_dispatch
42862 #undef TARGET_SCHED_REASSOCIATION_WIDTH
42863 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
42864 #undef TARGET_SCHED_REORDER
42865 #define TARGET_SCHED_REORDER ix86_sched_reorder
42866 #undef TARGET_SCHED_ADJUST_PRIORITY
42867 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
42868 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
42869 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
42870 ix86_dependencies_evaluation_hook
42872 /* The size of the dispatch window is the total number of bytes of
42873 object code allowed in a window. */
42874 #define DISPATCH_WINDOW_SIZE 16
42876 /* Number of dispatch windows considered for scheduling. */
42877 #define MAX_DISPATCH_WINDOWS 3
42879 /* Maximum number of instructions in a window. */
42882 /* Maximum number of immediate operands in a window. */
42885 /* Maximum number of immediate bits allowed in a window. */
42886 #define MAX_IMM_SIZE 128
42888 /* Maximum number of 32 bit immediates allowed in a window. */
42889 #define MAX_IMM_32 4
42891 /* Maximum number of 64 bit immediates allowed in a window. */
42892 #define MAX_IMM_64 2
42894 /* Maximum total of loads or prefetches allowed in a window. */
42897 /* Maximum total of stores allowed in a window. */
42898 #define MAX_STORE 1
42904 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
42905 enum dispatch_group
{
42920 /* Number of allowable groups in a dispatch window. It is an array
42921 indexed by dispatch_group enum. 100 is used as a big number,
42922 because the number of these kind of operations does not have any
42923 effect in dispatch window, but we need them for other reasons in
42925 static unsigned int num_allowable_groups
[disp_last
] = {
42926 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
42929 char group_name
[disp_last
+ 1][16] = {
42930 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
42931 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
42932 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
42935 /* Instruction path. */
42938 path_single
, /* Single micro op. */
42939 path_double
, /* Double micro op. */
42940 path_multi
, /* Instructions with more than 2 micro op.. */
42944 /* sched_insn_info defines a window to the instructions scheduled in
42945 the basic block. It contains a pointer to the insn_info table and
42946 the instruction scheduled.
42948 Windows are allocated for each basic block and are linked
42950 typedef struct sched_insn_info_s
{
42952 enum dispatch_group group
;
42953 enum insn_path path
;
42958 /* Linked list of dispatch windows. This is a two way list of
42959 dispatch windows of a basic block. It contains information about
42960 the number of uops in the window and the total number of
42961 instructions and of bytes in the object code for this dispatch
42963 typedef struct dispatch_windows_s
{
42964 int num_insn
; /* Number of insn in the window. */
42965 int num_uops
; /* Number of uops in the window. */
42966 int window_size
; /* Number of bytes in the window. */
42967 int window_num
; /* Window number between 0 or 1. */
42968 int num_imm
; /* Number of immediates in an insn. */
42969 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
42970 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
42971 int imm_size
; /* Total immediates in the window. */
42972 int num_loads
; /* Total memory loads in the window. */
42973 int num_stores
; /* Total memory stores in the window. */
42974 int violation
; /* Violation exists in window. */
42975 sched_insn_info
*window
; /* Pointer to the window. */
42976 struct dispatch_windows_s
*next
;
42977 struct dispatch_windows_s
*prev
;
42978 } dispatch_windows
;
42980 /* Immediate valuse used in an insn. */
42981 typedef struct imm_info_s
42988 static dispatch_windows
*dispatch_window_list
;
42989 static dispatch_windows
*dispatch_window_list1
;
42991 /* Get dispatch group of insn. */
42993 static enum dispatch_group
42994 get_mem_group (rtx insn
)
42996 enum attr_memory memory
;
42998 if (INSN_CODE (insn
) < 0)
42999 return disp_no_group
;
43000 memory
= get_attr_memory (insn
);
43001 if (memory
== MEMORY_STORE
)
43004 if (memory
== MEMORY_LOAD
)
43007 if (memory
== MEMORY_BOTH
)
43008 return disp_load_store
;
43010 return disp_no_group
;
43013 /* Return true if insn is a compare instruction. */
43018 enum attr_type type
;
43020 type
= get_attr_type (insn
);
43021 return (type
== TYPE_TEST
43022 || type
== TYPE_ICMP
43023 || type
== TYPE_FCMP
43024 || GET_CODE (PATTERN (insn
)) == COMPARE
);
43027 /* Return true if a dispatch violation encountered. */
43030 dispatch_violation (void)
43032 if (dispatch_window_list
->next
)
43033 return dispatch_window_list
->next
->violation
;
43034 return dispatch_window_list
->violation
;
43037 /* Return true if insn is a branch instruction. */
43040 is_branch (rtx insn
)
43042 return (CALL_P (insn
) || JUMP_P (insn
));
43045 /* Return true if insn is a prefetch instruction. */
43048 is_prefetch (rtx insn
)
43050 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
43053 /* This function initializes a dispatch window and the list container holding a
43054 pointer to the window. */
43057 init_window (int window_num
)
43060 dispatch_windows
*new_list
;
43062 if (window_num
== 0)
43063 new_list
= dispatch_window_list
;
43065 new_list
= dispatch_window_list1
;
43067 new_list
->num_insn
= 0;
43068 new_list
->num_uops
= 0;
43069 new_list
->window_size
= 0;
43070 new_list
->next
= NULL
;
43071 new_list
->prev
= NULL
;
43072 new_list
->window_num
= window_num
;
43073 new_list
->num_imm
= 0;
43074 new_list
->num_imm_32
= 0;
43075 new_list
->num_imm_64
= 0;
43076 new_list
->imm_size
= 0;
43077 new_list
->num_loads
= 0;
43078 new_list
->num_stores
= 0;
43079 new_list
->violation
= false;
43081 for (i
= 0; i
< MAX_INSN
; i
++)
43083 new_list
->window
[i
].insn
= NULL
;
43084 new_list
->window
[i
].group
= disp_no_group
;
43085 new_list
->window
[i
].path
= no_path
;
43086 new_list
->window
[i
].byte_len
= 0;
43087 new_list
->window
[i
].imm_bytes
= 0;
43092 /* This function allocates and initializes a dispatch window and the
43093 list container holding a pointer to the window. */
43095 static dispatch_windows
*
43096 allocate_window (void)
43098 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
43099 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
43104 /* This routine initializes the dispatch scheduling information. It
43105 initiates building dispatch scheduler tables and constructs the
43106 first dispatch window. */
43109 init_dispatch_sched (void)
43111 /* Allocate a dispatch list and a window. */
43112 dispatch_window_list
= allocate_window ();
43113 dispatch_window_list1
= allocate_window ();
43118 /* This function returns true if a branch is detected. End of a basic block
43119 does not have to be a branch, but here we assume only branches end a
43123 is_end_basic_block (enum dispatch_group group
)
43125 return group
== disp_branch
;
43128 /* This function is called when the end of a window processing is reached. */
43131 process_end_window (void)
43133 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
43134 if (dispatch_window_list
->next
)
43136 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
43137 gcc_assert (dispatch_window_list
->window_size
43138 + dispatch_window_list1
->window_size
<= 48);
43144 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
43145 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
43146 for 48 bytes of instructions. Note that these windows are not dispatch
43147 windows that their sizes are DISPATCH_WINDOW_SIZE. */
43149 static dispatch_windows
*
43150 allocate_next_window (int window_num
)
43152 if (window_num
== 0)
43154 if (dispatch_window_list
->next
)
43157 return dispatch_window_list
;
43160 dispatch_window_list
->next
= dispatch_window_list1
;
43161 dispatch_window_list1
->prev
= dispatch_window_list
;
43163 return dispatch_window_list1
;
43166 /* Increment the number of immediate operands of an instruction. */
43169 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
43174 switch ( GET_CODE (*in_rtx
))
43179 (imm_values
->imm
)++;
43180 if (x86_64_immediate_operand (*in_rtx
, SImode
))
43181 (imm_values
->imm32
)++;
43183 (imm_values
->imm64
)++;
43187 (imm_values
->imm
)++;
43188 (imm_values
->imm64
)++;
43192 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
43194 (imm_values
->imm
)++;
43195 (imm_values
->imm32
)++;
43206 /* Compute number of immediate operands of an instruction. */
43209 find_constant (rtx in_rtx
, imm_info
*imm_values
)
43211 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
43212 (rtx_function
) find_constant_1
, (void *) imm_values
);
43215 /* Return total size of immediate operands of an instruction along with number
43216 of corresponding immediate-operands. It initializes its parameters to zero
43217 befor calling FIND_CONSTANT.
43218 INSN is the input instruction. IMM is the total of immediates.
43219 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
43223 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
43225 imm_info imm_values
= {0, 0, 0};
43227 find_constant (insn
, &imm_values
);
43228 *imm
= imm_values
.imm
;
43229 *imm32
= imm_values
.imm32
;
43230 *imm64
= imm_values
.imm64
;
43231 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
43234 /* This function indicates if an operand of an instruction is an
43238 has_immediate (rtx insn
)
43240 int num_imm_operand
;
43241 int num_imm32_operand
;
43242 int num_imm64_operand
;
43245 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43246 &num_imm64_operand
);
43250 /* Return single or double path for instructions. */
43252 static enum insn_path
43253 get_insn_path (rtx insn
)
43255 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
43257 if ((int)path
== 0)
43258 return path_single
;
43260 if ((int)path
== 1)
43261 return path_double
;
43266 /* Return insn dispatch group. */
43268 static enum dispatch_group
43269 get_insn_group (rtx insn
)
43271 enum dispatch_group group
= get_mem_group (insn
);
43275 if (is_branch (insn
))
43276 return disp_branch
;
43281 if (has_immediate (insn
))
43284 if (is_prefetch (insn
))
43285 return disp_prefetch
;
43287 return disp_no_group
;
43290 /* Count number of GROUP restricted instructions in a dispatch
43291 window WINDOW_LIST. */
43294 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
43296 enum dispatch_group group
= get_insn_group (insn
);
43298 int num_imm_operand
;
43299 int num_imm32_operand
;
43300 int num_imm64_operand
;
43302 if (group
== disp_no_group
)
43305 if (group
== disp_imm
)
43307 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43308 &num_imm64_operand
);
43309 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
43310 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
43311 || (num_imm32_operand
> 0
43312 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
43313 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
43314 || (num_imm64_operand
> 0
43315 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
43316 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
43317 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
43318 && num_imm64_operand
> 0
43319 && ((window_list
->num_imm_64
> 0
43320 && window_list
->num_insn
>= 2)
43321 || window_list
->num_insn
>= 3)))
43327 if ((group
== disp_load_store
43328 && (window_list
->num_loads
>= MAX_LOAD
43329 || window_list
->num_stores
>= MAX_STORE
))
43330 || ((group
== disp_load
43331 || group
== disp_prefetch
)
43332 && window_list
->num_loads
>= MAX_LOAD
)
43333 || (group
== disp_store
43334 && window_list
->num_stores
>= MAX_STORE
))
43340 /* This function returns true if insn satisfies dispatch rules on the
43341 last window scheduled. */
43344 fits_dispatch_window (rtx insn
)
43346 dispatch_windows
*window_list
= dispatch_window_list
;
43347 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
43348 unsigned int num_restrict
;
43349 enum dispatch_group group
= get_insn_group (insn
);
43350 enum insn_path path
= get_insn_path (insn
);
43353 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
43354 instructions should be given the lowest priority in the
43355 scheduling process in Haifa scheduler to make sure they will be
43356 scheduled in the same dispatch window as the reference to them. */
43357 if (group
== disp_jcc
|| group
== disp_cmp
)
43360 /* Check nonrestricted. */
43361 if (group
== disp_no_group
|| group
== disp_branch
)
43364 /* Get last dispatch window. */
43365 if (window_list_next
)
43366 window_list
= window_list_next
;
43368 if (window_list
->window_num
== 1)
43370 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
43373 || (min_insn_size (insn
) + sum
) >= 48)
43374 /* Window 1 is full. Go for next window. */
43378 num_restrict
= count_num_restricted (insn
, window_list
);
43380 if (num_restrict
> num_allowable_groups
[group
])
43383 /* See if it fits in the first window. */
43384 if (window_list
->window_num
== 0)
43386 /* The first widow should have only single and double path
43388 if (path
== path_double
43389 && (window_list
->num_uops
+ 2) > MAX_INSN
)
43391 else if (path
!= path_single
)
43397 /* Add an instruction INSN with NUM_UOPS micro-operations to the
43398 dispatch window WINDOW_LIST. */
43401 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
43403 int byte_len
= min_insn_size (insn
);
43404 int num_insn
= window_list
->num_insn
;
43406 sched_insn_info
*window
= window_list
->window
;
43407 enum dispatch_group group
= get_insn_group (insn
);
43408 enum insn_path path
= get_insn_path (insn
);
43409 int num_imm_operand
;
43410 int num_imm32_operand
;
43411 int num_imm64_operand
;
43413 if (!window_list
->violation
&& group
!= disp_cmp
43414 && !fits_dispatch_window (insn
))
43415 window_list
->violation
= true;
43417 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43418 &num_imm64_operand
);
43420 /* Initialize window with new instruction. */
43421 window
[num_insn
].insn
= insn
;
43422 window
[num_insn
].byte_len
= byte_len
;
43423 window
[num_insn
].group
= group
;
43424 window
[num_insn
].path
= path
;
43425 window
[num_insn
].imm_bytes
= imm_size
;
43427 window_list
->window_size
+= byte_len
;
43428 window_list
->num_insn
= num_insn
+ 1;
43429 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
43430 window_list
->imm_size
+= imm_size
;
43431 window_list
->num_imm
+= num_imm_operand
;
43432 window_list
->num_imm_32
+= num_imm32_operand
;
43433 window_list
->num_imm_64
+= num_imm64_operand
;
43435 if (group
== disp_store
)
43436 window_list
->num_stores
+= 1;
43437 else if (group
== disp_load
43438 || group
== disp_prefetch
)
43439 window_list
->num_loads
+= 1;
43440 else if (group
== disp_load_store
)
43442 window_list
->num_stores
+= 1;
43443 window_list
->num_loads
+= 1;
43447 /* Adds a scheduled instruction, INSN, to the current dispatch window.
43448 If the total bytes of instructions or the number of instructions in
43449 the window exceed allowable, it allocates a new window. */
43452 add_to_dispatch_window (rtx insn
)
43455 dispatch_windows
*window_list
;
43456 dispatch_windows
*next_list
;
43457 dispatch_windows
*window0_list
;
43458 enum insn_path path
;
43459 enum dispatch_group insn_group
;
43467 if (INSN_CODE (insn
) < 0)
43470 byte_len
= min_insn_size (insn
);
43471 window_list
= dispatch_window_list
;
43472 next_list
= window_list
->next
;
43473 path
= get_insn_path (insn
);
43474 insn_group
= get_insn_group (insn
);
43476 /* Get the last dispatch window. */
43478 window_list
= dispatch_window_list
->next
;
43480 if (path
== path_single
)
43482 else if (path
== path_double
)
43485 insn_num_uops
= (int) path
;
43487 /* If current window is full, get a new window.
43488 Window number zero is full, if MAX_INSN uops are scheduled in it.
43489 Window number one is full, if window zero's bytes plus window
43490 one's bytes is 32, or if the bytes of the new instruction added
43491 to the total makes it greater than 48, or it has already MAX_INSN
43492 instructions in it. */
43493 num_insn
= window_list
->num_insn
;
43494 num_uops
= window_list
->num_uops
;
43495 window_num
= window_list
->window_num
;
43496 insn_fits
= fits_dispatch_window (insn
);
43498 if (num_insn
>= MAX_INSN
43499 || num_uops
+ insn_num_uops
> MAX_INSN
43502 window_num
= ~window_num
& 1;
43503 window_list
= allocate_next_window (window_num
);
43506 if (window_num
== 0)
43508 add_insn_window (insn
, window_list
, insn_num_uops
);
43509 if (window_list
->num_insn
>= MAX_INSN
43510 && insn_group
== disp_branch
)
43512 process_end_window ();
43516 else if (window_num
== 1)
43518 window0_list
= window_list
->prev
;
43519 sum
= window0_list
->window_size
+ window_list
->window_size
;
43521 || (byte_len
+ sum
) >= 48)
43523 process_end_window ();
43524 window_list
= dispatch_window_list
;
43527 add_insn_window (insn
, window_list
, insn_num_uops
);
43530 gcc_unreachable ();
43532 if (is_end_basic_block (insn_group
))
43534 /* End of basic block is reached do end-basic-block process. */
43535 process_end_window ();
43540 /* Print the dispatch window, WINDOW_NUM, to FILE. */
43542 DEBUG_FUNCTION
static void
43543 debug_dispatch_window_file (FILE *file
, int window_num
)
43545 dispatch_windows
*list
;
43548 if (window_num
== 0)
43549 list
= dispatch_window_list
;
43551 list
= dispatch_window_list1
;
43553 fprintf (file
, "Window #%d:\n", list
->window_num
);
43554 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
43555 list
->num_insn
, list
->num_uops
, list
->window_size
);
43556 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43557 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
43559 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
43561 fprintf (file
, " insn info:\n");
43563 for (i
= 0; i
< MAX_INSN
; i
++)
43565 if (!list
->window
[i
].insn
)
43567 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
43568 i
, group_name
[list
->window
[i
].group
],
43569 i
, (void *)list
->window
[i
].insn
,
43570 i
, list
->window
[i
].path
,
43571 i
, list
->window
[i
].byte_len
,
43572 i
, list
->window
[i
].imm_bytes
);
43576 /* Print to stdout a dispatch window. */
43578 DEBUG_FUNCTION
void
43579 debug_dispatch_window (int window_num
)
43581 debug_dispatch_window_file (stdout
, window_num
);
43584 /* Print INSN dispatch information to FILE. */
43586 DEBUG_FUNCTION
static void
43587 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
43590 enum insn_path path
;
43591 enum dispatch_group group
;
43593 int num_imm_operand
;
43594 int num_imm32_operand
;
43595 int num_imm64_operand
;
43597 if (INSN_CODE (insn
) < 0)
43600 byte_len
= min_insn_size (insn
);
43601 path
= get_insn_path (insn
);
43602 group
= get_insn_group (insn
);
43603 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
43604 &num_imm64_operand
);
43606 fprintf (file
, " insn info:\n");
43607 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
43608 group_name
[group
], path
, byte_len
);
43609 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
43610 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
43613 /* Print to STDERR the status of the ready list with respect to
43614 dispatch windows. */
43616 DEBUG_FUNCTION
void
43617 debug_ready_dispatch (void)
43620 int no_ready
= number_in_ready ();
43622 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
43624 for (i
= 0; i
< no_ready
; i
++)
43625 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
43628 /* This routine is the driver of the dispatch scheduler. */
43631 do_dispatch (rtx insn
, int mode
)
43633 if (mode
== DISPATCH_INIT
)
43634 init_dispatch_sched ();
43635 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
43636 add_to_dispatch_window (insn
);
43639 /* Return TRUE if Dispatch Scheduling is supported. */
43642 has_dispatch (rtx insn
, int action
)
43644 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
|| TARGET_BDVER4
)
43645 && flag_dispatch_scheduler
)
43651 case IS_DISPATCH_ON
:
43656 return is_cmp (insn
);
43658 case DISPATCH_VIOLATION
:
43659 return dispatch_violation ();
43661 case FITS_DISPATCH_WINDOW
:
43662 return fits_dispatch_window (insn
);
43668 /* Implementation of reassociation_width target hook used by
43669 reassoc phase to identify parallelism level in reassociated
43670 tree. Statements tree_code is passed in OPC. Arguments type
43673 Currently parallel reassociation is enabled for Atom
43674 processors only and we set reassociation width to be 2
43675 because Atom may issue up to 2 instructions per cycle.
43677 Return value should be fixed if parallel reassociation is
43678 enabled for other processors. */
43681 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
43682 enum machine_mode mode
)
43686 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
43688 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
43694 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
43695 place emms and femms instructions. */
43697 static enum machine_mode
43698 ix86_preferred_simd_mode (enum machine_mode mode
)
43706 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
43708 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
43710 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
43712 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
43715 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43721 if (!TARGET_VECTORIZE_DOUBLE
)
43723 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
43725 else if (TARGET_SSE2
)
43734 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
43737 static unsigned int
43738 ix86_autovectorize_vector_sizes (void)
43740 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
43745 /* Return class of registers which could be used for pseudo of MODE
43746 and of class RCLASS for spilling instead of memory. Return NO_REGS
43747 if it is not possible or non-profitable. */
43749 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
43751 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
43752 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
43753 && INTEGER_CLASS_P (rclass
))
43754 return ALL_SSE_REGS
;
43758 /* Implement targetm.vectorize.init_cost. */
43761 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
43763 unsigned *cost
= XNEWVEC (unsigned, 3);
43764 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
43768 /* Implement targetm.vectorize.add_stmt_cost. */
43771 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
43772 struct _stmt_vec_info
*stmt_info
, int misalign
,
43773 enum vect_cost_model_location where
)
43775 unsigned *cost
= (unsigned *) data
;
43776 unsigned retval
= 0;
43778 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
43779 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
43781 /* Statements in an inner loop relative to the loop being
43782 vectorized are weighted more heavily. The value here is
43783 arbitrary and could potentially be improved with analysis. */
43784 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
43785 count
*= 50; /* FIXME. */
43787 retval
= (unsigned) (count
* stmt_cost
);
43788 cost
[where
] += retval
;
43793 /* Implement targetm.vectorize.finish_cost. */
43796 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
43797 unsigned *body_cost
, unsigned *epilogue_cost
)
43799 unsigned *cost
= (unsigned *) data
;
43800 *prologue_cost
= cost
[vect_prologue
];
43801 *body_cost
= cost
[vect_body
];
43802 *epilogue_cost
= cost
[vect_epilogue
];
43805 /* Implement targetm.vectorize.destroy_cost_data. */
43808 ix86_destroy_cost_data (void *data
)
43813 /* Validate target specific memory model bits in VAL. */
43815 static unsigned HOST_WIDE_INT
43816 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
43818 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
43821 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
43823 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
43825 warning (OPT_Winvalid_memory_model
,
43826 "Unknown architecture specific memory model");
43827 return MEMMODEL_SEQ_CST
;
43829 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
43830 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
43832 warning (OPT_Winvalid_memory_model
,
43833 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
43834 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
43836 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
43838 warning (OPT_Winvalid_memory_model
,
43839 "HLE_RELEASE not used with RELEASE or stronger memory model");
43840 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
43845 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
43846 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
43847 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
43848 or number of vecsize_mangle variants that should be emitted. */
43851 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
43852 struct cgraph_simd_clone
*clonei
,
43853 tree base_type
, int num
)
43857 if (clonei
->simdlen
43858 && (clonei
->simdlen
< 2
43859 || clonei
->simdlen
> 16
43860 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
43862 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
43863 "unsupported simdlen %d", clonei
->simdlen
);
43867 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
43868 if (TREE_CODE (ret_type
) != VOID_TYPE
)
43869 switch (TYPE_MODE (ret_type
))
43881 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
43882 "unsupported return type %qT for simd\n", ret_type
);
43889 for (t
= DECL_ARGUMENTS (node
->decl
), i
= 0; t
; t
= DECL_CHAIN (t
), i
++)
43890 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
43891 switch (TYPE_MODE (TREE_TYPE (t
)))
43903 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
43904 "unsupported argument type %qT for simd\n", TREE_TYPE (t
));
43908 if (clonei
->cilk_elemental
)
43910 /* Parse here processor clause. If not present, default to 'b'. */
43911 clonei
->vecsize_mangle
= 'b';
43913 else if (!TREE_PUBLIC (node
->decl
))
43915 /* If the function isn't exported, we can pick up just one ISA
43918 clonei
->vecsize_mangle
= 'd';
43919 else if (TARGET_AVX
)
43920 clonei
->vecsize_mangle
= 'c';
43922 clonei
->vecsize_mangle
= 'b';
43927 clonei
->vecsize_mangle
= "bcd"[num
];
43930 switch (clonei
->vecsize_mangle
)
43933 clonei
->vecsize_int
= 128;
43934 clonei
->vecsize_float
= 128;
43937 clonei
->vecsize_int
= 128;
43938 clonei
->vecsize_float
= 256;
43941 clonei
->vecsize_int
= 256;
43942 clonei
->vecsize_float
= 256;
43945 if (clonei
->simdlen
== 0)
43947 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
43948 clonei
->simdlen
= clonei
->vecsize_int
;
43950 clonei
->simdlen
= clonei
->vecsize_float
;
43951 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
43952 if (clonei
->simdlen
> 16)
43953 clonei
->simdlen
= 16;
43958 /* Add target attribute to SIMD clone NODE if needed. */
43961 ix86_simd_clone_adjust (struct cgraph_node
*node
)
43963 const char *str
= NULL
;
43964 gcc_assert (node
->decl
== cfun
->decl
);
43965 switch (node
->simdclone
->vecsize_mangle
)
43980 gcc_unreachable ();
43985 tree args
= build_tree_list (NULL_TREE
, build_string (strlen (str
), str
));
43986 bool ok
= ix86_valid_target_attribute_p (node
->decl
, NULL
, args
, 0);
43989 ix86_previous_fndecl
= NULL_TREE
;
43990 ix86_set_current_function (node
->decl
);
43993 /* If SIMD clone NODE can't be used in a vectorized loop
43994 in current function, return -1, otherwise return a badness of using it
43995 (0 if it is most desirable from vecsize_mangle point of view, 1
43996 slightly less desirable, etc.). */
43999 ix86_simd_clone_usable (struct cgraph_node
*node
)
44001 switch (node
->simdclone
->vecsize_mangle
)
44008 return TARGET_AVX2
? 2 : 1;
44012 return TARGET_AVX2
? 1 : 0;
44019 gcc_unreachable ();
44023 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
44026 ix86_float_exceptions_rounding_supported_p (void)
44028 /* For x87 floating point with standard excess precision handling,
44029 there is no adddf3 pattern (since x87 floating point only has
44030 XFmode operations) so the default hook implementation gets this
44032 return TARGET_80387
|| TARGET_SSE_MATH
;
44035 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
44038 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
44040 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
44042 tree exceptions_var
= create_tmp_var (integer_type_node
, NULL
);
44045 tree fenv_index_type
= build_index_type (size_int (6));
44046 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
44047 tree fenv_var
= create_tmp_var (fenv_type
, NULL
);
44048 mark_addressable (fenv_var
);
44049 tree fenv_ptr
= build_pointer_type (fenv_type
);
44050 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
44051 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
44052 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
44053 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
44054 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
44055 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
44056 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
44057 tree hold_fnclex
= build_call_expr (fnclex
, 0);
44058 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, hold_fnstenv
,
44060 *clear
= build_call_expr (fnclex
, 0);
44061 tree sw_var
= create_tmp_var (short_unsigned_type_node
, NULL
);
44062 mark_addressable (sw_var
);
44063 tree su_ptr
= build_pointer_type (short_unsigned_type_node
);
44064 tree sw_addr
= build1 (ADDR_EXPR
, su_ptr
, sw_var
);
44065 tree fnstsw_call
= build_call_expr (fnstsw
, 1, sw_addr
);
44066 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
44067 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
44068 exceptions_var
, exceptions_x87
);
44069 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
44070 fnstsw_call
, update_mod
);
44071 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
44072 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
44074 if (TARGET_SSE_MATH
)
44076 tree mxcsr_orig_var
= create_tmp_var (unsigned_type_node
, NULL
);
44077 tree mxcsr_mod_var
= create_tmp_var (unsigned_type_node
, NULL
);
44078 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
44079 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
44080 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
44081 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
44082 mxcsr_orig_var
, stmxcsr_hold_call
);
44083 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
44085 build_int_cst (unsigned_type_node
, 0x1f80));
44086 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
44087 build_int_cst (unsigned_type_node
, 0xffffffc0));
44088 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
44089 mxcsr_mod_var
, hold_mod_val
);
44090 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
44091 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
44092 hold_assign_orig
, hold_assign_mod
);
44093 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
44094 ldmxcsr_hold_call
);
44096 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
44099 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
44101 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
44102 ldmxcsr_clear_call
);
44104 *clear
= ldmxcsr_clear_call
;
44105 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
44106 tree exceptions_sse
= fold_convert (integer_type_node
,
44107 stxmcsr_update_call
);
44110 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
44111 exceptions_var
, exceptions_sse
);
44112 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
44113 exceptions_var
, exceptions_mod
);
44114 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
44115 exceptions_assign
);
44118 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
44119 exceptions_var
, exceptions_sse
);
44120 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
44121 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
44122 ldmxcsr_update_call
);
44124 tree atomic_feraiseexcept
44125 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
44126 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
44127 1, exceptions_var
);
44128 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
44129 atomic_feraiseexcept_call
);
44132 /* Initialize the GCC target structure. */
44133 #undef TARGET_RETURN_IN_MEMORY
44134 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
44136 #undef TARGET_LEGITIMIZE_ADDRESS
44137 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
44139 #undef TARGET_ATTRIBUTE_TABLE
44140 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
44141 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
44142 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
44143 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44144 # undef TARGET_MERGE_DECL_ATTRIBUTES
44145 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
44148 #undef TARGET_COMP_TYPE_ATTRIBUTES
44149 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
44151 #undef TARGET_INIT_BUILTINS
44152 #define TARGET_INIT_BUILTINS ix86_init_builtins
44153 #undef TARGET_BUILTIN_DECL
44154 #define TARGET_BUILTIN_DECL ix86_builtin_decl
44155 #undef TARGET_EXPAND_BUILTIN
44156 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
44158 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
44159 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
44160 ix86_builtin_vectorized_function
44162 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
44163 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
44165 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
44166 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
44168 #undef TARGET_VECTORIZE_BUILTIN_GATHER
44169 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
44171 #undef TARGET_BUILTIN_RECIPROCAL
44172 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
44174 #undef TARGET_ASM_FUNCTION_EPILOGUE
44175 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
44177 #undef TARGET_ENCODE_SECTION_INFO
44178 #ifndef SUBTARGET_ENCODE_SECTION_INFO
44179 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
44181 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
44184 #undef TARGET_ASM_OPEN_PAREN
44185 #define TARGET_ASM_OPEN_PAREN ""
44186 #undef TARGET_ASM_CLOSE_PAREN
44187 #define TARGET_ASM_CLOSE_PAREN ""
44189 #undef TARGET_ASM_BYTE_OP
44190 #define TARGET_ASM_BYTE_OP ASM_BYTE
44192 #undef TARGET_ASM_ALIGNED_HI_OP
44193 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
44194 #undef TARGET_ASM_ALIGNED_SI_OP
44195 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
44197 #undef TARGET_ASM_ALIGNED_DI_OP
44198 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
44201 #undef TARGET_PROFILE_BEFORE_PROLOGUE
44202 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
44204 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
44205 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
44207 #undef TARGET_ASM_UNALIGNED_HI_OP
44208 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
44209 #undef TARGET_ASM_UNALIGNED_SI_OP
44210 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
44211 #undef TARGET_ASM_UNALIGNED_DI_OP
44212 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
44214 #undef TARGET_PRINT_OPERAND
44215 #define TARGET_PRINT_OPERAND ix86_print_operand
44216 #undef TARGET_PRINT_OPERAND_ADDRESS
44217 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
44218 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
44219 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
44220 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
44221 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
44223 #undef TARGET_SCHED_INIT_GLOBAL
44224 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
44225 #undef TARGET_SCHED_ADJUST_COST
44226 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
44227 #undef TARGET_SCHED_ISSUE_RATE
44228 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
44229 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
44230 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
44231 ia32_multipass_dfa_lookahead
44232 #undef TARGET_SCHED_MACRO_FUSION_P
44233 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
44234 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
44235 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
44237 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
44238 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
44240 #undef TARGET_MEMMODEL_CHECK
44241 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
44243 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
44244 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
44247 #undef TARGET_HAVE_TLS
44248 #define TARGET_HAVE_TLS true
44250 #undef TARGET_CANNOT_FORCE_CONST_MEM
44251 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
44252 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
44253 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
44255 #undef TARGET_DELEGITIMIZE_ADDRESS
44256 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
44258 #undef TARGET_MS_BITFIELD_LAYOUT_P
44259 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
44262 #undef TARGET_BINDS_LOCAL_P
44263 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
44265 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
44266 #undef TARGET_BINDS_LOCAL_P
44267 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
44270 #undef TARGET_ASM_OUTPUT_MI_THUNK
44271 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
44272 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
44273 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
44275 #undef TARGET_ASM_FILE_START
44276 #define TARGET_ASM_FILE_START x86_file_start
44278 #undef TARGET_OPTION_OVERRIDE
44279 #define TARGET_OPTION_OVERRIDE ix86_option_override
44281 #undef TARGET_REGISTER_MOVE_COST
44282 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
44283 #undef TARGET_MEMORY_MOVE_COST
44284 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
44285 #undef TARGET_RTX_COSTS
44286 #define TARGET_RTX_COSTS ix86_rtx_costs
44287 #undef TARGET_ADDRESS_COST
44288 #define TARGET_ADDRESS_COST ix86_address_cost
44290 #undef TARGET_FIXED_CONDITION_CODE_REGS
44291 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
44292 #undef TARGET_CC_MODES_COMPATIBLE
44293 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
44295 #undef TARGET_MACHINE_DEPENDENT_REORG
44296 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
44298 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
44299 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
44301 #undef TARGET_BUILD_BUILTIN_VA_LIST
44302 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
44304 #undef TARGET_FOLD_BUILTIN
44305 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
44307 #undef TARGET_COMPARE_VERSION_PRIORITY
44308 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
44310 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
44311 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
44312 ix86_generate_version_dispatcher_body
44314 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
44315 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
44316 ix86_get_function_versions_dispatcher
44318 #undef TARGET_ENUM_VA_LIST_P
44319 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
44321 #undef TARGET_FN_ABI_VA_LIST
44322 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
44324 #undef TARGET_CANONICAL_VA_LIST_TYPE
44325 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
44327 #undef TARGET_EXPAND_BUILTIN_VA_START
44328 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
44330 #undef TARGET_MD_ASM_CLOBBERS
44331 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
44333 #undef TARGET_PROMOTE_PROTOTYPES
44334 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
44335 #undef TARGET_STRUCT_VALUE_RTX
44336 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
44337 #undef TARGET_SETUP_INCOMING_VARARGS
44338 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
44339 #undef TARGET_MUST_PASS_IN_STACK
44340 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
44341 #undef TARGET_FUNCTION_ARG_ADVANCE
44342 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
44343 #undef TARGET_FUNCTION_ARG
44344 #define TARGET_FUNCTION_ARG ix86_function_arg
44345 #undef TARGET_FUNCTION_ARG_BOUNDARY
44346 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
44347 #undef TARGET_PASS_BY_REFERENCE
44348 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
44349 #undef TARGET_INTERNAL_ARG_POINTER
44350 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
44351 #undef TARGET_UPDATE_STACK_BOUNDARY
44352 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
44353 #undef TARGET_GET_DRAP_RTX
44354 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
44355 #undef TARGET_STRICT_ARGUMENT_NAMING
44356 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
44357 #undef TARGET_STATIC_CHAIN
44358 #define TARGET_STATIC_CHAIN ix86_static_chain
44359 #undef TARGET_TRAMPOLINE_INIT
44360 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
44361 #undef TARGET_RETURN_POPS_ARGS
44362 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
44364 #undef TARGET_LEGITIMATE_COMBINED_INSN
44365 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
44367 #undef TARGET_ASAN_SHADOW_OFFSET
44368 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
44370 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
44371 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
44373 #undef TARGET_SCALAR_MODE_SUPPORTED_P
44374 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
44376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
44377 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
44379 #undef TARGET_C_MODE_FOR_SUFFIX
44380 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
44383 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
44384 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
44387 #ifdef SUBTARGET_INSERT_ATTRIBUTES
44388 #undef TARGET_INSERT_ATTRIBUTES
44389 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
44392 #undef TARGET_MANGLE_TYPE
44393 #define TARGET_MANGLE_TYPE ix86_mangle_type
44396 #undef TARGET_STACK_PROTECT_FAIL
44397 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
44400 #undef TARGET_FUNCTION_VALUE
44401 #define TARGET_FUNCTION_VALUE ix86_function_value
44403 #undef TARGET_FUNCTION_VALUE_REGNO_P
44404 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
44406 #undef TARGET_PROMOTE_FUNCTION_MODE
44407 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
44409 #undef TARGET_MEMBER_TYPE_FORCES_BLK
44410 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
44412 #undef TARGET_INSTANTIATE_DECLS
44413 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
44415 #undef TARGET_SECONDARY_RELOAD
44416 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
44418 #undef TARGET_CLASS_MAX_NREGS
44419 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
44421 #undef TARGET_PREFERRED_RELOAD_CLASS
44422 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
44423 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
44424 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
44425 #undef TARGET_CLASS_LIKELY_SPILLED_P
44426 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
44428 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
44429 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
44430 ix86_builtin_vectorization_cost
44431 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
44432 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
44433 ix86_vectorize_vec_perm_const_ok
44434 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
44435 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
44436 ix86_preferred_simd_mode
44437 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
44438 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
44439 ix86_autovectorize_vector_sizes
44440 #undef TARGET_VECTORIZE_INIT_COST
44441 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
44442 #undef TARGET_VECTORIZE_ADD_STMT_COST
44443 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
44444 #undef TARGET_VECTORIZE_FINISH_COST
44445 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
44446 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
44447 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
44449 #undef TARGET_SET_CURRENT_FUNCTION
44450 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
44452 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
44453 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
44455 #undef TARGET_OPTION_SAVE
44456 #define TARGET_OPTION_SAVE ix86_function_specific_save
44458 #undef TARGET_OPTION_RESTORE
44459 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
44461 #undef TARGET_OPTION_PRINT
44462 #define TARGET_OPTION_PRINT ix86_function_specific_print
44464 #undef TARGET_OPTION_FUNCTION_VERSIONS
44465 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
44467 #undef TARGET_CAN_INLINE_P
44468 #define TARGET_CAN_INLINE_P ix86_can_inline_p
44470 #undef TARGET_EXPAND_TO_RTL_HOOK
44471 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
44473 #undef TARGET_LEGITIMATE_ADDRESS_P
44474 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
44476 #undef TARGET_LRA_P
44477 #define TARGET_LRA_P hook_bool_void_true
44479 #undef TARGET_REGISTER_PRIORITY
44480 #define TARGET_REGISTER_PRIORITY ix86_register_priority
44482 #undef TARGET_REGISTER_USAGE_LEVELING_P
44483 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
44485 #undef TARGET_LEGITIMATE_CONSTANT_P
44486 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
44488 #undef TARGET_FRAME_POINTER_REQUIRED
44489 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
44491 #undef TARGET_CAN_ELIMINATE
44492 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
44494 #undef TARGET_EXTRA_LIVE_ON_ENTRY
44495 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
44497 #undef TARGET_ASM_CODE_END
44498 #define TARGET_ASM_CODE_END ix86_code_end
44500 #undef TARGET_CONDITIONAL_REGISTER_USAGE
44501 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
44504 #undef TARGET_INIT_LIBFUNCS
44505 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
44508 #undef TARGET_SPILL_CLASS
44509 #define TARGET_SPILL_CLASS ix86_spill_class
44511 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
44512 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
44513 ix86_simd_clone_compute_vecsize_and_simdlen
44515 #undef TARGET_SIMD_CLONE_ADJUST
44516 #define TARGET_SIMD_CLONE_ADJUST \
44517 ix86_simd_clone_adjust
44519 #undef TARGET_SIMD_CLONE_USABLE
44520 #define TARGET_SIMD_CLONE_USABLE \
44521 ix86_simd_clone_usable
44523 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
44524 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
44525 ix86_float_exceptions_rounding_supported_p
44527 struct gcc_target targetm
= TARGET_INITIALIZER
;
44529 #include "gt-i386.h"