1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
32 #include "stringpool.h"
39 #include "diagnostic.h"
42 #include "fold-const.h"
45 #include "stor-layout.h"
48 #include "insn-attr.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
59 #include "tm-constrs.h"
62 #include "sched-int.h"
64 #include "tree-pass.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
73 #include "tree-iterator.h"
74 #include "tree-chkp.h"
77 #include "case-cfn-macros.h"
78 #include "regrename.h"
80 #include "fold-const-call.h"
82 #include "tree-ssanames.h"
84 #include "selftest-rtl.h"
85 #include "print-rtl.h"
88 #include "symbol-summary.h"
90 #include "ipa-fnsummary.h"
92 /* This file should be included last. */
93 #include "target-def.h"
95 static rtx
legitimize_dllimport_symbol (rtx
, bool);
96 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
97 static rtx
legitimize_pe_coff_symbol (rtx
, bool);
98 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
99 static bool ix86_save_reg (unsigned int, bool, bool);
100 static bool ix86_function_naked (const_tree
);
102 #ifndef CHECK_STACK_LIMIT
103 #define CHECK_STACK_LIMIT (-1)
106 /* Return index of given mode in mult and division cost tables. */
107 #define MODE_INDEX(mode) \
108 ((mode) == QImode ? 0 \
109 : (mode) == HImode ? 1 \
110 : (mode) == SImode ? 2 \
111 : (mode) == DImode ? 3 \
114 /* Processor costs (relative to an add) */
115 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
116 #define COSTS_N_BYTES(N) ((N) * 2)
118 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
120 static stringop_algs ix86_size_memcpy
[2] = {
121 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
122 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
123 static stringop_algs ix86_size_memset
[2] = {
124 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
125 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}};
128 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
129 COSTS_N_BYTES (2), /* cost of an add instruction */
130 COSTS_N_BYTES (3), /* cost of a lea instruction */
131 COSTS_N_BYTES (2), /* variable shift costs */
132 COSTS_N_BYTES (3), /* constant shift costs */
133 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
134 COSTS_N_BYTES (3), /* HI */
135 COSTS_N_BYTES (3), /* SI */
136 COSTS_N_BYTES (3), /* DI */
137 COSTS_N_BYTES (5)}, /* other */
138 0, /* cost of multiply per each bit set */
139 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
140 COSTS_N_BYTES (3), /* HI */
141 COSTS_N_BYTES (3), /* SI */
142 COSTS_N_BYTES (3), /* DI */
143 COSTS_N_BYTES (5)}, /* other */
144 COSTS_N_BYTES (3), /* cost of movsx */
145 COSTS_N_BYTES (3), /* cost of movzx */
146 0, /* "large" insn */
148 2, /* cost for loading QImode using movzbl */
149 {2, 2, 2}, /* cost of loading integer registers
150 in QImode, HImode and SImode.
151 Relative to reg-reg move (2). */
152 {2, 2, 2}, /* cost of storing integer registers */
153 2, /* cost of reg,reg fld/fst */
154 {2, 2, 2}, /* cost of loading fp registers
155 in SFmode, DFmode and XFmode */
156 {2, 2, 2}, /* cost of storing fp registers
157 in SFmode, DFmode and XFmode */
158 3, /* cost of moving MMX register */
159 {3, 3}, /* cost of loading MMX registers
160 in SImode and DImode */
161 {3, 3}, /* cost of storing MMX registers
162 in SImode and DImode */
163 3, /* cost of moving SSE register */
164 {3, 3, 3}, /* cost of loading SSE registers
165 in SImode, DImode and TImode */
166 {3, 3, 3}, /* cost of storing SSE registers
167 in SImode, DImode and TImode */
168 3, /* MMX or SSE register to integer */
169 0, /* size of l1 cache */
170 0, /* size of l2 cache */
171 0, /* size of prefetch block */
172 0, /* number of parallel prefetches */
174 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
175 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
176 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
177 COSTS_N_BYTES (2), /* cost of FABS instruction. */
178 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
179 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
182 1, /* scalar_stmt_cost. */
183 1, /* scalar load_cost. */
184 1, /* scalar_store_cost. */
185 1, /* vec_stmt_cost. */
186 1, /* vec_to_scalar_cost. */
187 1, /* scalar_to_vec_cost. */
188 1, /* vec_align_load_cost. */
189 1, /* vec_unalign_load_cost. */
190 1, /* vec_store_cost. */
191 1, /* cond_taken_branch_cost. */
192 1, /* cond_not_taken_branch_cost. */
195 /* Processor costs (relative to an add) */
196 static stringop_algs i386_memcpy
[2] = {
197 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
198 DUMMY_STRINGOP_ALGS
};
199 static stringop_algs i386_memset
[2] = {
200 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
201 DUMMY_STRINGOP_ALGS
};
204 struct processor_costs i386_cost
= { /* 386 specific costs */
205 COSTS_N_INSNS (1), /* cost of an add instruction */
206 COSTS_N_INSNS (1), /* cost of a lea instruction */
207 COSTS_N_INSNS (3), /* variable shift costs */
208 COSTS_N_INSNS (2), /* constant shift costs */
209 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
210 COSTS_N_INSNS (6), /* HI */
211 COSTS_N_INSNS (6), /* SI */
212 COSTS_N_INSNS (6), /* DI */
213 COSTS_N_INSNS (6)}, /* other */
214 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
215 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
216 COSTS_N_INSNS (23), /* HI */
217 COSTS_N_INSNS (23), /* SI */
218 COSTS_N_INSNS (23), /* DI */
219 COSTS_N_INSNS (23)}, /* other */
220 COSTS_N_INSNS (3), /* cost of movsx */
221 COSTS_N_INSNS (2), /* cost of movzx */
222 15, /* "large" insn */
224 4, /* cost for loading QImode using movzbl */
225 {2, 4, 2}, /* cost of loading integer registers
226 in QImode, HImode and SImode.
227 Relative to reg-reg move (2). */
228 {2, 4, 2}, /* cost of storing integer registers */
229 2, /* cost of reg,reg fld/fst */
230 {8, 8, 8}, /* cost of loading fp registers
231 in SFmode, DFmode and XFmode */
232 {8, 8, 8}, /* cost of storing fp registers
233 in SFmode, DFmode and XFmode */
234 2, /* cost of moving MMX register */
235 {4, 8}, /* cost of loading MMX registers
236 in SImode and DImode */
237 {4, 8}, /* cost of storing MMX registers
238 in SImode and DImode */
239 2, /* cost of moving SSE register */
240 {4, 8, 16}, /* cost of loading SSE registers
241 in SImode, DImode and TImode */
242 {4, 8, 16}, /* cost of storing SSE registers
243 in SImode, DImode and TImode */
244 3, /* MMX or SSE register to integer */
245 0, /* size of l1 cache */
246 0, /* size of l2 cache */
247 0, /* size of prefetch block */
248 0, /* number of parallel prefetches */
250 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
251 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
252 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
253 COSTS_N_INSNS (22), /* cost of FABS instruction. */
254 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
255 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
258 1, /* scalar_stmt_cost. */
259 1, /* scalar load_cost. */
260 1, /* scalar_store_cost. */
261 1, /* vec_stmt_cost. */
262 1, /* vec_to_scalar_cost. */
263 1, /* scalar_to_vec_cost. */
264 1, /* vec_align_load_cost. */
265 2, /* vec_unalign_load_cost. */
266 1, /* vec_store_cost. */
267 3, /* cond_taken_branch_cost. */
268 1, /* cond_not_taken_branch_cost. */
271 static stringop_algs i486_memcpy
[2] = {
272 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
273 DUMMY_STRINGOP_ALGS
};
274 static stringop_algs i486_memset
[2] = {
275 {rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
276 DUMMY_STRINGOP_ALGS
};
279 struct processor_costs i486_cost
= { /* 486 specific costs */
280 COSTS_N_INSNS (1), /* cost of an add instruction */
281 COSTS_N_INSNS (1), /* cost of a lea instruction */
282 COSTS_N_INSNS (3), /* variable shift costs */
283 COSTS_N_INSNS (2), /* constant shift costs */
284 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
285 COSTS_N_INSNS (12), /* HI */
286 COSTS_N_INSNS (12), /* SI */
287 COSTS_N_INSNS (12), /* DI */
288 COSTS_N_INSNS (12)}, /* other */
289 1, /* cost of multiply per each bit set */
290 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
291 COSTS_N_INSNS (40), /* HI */
292 COSTS_N_INSNS (40), /* SI */
293 COSTS_N_INSNS (40), /* DI */
294 COSTS_N_INSNS (40)}, /* other */
295 COSTS_N_INSNS (3), /* cost of movsx */
296 COSTS_N_INSNS (2), /* cost of movzx */
297 15, /* "large" insn */
299 4, /* cost for loading QImode using movzbl */
300 {2, 4, 2}, /* cost of loading integer registers
301 in QImode, HImode and SImode.
302 Relative to reg-reg move (2). */
303 {2, 4, 2}, /* cost of storing integer registers */
304 2, /* cost of reg,reg fld/fst */
305 {8, 8, 8}, /* cost of loading fp registers
306 in SFmode, DFmode and XFmode */
307 {8, 8, 8}, /* cost of storing fp registers
308 in SFmode, DFmode and XFmode */
309 2, /* cost of moving MMX register */
310 {4, 8}, /* cost of loading MMX registers
311 in SImode and DImode */
312 {4, 8}, /* cost of storing MMX registers
313 in SImode and DImode */
314 2, /* cost of moving SSE register */
315 {4, 8, 16}, /* cost of loading SSE registers
316 in SImode, DImode and TImode */
317 {4, 8, 16}, /* cost of storing SSE registers
318 in SImode, DImode and TImode */
319 3, /* MMX or SSE register to integer */
320 4, /* size of l1 cache. 486 has 8kB cache
321 shared for code and data, so 4kB is
322 not really precise. */
323 4, /* size of l2 cache */
324 0, /* size of prefetch block */
325 0, /* number of parallel prefetches */
327 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
328 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
329 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
330 COSTS_N_INSNS (3), /* cost of FABS instruction. */
331 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
332 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
335 1, /* scalar_stmt_cost. */
336 1, /* scalar load_cost. */
337 1, /* scalar_store_cost. */
338 1, /* vec_stmt_cost. */
339 1, /* vec_to_scalar_cost. */
340 1, /* scalar_to_vec_cost. */
341 1, /* vec_align_load_cost. */
342 2, /* vec_unalign_load_cost. */
343 1, /* vec_store_cost. */
344 3, /* cond_taken_branch_cost. */
345 1, /* cond_not_taken_branch_cost. */
348 static stringop_algs pentium_memcpy
[2] = {
349 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
350 DUMMY_STRINGOP_ALGS
};
351 static stringop_algs pentium_memset
[2] = {
352 {libcall
, {{-1, rep_prefix_4_byte
, false}}},
353 DUMMY_STRINGOP_ALGS
};
356 struct processor_costs pentium_cost
= {
357 COSTS_N_INSNS (1), /* cost of an add instruction */
358 COSTS_N_INSNS (1), /* cost of a lea instruction */
359 COSTS_N_INSNS (4), /* variable shift costs */
360 COSTS_N_INSNS (1), /* constant shift costs */
361 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
362 COSTS_N_INSNS (11), /* HI */
363 COSTS_N_INSNS (11), /* SI */
364 COSTS_N_INSNS (11), /* DI */
365 COSTS_N_INSNS (11)}, /* other */
366 0, /* cost of multiply per each bit set */
367 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
368 COSTS_N_INSNS (25), /* HI */
369 COSTS_N_INSNS (25), /* SI */
370 COSTS_N_INSNS (25), /* DI */
371 COSTS_N_INSNS (25)}, /* other */
372 COSTS_N_INSNS (3), /* cost of movsx */
373 COSTS_N_INSNS (2), /* cost of movzx */
374 8, /* "large" insn */
376 6, /* cost for loading QImode using movzbl */
377 {2, 4, 2}, /* cost of loading integer registers
378 in QImode, HImode and SImode.
379 Relative to reg-reg move (2). */
380 {2, 4, 2}, /* cost of storing integer registers */
381 2, /* cost of reg,reg fld/fst */
382 {2, 2, 6}, /* cost of loading fp registers
383 in SFmode, DFmode and XFmode */
384 {4, 4, 6}, /* cost of storing fp registers
385 in SFmode, DFmode and XFmode */
386 8, /* cost of moving MMX register */
387 {8, 8}, /* cost of loading MMX registers
388 in SImode and DImode */
389 {8, 8}, /* cost of storing MMX registers
390 in SImode and DImode */
391 2, /* cost of moving SSE register */
392 {4, 8, 16}, /* cost of loading SSE registers
393 in SImode, DImode and TImode */
394 {4, 8, 16}, /* cost of storing SSE registers
395 in SImode, DImode and TImode */
396 3, /* MMX or SSE register to integer */
397 8, /* size of l1 cache. */
398 8, /* size of l2 cache */
399 0, /* size of prefetch block */
400 0, /* number of parallel prefetches */
402 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
403 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
404 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
405 COSTS_N_INSNS (1), /* cost of FABS instruction. */
406 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
407 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
410 1, /* scalar_stmt_cost. */
411 1, /* scalar load_cost. */
412 1, /* scalar_store_cost. */
413 1, /* vec_stmt_cost. */
414 1, /* vec_to_scalar_cost. */
415 1, /* scalar_to_vec_cost. */
416 1, /* vec_align_load_cost. */
417 2, /* vec_unalign_load_cost. */
418 1, /* vec_store_cost. */
419 3, /* cond_taken_branch_cost. */
420 1, /* cond_not_taken_branch_cost. */
424 struct processor_costs lakemont_cost
= {
425 COSTS_N_INSNS (1), /* cost of an add instruction */
426 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
427 COSTS_N_INSNS (1), /* variable shift costs */
428 COSTS_N_INSNS (1), /* constant shift costs */
429 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
430 COSTS_N_INSNS (11), /* HI */
431 COSTS_N_INSNS (11), /* SI */
432 COSTS_N_INSNS (11), /* DI */
433 COSTS_N_INSNS (11)}, /* other */
434 0, /* cost of multiply per each bit set */
435 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
436 COSTS_N_INSNS (25), /* HI */
437 COSTS_N_INSNS (25), /* SI */
438 COSTS_N_INSNS (25), /* DI */
439 COSTS_N_INSNS (25)}, /* other */
440 COSTS_N_INSNS (3), /* cost of movsx */
441 COSTS_N_INSNS (2), /* cost of movzx */
442 8, /* "large" insn */
444 6, /* cost for loading QImode using movzbl */
445 {2, 4, 2}, /* cost of loading integer registers
446 in QImode, HImode and SImode.
447 Relative to reg-reg move (2). */
448 {2, 4, 2}, /* cost of storing integer registers */
449 2, /* cost of reg,reg fld/fst */
450 {2, 2, 6}, /* cost of loading fp registers
451 in SFmode, DFmode and XFmode */
452 {4, 4, 6}, /* cost of storing fp registers
453 in SFmode, DFmode and XFmode */
454 8, /* cost of moving MMX register */
455 {8, 8}, /* cost of loading MMX registers
456 in SImode and DImode */
457 {8, 8}, /* cost of storing MMX registers
458 in SImode and DImode */
459 2, /* cost of moving SSE register */
460 {4, 8, 16}, /* cost of loading SSE registers
461 in SImode, DImode and TImode */
462 {4, 8, 16}, /* cost of storing SSE registers
463 in SImode, DImode and TImode */
464 3, /* MMX or SSE register to integer */
465 8, /* size of l1 cache. */
466 8, /* size of l2 cache */
467 0, /* size of prefetch block */
468 0, /* number of parallel prefetches */
470 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
471 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
472 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
473 COSTS_N_INSNS (1), /* cost of FABS instruction. */
474 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
475 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
478 1, /* scalar_stmt_cost. */
479 1, /* scalar load_cost. */
480 1, /* scalar_store_cost. */
481 1, /* vec_stmt_cost. */
482 1, /* vec_to_scalar_cost. */
483 1, /* scalar_to_vec_cost. */
484 1, /* vec_align_load_cost. */
485 2, /* vec_unalign_load_cost. */
486 1, /* vec_store_cost. */
487 3, /* cond_taken_branch_cost. */
488 1, /* cond_not_taken_branch_cost. */
491 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
492 (we ensure the alignment). For small blocks inline loop is still a
493 noticeable win, for bigger blocks either rep movsl or rep movsb is
494 way to go. Rep movsb has apparently more expensive startup time in CPU,
495 but after 4K the difference is down in the noise. */
496 static stringop_algs pentiumpro_memcpy
[2] = {
497 {rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
498 {8192, rep_prefix_4_byte
, false},
499 {-1, rep_prefix_1_byte
, false}}},
500 DUMMY_STRINGOP_ALGS
};
501 static stringop_algs pentiumpro_memset
[2] = {
502 {rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
503 {8192, rep_prefix_4_byte
, false},
504 {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
};
507 struct processor_costs pentiumpro_cost
= {
508 COSTS_N_INSNS (1), /* cost of an add instruction */
509 COSTS_N_INSNS (1), /* cost of a lea instruction */
510 COSTS_N_INSNS (1), /* variable shift costs */
511 COSTS_N_INSNS (1), /* constant shift costs */
512 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
513 COSTS_N_INSNS (4), /* HI */
514 COSTS_N_INSNS (4), /* SI */
515 COSTS_N_INSNS (4), /* DI */
516 COSTS_N_INSNS (4)}, /* other */
517 0, /* cost of multiply per each bit set */
518 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
519 COSTS_N_INSNS (17), /* HI */
520 COSTS_N_INSNS (17), /* SI */
521 COSTS_N_INSNS (17), /* DI */
522 COSTS_N_INSNS (17)}, /* other */
523 COSTS_N_INSNS (1), /* cost of movsx */
524 COSTS_N_INSNS (1), /* cost of movzx */
525 8, /* "large" insn */
527 2, /* cost for loading QImode using movzbl */
528 {4, 4, 4}, /* cost of loading integer registers
529 in QImode, HImode and SImode.
530 Relative to reg-reg move (2). */
531 {2, 2, 2}, /* cost of storing integer registers */
532 2, /* cost of reg,reg fld/fst */
533 {2, 2, 6}, /* cost of loading fp registers
534 in SFmode, DFmode and XFmode */
535 {4, 4, 6}, /* cost of storing fp registers
536 in SFmode, DFmode and XFmode */
537 2, /* cost of moving MMX register */
538 {2, 2}, /* cost of loading MMX registers
539 in SImode and DImode */
540 {2, 2}, /* cost of storing MMX registers
541 in SImode and DImode */
542 2, /* cost of moving SSE register */
543 {2, 2, 8}, /* cost of loading SSE registers
544 in SImode, DImode and TImode */
545 {2, 2, 8}, /* cost of storing SSE registers
546 in SImode, DImode and TImode */
547 3, /* MMX or SSE register to integer */
548 8, /* size of l1 cache. */
549 256, /* size of l2 cache */
550 32, /* size of prefetch block */
551 6, /* number of parallel prefetches */
553 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
554 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
555 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
556 COSTS_N_INSNS (2), /* cost of FABS instruction. */
557 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
558 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
561 1, /* scalar_stmt_cost. */
562 1, /* scalar load_cost. */
563 1, /* scalar_store_cost. */
564 1, /* vec_stmt_cost. */
565 1, /* vec_to_scalar_cost. */
566 1, /* scalar_to_vec_cost. */
567 1, /* vec_align_load_cost. */
568 2, /* vec_unalign_load_cost. */
569 1, /* vec_store_cost. */
570 3, /* cond_taken_branch_cost. */
571 1, /* cond_not_taken_branch_cost. */
574 static stringop_algs geode_memcpy
[2] = {
575 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
};
577 static stringop_algs geode_memset
[2] = {
578 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
579 DUMMY_STRINGOP_ALGS
};
581 struct processor_costs geode_cost
= {
582 COSTS_N_INSNS (1), /* cost of an add instruction */
583 COSTS_N_INSNS (1), /* cost of a lea instruction */
584 COSTS_N_INSNS (2), /* variable shift costs */
585 COSTS_N_INSNS (1), /* constant shift costs */
586 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
587 COSTS_N_INSNS (4), /* HI */
588 COSTS_N_INSNS (7), /* SI */
589 COSTS_N_INSNS (7), /* DI */
590 COSTS_N_INSNS (7)}, /* other */
591 0, /* cost of multiply per each bit set */
592 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
593 COSTS_N_INSNS (23), /* HI */
594 COSTS_N_INSNS (39), /* SI */
595 COSTS_N_INSNS (39), /* DI */
596 COSTS_N_INSNS (39)}, /* other */
597 COSTS_N_INSNS (1), /* cost of movsx */
598 COSTS_N_INSNS (1), /* cost of movzx */
599 8, /* "large" insn */
601 1, /* cost for loading QImode using movzbl */
602 {1, 1, 1}, /* cost of loading integer registers
603 in QImode, HImode and SImode.
604 Relative to reg-reg move (2). */
605 {1, 1, 1}, /* cost of storing integer registers */
606 1, /* cost of reg,reg fld/fst */
607 {1, 1, 1}, /* cost of loading fp registers
608 in SFmode, DFmode and XFmode */
609 {4, 6, 6}, /* cost of storing fp registers
610 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {2, 2}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {2, 2}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {2, 2, 8}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {2, 2, 8}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 3, /* MMX or SSE register to integer */
623 64, /* size of l1 cache. */
624 128, /* size of l2 cache. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (1), /* cost of FABS instruction. */
632 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
649 static stringop_algs k6_memcpy
[2] = {
650 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
};
652 static stringop_algs k6_memset
[2] = {
653 {libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
654 DUMMY_STRINGOP_ALGS
};
656 struct processor_costs k6_cost
= {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (3), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (3), /* DI */
665 COSTS_N_INSNS (3)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (18), /* HI */
669 COSTS_N_INSNS (18), /* SI */
670 COSTS_N_INSNS (18), /* DI */
671 COSTS_N_INSNS (18)}, /* other */
672 COSTS_N_INSNS (2), /* cost of movsx */
673 COSTS_N_INSNS (2), /* cost of movzx */
674 8, /* "large" insn */
676 3, /* cost for loading QImode using movzbl */
677 {4, 5, 4}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {2, 3, 2}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {6, 6, 6}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {4, 4, 4}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {2, 2}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {2, 2}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {2, 2, 8}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {2, 2, 8}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 6, /* MMX or SSE register to integer */
697 32, /* size of l1 cache. */
698 32, /* size of l2 cache. Some models
699 have integrated l2 cache, but
700 optimizing for k6 is not important
701 enough to worry about that. */
702 32, /* size of prefetch block */
703 1, /* number of parallel prefetches */
705 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
726 /* For some reason, Athlon deals better with REP prefix (relative to loops)
727 compared to K8. Alignment becomes important after 8 bytes for memcpy and
728 128 bytes for memset. */
729 static stringop_algs athlon_memcpy
[2] = {
730 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
731 DUMMY_STRINGOP_ALGS
};
732 static stringop_algs athlon_memset
[2] = {
733 {libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
734 DUMMY_STRINGOP_ALGS
};
736 struct processor_costs athlon_cost
= {
737 COSTS_N_INSNS (1), /* cost of an add instruction */
738 COSTS_N_INSNS (2), /* cost of a lea instruction */
739 COSTS_N_INSNS (1), /* variable shift costs */
740 COSTS_N_INSNS (1), /* constant shift costs */
741 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
742 COSTS_N_INSNS (5), /* HI */
743 COSTS_N_INSNS (5), /* SI */
744 COSTS_N_INSNS (5), /* DI */
745 COSTS_N_INSNS (5)}, /* other */
746 0, /* cost of multiply per each bit set */
747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
748 COSTS_N_INSNS (26), /* HI */
749 COSTS_N_INSNS (42), /* SI */
750 COSTS_N_INSNS (74), /* DI */
751 COSTS_N_INSNS (74)}, /* other */
752 COSTS_N_INSNS (1), /* cost of movsx */
753 COSTS_N_INSNS (1), /* cost of movzx */
754 8, /* "large" insn */
756 4, /* cost for loading QImode using movzbl */
757 {3, 4, 3}, /* cost of loading integer registers
758 in QImode, HImode and SImode.
759 Relative to reg-reg move (2). */
760 {3, 4, 3}, /* cost of storing integer registers */
761 4, /* cost of reg,reg fld/fst */
762 {4, 4, 12}, /* cost of loading fp registers
763 in SFmode, DFmode and XFmode */
764 {6, 6, 8}, /* cost of storing fp registers
765 in SFmode, DFmode and XFmode */
766 2, /* cost of moving MMX register */
767 {4, 4}, /* cost of loading MMX registers
768 in SImode and DImode */
769 {4, 4}, /* cost of storing MMX registers
770 in SImode and DImode */
771 2, /* cost of moving SSE register */
772 {4, 4, 6}, /* cost of loading SSE registers
773 in SImode, DImode and TImode */
774 {4, 4, 5}, /* cost of storing SSE registers
775 in SImode, DImode and TImode */
776 5, /* MMX or SSE register to integer */
777 64, /* size of l1 cache. */
778 256, /* size of l2 cache. */
779 64, /* size of prefetch block */
780 6, /* number of parallel prefetches */
782 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
783 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
784 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
785 COSTS_N_INSNS (2), /* cost of FABS instruction. */
786 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
787 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
790 1, /* scalar_stmt_cost. */
791 1, /* scalar load_cost. */
792 1, /* scalar_store_cost. */
793 1, /* vec_stmt_cost. */
794 1, /* vec_to_scalar_cost. */
795 1, /* scalar_to_vec_cost. */
796 1, /* vec_align_load_cost. */
797 2, /* vec_unalign_load_cost. */
798 1, /* vec_store_cost. */
799 3, /* cond_taken_branch_cost. */
800 1, /* cond_not_taken_branch_cost. */
803 /* K8 has optimized REP instruction for medium sized blocks, but for very
804 small blocks it is better to use loop. For large blocks, libcall can
805 do nontemporary accesses and beat inline considerably. */
806 static stringop_algs k8_memcpy
[2] = {
807 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
808 {-1, rep_prefix_4_byte
, false}}},
809 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
810 {-1, libcall
, false}}}};
811 static stringop_algs k8_memset
[2] = {
812 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
813 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
814 {libcall
, {{48, unrolled_loop
, false},
815 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
817 struct processor_costs k8_cost
= {
818 COSTS_N_INSNS (1), /* cost of an add instruction */
819 COSTS_N_INSNS (2), /* cost of a lea instruction */
820 COSTS_N_INSNS (1), /* variable shift costs */
821 COSTS_N_INSNS (1), /* constant shift costs */
822 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
823 COSTS_N_INSNS (4), /* HI */
824 COSTS_N_INSNS (3), /* SI */
825 COSTS_N_INSNS (4), /* DI */
826 COSTS_N_INSNS (5)}, /* other */
827 0, /* cost of multiply per each bit set */
828 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
829 COSTS_N_INSNS (26), /* HI */
830 COSTS_N_INSNS (42), /* SI */
831 COSTS_N_INSNS (74), /* DI */
832 COSTS_N_INSNS (74)}, /* other */
833 COSTS_N_INSNS (1), /* cost of movsx */
834 COSTS_N_INSNS (1), /* cost of movzx */
835 8, /* "large" insn */
837 4, /* cost for loading QImode using movzbl */
838 {3, 4, 3}, /* cost of loading integer registers
839 in QImode, HImode and SImode.
840 Relative to reg-reg move (2). */
841 {3, 4, 3}, /* cost of storing integer registers */
842 4, /* cost of reg,reg fld/fst */
843 {4, 4, 12}, /* cost of loading fp registers
844 in SFmode, DFmode and XFmode */
845 {6, 6, 8}, /* cost of storing fp registers
846 in SFmode, DFmode and XFmode */
847 2, /* cost of moving MMX register */
848 {3, 3}, /* cost of loading MMX registers
849 in SImode and DImode */
850 {4, 4}, /* cost of storing MMX registers
851 in SImode and DImode */
852 2, /* cost of moving SSE register */
853 {4, 3, 6}, /* cost of loading SSE registers
854 in SImode, DImode and TImode */
855 {4, 4, 5}, /* cost of storing SSE registers
856 in SImode, DImode and TImode */
857 5, /* MMX or SSE register to integer */
858 64, /* size of l1 cache. */
859 512, /* size of l2 cache. */
860 64, /* size of prefetch block */
861 /* New AMD processors never drop prefetches; if they cannot be performed
862 immediately, they are queued. We set number of simultaneous prefetches
863 to a large constant to reflect this (it probably is not a good idea not
864 to limit number of prefetches at all, as their execution also takes some
866 100, /* number of parallel prefetches */
868 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
877 4, /* scalar_stmt_cost. */
878 2, /* scalar load_cost. */
879 2, /* scalar_store_cost. */
880 5, /* vec_stmt_cost. */
881 0, /* vec_to_scalar_cost. */
882 2, /* scalar_to_vec_cost. */
883 2, /* vec_align_load_cost. */
884 3, /* vec_unalign_load_cost. */
885 3, /* vec_store_cost. */
886 3, /* cond_taken_branch_cost. */
887 2, /* cond_not_taken_branch_cost. */
890 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
891 very small blocks it is better to use loop. For large blocks, libcall can
892 do nontemporary accesses and beat inline considerably. */
893 static stringop_algs amdfam10_memcpy
[2] = {
894 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
895 {-1, rep_prefix_4_byte
, false}}},
896 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
897 {-1, libcall
, false}}}};
898 static stringop_algs amdfam10_memset
[2] = {
899 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
900 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
901 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
902 {-1, libcall
, false}}}};
903 struct processor_costs amdfam10_cost
= {
904 COSTS_N_INSNS (1), /* cost of an add instruction */
905 COSTS_N_INSNS (2), /* cost of a lea instruction */
906 COSTS_N_INSNS (1), /* variable shift costs */
907 COSTS_N_INSNS (1), /* constant shift costs */
908 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
909 COSTS_N_INSNS (4), /* HI */
910 COSTS_N_INSNS (3), /* SI */
911 COSTS_N_INSNS (4), /* DI */
912 COSTS_N_INSNS (5)}, /* other */
913 0, /* cost of multiply per each bit set */
914 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
915 COSTS_N_INSNS (35), /* HI */
916 COSTS_N_INSNS (51), /* SI */
917 COSTS_N_INSNS (83), /* DI */
918 COSTS_N_INSNS (83)}, /* other */
919 COSTS_N_INSNS (1), /* cost of movsx */
920 COSTS_N_INSNS (1), /* cost of movzx */
921 8, /* "large" insn */
923 4, /* cost for loading QImode using movzbl */
924 {3, 4, 3}, /* cost of loading integer registers
925 in QImode, HImode and SImode.
926 Relative to reg-reg move (2). */
927 {3, 4, 3}, /* cost of storing integer registers */
928 4, /* cost of reg,reg fld/fst */
929 {4, 4, 12}, /* cost of loading fp registers
930 in SFmode, DFmode and XFmode */
931 {6, 6, 8}, /* cost of storing fp registers
932 in SFmode, DFmode and XFmode */
933 2, /* cost of moving MMX register */
934 {3, 3}, /* cost of loading MMX registers
935 in SImode and DImode */
936 {4, 4}, /* cost of storing MMX registers
937 in SImode and DImode */
938 2, /* cost of moving SSE register */
939 {4, 4, 3}, /* cost of loading SSE registers
940 in SImode, DImode and TImode */
941 {4, 4, 5}, /* cost of storing SSE registers
942 in SImode, DImode and TImode */
943 3, /* MMX or SSE register to integer */
945 MOVD reg64, xmmreg Double FSTORE 4
946 MOVD reg32, xmmreg Double FSTORE 4
948 MOVD reg64, xmmreg Double FADD 3
950 MOVD reg32, xmmreg Double FADD 3
952 64, /* size of l1 cache. */
953 512, /* size of l2 cache. */
954 64, /* size of prefetch block */
955 /* New AMD processors never drop prefetches; if they cannot be performed
956 immediately, they are queued. We set number of simultaneous prefetches
957 to a large constant to reflect this (it probably is not a good idea not
958 to limit number of prefetches at all, as their execution also takes some
960 100, /* number of parallel prefetches */
962 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
963 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
964 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
965 COSTS_N_INSNS (2), /* cost of FABS instruction. */
966 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
967 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
971 4, /* scalar_stmt_cost. */
972 2, /* scalar load_cost. */
973 2, /* scalar_store_cost. */
974 6, /* vec_stmt_cost. */
975 0, /* vec_to_scalar_cost. */
976 2, /* scalar_to_vec_cost. */
977 2, /* vec_align_load_cost. */
978 2, /* vec_unalign_load_cost. */
979 2, /* vec_store_cost. */
980 2, /* cond_taken_branch_cost. */
981 1, /* cond_not_taken_branch_cost. */
984 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
985 very small blocks it is better to use loop. For large blocks, libcall
986 can do nontemporary accesses and beat inline considerably. */
987 static stringop_algs bdver1_memcpy
[2] = {
988 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
989 {-1, rep_prefix_4_byte
, false}}},
990 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
991 {-1, libcall
, false}}}};
992 static stringop_algs bdver1_memset
[2] = {
993 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
994 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
995 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
996 {-1, libcall
, false}}}};
998 const struct processor_costs bdver1_cost
= {
999 COSTS_N_INSNS (1), /* cost of an add instruction */
1000 COSTS_N_INSNS (1), /* cost of a lea instruction */
1001 COSTS_N_INSNS (1), /* variable shift costs */
1002 COSTS_N_INSNS (1), /* constant shift costs */
1003 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1004 COSTS_N_INSNS (4), /* HI */
1005 COSTS_N_INSNS (4), /* SI */
1006 COSTS_N_INSNS (6), /* DI */
1007 COSTS_N_INSNS (6)}, /* other */
1008 0, /* cost of multiply per each bit set */
1009 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1010 COSTS_N_INSNS (35), /* HI */
1011 COSTS_N_INSNS (51), /* SI */
1012 COSTS_N_INSNS (83), /* DI */
1013 COSTS_N_INSNS (83)}, /* other */
1014 COSTS_N_INSNS (1), /* cost of movsx */
1015 COSTS_N_INSNS (1), /* cost of movzx */
1016 8, /* "large" insn */
1018 4, /* cost for loading QImode using movzbl */
1019 {5, 5, 4}, /* cost of loading integer registers
1020 in QImode, HImode and SImode.
1021 Relative to reg-reg move (2). */
1022 {4, 4, 4}, /* cost of storing integer registers */
1023 2, /* cost of reg,reg fld/fst */
1024 {5, 5, 12}, /* cost of loading fp registers
1025 in SFmode, DFmode and XFmode */
1026 {4, 4, 8}, /* cost of storing fp registers
1027 in SFmode, DFmode and XFmode */
1028 2, /* cost of moving MMX register */
1029 {4, 4}, /* cost of loading MMX registers
1030 in SImode and DImode */
1031 {4, 4}, /* cost of storing MMX registers
1032 in SImode and DImode */
1033 2, /* cost of moving SSE register */
1034 {4, 4, 4}, /* cost of loading SSE registers
1035 in SImode, DImode and TImode */
1036 {4, 4, 4}, /* cost of storing SSE registers
1037 in SImode, DImode and TImode */
1038 2, /* MMX or SSE register to integer */
1040 MOVD reg64, xmmreg Double FSTORE 4
1041 MOVD reg32, xmmreg Double FSTORE 4
1043 MOVD reg64, xmmreg Double FADD 3
1045 MOVD reg32, xmmreg Double FADD 3
1047 16, /* size of l1 cache. */
1048 2048, /* size of l2 cache. */
1049 64, /* size of prefetch block */
1050 /* New AMD processors never drop prefetches; if they cannot be performed
1051 immediately, they are queued. We set number of simultaneous prefetches
1052 to a large constant to reflect this (it probably is not a good idea not
1053 to limit number of prefetches at all, as their execution also takes some
1055 100, /* number of parallel prefetches */
1056 2, /* Branch cost */
1057 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1058 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1059 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1060 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1061 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1062 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1066 6, /* scalar_stmt_cost. */
1067 4, /* scalar load_cost. */
1068 4, /* scalar_store_cost. */
1069 6, /* vec_stmt_cost. */
1070 0, /* vec_to_scalar_cost. */
1071 2, /* scalar_to_vec_cost. */
1072 4, /* vec_align_load_cost. */
1073 4, /* vec_unalign_load_cost. */
1074 4, /* vec_store_cost. */
1075 4, /* cond_taken_branch_cost. */
1076 2, /* cond_not_taken_branch_cost. */
1079 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1080 very small blocks it is better to use loop. For large blocks, libcall
1081 can do nontemporary accesses and beat inline considerably. */
1083 static stringop_algs bdver2_memcpy
[2] = {
1084 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1085 {-1, rep_prefix_4_byte
, false}}},
1086 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1087 {-1, libcall
, false}}}};
1088 static stringop_algs bdver2_memset
[2] = {
1089 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1090 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1091 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1092 {-1, libcall
, false}}}};
1094 const struct processor_costs bdver2_cost
= {
1095 COSTS_N_INSNS (1), /* cost of an add instruction */
1096 COSTS_N_INSNS (1), /* cost of a lea instruction */
1097 COSTS_N_INSNS (1), /* variable shift costs */
1098 COSTS_N_INSNS (1), /* constant shift costs */
1099 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1100 COSTS_N_INSNS (4), /* HI */
1101 COSTS_N_INSNS (4), /* SI */
1102 COSTS_N_INSNS (6), /* DI */
1103 COSTS_N_INSNS (6)}, /* other */
1104 0, /* cost of multiply per each bit set */
1105 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1106 COSTS_N_INSNS (35), /* HI */
1107 COSTS_N_INSNS (51), /* SI */
1108 COSTS_N_INSNS (83), /* DI */
1109 COSTS_N_INSNS (83)}, /* other */
1110 COSTS_N_INSNS (1), /* cost of movsx */
1111 COSTS_N_INSNS (1), /* cost of movzx */
1112 8, /* "large" insn */
1114 4, /* cost for loading QImode using movzbl */
1115 {5, 5, 4}, /* cost of loading integer registers
1116 in QImode, HImode and SImode.
1117 Relative to reg-reg move (2). */
1118 {4, 4, 4}, /* cost of storing integer registers */
1119 2, /* cost of reg,reg fld/fst */
1120 {5, 5, 12}, /* cost of loading fp registers
1121 in SFmode, DFmode and XFmode */
1122 {4, 4, 8}, /* cost of storing fp registers
1123 in SFmode, DFmode and XFmode */
1124 2, /* cost of moving MMX register */
1125 {4, 4}, /* cost of loading MMX registers
1126 in SImode and DImode */
1127 {4, 4}, /* cost of storing MMX registers
1128 in SImode and DImode */
1129 2, /* cost of moving SSE register */
1130 {4, 4, 4}, /* cost of loading SSE registers
1131 in SImode, DImode and TImode */
1132 {4, 4, 4}, /* cost of storing SSE registers
1133 in SImode, DImode and TImode */
1134 2, /* MMX or SSE register to integer */
1136 MOVD reg64, xmmreg Double FSTORE 4
1137 MOVD reg32, xmmreg Double FSTORE 4
1139 MOVD reg64, xmmreg Double FADD 3
1141 MOVD reg32, xmmreg Double FADD 3
1143 16, /* size of l1 cache. */
1144 2048, /* size of l2 cache. */
1145 64, /* size of prefetch block */
1146 /* New AMD processors never drop prefetches; if they cannot be performed
1147 immediately, they are queued. We set number of simultaneous prefetches
1148 to a large constant to reflect this (it probably is not a good idea not
1149 to limit number of prefetches at all, as their execution also takes some
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1162 6, /* scalar_stmt_cost. */
1163 4, /* scalar load_cost. */
1164 4, /* scalar_store_cost. */
1165 6, /* vec_stmt_cost. */
1166 0, /* vec_to_scalar_cost. */
1167 2, /* scalar_to_vec_cost. */
1168 4, /* vec_align_load_cost. */
1169 4, /* vec_unalign_load_cost. */
1170 4, /* vec_store_cost. */
1171 4, /* cond_taken_branch_cost. */
1172 2, /* cond_not_taken_branch_cost. */
1176 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1177 very small blocks it is better to use loop. For large blocks, libcall
1178 can do nontemporary accesses and beat inline considerably. */
1179 static stringop_algs bdver3_memcpy
[2] = {
1180 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1181 {-1, rep_prefix_4_byte
, false}}},
1182 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1183 {-1, libcall
, false}}}};
1184 static stringop_algs bdver3_memset
[2] = {
1185 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1186 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1187 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1188 {-1, libcall
, false}}}};
1189 struct processor_costs bdver3_cost
= {
1190 COSTS_N_INSNS (1), /* cost of an add instruction */
1191 COSTS_N_INSNS (1), /* cost of a lea instruction */
1192 COSTS_N_INSNS (1), /* variable shift costs */
1193 COSTS_N_INSNS (1), /* constant shift costs */
1194 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1195 COSTS_N_INSNS (4), /* HI */
1196 COSTS_N_INSNS (4), /* SI */
1197 COSTS_N_INSNS (6), /* DI */
1198 COSTS_N_INSNS (6)}, /* other */
1199 0, /* cost of multiply per each bit set */
1200 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1201 COSTS_N_INSNS (35), /* HI */
1202 COSTS_N_INSNS (51), /* SI */
1203 COSTS_N_INSNS (83), /* DI */
1204 COSTS_N_INSNS (83)}, /* other */
1205 COSTS_N_INSNS (1), /* cost of movsx */
1206 COSTS_N_INSNS (1), /* cost of movzx */
1207 8, /* "large" insn */
1209 4, /* cost for loading QImode using movzbl */
1210 {5, 5, 4}, /* cost of loading integer registers
1211 in QImode, HImode and SImode.
1212 Relative to reg-reg move (2). */
1213 {4, 4, 4}, /* cost of storing integer registers */
1214 2, /* cost of reg,reg fld/fst */
1215 {5, 5, 12}, /* cost of loading fp registers
1216 in SFmode, DFmode and XFmode */
1217 {4, 4, 8}, /* cost of storing fp registers
1218 in SFmode, DFmode and XFmode */
1219 2, /* cost of moving MMX register */
1220 {4, 4}, /* cost of loading MMX registers
1221 in SImode and DImode */
1222 {4, 4}, /* cost of storing MMX registers
1223 in SImode and DImode */
1224 2, /* cost of moving SSE register */
1225 {4, 4, 4}, /* cost of loading SSE registers
1226 in SImode, DImode and TImode */
1227 {4, 4, 4}, /* cost of storing SSE registers
1228 in SImode, DImode and TImode */
1229 2, /* MMX or SSE register to integer */
1230 16, /* size of l1 cache. */
1231 2048, /* size of l2 cache. */
1232 64, /* size of prefetch block */
1233 /* New AMD processors never drop prefetches; if they cannot be performed
1234 immediately, they are queued. We set number of simultaneous prefetches
1235 to a large constant to reflect this (it probably is not a good idea not
1236 to limit number of prefetches at all, as their execution also takes some
1238 100, /* number of parallel prefetches */
1239 2, /* Branch cost */
1240 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1241 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1242 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1243 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1244 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1245 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1249 6, /* scalar_stmt_cost. */
1250 4, /* scalar load_cost. */
1251 4, /* scalar_store_cost. */
1252 6, /* vec_stmt_cost. */
1253 0, /* vec_to_scalar_cost. */
1254 2, /* scalar_to_vec_cost. */
1255 4, /* vec_align_load_cost. */
1256 4, /* vec_unalign_load_cost. */
1257 4, /* vec_store_cost. */
1258 4, /* cond_taken_branch_cost. */
1259 2, /* cond_not_taken_branch_cost. */
1262 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1263 very small blocks it is better to use loop. For large blocks, libcall
1264 can do nontemporary accesses and beat inline considerably. */
1265 static stringop_algs bdver4_memcpy
[2] = {
1266 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1267 {-1, rep_prefix_4_byte
, false}}},
1268 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1269 {-1, libcall
, false}}}};
1270 static stringop_algs bdver4_memset
[2] = {
1271 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1272 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1273 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1274 {-1, libcall
, false}}}};
1275 struct processor_costs bdver4_cost
= {
1276 COSTS_N_INSNS (1), /* cost of an add instruction */
1277 COSTS_N_INSNS (1), /* cost of a lea instruction */
1278 COSTS_N_INSNS (1), /* variable shift costs */
1279 COSTS_N_INSNS (1), /* constant shift costs */
1280 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1281 COSTS_N_INSNS (4), /* HI */
1282 COSTS_N_INSNS (4), /* SI */
1283 COSTS_N_INSNS (6), /* DI */
1284 COSTS_N_INSNS (6)}, /* other */
1285 0, /* cost of multiply per each bit set */
1286 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1287 COSTS_N_INSNS (35), /* HI */
1288 COSTS_N_INSNS (51), /* SI */
1289 COSTS_N_INSNS (83), /* DI */
1290 COSTS_N_INSNS (83)}, /* other */
1291 COSTS_N_INSNS (1), /* cost of movsx */
1292 COSTS_N_INSNS (1), /* cost of movzx */
1293 8, /* "large" insn */
1295 4, /* cost for loading QImode using movzbl */
1296 {5, 5, 4}, /* cost of loading integer registers
1297 in QImode, HImode and SImode.
1298 Relative to reg-reg move (2). */
1299 {4, 4, 4}, /* cost of storing integer registers */
1300 2, /* cost of reg,reg fld/fst */
1301 {5, 5, 12}, /* cost of loading fp registers
1302 in SFmode, DFmode and XFmode */
1303 {4, 4, 8}, /* cost of storing fp registers
1304 in SFmode, DFmode and XFmode */
1305 2, /* cost of moving MMX register */
1306 {4, 4}, /* cost of loading MMX registers
1307 in SImode and DImode */
1308 {4, 4}, /* cost of storing MMX registers
1309 in SImode and DImode */
1310 2, /* cost of moving SSE register */
1311 {4, 4, 4}, /* cost of loading SSE registers
1312 in SImode, DImode and TImode */
1313 {4, 4, 4}, /* cost of storing SSE registers
1314 in SImode, DImode and TImode */
1315 2, /* MMX or SSE register to integer */
1316 16, /* size of l1 cache. */
1317 2048, /* size of l2 cache. */
1318 64, /* size of prefetch block */
1319 /* New AMD processors never drop prefetches; if they cannot be performed
1320 immediately, they are queued. We set number of simultaneous prefetches
1321 to a large constant to reflect this (it probably is not a good idea not
1322 to limit number of prefetches at all, as their execution also takes some
1324 100, /* number of parallel prefetches */
1325 2, /* Branch cost */
1326 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1327 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1328 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1329 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1330 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1331 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1335 6, /* scalar_stmt_cost. */
1336 4, /* scalar load_cost. */
1337 4, /* scalar_store_cost. */
1338 6, /* vec_stmt_cost. */
1339 0, /* vec_to_scalar_cost. */
1340 2, /* scalar_to_vec_cost. */
1341 4, /* vec_align_load_cost. */
1342 4, /* vec_unalign_load_cost. */
1343 4, /* vec_store_cost. */
1344 4, /* cond_taken_branch_cost. */
1345 2, /* cond_not_taken_branch_cost. */
1349 /* ZNVER1 has optimized REP instruction for medium sized blocks, but for
1350 very small blocks it is better to use loop. For large blocks, libcall
1351 can do nontemporary accesses and beat inline considerably. */
1352 static stringop_algs znver1_memcpy
[2] = {
1353 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1354 {-1, rep_prefix_4_byte
, false}}},
1355 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1356 {-1, libcall
, false}}}};
1357 static stringop_algs znver1_memset
[2] = {
1358 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1359 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1360 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1361 {-1, libcall
, false}}}};
1362 struct processor_costs znver1_cost
= {
1363 COSTS_N_INSNS (1), /* cost of an add instruction. */
1364 COSTS_N_INSNS (1), /* cost of a lea instruction. */
1365 COSTS_N_INSNS (1), /* variable shift costs. */
1366 COSTS_N_INSNS (1), /* constant shift costs. */
1367 {COSTS_N_INSNS (3), /* cost of starting multiply for QI. */
1368 COSTS_N_INSNS (3), /* HI. */
1369 COSTS_N_INSNS (3), /* SI. */
1370 COSTS_N_INSNS (4), /* DI. */
1371 COSTS_N_INSNS (4)}, /* other. */
1372 0, /* cost of multiply per each bit
1374 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI. */
1375 COSTS_N_INSNS (35), /* HI. */
1376 COSTS_N_INSNS (51), /* SI. */
1377 COSTS_N_INSNS (83), /* DI. */
1378 COSTS_N_INSNS (83)}, /* other. */
1379 COSTS_N_INSNS (1), /* cost of movsx. */
1380 COSTS_N_INSNS (1), /* cost of movzx. */
1381 8, /* "large" insn. */
1382 9, /* MOVE_RATIO. */
1383 4, /* cost for loading QImode using
1385 {5, 5, 4}, /* cost of loading integer registers
1386 in QImode, HImode and SImode.
1387 Relative to reg-reg move (2). */
1388 {4, 4, 4}, /* cost of storing integer
1390 2, /* cost of reg,reg fld/fst. */
1391 {5, 5, 12}, /* cost of loading fp registers
1392 in SFmode, DFmode and XFmode. */
1393 {4, 4, 8}, /* cost of storing fp registers
1394 in SFmode, DFmode and XFmode. */
1395 2, /* cost of moving MMX register. */
1396 {4, 4}, /* cost of loading MMX registers
1397 in SImode and DImode. */
1398 {4, 4}, /* cost of storing MMX registers
1399 in SImode and DImode. */
1400 2, /* cost of moving SSE register. */
1401 {4, 4, 4}, /* cost of loading SSE registers
1402 in SImode, DImode and TImode. */
1403 {4, 4, 4}, /* cost of storing SSE registers
1404 in SImode, DImode and TImode. */
1405 2, /* MMX or SSE register to integer. */
1406 32, /* size of l1 cache. */
1407 512, /* size of l2 cache. */
1408 64, /* size of prefetch block. */
1409 /* New AMD processors never drop prefetches; if they cannot be performed
1410 immediately, they are queued. We set number of simultaneous prefetches
1411 to a large constant to reflect this (it probably is not a good idea not
1412 to limit number of prefetches at all, as their execution also takes some
1414 100, /* number of parallel prefetches. */
1415 2, /* Branch cost. */
1416 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1417 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1418 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1421 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1425 6, /* scalar_stmt_cost. */
1426 4, /* scalar load_cost. */
1427 4, /* scalar_store_cost. */
1428 6, /* vec_stmt_cost. */
1429 0, /* vec_to_scalar_cost. */
1430 2, /* scalar_to_vec_cost. */
1431 4, /* vec_align_load_cost. */
1432 4, /* vec_unalign_load_cost. */
1433 4, /* vec_store_cost. */
1434 4, /* cond_taken_branch_cost. */
1435 2, /* cond_not_taken_branch_cost. */
1438 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1439 very small blocks it is better to use loop. For large blocks, libcall can
1440 do nontemporary accesses and beat inline considerably. */
1441 static stringop_algs btver1_memcpy
[2] = {
1442 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1443 {-1, rep_prefix_4_byte
, false}}},
1444 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1445 {-1, libcall
, false}}}};
1446 static stringop_algs btver1_memset
[2] = {
1447 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1448 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1449 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1450 {-1, libcall
, false}}}};
1451 const struct processor_costs btver1_cost
= {
1452 COSTS_N_INSNS (1), /* cost of an add instruction */
1453 COSTS_N_INSNS (2), /* cost of a lea instruction */
1454 COSTS_N_INSNS (1), /* variable shift costs */
1455 COSTS_N_INSNS (1), /* constant shift costs */
1456 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1457 COSTS_N_INSNS (4), /* HI */
1458 COSTS_N_INSNS (3), /* SI */
1459 COSTS_N_INSNS (4), /* DI */
1460 COSTS_N_INSNS (5)}, /* other */
1461 0, /* cost of multiply per each bit set */
1462 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1463 COSTS_N_INSNS (35), /* HI */
1464 COSTS_N_INSNS (51), /* SI */
1465 COSTS_N_INSNS (83), /* DI */
1466 COSTS_N_INSNS (83)}, /* other */
1467 COSTS_N_INSNS (1), /* cost of movsx */
1468 COSTS_N_INSNS (1), /* cost of movzx */
1469 8, /* "large" insn */
1471 4, /* cost for loading QImode using movzbl */
1472 {3, 4, 3}, /* cost of loading integer registers
1473 in QImode, HImode and SImode.
1474 Relative to reg-reg move (2). */
1475 {3, 4, 3}, /* cost of storing integer registers */
1476 4, /* cost of reg,reg fld/fst */
1477 {4, 4, 12}, /* cost of loading fp registers
1478 in SFmode, DFmode and XFmode */
1479 {6, 6, 8}, /* cost of storing fp registers
1480 in SFmode, DFmode and XFmode */
1481 2, /* cost of moving MMX register */
1482 {3, 3}, /* cost of loading MMX registers
1483 in SImode and DImode */
1484 {4, 4}, /* cost of storing MMX registers
1485 in SImode and DImode */
1486 2, /* cost of moving SSE register */
1487 {4, 4, 3}, /* cost of loading SSE registers
1488 in SImode, DImode and TImode */
1489 {4, 4, 5}, /* cost of storing SSE registers
1490 in SImode, DImode and TImode */
1491 3, /* MMX or SSE register to integer */
1493 MOVD reg64, xmmreg Double FSTORE 4
1494 MOVD reg32, xmmreg Double FSTORE 4
1496 MOVD reg64, xmmreg Double FADD 3
1498 MOVD reg32, xmmreg Double FADD 3
1500 32, /* size of l1 cache. */
1501 512, /* size of l2 cache. */
1502 64, /* size of prefetch block */
1503 100, /* number of parallel prefetches */
1504 2, /* Branch cost */
1505 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1506 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1507 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1508 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1509 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1510 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1514 4, /* scalar_stmt_cost. */
1515 2, /* scalar load_cost. */
1516 2, /* scalar_store_cost. */
1517 6, /* vec_stmt_cost. */
1518 0, /* vec_to_scalar_cost. */
1519 2, /* scalar_to_vec_cost. */
1520 2, /* vec_align_load_cost. */
1521 2, /* vec_unalign_load_cost. */
1522 2, /* vec_store_cost. */
1523 2, /* cond_taken_branch_cost. */
1524 1, /* cond_not_taken_branch_cost. */
1527 static stringop_algs btver2_memcpy
[2] = {
1528 {libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1529 {-1, rep_prefix_4_byte
, false}}},
1530 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1531 {-1, libcall
, false}}}};
1532 static stringop_algs btver2_memset
[2] = {
1533 {libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1534 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1535 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1536 {-1, libcall
, false}}}};
1537 const struct processor_costs btver2_cost
= {
1538 COSTS_N_INSNS (1), /* cost of an add instruction */
1539 COSTS_N_INSNS (2), /* cost of a lea instruction */
1540 COSTS_N_INSNS (1), /* variable shift costs */
1541 COSTS_N_INSNS (1), /* constant shift costs */
1542 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1543 COSTS_N_INSNS (4), /* HI */
1544 COSTS_N_INSNS (3), /* SI */
1545 COSTS_N_INSNS (4), /* DI */
1546 COSTS_N_INSNS (5)}, /* other */
1547 0, /* cost of multiply per each bit set */
1548 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1549 COSTS_N_INSNS (35), /* HI */
1550 COSTS_N_INSNS (51), /* SI */
1551 COSTS_N_INSNS (83), /* DI */
1552 COSTS_N_INSNS (83)}, /* other */
1553 COSTS_N_INSNS (1), /* cost of movsx */
1554 COSTS_N_INSNS (1), /* cost of movzx */
1555 8, /* "large" insn */
1557 4, /* cost for loading QImode using movzbl */
1558 {3, 4, 3}, /* cost of loading integer registers
1559 in QImode, HImode and SImode.
1560 Relative to reg-reg move (2). */
1561 {3, 4, 3}, /* cost of storing integer registers */
1562 4, /* cost of reg,reg fld/fst */
1563 {4, 4, 12}, /* cost of loading fp registers
1564 in SFmode, DFmode and XFmode */
1565 {6, 6, 8}, /* cost of storing fp registers
1566 in SFmode, DFmode and XFmode */
1567 2, /* cost of moving MMX register */
1568 {3, 3}, /* cost of loading MMX registers
1569 in SImode and DImode */
1570 {4, 4}, /* cost of storing MMX registers
1571 in SImode and DImode */
1572 2, /* cost of moving SSE register */
1573 {4, 4, 3}, /* cost of loading SSE registers
1574 in SImode, DImode and TImode */
1575 {4, 4, 5}, /* cost of storing SSE registers
1576 in SImode, DImode and TImode */
1577 3, /* MMX or SSE register to integer */
1579 MOVD reg64, xmmreg Double FSTORE 4
1580 MOVD reg32, xmmreg Double FSTORE 4
1582 MOVD reg64, xmmreg Double FADD 3
1584 MOVD reg32, xmmreg Double FADD 3
1586 32, /* size of l1 cache. */
1587 2048, /* size of l2 cache. */
1588 64, /* size of prefetch block */
1589 100, /* number of parallel prefetches */
1590 2, /* Branch cost */
1591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1599 4, /* scalar_stmt_cost. */
1600 2, /* scalar load_cost. */
1601 2, /* scalar_store_cost. */
1602 6, /* vec_stmt_cost. */
1603 0, /* vec_to_scalar_cost. */
1604 2, /* scalar_to_vec_cost. */
1605 2, /* vec_align_load_cost. */
1606 2, /* vec_unalign_load_cost. */
1607 2, /* vec_store_cost. */
1608 2, /* cond_taken_branch_cost. */
1609 1, /* cond_not_taken_branch_cost. */
1612 static stringop_algs pentium4_memcpy
[2] = {
1613 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1614 DUMMY_STRINGOP_ALGS
};
1615 static stringop_algs pentium4_memset
[2] = {
1616 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1617 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1618 DUMMY_STRINGOP_ALGS
};
1621 struct processor_costs pentium4_cost
= {
1622 COSTS_N_INSNS (1), /* cost of an add instruction */
1623 COSTS_N_INSNS (3), /* cost of a lea instruction */
1624 COSTS_N_INSNS (4), /* variable shift costs */
1625 COSTS_N_INSNS (4), /* constant shift costs */
1626 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1627 COSTS_N_INSNS (15), /* HI */
1628 COSTS_N_INSNS (15), /* SI */
1629 COSTS_N_INSNS (15), /* DI */
1630 COSTS_N_INSNS (15)}, /* other */
1631 0, /* cost of multiply per each bit set */
1632 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1633 COSTS_N_INSNS (56), /* HI */
1634 COSTS_N_INSNS (56), /* SI */
1635 COSTS_N_INSNS (56), /* DI */
1636 COSTS_N_INSNS (56)}, /* other */
1637 COSTS_N_INSNS (1), /* cost of movsx */
1638 COSTS_N_INSNS (1), /* cost of movzx */
1639 16, /* "large" insn */
1641 2, /* cost for loading QImode using movzbl */
1642 {4, 5, 4}, /* cost of loading integer registers
1643 in QImode, HImode and SImode.
1644 Relative to reg-reg move (2). */
1645 {2, 3, 2}, /* cost of storing integer registers */
1646 2, /* cost of reg,reg fld/fst */
1647 {2, 2, 6}, /* cost of loading fp registers
1648 in SFmode, DFmode and XFmode */
1649 {4, 4, 6}, /* cost of storing fp registers
1650 in SFmode, DFmode and XFmode */
1651 2, /* cost of moving MMX register */
1652 {2, 2}, /* cost of loading MMX registers
1653 in SImode and DImode */
1654 {2, 2}, /* cost of storing MMX registers
1655 in SImode and DImode */
1656 12, /* cost of moving SSE register */
1657 {12, 12, 12}, /* cost of loading SSE registers
1658 in SImode, DImode and TImode */
1659 {2, 2, 8}, /* cost of storing SSE registers
1660 in SImode, DImode and TImode */
1661 10, /* MMX or SSE register to integer */
1662 8, /* size of l1 cache. */
1663 256, /* size of l2 cache. */
1664 64, /* size of prefetch block */
1665 6, /* number of parallel prefetches */
1666 2, /* Branch cost */
1667 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1668 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1669 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1670 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1671 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1672 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1675 1, /* scalar_stmt_cost. */
1676 1, /* scalar load_cost. */
1677 1, /* scalar_store_cost. */
1678 1, /* vec_stmt_cost. */
1679 1, /* vec_to_scalar_cost. */
1680 1, /* scalar_to_vec_cost. */
1681 1, /* vec_align_load_cost. */
1682 2, /* vec_unalign_load_cost. */
1683 1, /* vec_store_cost. */
1684 3, /* cond_taken_branch_cost. */
1685 1, /* cond_not_taken_branch_cost. */
1688 static stringop_algs nocona_memcpy
[2] = {
1689 {libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1690 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1691 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}};
1693 static stringop_algs nocona_memset
[2] = {
1694 {libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1695 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1696 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1697 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1700 struct processor_costs nocona_cost
= {
1701 COSTS_N_INSNS (1), /* cost of an add instruction */
1702 COSTS_N_INSNS (1), /* cost of a lea instruction */
1703 COSTS_N_INSNS (1), /* variable shift costs */
1704 COSTS_N_INSNS (1), /* constant shift costs */
1705 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1706 COSTS_N_INSNS (10), /* HI */
1707 COSTS_N_INSNS (10), /* SI */
1708 COSTS_N_INSNS (10), /* DI */
1709 COSTS_N_INSNS (10)}, /* other */
1710 0, /* cost of multiply per each bit set */
1711 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1712 COSTS_N_INSNS (66), /* HI */
1713 COSTS_N_INSNS (66), /* SI */
1714 COSTS_N_INSNS (66), /* DI */
1715 COSTS_N_INSNS (66)}, /* other */
1716 COSTS_N_INSNS (1), /* cost of movsx */
1717 COSTS_N_INSNS (1), /* cost of movzx */
1718 16, /* "large" insn */
1719 17, /* MOVE_RATIO */
1720 4, /* cost for loading QImode using movzbl */
1721 {4, 4, 4}, /* cost of loading integer registers
1722 in QImode, HImode and SImode.
1723 Relative to reg-reg move (2). */
1724 {4, 4, 4}, /* cost of storing integer registers */
1725 3, /* cost of reg,reg fld/fst */
1726 {12, 12, 12}, /* cost of loading fp registers
1727 in SFmode, DFmode and XFmode */
1728 {4, 4, 4}, /* cost of storing fp registers
1729 in SFmode, DFmode and XFmode */
1730 6, /* cost of moving MMX register */
1731 {12, 12}, /* cost of loading MMX registers
1732 in SImode and DImode */
1733 {12, 12}, /* cost of storing MMX registers
1734 in SImode and DImode */
1735 6, /* cost of moving SSE register */
1736 {12, 12, 12}, /* cost of loading SSE registers
1737 in SImode, DImode and TImode */
1738 {12, 12, 12}, /* cost of storing SSE registers
1739 in SImode, DImode and TImode */
1740 8, /* MMX or SSE register to integer */
1741 8, /* size of l1 cache. */
1742 1024, /* size of l2 cache. */
1743 64, /* size of prefetch block */
1744 8, /* number of parallel prefetches */
1745 1, /* Branch cost */
1746 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1747 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1748 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1749 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1750 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1751 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1754 1, /* scalar_stmt_cost. */
1755 1, /* scalar load_cost. */
1756 1, /* scalar_store_cost. */
1757 1, /* vec_stmt_cost. */
1758 1, /* vec_to_scalar_cost. */
1759 1, /* scalar_to_vec_cost. */
1760 1, /* vec_align_load_cost. */
1761 2, /* vec_unalign_load_cost. */
1762 1, /* vec_store_cost. */
1763 3, /* cond_taken_branch_cost. */
1764 1, /* cond_not_taken_branch_cost. */
1767 static stringop_algs atom_memcpy
[2] = {
1768 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1769 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1770 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1771 static stringop_algs atom_memset
[2] = {
1772 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1773 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1774 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1775 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1777 struct processor_costs atom_cost
= {
1778 COSTS_N_INSNS (1), /* cost of an add instruction */
1779 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1780 COSTS_N_INSNS (1), /* variable shift costs */
1781 COSTS_N_INSNS (1), /* constant shift costs */
1782 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1783 COSTS_N_INSNS (4), /* HI */
1784 COSTS_N_INSNS (3), /* SI */
1785 COSTS_N_INSNS (4), /* DI */
1786 COSTS_N_INSNS (2)}, /* other */
1787 0, /* cost of multiply per each bit set */
1788 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1789 COSTS_N_INSNS (26), /* HI */
1790 COSTS_N_INSNS (42), /* SI */
1791 COSTS_N_INSNS (74), /* DI */
1792 COSTS_N_INSNS (74)}, /* other */
1793 COSTS_N_INSNS (1), /* cost of movsx */
1794 COSTS_N_INSNS (1), /* cost of movzx */
1795 8, /* "large" insn */
1796 17, /* MOVE_RATIO */
1797 4, /* cost for loading QImode using movzbl */
1798 {4, 4, 4}, /* cost of loading integer registers
1799 in QImode, HImode and SImode.
1800 Relative to reg-reg move (2). */
1801 {4, 4, 4}, /* cost of storing integer registers */
1802 4, /* cost of reg,reg fld/fst */
1803 {12, 12, 12}, /* cost of loading fp registers
1804 in SFmode, DFmode and XFmode */
1805 {6, 6, 8}, /* cost of storing fp registers
1806 in SFmode, DFmode and XFmode */
1807 2, /* cost of moving MMX register */
1808 {8, 8}, /* cost of loading MMX registers
1809 in SImode and DImode */
1810 {8, 8}, /* cost of storing MMX registers
1811 in SImode and DImode */
1812 2, /* cost of moving SSE register */
1813 {8, 8, 8}, /* cost of loading SSE registers
1814 in SImode, DImode and TImode */
1815 {8, 8, 8}, /* cost of storing SSE registers
1816 in SImode, DImode and TImode */
1817 5, /* MMX or SSE register to integer */
1818 32, /* size of l1 cache. */
1819 256, /* size of l2 cache. */
1820 64, /* size of prefetch block */
1821 6, /* number of parallel prefetches */
1822 3, /* Branch cost */
1823 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1824 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1825 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1826 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1827 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1828 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1831 1, /* scalar_stmt_cost. */
1832 1, /* scalar load_cost. */
1833 1, /* scalar_store_cost. */
1834 1, /* vec_stmt_cost. */
1835 1, /* vec_to_scalar_cost. */
1836 1, /* scalar_to_vec_cost. */
1837 1, /* vec_align_load_cost. */
1838 2, /* vec_unalign_load_cost. */
1839 1, /* vec_store_cost. */
1840 3, /* cond_taken_branch_cost. */
1841 1, /* cond_not_taken_branch_cost. */
1844 static stringop_algs slm_memcpy
[2] = {
1845 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1846 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1847 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1848 static stringop_algs slm_memset
[2] = {
1849 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1850 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1851 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1852 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1854 struct processor_costs slm_cost
= {
1855 COSTS_N_INSNS (1), /* cost of an add instruction */
1856 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1857 COSTS_N_INSNS (1), /* variable shift costs */
1858 COSTS_N_INSNS (1), /* constant shift costs */
1859 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1860 COSTS_N_INSNS (3), /* HI */
1861 COSTS_N_INSNS (3), /* SI */
1862 COSTS_N_INSNS (4), /* DI */
1863 COSTS_N_INSNS (2)}, /* other */
1864 0, /* cost of multiply per each bit set */
1865 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1866 COSTS_N_INSNS (26), /* HI */
1867 COSTS_N_INSNS (42), /* SI */
1868 COSTS_N_INSNS (74), /* DI */
1869 COSTS_N_INSNS (74)}, /* other */
1870 COSTS_N_INSNS (1), /* cost of movsx */
1871 COSTS_N_INSNS (1), /* cost of movzx */
1872 8, /* "large" insn */
1873 17, /* MOVE_RATIO */
1874 4, /* cost for loading QImode using movzbl */
1875 {4, 4, 4}, /* cost of loading integer registers
1876 in QImode, HImode and SImode.
1877 Relative to reg-reg move (2). */
1878 {4, 4, 4}, /* cost of storing integer registers */
1879 4, /* cost of reg,reg fld/fst */
1880 {12, 12, 12}, /* cost of loading fp registers
1881 in SFmode, DFmode and XFmode */
1882 {6, 6, 8}, /* cost of storing fp registers
1883 in SFmode, DFmode and XFmode */
1884 2, /* cost of moving MMX register */
1885 {8, 8}, /* cost of loading MMX registers
1886 in SImode and DImode */
1887 {8, 8}, /* cost of storing MMX registers
1888 in SImode and DImode */
1889 2, /* cost of moving SSE register */
1890 {8, 8, 8}, /* cost of loading SSE registers
1891 in SImode, DImode and TImode */
1892 {8, 8, 8}, /* cost of storing SSE registers
1893 in SImode, DImode and TImode */
1894 5, /* MMX or SSE register to integer */
1895 32, /* size of l1 cache. */
1896 256, /* size of l2 cache. */
1897 64, /* size of prefetch block */
1898 6, /* number of parallel prefetches */
1899 3, /* Branch cost */
1900 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1901 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1902 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1903 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1904 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1905 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1908 1, /* scalar_stmt_cost. */
1909 1, /* scalar load_cost. */
1910 1, /* scalar_store_cost. */
1911 1, /* vec_stmt_cost. */
1912 4, /* vec_to_scalar_cost. */
1913 1, /* scalar_to_vec_cost. */
1914 1, /* vec_align_load_cost. */
1915 2, /* vec_unalign_load_cost. */
1916 1, /* vec_store_cost. */
1917 3, /* cond_taken_branch_cost. */
1918 1, /* cond_not_taken_branch_cost. */
1921 static stringop_algs intel_memcpy
[2] = {
1922 {libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1923 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1924 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1925 static stringop_algs intel_memset
[2] = {
1926 {libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1927 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1928 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1929 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}};
1931 struct processor_costs intel_cost
= {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1934 COSTS_N_INSNS (1), /* variable shift costs */
1935 COSTS_N_INSNS (1), /* constant shift costs */
1936 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1937 COSTS_N_INSNS (3), /* HI */
1938 COSTS_N_INSNS (3), /* SI */
1939 COSTS_N_INSNS (4), /* DI */
1940 COSTS_N_INSNS (2)}, /* other */
1941 0, /* cost of multiply per each bit set */
1942 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1943 COSTS_N_INSNS (26), /* HI */
1944 COSTS_N_INSNS (42), /* SI */
1945 COSTS_N_INSNS (74), /* DI */
1946 COSTS_N_INSNS (74)}, /* other */
1947 COSTS_N_INSNS (1), /* cost of movsx */
1948 COSTS_N_INSNS (1), /* cost of movzx */
1949 8, /* "large" insn */
1950 17, /* MOVE_RATIO */
1951 4, /* cost for loading QImode using movzbl */
1952 {4, 4, 4}, /* cost of loading integer registers
1953 in QImode, HImode and SImode.
1954 Relative to reg-reg move (2). */
1955 {4, 4, 4}, /* cost of storing integer registers */
1956 4, /* cost of reg,reg fld/fst */
1957 {12, 12, 12}, /* cost of loading fp registers
1958 in SFmode, DFmode and XFmode */
1959 {6, 6, 8}, /* cost of storing fp registers
1960 in SFmode, DFmode and XFmode */
1961 2, /* cost of moving MMX register */
1962 {8, 8}, /* cost of loading MMX registers
1963 in SImode and DImode */
1964 {8, 8}, /* cost of storing MMX registers
1965 in SImode and DImode */
1966 2, /* cost of moving SSE register */
1967 {8, 8, 8}, /* cost of loading SSE registers
1968 in SImode, DImode and TImode */
1969 {8, 8, 8}, /* cost of storing SSE registers
1970 in SImode, DImode and TImode */
1971 5, /* MMX or SSE register to integer */
1972 32, /* size of l1 cache. */
1973 256, /* size of l2 cache. */
1974 64, /* size of prefetch block */
1975 6, /* number of parallel prefetches */
1976 3, /* Branch cost */
1977 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1978 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1979 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1980 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1981 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1982 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1985 1, /* scalar_stmt_cost. */
1986 1, /* scalar load_cost. */
1987 1, /* scalar_store_cost. */
1988 1, /* vec_stmt_cost. */
1989 4, /* vec_to_scalar_cost. */
1990 1, /* scalar_to_vec_cost. */
1991 1, /* vec_align_load_cost. */
1992 2, /* vec_unalign_load_cost. */
1993 1, /* vec_store_cost. */
1994 3, /* cond_taken_branch_cost. */
1995 1, /* cond_not_taken_branch_cost. */
1998 /* Generic should produce code tuned for Core-i7 (and newer chips)
1999 and btver1 (and newer chips). */
2001 static stringop_algs generic_memcpy
[2] = {
2002 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
2003 {-1, libcall
, false}}},
2004 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
2005 {-1, libcall
, false}}}};
2006 static stringop_algs generic_memset
[2] = {
2007 {libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
2008 {-1, libcall
, false}}},
2009 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
2010 {-1, libcall
, false}}}};
2012 struct processor_costs generic_cost
= {
2013 COSTS_N_INSNS (1), /* cost of an add instruction */
2014 /* On all chips taken into consideration lea is 2 cycles and more. With
2015 this cost however our current implementation of synth_mult results in
2016 use of unnecessary temporary registers causing regression on several
2017 SPECfp benchmarks. */
2018 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2019 COSTS_N_INSNS (1), /* variable shift costs */
2020 COSTS_N_INSNS (1), /* constant shift costs */
2021 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2022 COSTS_N_INSNS (4), /* HI */
2023 COSTS_N_INSNS (3), /* SI */
2024 COSTS_N_INSNS (4), /* DI */
2025 COSTS_N_INSNS (2)}, /* other */
2026 0, /* cost of multiply per each bit set */
2027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2028 COSTS_N_INSNS (26), /* HI */
2029 COSTS_N_INSNS (42), /* SI */
2030 COSTS_N_INSNS (74), /* DI */
2031 COSTS_N_INSNS (74)}, /* other */
2032 COSTS_N_INSNS (1), /* cost of movsx */
2033 COSTS_N_INSNS (1), /* cost of movzx */
2034 8, /* "large" insn */
2035 17, /* MOVE_RATIO */
2036 4, /* cost for loading QImode using movzbl */
2037 {4, 4, 4}, /* cost of loading integer registers
2038 in QImode, HImode and SImode.
2039 Relative to reg-reg move (2). */
2040 {4, 4, 4}, /* cost of storing integer registers */
2041 4, /* cost of reg,reg fld/fst */
2042 {12, 12, 12}, /* cost of loading fp registers
2043 in SFmode, DFmode and XFmode */
2044 {6, 6, 8}, /* cost of storing fp registers
2045 in SFmode, DFmode and XFmode */
2046 2, /* cost of moving MMX register */
2047 {8, 8}, /* cost of loading MMX registers
2048 in SImode and DImode */
2049 {8, 8}, /* cost of storing MMX registers
2050 in SImode and DImode */
2051 2, /* cost of moving SSE register */
2052 {8, 8, 8}, /* cost of loading SSE registers
2053 in SImode, DImode and TImode */
2054 {8, 8, 8}, /* cost of storing SSE registers
2055 in SImode, DImode and TImode */
2056 5, /* MMX or SSE register to integer */
2057 32, /* size of l1 cache. */
2058 512, /* size of l2 cache. */
2059 64, /* size of prefetch block */
2060 6, /* number of parallel prefetches */
2061 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
2062 value is increased to perhaps more appropriate value of 5. */
2063 3, /* Branch cost */
2064 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2065 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2066 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2067 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2068 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2069 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2072 1, /* scalar_stmt_cost. */
2073 1, /* scalar load_cost. */
2074 1, /* scalar_store_cost. */
2075 1, /* vec_stmt_cost. */
2076 1, /* vec_to_scalar_cost. */
2077 1, /* scalar_to_vec_cost. */
2078 1, /* vec_align_load_cost. */
2079 2, /* vec_unalign_load_cost. */
2080 1, /* vec_store_cost. */
2081 3, /* cond_taken_branch_cost. */
2082 1, /* cond_not_taken_branch_cost. */
2085 /* core_cost should produce code tuned for Core familly of CPUs. */
2086 static stringop_algs core_memcpy
[2] = {
2087 {libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
2088 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
2089 {-1, libcall
, false}}}};
2090 static stringop_algs core_memset
[2] = {
2091 {libcall
, {{6, loop_1_byte
, true},
2093 {8192, rep_prefix_4_byte
, true},
2094 {-1, libcall
, false}}},
2095 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
2096 {-1, libcall
, false}}}};
2099 struct processor_costs core_cost
= {
2100 COSTS_N_INSNS (1), /* cost of an add instruction */
2101 /* On all chips taken into consideration lea is 2 cycles and more. With
2102 this cost however our current implementation of synth_mult results in
2103 use of unnecessary temporary registers causing regression on several
2104 SPECfp benchmarks. */
2105 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2106 COSTS_N_INSNS (1), /* variable shift costs */
2107 COSTS_N_INSNS (1), /* constant shift costs */
2108 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2109 COSTS_N_INSNS (4), /* HI */
2110 COSTS_N_INSNS (3), /* SI */
2111 COSTS_N_INSNS (4), /* DI */
2112 COSTS_N_INSNS (2)}, /* other */
2113 0, /* cost of multiply per each bit set */
2114 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2115 COSTS_N_INSNS (26), /* HI */
2116 COSTS_N_INSNS (42), /* SI */
2117 COSTS_N_INSNS (74), /* DI */
2118 COSTS_N_INSNS (74)}, /* other */
2119 COSTS_N_INSNS (1), /* cost of movsx */
2120 COSTS_N_INSNS (1), /* cost of movzx */
2121 8, /* "large" insn */
2122 17, /* MOVE_RATIO */
2123 4, /* cost for loading QImode using movzbl */
2124 {4, 4, 4}, /* cost of loading integer registers
2125 in QImode, HImode and SImode.
2126 Relative to reg-reg move (2). */
2127 {4, 4, 4}, /* cost of storing integer registers */
2128 4, /* cost of reg,reg fld/fst */
2129 {12, 12, 12}, /* cost of loading fp registers
2130 in SFmode, DFmode and XFmode */
2131 {6, 6, 8}, /* cost of storing fp registers
2132 in SFmode, DFmode and XFmode */
2133 2, /* cost of moving MMX register */
2134 {8, 8}, /* cost of loading MMX registers
2135 in SImode and DImode */
2136 {8, 8}, /* cost of storing MMX registers
2137 in SImode and DImode */
2138 2, /* cost of moving SSE register */
2139 {8, 8, 8}, /* cost of loading SSE registers
2140 in SImode, DImode and TImode */
2141 {8, 8, 8}, /* cost of storing SSE registers
2142 in SImode, DImode and TImode */
2143 5, /* MMX or SSE register to integer */
2144 64, /* size of l1 cache. */
2145 512, /* size of l2 cache. */
2146 64, /* size of prefetch block */
2147 6, /* number of parallel prefetches */
2148 /* FIXME perhaps more appropriate value is 5. */
2149 3, /* Branch cost */
2150 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2151 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2152 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2153 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2154 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2155 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2158 1, /* scalar_stmt_cost. */
2159 1, /* scalar load_cost. */
2160 1, /* scalar_store_cost. */
2161 1, /* vec_stmt_cost. */
2162 1, /* vec_to_scalar_cost. */
2163 1, /* scalar_to_vec_cost. */
2164 1, /* vec_align_load_cost. */
2165 2, /* vec_unalign_load_cost. */
2166 1, /* vec_store_cost. */
2167 3, /* cond_taken_branch_cost. */
2168 1, /* cond_not_taken_branch_cost. */
2172 /* Set by -mtune. */
2173 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
2175 /* Set by -mtune or -Os. */
2176 const struct processor_costs
*ix86_cost
= &pentium_cost
;
2178 /* Processor feature/optimization bitmasks. */
2179 #define m_386 (1U<<PROCESSOR_I386)
2180 #define m_486 (1U<<PROCESSOR_I486)
2181 #define m_PENT (1U<<PROCESSOR_PENTIUM)
2182 #define m_LAKEMONT (1U<<PROCESSOR_LAKEMONT)
2183 #define m_PPRO (1U<<PROCESSOR_PENTIUMPRO)
2184 #define m_PENT4 (1U<<PROCESSOR_PENTIUM4)
2185 #define m_NOCONA (1U<<PROCESSOR_NOCONA)
2186 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2187 #define m_CORE2 (1U<<PROCESSOR_CORE2)
2188 #define m_NEHALEM (1U<<PROCESSOR_NEHALEM)
2189 #define m_SANDYBRIDGE (1U<<PROCESSOR_SANDYBRIDGE)
2190 #define m_HASWELL (1U<<PROCESSOR_HASWELL)
2191 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2192 #define m_BONNELL (1U<<PROCESSOR_BONNELL)
2193 #define m_SILVERMONT (1U<<PROCESSOR_SILVERMONT)
2194 #define m_KNL (1U<<PROCESSOR_KNL)
2195 #define m_SKYLAKE_AVX512 (1U<<PROCESSOR_SKYLAKE_AVX512)
2196 #define m_INTEL (1U<<PROCESSOR_INTEL)
2198 #define m_GEODE (1U<<PROCESSOR_GEODE)
2199 #define m_K6 (1U<<PROCESSOR_K6)
2200 #define m_K6_GEODE (m_K6 | m_GEODE)
2201 #define m_K8 (1U<<PROCESSOR_K8)
2202 #define m_ATHLON (1U<<PROCESSOR_ATHLON)
2203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2204 #define m_AMDFAM10 (1U<<PROCESSOR_AMDFAM10)
2205 #define m_BDVER1 (1U<<PROCESSOR_BDVER1)
2206 #define m_BDVER2 (1U<<PROCESSOR_BDVER2)
2207 #define m_BDVER3 (1U<<PROCESSOR_BDVER3)
2208 #define m_BDVER4 (1U<<PROCESSOR_BDVER4)
2209 #define m_ZNVER1 (1U<<PROCESSOR_ZNVER1)
2210 #define m_BTVER1 (1U<<PROCESSOR_BTVER1)
2211 #define m_BTVER2 (1U<<PROCESSOR_BTVER2)
2212 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2213 #define m_BTVER (m_BTVER1 | m_BTVER2)
2214 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \
2217 #define m_GENERIC (1U<<PROCESSOR_GENERIC)
2219 const char* ix86_tune_feature_names
[X86_TUNE_LAST
] = {
2221 #define DEF_TUNE(tune, name, selector) name,
2222 #include "x86-tune.def"
2226 /* Feature tests against the various tunings. */
2227 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2229 /* Feature tests against the various tunings used to create ix86_tune_features
2230 based on the processor mask. */
2231 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2233 #define DEF_TUNE(tune, name, selector) selector,
2234 #include "x86-tune.def"
2238 /* Feature tests against the various architecture variations. */
2239 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2241 /* Feature tests against the various architecture variations, used to create
2242 ix86_arch_features based on the processor mask. */
2243 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2244 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2245 ~(m_386
| m_486
| m_PENT
| m_LAKEMONT
| m_K6
),
2247 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2250 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2253 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2256 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2260 /* In case the average insn count for single function invocation is
2261 lower than this constant, emit fast (but longer) prologue and
2263 #define FAST_PROLOGUE_INSN_COUNT 20
2265 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2266 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2267 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2268 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2270 /* Array of the smallest class containing reg number REGNO, indexed by
2271 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2273 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2275 /* ax, dx, cx, bx */
2276 AREG
, DREG
, CREG
, BREG
,
2277 /* si, di, bp, sp */
2278 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2280 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2281 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2284 /* flags, fpsr, fpcr, frame */
2285 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2287 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2290 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2293 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2294 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2295 /* SSE REX registers */
2296 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2298 /* AVX-512 SSE registers */
2299 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2300 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2301 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2302 EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
, EVEX_SSE_REGS
,
2303 /* Mask registers. */
2304 MASK_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2305 MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
, MASK_EVEX_REGS
,
2306 /* MPX bound registers */
2307 BND_REGS
, BND_REGS
, BND_REGS
, BND_REGS
,
2310 /* The "default" register map used in 32bit mode. */
2312 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2314 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2315 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2316 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2317 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2318 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2319 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2320 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2321 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2322 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2323 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2324 101, 102, 103, 104, /* bound registers */
2327 /* The "default" register map used in 64bit mode. */
2329 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2331 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2332 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2333 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2334 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2335 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2336 8,9,10,11,12,13,14,15, /* extended integer registers */
2337 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2338 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2339 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2340 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2341 126, 127, 128, 129, /* bound registers */
2344 /* Define the register numbers to be used in Dwarf debugging information.
2345 The SVR4 reference port C compiler uses the following register numbers
2346 in its Dwarf output code:
2347 0 for %eax (gcc regno = 0)
2348 1 for %ecx (gcc regno = 2)
2349 2 for %edx (gcc regno = 1)
2350 3 for %ebx (gcc regno = 3)
2351 4 for %esp (gcc regno = 7)
2352 5 for %ebp (gcc regno = 6)
2353 6 for %esi (gcc regno = 4)
2354 7 for %edi (gcc regno = 5)
2355 The following three DWARF register numbers are never generated by
2356 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2357 believes these numbers have these meanings.
2358 8 for %eip (no gcc equivalent)
2359 9 for %eflags (gcc regno = 17)
2360 10 for %trapno (no gcc equivalent)
2361 It is not at all clear how we should number the FP stack registers
2362 for the x86 architecture. If the version of SDB on x86/svr4 were
2363 a bit less brain dead with respect to floating-point then we would
2364 have a precedent to follow with respect to DWARF register numbers
2365 for x86 FP registers, but the SDB on x86/svr4 is so completely
2366 broken with respect to FP registers that it is hardly worth thinking
2367 of it as something to strive for compatibility with.
2368 The version of x86/svr4 SDB I have at the moment does (partially)
2369 seem to believe that DWARF register number 11 is associated with
2370 the x86 register %st(0), but that's about all. Higher DWARF
2371 register numbers don't seem to be associated with anything in
2372 particular, and even for DWARF regno 11, SDB only seems to under-
2373 stand that it should say that a variable lives in %st(0) (when
2374 asked via an `=' command) if we said it was in DWARF regno 11,
2375 but SDB still prints garbage when asked for the value of the
2376 variable in question (via a `/' command).
2377 (Also note that the labels SDB prints for various FP stack regs
2378 when doing an `x' command are all wrong.)
2379 Note that these problems generally don't affect the native SVR4
2380 C compiler because it doesn't allow the use of -O with -g and
2381 because when it is *not* optimizing, it allocates a memory
2382 location for each floating-point variable, and the memory
2383 location is what gets described in the DWARF AT_location
2384 attribute for the variable in question.
2385 Regardless of the severe mental illness of the x86/svr4 SDB, we
2386 do something sensible here and we use the following DWARF
2387 register numbers. Note that these are all stack-top-relative
2389 11 for %st(0) (gcc regno = 8)
2390 12 for %st(1) (gcc regno = 9)
2391 13 for %st(2) (gcc regno = 10)
2392 14 for %st(3) (gcc regno = 11)
2393 15 for %st(4) (gcc regno = 12)
2394 16 for %st(5) (gcc regno = 13)
2395 17 for %st(6) (gcc regno = 14)
2396 18 for %st(7) (gcc regno = 15)
2398 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2400 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2401 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2402 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2403 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2404 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2405 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2406 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2407 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2408 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2409 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2410 101, 102, 103, 104, /* bound registers */
2413 /* Define parameter passing and return registers. */
2415 static int const x86_64_int_parameter_registers
[6] =
2417 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2420 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2422 CX_REG
, DX_REG
, R8_REG
, R9_REG
2425 static int const x86_64_int_return_registers
[4] =
2427 AX_REG
, DX_REG
, DI_REG
, SI_REG
2430 /* Additional registers that are clobbered by SYSV calls. */
2432 #define NUM_X86_64_MS_CLOBBERED_REGS 12
2433 static int const x86_64_ms_sysv_extra_clobbered_registers
2434 [NUM_X86_64_MS_CLOBBERED_REGS
] =
2438 XMM8_REG
, XMM9_REG
, XMM10_REG
, XMM11_REG
,
2439 XMM12_REG
, XMM13_REG
, XMM14_REG
, XMM15_REG
2444 XLOGUE_STUB_RESTORE
,
2445 XLOGUE_STUB_RESTORE_TAIL
,
2446 XLOGUE_STUB_SAVE_HFP
,
2447 XLOGUE_STUB_RESTORE_HFP
,
2448 XLOGUE_STUB_RESTORE_HFP_TAIL
,
2453 enum xlogue_stub_sets
{
2455 XLOGUE_SET_ALIGNED_PLUS_8
,
2456 XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
,
2457 XLOGUE_SET_HFP_ALIGNED_PLUS_8
,
2462 /* Register save/restore layout used by out-of-line stubs. */
2463 class xlogue_layout
{
2468 HOST_WIDE_INT offset
; /* Offset used by stub base pointer (rax or
2469 rsi) to where each register is stored. */
2472 unsigned get_nregs () const {return m_nregs
;}
2473 HOST_WIDE_INT
get_stack_align_off_in () const {return m_stack_align_off_in
;}
2475 const reginfo
&get_reginfo (unsigned reg
) const
2477 gcc_assert (reg
< m_nregs
);
2481 static const char *get_stub_name (enum xlogue_stub stub
,
2482 unsigned n_extra_args
);
2484 /* Returns an rtx for the stub's symbol based upon
2485 1.) the specified stub (save, restore or restore_ret) and
2486 2.) the value of cfun->machine->call_ms2sysv_extra_regs and
2487 3.) rather or not stack alignment is being performed. */
2488 static rtx
get_stub_rtx (enum xlogue_stub stub
);
2490 /* Returns the amount of stack space (including padding) that the stub
2491 needs to store registers based upon data in the machine_function. */
2492 HOST_WIDE_INT
get_stack_space_used () const
2494 const struct machine_function
*m
= cfun
->machine
;
2495 unsigned last_reg
= m
->call_ms2sysv_extra_regs
+ MIN_REGS
- 1;
2497 gcc_assert (m
->call_ms2sysv_extra_regs
<= MAX_EXTRA_REGS
);
2498 return m_regs
[last_reg
].offset
+ STUB_INDEX_OFFSET
;
2501 /* Returns the offset for the base pointer used by the stub. */
2502 HOST_WIDE_INT
get_stub_ptr_offset () const
2504 return STUB_INDEX_OFFSET
+ m_stack_align_off_in
;
2507 static const struct xlogue_layout
&get_instance ();
2508 static unsigned count_stub_managed_regs ();
2509 static bool is_stub_managed_reg (unsigned regno
, unsigned count
);
2511 static const HOST_WIDE_INT STUB_INDEX_OFFSET
= 0x70;
2512 static const unsigned MIN_REGS
= NUM_X86_64_MS_CLOBBERED_REGS
;
2513 static const unsigned MAX_REGS
= 18;
2514 static const unsigned MAX_EXTRA_REGS
= MAX_REGS
- MIN_REGS
;
2515 static const unsigned VARIANT_COUNT
= MAX_EXTRA_REGS
+ 1;
2516 static const unsigned STUB_NAME_MAX_LEN
= 20;
2517 static const char * const STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
];
2518 static const unsigned REG_ORDER
[MAX_REGS
];
2519 static const unsigned REG_ORDER_REALIGN
[MAX_REGS
];
2523 xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
);
2524 xlogue_layout (const xlogue_layout
&);
2526 /* True if hard frame pointer is used. */
2529 /* Max number of register this layout manages. */
2532 /* Incoming offset from 16-byte alignment. */
2533 HOST_WIDE_INT m_stack_align_off_in
;
2535 /* Register order and offsets. */
2536 struct reginfo m_regs
[MAX_REGS
];
2538 /* Lazy-inited cache of symbol names for stubs. */
2539 static char s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
2540 [STUB_NAME_MAX_LEN
];
2542 static const xlogue_layout s_instances
[XLOGUE_SET_COUNT
];
2545 const char * const xlogue_layout::STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
] = {
2554 const unsigned xlogue_layout::REG_ORDER
[xlogue_layout::MAX_REGS
] = {
2555 /* The below offset values are where each register is stored for the layout
2556 relative to incoming stack pointer. The value of each m_regs[].offset will
2557 be relative to the incoming base pointer (rax or rsi) used by the stub.
2559 s_instances: 0 1 2 3
2560 Offset: realigned or aligned + 8
2561 Register aligned aligned + 8 aligned w/HFP w/HFP */
2562 XMM15_REG
, /* 0x10 0x18 0x10 0x18 */
2563 XMM14_REG
, /* 0x20 0x28 0x20 0x28 */
2564 XMM13_REG
, /* 0x30 0x38 0x30 0x38 */
2565 XMM12_REG
, /* 0x40 0x48 0x40 0x48 */
2566 XMM11_REG
, /* 0x50 0x58 0x50 0x58 */
2567 XMM10_REG
, /* 0x60 0x68 0x60 0x68 */
2568 XMM9_REG
, /* 0x70 0x78 0x70 0x78 */
2569 XMM8_REG
, /* 0x80 0x88 0x80 0x88 */
2570 XMM7_REG
, /* 0x90 0x98 0x90 0x98 */
2571 XMM6_REG
, /* 0xa0 0xa8 0xa0 0xa8 */
2572 SI_REG
, /* 0xa8 0xb0 0xa8 0xb0 */
2573 DI_REG
, /* 0xb0 0xb8 0xb0 0xb8 */
2574 BX_REG
, /* 0xb8 0xc0 0xb8 0xc0 */
2575 BP_REG
, /* 0xc0 0xc8 N/A N/A */
2576 R12_REG
, /* 0xc8 0xd0 0xc0 0xc8 */
2577 R13_REG
, /* 0xd0 0xd8 0xc8 0xd0 */
2578 R14_REG
, /* 0xd8 0xe0 0xd0 0xd8 */
2579 R15_REG
, /* 0xe0 0xe8 0xd8 0xe0 */
2582 /* Instantiate static const values. */
2583 const HOST_WIDE_INT
xlogue_layout::STUB_INDEX_OFFSET
;
2584 const unsigned xlogue_layout::MIN_REGS
;
2585 const unsigned xlogue_layout::MAX_REGS
;
2586 const unsigned xlogue_layout::MAX_EXTRA_REGS
;
2587 const unsigned xlogue_layout::VARIANT_COUNT
;
2588 const unsigned xlogue_layout::STUB_NAME_MAX_LEN
;
2590 /* Initialize xlogue_layout::s_stub_names to zero. */
2591 char xlogue_layout::s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
2592 [STUB_NAME_MAX_LEN
];
2594 /* Instantiates all xlogue_layout instances. */
2595 const xlogue_layout
xlogue_layout::s_instances
[XLOGUE_SET_COUNT
] = {
2596 xlogue_layout (0, false),
2597 xlogue_layout (8, false),
2598 xlogue_layout (0, true),
2599 xlogue_layout (8, true)
2602 /* Return an appropriate const instance of xlogue_layout based upon values
2603 in cfun->machine and crtl. */
2604 const struct xlogue_layout
&
2605 xlogue_layout::get_instance ()
2607 enum xlogue_stub_sets stub_set
;
2608 bool aligned_plus_8
= cfun
->machine
->call_ms2sysv_pad_in
;
2610 if (stack_realign_fp
)
2611 stub_set
= XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
2612 else if (frame_pointer_needed
)
2613 stub_set
= aligned_plus_8
2614 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
2615 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
2617 stub_set
= aligned_plus_8
? XLOGUE_SET_ALIGNED_PLUS_8
: XLOGUE_SET_ALIGNED
;
2619 return s_instances
[stub_set
];
2622 /* Determine how many clobbered registers can be saved by the stub.
2623 Returns the count of registers the stub will save and restore. */
2625 xlogue_layout::count_stub_managed_regs ()
2627 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
2631 for (count
= i
= MIN_REGS
; i
< MAX_REGS
; ++i
)
2633 regno
= REG_ORDER
[i
];
2634 if (regno
== BP_REG
&& hfp
)
2636 if (!ix86_save_reg (regno
, false, false))
2643 /* Determine if register REGNO is a stub managed register given the
2644 total COUNT of stub managed registers. */
2646 xlogue_layout::is_stub_managed_reg (unsigned regno
, unsigned count
)
2648 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
2651 for (i
= 0; i
< count
; ++i
)
2653 gcc_assert (i
< MAX_REGS
);
2654 if (REG_ORDER
[i
] == BP_REG
&& hfp
)
2656 else if (REG_ORDER
[i
] == regno
)
2662 /* Constructor for xlogue_layout. */
2663 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
)
2664 : m_hfp (hfp
) , m_nregs (hfp
? 17 : 18),
2665 m_stack_align_off_in (stack_align_off_in
)
2667 HOST_WIDE_INT offset
= stack_align_off_in
;
2670 for (i
= j
= 0; i
< MAX_REGS
; ++i
)
2672 unsigned regno
= REG_ORDER
[i
];
2674 if (regno
== BP_REG
&& hfp
)
2676 if (SSE_REGNO_P (regno
))
2679 /* Verify that SSE regs are always aligned. */
2680 gcc_assert (!((stack_align_off_in
+ offset
) & 15));
2685 m_regs
[j
].regno
= regno
;
2686 m_regs
[j
++].offset
= offset
- STUB_INDEX_OFFSET
;
2688 gcc_assert (j
== m_nregs
);
2692 xlogue_layout::get_stub_name (enum xlogue_stub stub
,
2693 unsigned n_extra_regs
)
2695 const int have_avx
= TARGET_AVX
;
2696 char *name
= s_stub_names
[!!have_avx
][stub
][n_extra_regs
];
2701 int res
= snprintf (name
, STUB_NAME_MAX_LEN
, "__%s_%s_%u",
2702 (have_avx
? "avx" : "sse"),
2703 STUB_BASE_NAMES
[stub
],
2704 MIN_REGS
+ n_extra_regs
);
2705 gcc_checking_assert (res
< (int)STUB_NAME_MAX_LEN
);
2711 /* Return rtx of a symbol ref for the entry point (based upon
2712 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
2714 xlogue_layout::get_stub_rtx (enum xlogue_stub stub
)
2716 const unsigned n_extra_regs
= cfun
->machine
->call_ms2sysv_extra_regs
;
2717 gcc_checking_assert (n_extra_regs
<= MAX_EXTRA_REGS
);
2718 gcc_assert (stub
< XLOGUE_STUB_COUNT
);
2719 gcc_assert (crtl
->stack_realign_finalized
);
2721 return gen_rtx_SYMBOL_REF (Pmode
, get_stub_name (stub
, n_extra_regs
));
2724 /* Define the structure for the machine field in struct function. */
2726 struct GTY(()) stack_local_entry
{
2727 unsigned short mode
;
2730 struct stack_local_entry
*next
;
2733 /* Which cpu are we scheduling for. */
2734 enum attr_cpu ix86_schedule
;
2736 /* Which cpu are we optimizing for. */
2737 enum processor_type ix86_tune
;
2739 /* Which instruction set architecture to use. */
2740 enum processor_type ix86_arch
;
2742 /* True if processor has SSE prefetch instruction. */
2743 unsigned char x86_prefetch_sse
;
2745 /* -mstackrealign option */
2746 static const char ix86_force_align_arg_pointer_string
[]
2747 = "force_align_arg_pointer";
2749 static rtx (*ix86_gen_leave
) (void);
2750 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2751 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2752 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2753 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2754 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2755 static rtx (*ix86_gen_monitorx
) (rtx
, rtx
, rtx
);
2756 static rtx (*ix86_gen_clzero
) (rtx
);
2757 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2758 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2759 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2760 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2761 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2762 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2764 /* Preferred alignment for stack boundary in bits. */
2765 unsigned int ix86_preferred_stack_boundary
;
2767 /* Alignment for incoming stack boundary in bits specified at
2769 static unsigned int ix86_user_incoming_stack_boundary
;
2771 /* Default alignment for incoming stack boundary in bits. */
2772 static unsigned int ix86_default_incoming_stack_boundary
;
2774 /* Alignment for incoming stack boundary in bits. */
2775 unsigned int ix86_incoming_stack_boundary
;
2777 /* Calling abi specific va_list type nodes. */
2778 static GTY(()) tree sysv_va_list_type_node
;
2779 static GTY(()) tree ms_va_list_type_node
;
2781 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2782 char internal_label_prefix
[16];
2783 int internal_label_prefix_len
;
2785 /* Fence to use after loop using movnt. */
2788 /* Register class used for passing given 64bit part of the argument.
2789 These represent classes as documented by the PS ABI, with the exception
2790 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2791 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2793 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2794 whenever possible (upper half does contain padding). */
2795 enum x86_64_reg_class
2798 X86_64_INTEGER_CLASS
,
2799 X86_64_INTEGERSI_CLASS
,
2806 X86_64_COMPLEX_X87_CLASS
,
2810 #define MAX_CLASSES 8
2812 /* Table of constants used by fldpi, fldln2, etc.... */
2813 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2814 static bool ext_80387_constants_init
;
2817 static struct machine_function
* ix86_init_machine_status (void);
2818 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2819 static bool ix86_function_value_regno_p (const unsigned int);
2820 static unsigned int ix86_function_arg_boundary (machine_mode
,
2822 static rtx
ix86_static_chain (const_tree
, bool);
2823 static int ix86_function_regparm (const_tree
, const_tree
);
2824 static void ix86_compute_frame_layout (void);
2825 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode
,
2827 static void ix86_add_new_builtins (HOST_WIDE_INT
, HOST_WIDE_INT
);
2828 static tree
ix86_canonical_va_list_type (tree
);
2829 static void predict_jump (int);
2830 static unsigned int split_stack_prologue_scratch_regno (void);
2831 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2833 enum ix86_function_specific_strings
2835 IX86_FUNCTION_SPECIFIC_ARCH
,
2836 IX86_FUNCTION_SPECIFIC_TUNE
,
2837 IX86_FUNCTION_SPECIFIC_MAX
2840 static char *ix86_target_string (HOST_WIDE_INT
, HOST_WIDE_INT
, int, int,
2841 const char *, const char *, enum fpmath_unit
,
2843 static void ix86_function_specific_save (struct cl_target_option
*,
2844 struct gcc_options
*opts
);
2845 static void ix86_function_specific_restore (struct gcc_options
*opts
,
2846 struct cl_target_option
*);
2847 static void ix86_function_specific_post_stream_in (struct cl_target_option
*);
2848 static void ix86_function_specific_print (FILE *, int,
2849 struct cl_target_option
*);
2850 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2851 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2852 struct gcc_options
*,
2853 struct gcc_options
*,
2854 struct gcc_options
*);
2855 static bool ix86_can_inline_p (tree
, tree
);
2856 static void ix86_set_current_function (tree
);
2857 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2859 static enum calling_abi
ix86_function_abi (const_tree
);
2862 #ifndef SUBTARGET32_DEFAULT_CPU
2863 #define SUBTARGET32_DEFAULT_CPU "i386"
2866 /* Whether -mtune= or -march= were specified */
2867 static int ix86_tune_defaulted
;
2868 static int ix86_arch_specified
;
2870 /* Vectorization library interface and handlers. */
2871 static tree (*ix86_veclib_handler
) (combined_fn
, tree
, tree
);
2873 static tree
ix86_veclibabi_svml (combined_fn
, tree
, tree
);
2874 static tree
ix86_veclibabi_acml (combined_fn
, tree
, tree
);
2876 /* Processor target table, indexed by processor number */
2879 const char *const name
; /* processor name */
2880 const struct processor_costs
*cost
; /* Processor costs */
2881 const int align_loop
; /* Default alignments. */
2882 const int align_loop_max_skip
;
2883 const int align_jump
;
2884 const int align_jump_max_skip
;
2885 const int align_func
;
2888 /* This table must be in sync with enum processor_type in i386.h. */
2889 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2891 {"generic", &generic_cost
, 16, 10, 16, 10, 16},
2892 {"i386", &i386_cost
, 4, 3, 4, 3, 4},
2893 {"i486", &i486_cost
, 16, 15, 16, 15, 16},
2894 {"pentium", &pentium_cost
, 16, 7, 16, 7, 16},
2895 {"lakemont", &lakemont_cost
, 16, 7, 16, 7, 16},
2896 {"pentiumpro", &pentiumpro_cost
, 16, 15, 16, 10, 16},
2897 {"pentium4", &pentium4_cost
, 0, 0, 0, 0, 0},
2898 {"nocona", &nocona_cost
, 0, 0, 0, 0, 0},
2899 {"core2", &core_cost
, 16, 10, 16, 10, 16},
2900 {"nehalem", &core_cost
, 16, 10, 16, 10, 16},
2901 {"sandybridge", &core_cost
, 16, 10, 16, 10, 16},
2902 {"haswell", &core_cost
, 16, 10, 16, 10, 16},
2903 {"bonnell", &atom_cost
, 16, 15, 16, 7, 16},
2904 {"silvermont", &slm_cost
, 16, 15, 16, 7, 16},
2905 {"knl", &slm_cost
, 16, 15, 16, 7, 16},
2906 {"skylake-avx512", &core_cost
, 16, 10, 16, 10, 16},
2907 {"intel", &intel_cost
, 16, 15, 16, 7, 16},
2908 {"geode", &geode_cost
, 0, 0, 0, 0, 0},
2909 {"k6", &k6_cost
, 32, 7, 32, 7, 32},
2910 {"athlon", &athlon_cost
, 16, 7, 16, 7, 16},
2911 {"k8", &k8_cost
, 16, 7, 16, 7, 16},
2912 {"amdfam10", &amdfam10_cost
, 32, 24, 32, 7, 32},
2913 {"bdver1", &bdver1_cost
, 16, 10, 16, 7, 11},
2914 {"bdver2", &bdver2_cost
, 16, 10, 16, 7, 11},
2915 {"bdver3", &bdver3_cost
, 16, 10, 16, 7, 11},
2916 {"bdver4", &bdver4_cost
, 16, 10, 16, 7, 11},
2917 {"btver1", &btver1_cost
, 16, 10, 16, 7, 11},
2918 {"btver2", &btver2_cost
, 16, 10, 16, 7, 11},
2919 {"znver1", &znver1_cost
, 16, 15, 16, 15, 16}
2923 rest_of_handle_insert_vzeroupper (void)
2927 /* vzeroupper instructions are inserted immediately after reload to
2928 account for possible spills from 256bit registers. The pass
2929 reuses mode switching infrastructure by re-running mode insertion
2930 pass, so disable entities that have already been processed. */
2931 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2932 ix86_optimize_mode_switching
[i
] = 0;
2934 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2936 /* Call optimize_mode_switching. */
2937 g
->get_passes ()->execute_pass_mode_switching ();
2941 /* Return 1 if INSN uses or defines a hard register.
2942 Hard register uses in a memory address are ignored.
2943 Clobbers and flags definitions are ignored. */
2946 has_non_address_hard_reg (rtx_insn
*insn
)
2949 FOR_EACH_INSN_DEF (ref
, insn
)
2950 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref
))
2951 && !DF_REF_FLAGS_IS_SET (ref
, DF_REF_MUST_CLOBBER
)
2952 && DF_REF_REGNO (ref
) != FLAGS_REG
)
2955 FOR_EACH_INSN_USE (ref
, insn
)
2956 if (!DF_REF_REG_MEM_P (ref
) && HARD_REGISTER_P (DF_REF_REAL_REG (ref
)))
2962 /* Check if comparison INSN may be transformed
2963 into vector comparison. Currently we transform
2964 zero checks only which look like:
2966 (set (reg:CCZ 17 flags)
2967 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
2968 (subreg:SI (reg:DI x) 0))
2969 (const_int 0 [0]))) */
2972 convertible_comparison_p (rtx_insn
*insn
)
2977 rtx def_set
= single_set (insn
);
2979 gcc_assert (def_set
);
2981 rtx src
= SET_SRC (def_set
);
2982 rtx dst
= SET_DEST (def_set
);
2984 gcc_assert (GET_CODE (src
) == COMPARE
);
2986 if (GET_CODE (dst
) != REG
2987 || REGNO (dst
) != FLAGS_REG
2988 || GET_MODE (dst
) != CCZmode
)
2991 rtx op1
= XEXP (src
, 0);
2992 rtx op2
= XEXP (src
, 1);
2994 if (op2
!= CONST0_RTX (GET_MODE (op2
)))
2997 if (GET_CODE (op1
) != IOR
)
3000 op2
= XEXP (op1
, 1);
3001 op1
= XEXP (op1
, 0);
3005 || GET_MODE (op1
) != SImode
3006 || GET_MODE (op2
) != SImode
3007 || ((SUBREG_BYTE (op1
) != 0
3008 || SUBREG_BYTE (op2
) != GET_MODE_SIZE (SImode
))
3009 && (SUBREG_BYTE (op2
) != 0
3010 || SUBREG_BYTE (op1
) != GET_MODE_SIZE (SImode
))))
3013 op1
= SUBREG_REG (op1
);
3014 op2
= SUBREG_REG (op2
);
3018 || GET_MODE (op1
) != DImode
)
3024 /* The DImode version of scalar_to_vector_candidate_p. */
3027 dimode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
3029 rtx def_set
= single_set (insn
);
3034 if (has_non_address_hard_reg (insn
))
3037 rtx src
= SET_SRC (def_set
);
3038 rtx dst
= SET_DEST (def_set
);
3040 if (GET_CODE (src
) == COMPARE
)
3041 return convertible_comparison_p (insn
);
3043 /* We are interested in DImode promotion only. */
3044 if ((GET_MODE (src
) != DImode
3045 && !CONST_INT_P (src
))
3046 || GET_MODE (dst
) != DImode
)
3049 if (!REG_P (dst
) && !MEM_P (dst
))
3052 switch (GET_CODE (src
))
3055 if (!TARGET_AVX512VL
)
3061 if (!REG_P (XEXP (src
, 1))
3062 && (!SUBREG_P (XEXP (src
, 1))
3063 || SUBREG_BYTE (XEXP (src
, 1)) != 0
3064 || !REG_P (SUBREG_REG (XEXP (src
, 1))))
3065 && (!CONST_INT_P (XEXP (src
, 1))
3066 || !IN_RANGE (INTVAL (XEXP (src
, 1)), 0, 63)))
3069 if (GET_MODE (XEXP (src
, 1)) != QImode
3070 && !CONST_INT_P (XEXP (src
, 1)))
3079 if (!REG_P (XEXP (src
, 1))
3080 && !MEM_P (XEXP (src
, 1))
3081 && !CONST_INT_P (XEXP (src
, 1)))
3084 if (GET_MODE (XEXP (src
, 1)) != DImode
3085 && !CONST_INT_P (XEXP (src
, 1)))
3104 if (!REG_P (XEXP (src
, 0))
3105 && !MEM_P (XEXP (src
, 0))
3106 && !CONST_INT_P (XEXP (src
, 0))
3107 /* Check for andnot case. */
3108 && (GET_CODE (src
) != AND
3109 || GET_CODE (XEXP (src
, 0)) != NOT
3110 || !REG_P (XEXP (XEXP (src
, 0), 0))))
3113 if (GET_MODE (XEXP (src
, 0)) != DImode
3114 && !CONST_INT_P (XEXP (src
, 0)))
3120 /* The TImode version of scalar_to_vector_candidate_p. */
3123 timode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
3125 rtx def_set
= single_set (insn
);
3130 if (has_non_address_hard_reg (insn
))
3133 rtx src
= SET_SRC (def_set
);
3134 rtx dst
= SET_DEST (def_set
);
3136 /* Only TImode load and store are allowed. */
3137 if (GET_MODE (dst
) != TImode
)
3142 /* Check for store. Memory must be aligned or unaligned store
3143 is optimal. Only support store from register, standard SSE
3144 constant or CONST_WIDE_INT generated from piecewise store.
3146 ??? Verify performance impact before enabling CONST_INT for
3148 if (misaligned_operand (dst
, TImode
)
3149 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
3152 switch (GET_CODE (src
))
3158 case CONST_WIDE_INT
:
3162 return standard_sse_constant_p (src
, TImode
);
3165 else if (MEM_P (src
))
3167 /* Check for load. Memory must be aligned or unaligned load is
3170 && (!misaligned_operand (src
, TImode
)
3171 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
));
3177 /* Return 1 if INSN may be converted into vector
3181 scalar_to_vector_candidate_p (rtx_insn
*insn
)
3184 return timode_scalar_to_vector_candidate_p (insn
);
3186 return dimode_scalar_to_vector_candidate_p (insn
);
3189 /* The DImode version of remove_non_convertible_regs. */
3192 dimode_remove_non_convertible_regs (bitmap candidates
)
3196 bitmap regs
= BITMAP_ALLOC (NULL
);
3198 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
3200 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
3201 rtx reg
= SET_DEST (def_set
);
3204 || bitmap_bit_p (regs
, REGNO (reg
))
3205 || HARD_REGISTER_P (reg
))
3208 for (df_ref def
= DF_REG_DEF_CHAIN (REGNO (reg
));
3210 def
= DF_REF_NEXT_REG (def
))
3212 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
3216 "r%d has non convertible definition in insn %d\n",
3217 REGNO (reg
), DF_REF_INSN_UID (def
));
3219 bitmap_set_bit (regs
, REGNO (reg
));
3225 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
3227 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
3229 def
= DF_REF_NEXT_REG (def
))
3230 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
3233 fprintf (dump_file
, "Removing insn %d from candidates list\n",
3234 DF_REF_INSN_UID (def
));
3236 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
3243 /* For a register REGNO, scan instructions for its defs and uses.
3244 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
3247 timode_check_non_convertible_regs (bitmap candidates
, bitmap regs
,
3250 for (df_ref def
= DF_REG_DEF_CHAIN (regno
);
3252 def
= DF_REF_NEXT_REG (def
))
3254 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
3258 "r%d has non convertible def in insn %d\n",
3259 regno
, DF_REF_INSN_UID (def
));
3261 bitmap_set_bit (regs
, regno
);
3266 for (df_ref ref
= DF_REG_USE_CHAIN (regno
);
3268 ref
= DF_REF_NEXT_REG (ref
))
3270 /* Debug instructions are skipped. */
3271 if (NONDEBUG_INSN_P (DF_REF_INSN (ref
))
3272 && !bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
3276 "r%d has non convertible use in insn %d\n",
3277 regno
, DF_REF_INSN_UID (ref
));
3279 bitmap_set_bit (regs
, regno
);
3285 /* The TImode version of remove_non_convertible_regs. */
3288 timode_remove_non_convertible_regs (bitmap candidates
)
3292 bitmap regs
= BITMAP_ALLOC (NULL
);
3294 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
3296 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
3297 rtx dest
= SET_DEST (def_set
);
3298 rtx src
= SET_SRC (def_set
);
3301 || bitmap_bit_p (regs
, REGNO (dest
))
3302 || HARD_REGISTER_P (dest
))
3304 || bitmap_bit_p (regs
, REGNO (src
))
3305 || HARD_REGISTER_P (src
)))
3309 timode_check_non_convertible_regs (candidates
, regs
,
3313 timode_check_non_convertible_regs (candidates
, regs
,
3317 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
3319 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
3321 def
= DF_REF_NEXT_REG (def
))
3322 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
3325 fprintf (dump_file
, "Removing insn %d from candidates list\n",
3326 DF_REF_INSN_UID (def
));
3328 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
3331 for (df_ref ref
= DF_REG_USE_CHAIN (id
);
3333 ref
= DF_REF_NEXT_REG (ref
))
3334 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
3337 fprintf (dump_file
, "Removing insn %d from candidates list\n",
3338 DF_REF_INSN_UID (ref
));
3340 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (ref
));
3347 /* For a given bitmap of insn UIDs scans all instruction and
3348 remove insn from CANDIDATES in case it has both convertible
3349 and not convertible definitions.
3351 All insns in a bitmap are conversion candidates according to
3352 scalar_to_vector_candidate_p. Currently it implies all insns
3356 remove_non_convertible_regs (bitmap candidates
)
3359 timode_remove_non_convertible_regs (candidates
);
3361 dimode_remove_non_convertible_regs (candidates
);
3368 virtual ~scalar_chain ();
3370 static unsigned max_id
;
3372 /* ID of a chain. */
3373 unsigned int chain_id
;
3374 /* A queue of instructions to be included into a chain. */
3376 /* Instructions included into a chain. */
3378 /* All registers defined by a chain. */
3380 /* Registers used in both vector and sclar modes. */
3383 void build (bitmap candidates
, unsigned insn_uid
);
3384 virtual int compute_convert_gain () = 0;
3388 void add_to_queue (unsigned insn_uid
);
3389 void emit_conversion_insns (rtx insns
, rtx_insn
*pos
);
3392 void add_insn (bitmap candidates
, unsigned insn_uid
);
3393 void analyze_register_chain (bitmap candidates
, df_ref ref
);
3394 virtual void mark_dual_mode_def (df_ref def
) = 0;
3395 virtual void convert_insn (rtx_insn
*insn
) = 0;
3396 virtual void convert_registers () = 0;
3399 class dimode_scalar_chain
: public scalar_chain
3402 int compute_convert_gain ();
3404 void mark_dual_mode_def (df_ref def
);
3405 rtx
replace_with_subreg (rtx x
, rtx reg
, rtx subreg
);
3406 void replace_with_subreg_in_insn (rtx_insn
*insn
, rtx reg
, rtx subreg
);
3407 void convert_insn (rtx_insn
*insn
);
3408 void convert_op (rtx
*op
, rtx_insn
*insn
);
3409 void convert_reg (unsigned regno
);
3410 void make_vector_copies (unsigned regno
);
3411 void convert_registers ();
3412 int vector_const_cost (rtx exp
);
3415 class timode_scalar_chain
: public scalar_chain
3418 /* Convert from TImode to V1TImode is always faster. */
3419 int compute_convert_gain () { return 1; }
3422 void mark_dual_mode_def (df_ref def
);
3423 void fix_debug_reg_uses (rtx reg
);
3424 void convert_insn (rtx_insn
*insn
);
3425 /* We don't convert registers to difference size. */
3426 void convert_registers () {}
3429 unsigned scalar_chain::max_id
= 0;
3431 /* Initialize new chain. */
3433 scalar_chain::scalar_chain ()
3435 chain_id
= ++max_id
;
3438 fprintf (dump_file
, "Created a new instruction chain #%d\n", chain_id
);
3440 bitmap_obstack_initialize (NULL
);
3441 insns
= BITMAP_ALLOC (NULL
);
3442 defs
= BITMAP_ALLOC (NULL
);
3443 defs_conv
= BITMAP_ALLOC (NULL
);
3447 /* Free chain's data. */
3449 scalar_chain::~scalar_chain ()
3451 BITMAP_FREE (insns
);
3453 BITMAP_FREE (defs_conv
);
3454 bitmap_obstack_release (NULL
);
3457 /* Add instruction into chains' queue. */
3460 scalar_chain::add_to_queue (unsigned insn_uid
)
3462 if (bitmap_bit_p (insns
, insn_uid
)
3463 || bitmap_bit_p (queue
, insn_uid
))
3467 fprintf (dump_file
, " Adding insn %d into chain's #%d queue\n",
3468 insn_uid
, chain_id
);
3469 bitmap_set_bit (queue
, insn_uid
);
3472 /* For DImode conversion, mark register defined by DEF as requiring
3476 dimode_scalar_chain::mark_dual_mode_def (df_ref def
)
3478 gcc_assert (DF_REF_REG_DEF_P (def
));
3480 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (def
)))
3485 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
3486 DF_REF_REGNO (def
), DF_REF_INSN_UID (def
), chain_id
);
3488 bitmap_set_bit (defs_conv
, DF_REF_REGNO (def
));
3491 /* For TImode conversion, it is unused. */
3494 timode_scalar_chain::mark_dual_mode_def (df_ref
)
3499 /* Check REF's chain to add new insns into a queue
3500 and find registers requiring conversion. */
3503 scalar_chain::analyze_register_chain (bitmap candidates
, df_ref ref
)
3507 gcc_assert (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
))
3508 || bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)));
3509 add_to_queue (DF_REF_INSN_UID (ref
));
3511 for (chain
= DF_REF_CHAIN (ref
); chain
; chain
= chain
->next
)
3513 unsigned uid
= DF_REF_INSN_UID (chain
->ref
);
3515 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain
->ref
)))
3518 if (!DF_REF_REG_MEM_P (chain
->ref
))
3520 if (bitmap_bit_p (insns
, uid
))
3523 if (bitmap_bit_p (candidates
, uid
))
3530 if (DF_REF_REG_DEF_P (chain
->ref
))
3533 fprintf (dump_file
, " r%d def in insn %d isn't convertible\n",
3534 DF_REF_REGNO (chain
->ref
), uid
);
3535 mark_dual_mode_def (chain
->ref
);
3540 fprintf (dump_file
, " r%d use in insn %d isn't convertible\n",
3541 DF_REF_REGNO (chain
->ref
), uid
);
3542 mark_dual_mode_def (ref
);
3547 /* Add instruction into a chain. */
3550 scalar_chain::add_insn (bitmap candidates
, unsigned int insn_uid
)
3552 if (bitmap_bit_p (insns
, insn_uid
))
3556 fprintf (dump_file
, " Adding insn %d to chain #%d\n", insn_uid
, chain_id
);
3558 bitmap_set_bit (insns
, insn_uid
);
3560 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
3561 rtx def_set
= single_set (insn
);
3562 if (def_set
&& REG_P (SET_DEST (def_set
))
3563 && !HARD_REGISTER_P (SET_DEST (def_set
)))
3564 bitmap_set_bit (defs
, REGNO (SET_DEST (def_set
)));
3568 for (ref
= DF_INSN_UID_DEFS (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
3569 if (!HARD_REGISTER_P (DF_REF_REG (ref
)))
3570 for (def
= DF_REG_DEF_CHAIN (DF_REF_REGNO (ref
));
3572 def
= DF_REF_NEXT_REG (def
))
3573 analyze_register_chain (candidates
, def
);
3574 for (ref
= DF_INSN_UID_USES (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
3575 if (!DF_REF_REG_MEM_P (ref
))
3576 analyze_register_chain (candidates
, ref
);
3579 /* Build new chain starting from insn INSN_UID recursively
3580 adding all dependent uses and definitions. */
3583 scalar_chain::build (bitmap candidates
, unsigned insn_uid
)
3585 queue
= BITMAP_ALLOC (NULL
);
3586 bitmap_set_bit (queue
, insn_uid
);
3589 fprintf (dump_file
, "Building chain #%d...\n", chain_id
);
3591 while (!bitmap_empty_p (queue
))
3593 insn_uid
= bitmap_first_set_bit (queue
);
3594 bitmap_clear_bit (queue
, insn_uid
);
3595 bitmap_clear_bit (candidates
, insn_uid
);
3596 add_insn (candidates
, insn_uid
);
3601 fprintf (dump_file
, "Collected chain #%d...\n", chain_id
);
3602 fprintf (dump_file
, " insns: ");
3603 dump_bitmap (dump_file
, insns
);
3604 if (!bitmap_empty_p (defs_conv
))
3608 const char *comma
= "";
3609 fprintf (dump_file
, " defs to convert: ");
3610 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
3612 fprintf (dump_file
, "%sr%d", comma
, id
);
3615 fprintf (dump_file
, "\n");
3619 BITMAP_FREE (queue
);
3622 /* Return a cost of building a vector costant
3623 instead of using a scalar one. */
3626 dimode_scalar_chain::vector_const_cost (rtx exp
)
3628 gcc_assert (CONST_INT_P (exp
));
3630 if (standard_sse_constant_p (exp
, V2DImode
))
3631 return COSTS_N_INSNS (1);
3632 return ix86_cost
->sse_load
[1];
3635 /* Compute a gain for chain conversion. */
3638 dimode_scalar_chain::compute_convert_gain ()
3646 fprintf (dump_file
, "Computing gain for chain #%d...\n", chain_id
);
3648 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, insn_uid
, bi
)
3650 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
3651 rtx def_set
= single_set (insn
);
3652 rtx src
= SET_SRC (def_set
);
3653 rtx dst
= SET_DEST (def_set
);
3655 if (REG_P (src
) && REG_P (dst
))
3656 gain
+= COSTS_N_INSNS (2) - ix86_cost
->sse_move
;
3657 else if (REG_P (src
) && MEM_P (dst
))
3658 gain
+= 2 * ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[1];
3659 else if (MEM_P (src
) && REG_P (dst
))
3660 gain
+= 2 * ix86_cost
->int_load
[2] - ix86_cost
->sse_load
[1];
3661 else if (GET_CODE (src
) == ASHIFT
3662 || GET_CODE (src
) == ASHIFTRT
3663 || GET_CODE (src
) == LSHIFTRT
)
3665 if (CONST_INT_P (XEXP (src
, 0)))
3666 gain
-= vector_const_cost (XEXP (src
, 0));
3667 if (CONST_INT_P (XEXP (src
, 1)))
3669 gain
+= ix86_cost
->shift_const
;
3670 if (INTVAL (XEXP (src
, 1)) >= 32)
3671 gain
-= COSTS_N_INSNS (1);
3674 /* Additional gain for omitting two CMOVs. */
3675 gain
+= ix86_cost
->shift_var
+ COSTS_N_INSNS (2);
3677 else if (GET_CODE (src
) == PLUS
3678 || GET_CODE (src
) == MINUS
3679 || GET_CODE (src
) == IOR
3680 || GET_CODE (src
) == XOR
3681 || GET_CODE (src
) == AND
)
3683 gain
+= ix86_cost
->add
;
3684 /* Additional gain for andnot for targets without BMI. */
3685 if (GET_CODE (XEXP (src
, 0)) == NOT
3687 gain
+= 2 * ix86_cost
->add
;
3689 if (CONST_INT_P (XEXP (src
, 0)))
3690 gain
-= vector_const_cost (XEXP (src
, 0));
3691 if (CONST_INT_P (XEXP (src
, 1)))
3692 gain
-= vector_const_cost (XEXP (src
, 1));
3694 else if (GET_CODE (src
) == NEG
3695 || GET_CODE (src
) == NOT
)
3696 gain
+= ix86_cost
->add
- COSTS_N_INSNS (1);
3697 else if (GET_CODE (src
) == COMPARE
)
3699 /* Assume comparison cost is the same. */
3701 else if (CONST_INT_P (src
))
3704 gain
+= COSTS_N_INSNS (2);
3705 else if (MEM_P (dst
))
3706 gain
+= 2 * ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[1];
3707 gain
-= vector_const_cost (src
);
3714 fprintf (dump_file
, " Instruction conversion gain: %d\n", gain
);
3716 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, insn_uid
, bi
)
3717 cost
+= DF_REG_DEF_COUNT (insn_uid
) * ix86_cost
->mmxsse_to_integer
;
3720 fprintf (dump_file
, " Registers conversion cost: %d\n", cost
);
3725 fprintf (dump_file
, " Total gain: %d\n", gain
);
3730 /* Replace REG in X with a V2DI subreg of NEW_REG. */
3733 dimode_scalar_chain::replace_with_subreg (rtx x
, rtx reg
, rtx new_reg
)
3736 return gen_rtx_SUBREG (V2DImode
, new_reg
, 0);
3738 const char *fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3740 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3743 XEXP (x
, i
) = replace_with_subreg (XEXP (x
, i
), reg
, new_reg
);
3744 else if (fmt
[i
] == 'E')
3745 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3746 XVECEXP (x
, i
, j
) = replace_with_subreg (XVECEXP (x
, i
, j
),
3753 /* Replace REG in INSN with a V2DI subreg of NEW_REG. */
3756 dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn
*insn
,
3757 rtx reg
, rtx new_reg
)
3759 replace_with_subreg (single_set (insn
), reg
, new_reg
);
3762 /* Insert generated conversion instruction sequence INSNS
3763 after instruction AFTER. New BB may be required in case
3764 instruction has EH region attached. */
3767 scalar_chain::emit_conversion_insns (rtx insns
, rtx_insn
*after
)
3769 if (!control_flow_insn_p (after
))
3771 emit_insn_after (insns
, after
);
3775 basic_block bb
= BLOCK_FOR_INSN (after
);
3776 edge e
= find_fallthru_edge (bb
->succs
);
3779 basic_block new_bb
= split_edge (e
);
3780 emit_insn_after (insns
, BB_HEAD (new_bb
));
3783 /* Make vector copies for all register REGNO definitions
3784 and replace its uses in a chain. */
3787 dimode_scalar_chain::make_vector_copies (unsigned regno
)
3789 rtx reg
= regno_reg_rtx
[regno
];
3790 rtx vreg
= gen_reg_rtx (DImode
);
3791 bool count_reg
= false;
3794 for (ref
= DF_REG_DEF_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
3795 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
3799 /* Detect the count register of a shift instruction. */
3800 for (use
= DF_REG_USE_CHAIN (regno
); use
; use
= DF_REF_NEXT_REG (use
))
3801 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (use
)))
3803 rtx_insn
*insn
= DF_REF_INSN (use
);
3804 rtx def_set
= single_set (insn
);
3806 gcc_assert (def_set
);
3808 rtx src
= SET_SRC (def_set
);
3810 if ((GET_CODE (src
) == ASHIFT
3811 || GET_CODE (src
) == ASHIFTRT
3812 || GET_CODE (src
) == LSHIFTRT
)
3813 && !CONST_INT_P (XEXP (src
, 1))
3814 && reg_or_subregno (XEXP (src
, 1)) == regno
)
3821 rtx qreg
= gen_lowpart (QImode
, reg
);
3822 rtx tmp
= gen_reg_rtx (SImode
);
3824 if (TARGET_ZERO_EXTEND_WITH_AND
3825 && optimize_function_for_speed_p (cfun
))
3827 emit_move_insn (tmp
, const0_rtx
);
3828 emit_insn (gen_movstrictqi
3829 (gen_lowpart (QImode
, tmp
), qreg
));
3832 emit_insn (gen_rtx_SET
3833 (tmp
, gen_rtx_ZERO_EXTEND (SImode
, qreg
)));
3835 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
3837 rtx slot
= assign_386_stack_local (SImode
, SLOT_STV_TEMP
);
3838 emit_move_insn (slot
, tmp
);
3839 tmp
= copy_rtx (slot
);
3842 emit_insn (gen_zero_extendsidi2 (vreg
, tmp
));
3844 else if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
3846 rtx tmp
= assign_386_stack_local (DImode
, SLOT_STV_TEMP
);
3847 emit_move_insn (adjust_address (tmp
, SImode
, 0),
3848 gen_rtx_SUBREG (SImode
, reg
, 0));
3849 emit_move_insn (adjust_address (tmp
, SImode
, 4),
3850 gen_rtx_SUBREG (SImode
, reg
, 4));
3851 emit_move_insn (vreg
, tmp
);
3853 else if (TARGET_SSE4_1
)
3855 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3856 CONST0_RTX (V4SImode
),
3857 gen_rtx_SUBREG (SImode
, reg
, 0)));
3858 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3859 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3860 gen_rtx_SUBREG (SImode
, reg
, 4),
3865 rtx tmp
= gen_reg_rtx (DImode
);
3866 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3867 CONST0_RTX (V4SImode
),
3868 gen_rtx_SUBREG (SImode
, reg
, 0)));
3869 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, tmp
, 0),
3870 CONST0_RTX (V4SImode
),
3871 gen_rtx_SUBREG (SImode
, reg
, 4)));
3872 emit_insn (gen_vec_interleave_lowv4si
3873 (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3874 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
3875 gen_rtx_SUBREG (V4SImode
, tmp
, 0)));
3877 rtx_insn
*seq
= get_insns ();
3879 rtx_insn
*insn
= DF_REF_INSN (ref
);
3880 emit_conversion_insns (seq
, insn
);
3884 " Copied r%d to a vector register r%d for insn %d\n",
3885 regno
, REGNO (vreg
), INSN_UID (insn
));
3888 for (ref
= DF_REG_USE_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
3889 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
3891 rtx_insn
*insn
= DF_REF_INSN (ref
);
3894 rtx def_set
= single_set (insn
);
3895 gcc_assert (def_set
);
3897 rtx src
= SET_SRC (def_set
);
3899 if ((GET_CODE (src
) == ASHIFT
3900 || GET_CODE (src
) == ASHIFTRT
3901 || GET_CODE (src
) == LSHIFTRT
)
3902 && !CONST_INT_P (XEXP (src
, 1))
3903 && reg_or_subregno (XEXP (src
, 1)) == regno
)
3904 XEXP (src
, 1) = vreg
;
3907 replace_with_subreg_in_insn (insn
, reg
, vreg
);
3910 fprintf (dump_file
, " Replaced r%d with r%d in insn %d\n",
3911 regno
, REGNO (vreg
), INSN_UID (insn
));
3915 /* Convert all definitions of register REGNO
3916 and fix its uses. Scalar copies may be created
3917 in case register is used in not convertible insn. */
3920 dimode_scalar_chain::convert_reg (unsigned regno
)
3922 bool scalar_copy
= bitmap_bit_p (defs_conv
, regno
);
3923 rtx reg
= regno_reg_rtx
[regno
];
3924 rtx scopy
= NULL_RTX
;
3928 conv
= BITMAP_ALLOC (NULL
);
3929 bitmap_copy (conv
, insns
);
3932 scopy
= gen_reg_rtx (DImode
);
3934 for (ref
= DF_REG_DEF_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
3936 rtx_insn
*insn
= DF_REF_INSN (ref
);
3937 rtx def_set
= single_set (insn
);
3938 rtx src
= SET_SRC (def_set
);
3939 rtx reg
= DF_REF_REG (ref
);
3943 replace_with_subreg_in_insn (insn
, reg
, reg
);
3944 bitmap_clear_bit (conv
, INSN_UID (insn
));
3950 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC
)
3952 rtx tmp
= assign_386_stack_local (DImode
, SLOT_STV_TEMP
);
3953 emit_move_insn (tmp
, reg
);
3954 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 0),
3955 adjust_address (tmp
, SImode
, 0));
3956 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 4),
3957 adjust_address (tmp
, SImode
, 4));
3959 else if (TARGET_SSE4_1
)
3961 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
3964 (gen_rtx_SUBREG (SImode
, scopy
, 0),
3965 gen_rtx_VEC_SELECT (SImode
,
3966 gen_rtx_SUBREG (V4SImode
, reg
, 0), tmp
)));
3968 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const1_rtx
));
3971 (gen_rtx_SUBREG (SImode
, scopy
, 4),
3972 gen_rtx_VEC_SELECT (SImode
,
3973 gen_rtx_SUBREG (V4SImode
, reg
, 0), tmp
)));
3977 rtx vcopy
= gen_reg_rtx (V2DImode
);
3978 emit_move_insn (vcopy
, gen_rtx_SUBREG (V2DImode
, reg
, 0));
3979 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 0),
3980 gen_rtx_SUBREG (SImode
, vcopy
, 0));
3981 emit_move_insn (vcopy
,
3982 gen_rtx_LSHIFTRT (V2DImode
, vcopy
, GEN_INT (32)));
3983 emit_move_insn (gen_rtx_SUBREG (SImode
, scopy
, 4),
3984 gen_rtx_SUBREG (SImode
, vcopy
, 0));
3986 rtx_insn
*seq
= get_insns ();
3988 emit_conversion_insns (seq
, insn
);
3992 " Copied r%d to a scalar register r%d for insn %d\n",
3993 regno
, REGNO (scopy
), INSN_UID (insn
));
3997 for (ref
= DF_REG_USE_CHAIN (regno
); ref
; ref
= DF_REF_NEXT_REG (ref
))
3998 if (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
)))
4000 if (bitmap_bit_p (conv
, DF_REF_INSN_UID (ref
)))
4002 rtx_insn
*insn
= DF_REF_INSN (ref
);
4004 rtx def_set
= single_set (insn
);
4005 gcc_assert (def_set
);
4007 rtx src
= SET_SRC (def_set
);
4008 rtx dst
= SET_DEST (def_set
);
4010 if ((GET_CODE (src
) == ASHIFT
4011 || GET_CODE (src
) == ASHIFTRT
4012 || GET_CODE (src
) == LSHIFTRT
)
4013 && !CONST_INT_P (XEXP (src
, 1))
4014 && reg_or_subregno (XEXP (src
, 1)) == regno
)
4016 rtx tmp2
= gen_reg_rtx (V2DImode
);
4021 emit_insn (gen_sse4_1_zero_extendv2qiv2di2
4022 (tmp2
, gen_rtx_SUBREG (V16QImode
, reg
, 0)));
4026 = gen_rtx_CONST_VECTOR (V2DImode
,
4027 gen_rtvec (2, GEN_INT (0xff),
4030 = validize_mem (force_const_mem (V2DImode
, vec_cst
));
4032 emit_insn (gen_rtx_SET
4034 gen_rtx_AND (V2DImode
,
4035 gen_rtx_SUBREG (V2DImode
, reg
, 0),
4038 rtx_insn
*seq
= get_insns ();
4041 emit_insn_before (seq
, insn
);
4043 XEXP (src
, 1) = gen_rtx_SUBREG (DImode
, tmp2
, 0);
4045 else if (!MEM_P (dst
) || !REG_P (src
))
4046 replace_with_subreg_in_insn (insn
, reg
, reg
);
4048 bitmap_clear_bit (conv
, INSN_UID (insn
));
4051 /* Skip debug insns and uninitialized uses. */
4052 else if (DF_REF_CHAIN (ref
)
4053 && NONDEBUG_INSN_P (DF_REF_INSN (ref
)))
4056 replace_rtx (DF_REF_INSN (ref
), reg
, scopy
);
4057 df_insn_rescan (DF_REF_INSN (ref
));
4063 /* Convert operand OP in INSN. We should handle
4064 memory operands and uninitialized registers.
4065 All other register uses are converted during
4066 registers conversion. */
4069 dimode_scalar_chain::convert_op (rtx
*op
, rtx_insn
*insn
)
4071 *op
= copy_rtx_if_shared (*op
);
4073 if (GET_CODE (*op
) == NOT
)
4075 convert_op (&XEXP (*op
, 0), insn
);
4076 PUT_MODE (*op
, V2DImode
);
4078 else if (MEM_P (*op
))
4080 rtx tmp
= gen_reg_rtx (DImode
);
4082 emit_insn_before (gen_move_insn (tmp
, *op
), insn
);
4083 *op
= gen_rtx_SUBREG (V2DImode
, tmp
, 0);
4086 fprintf (dump_file
, " Preloading operand for insn %d into r%d\n",
4087 INSN_UID (insn
), REGNO (tmp
));
4089 else if (REG_P (*op
))
4091 /* We may have not converted register usage in case
4092 this register has no definition. Otherwise it
4093 should be converted in convert_reg. */
4095 FOR_EACH_INSN_USE (ref
, insn
)
4096 if (DF_REF_REGNO (ref
) == REGNO (*op
))
4098 gcc_assert (!DF_REF_CHAIN (ref
));
4101 *op
= gen_rtx_SUBREG (V2DImode
, *op
, 0);
4103 else if (CONST_INT_P (*op
))
4106 rtx tmp
= gen_rtx_SUBREG (V2DImode
, gen_reg_rtx (DImode
), 0);
4108 /* Prefer all ones vector in case of -1. */
4109 if (constm1_operand (*op
, GET_MODE (*op
)))
4110 vec_cst
= CONSTM1_RTX (V2DImode
);
4112 vec_cst
= gen_rtx_CONST_VECTOR (V2DImode
,
4113 gen_rtvec (2, *op
, const0_rtx
));
4115 if (!standard_sse_constant_p (vec_cst
, V2DImode
))
4118 vec_cst
= validize_mem (force_const_mem (V2DImode
, vec_cst
));
4119 rtx_insn
*seq
= get_insns ();
4121 emit_insn_before (seq
, insn
);
4124 emit_insn_before (gen_move_insn (copy_rtx (tmp
), vec_cst
), insn
);
4129 gcc_assert (SUBREG_P (*op
));
4130 gcc_assert (GET_MODE (*op
) == V2DImode
);
4134 /* Convert INSN to vector mode. */
4137 dimode_scalar_chain::convert_insn (rtx_insn
*insn
)
4139 rtx def_set
= single_set (insn
);
4140 rtx src
= SET_SRC (def_set
);
4141 rtx dst
= SET_DEST (def_set
);
4144 if (MEM_P (dst
) && !REG_P (src
))
4146 /* There are no scalar integer instructions and therefore
4147 temporary register usage is required. */
4148 rtx tmp
= gen_reg_rtx (DImode
);
4149 emit_conversion_insns (gen_move_insn (dst
, tmp
), insn
);
4150 dst
= gen_rtx_SUBREG (V2DImode
, tmp
, 0);
4153 switch (GET_CODE (src
))
4158 convert_op (&XEXP (src
, 0), insn
);
4159 PUT_MODE (src
, V2DImode
);
4167 convert_op (&XEXP (src
, 0), insn
);
4168 convert_op (&XEXP (src
, 1), insn
);
4169 PUT_MODE (src
, V2DImode
);
4173 src
= XEXP (src
, 0);
4174 convert_op (&src
, insn
);
4175 subreg
= gen_reg_rtx (V2DImode
);
4176 emit_insn_before (gen_move_insn (subreg
, CONST0_RTX (V2DImode
)), insn
);
4177 src
= gen_rtx_MINUS (V2DImode
, subreg
, src
);
4181 src
= XEXP (src
, 0);
4182 convert_op (&src
, insn
);
4183 subreg
= gen_reg_rtx (V2DImode
);
4184 emit_insn_before (gen_move_insn (subreg
, CONSTM1_RTX (V2DImode
)), insn
);
4185 src
= gen_rtx_XOR (V2DImode
, src
, subreg
);
4190 convert_op (&src
, insn
);
4195 convert_op (&src
, insn
);
4199 gcc_assert (GET_MODE (src
) == V2DImode
);
4203 src
= SUBREG_REG (XEXP (XEXP (src
, 0), 0));
4205 gcc_assert ((REG_P (src
) && GET_MODE (src
) == DImode
)
4206 || (SUBREG_P (src
) && GET_MODE (src
) == V2DImode
));
4209 subreg
= gen_rtx_SUBREG (V2DImode
, src
, 0);
4211 subreg
= copy_rtx_if_shared (src
);
4212 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg
),
4213 copy_rtx_if_shared (subreg
),
4214 copy_rtx_if_shared (subreg
)),
4216 dst
= gen_rtx_REG (CCmode
, FLAGS_REG
);
4217 src
= gen_rtx_UNSPEC (CCmode
, gen_rtvec (2, copy_rtx_if_shared (src
),
4218 copy_rtx_if_shared (src
)),
4223 convert_op (&src
, insn
);
4230 SET_SRC (def_set
) = src
;
4231 SET_DEST (def_set
) = dst
;
4233 /* Drop possible dead definitions. */
4234 PATTERN (insn
) = def_set
;
4236 INSN_CODE (insn
) = -1;
4237 recog_memoized (insn
);
4238 df_insn_rescan (insn
);
4241 /* Fix uses of converted REG in debug insns. */
4244 timode_scalar_chain::fix_debug_reg_uses (rtx reg
)
4246 if (!flag_var_tracking
)
4250 for (ref
= DF_REG_USE_CHAIN (REGNO (reg
)); ref
; ref
= next
)
4252 rtx_insn
*insn
= DF_REF_INSN (ref
);
4253 /* Make sure the next ref is for a different instruction,
4254 so that we're not affected by the rescan. */
4255 next
= DF_REF_NEXT_REG (ref
);
4256 while (next
&& DF_REF_INSN (next
) == insn
)
4257 next
= DF_REF_NEXT_REG (next
);
4259 if (DEBUG_INSN_P (insn
))
4261 /* It may be a debug insn with a TImode variable in
4263 bool changed
= false;
4264 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
4266 rtx
*loc
= DF_REF_LOC (ref
);
4267 if (REG_P (*loc
) && GET_MODE (*loc
) == V1TImode
)
4269 *loc
= gen_rtx_SUBREG (TImode
, *loc
, 0);
4274 df_insn_rescan (insn
);
4279 /* Convert INSN from TImode to V1T1mode. */
4282 timode_scalar_chain::convert_insn (rtx_insn
*insn
)
4284 rtx def_set
= single_set (insn
);
4285 rtx src
= SET_SRC (def_set
);
4286 rtx dst
= SET_DEST (def_set
);
4288 switch (GET_CODE (dst
))
4292 rtx tmp
= find_reg_equal_equiv_note (insn
);
4294 PUT_MODE (XEXP (tmp
, 0), V1TImode
);
4295 PUT_MODE (dst
, V1TImode
);
4296 fix_debug_reg_uses (dst
);
4300 PUT_MODE (dst
, V1TImode
);
4307 switch (GET_CODE (src
))
4310 PUT_MODE (src
, V1TImode
);
4311 /* Call fix_debug_reg_uses only if SRC is never defined. */
4312 if (!DF_REG_DEF_CHAIN (REGNO (src
)))
4313 fix_debug_reg_uses (src
);
4317 PUT_MODE (src
, V1TImode
);
4320 case CONST_WIDE_INT
:
4321 if (NONDEBUG_INSN_P (insn
))
4323 /* Since there are no instructions to store 128-bit constant,
4324 temporary register usage is required. */
4325 rtx tmp
= gen_reg_rtx (V1TImode
);
4327 src
= gen_rtx_CONST_VECTOR (V1TImode
, gen_rtvec (1, src
));
4328 src
= validize_mem (force_const_mem (V1TImode
, src
));
4329 rtx_insn
*seq
= get_insns ();
4332 emit_insn_before (seq
, insn
);
4333 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
4339 switch (standard_sse_constant_p (src
, TImode
))
4342 src
= CONST0_RTX (GET_MODE (dst
));
4345 src
= CONSTM1_RTX (GET_MODE (dst
));
4350 if (NONDEBUG_INSN_P (insn
))
4352 rtx tmp
= gen_reg_rtx (V1TImode
);
4353 /* Since there are no instructions to store standard SSE
4354 constant, temporary register usage is required. */
4355 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
4364 SET_SRC (def_set
) = src
;
4365 SET_DEST (def_set
) = dst
;
4367 /* Drop possible dead definitions. */
4368 PATTERN (insn
) = def_set
;
4370 INSN_CODE (insn
) = -1;
4371 recog_memoized (insn
);
4372 df_insn_rescan (insn
);
4376 dimode_scalar_chain::convert_registers ()
4381 EXECUTE_IF_SET_IN_BITMAP (defs
, 0, id
, bi
)
4384 EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv
, defs
, 0, id
, bi
)
4385 make_vector_copies (id
);
4388 /* Convert whole chain creating required register
4389 conversions and copies. */
4392 scalar_chain::convert ()
4396 int converted_insns
= 0;
4398 if (!dbg_cnt (stv_conversion
))
4402 fprintf (dump_file
, "Converting chain #%d...\n", chain_id
);
4404 convert_registers ();
4406 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, id
, bi
)
4408 convert_insn (DF_INSN_UID_GET (id
)->insn
);
4412 return converted_insns
;
4415 /* Main STV pass function. Find and convert scalar
4416 instructions into vector mode when profitable. */
4419 convert_scalars_to_vector ()
4423 int converted_insns
= 0;
4425 bitmap_obstack_initialize (NULL
);
4426 candidates
= BITMAP_ALLOC (NULL
);
4428 calculate_dominance_info (CDI_DOMINATORS
);
4429 df_set_flags (DF_DEFER_INSN_RESCAN
);
4430 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
4431 df_md_add_problem ();
4434 /* Find all instructions we want to convert into vector mode. */
4436 fprintf (dump_file
, "Searching for mode conversion candidates...\n");
4438 FOR_EACH_BB_FN (bb
, cfun
)
4441 FOR_BB_INSNS (bb
, insn
)
4442 if (scalar_to_vector_candidate_p (insn
))
4445 fprintf (dump_file
, " insn %d is marked as a candidate\n",
4448 bitmap_set_bit (candidates
, INSN_UID (insn
));
4452 remove_non_convertible_regs (candidates
);
4454 if (bitmap_empty_p (candidates
))
4456 fprintf (dump_file
, "There are no candidates for optimization.\n");
4458 while (!bitmap_empty_p (candidates
))
4460 unsigned uid
= bitmap_first_set_bit (candidates
);
4461 scalar_chain
*chain
;
4464 chain
= new timode_scalar_chain
;
4466 chain
= new dimode_scalar_chain
;
4468 /* Find instructions chain we want to convert to vector mode.
4469 Check all uses and definitions to estimate all required
4471 chain
->build (candidates
, uid
);
4473 if (chain
->compute_convert_gain () > 0)
4474 converted_insns
+= chain
->convert ();
4477 fprintf (dump_file
, "Chain #%d conversion is not profitable\n",
4484 fprintf (dump_file
, "Total insns converted: %d\n", converted_insns
);
4486 BITMAP_FREE (candidates
);
4487 bitmap_obstack_release (NULL
);
4488 df_process_deferred_rescans ();
4490 /* Conversion means we may have 128bit register spills/fills
4491 which require aligned stack. */
4492 if (converted_insns
)
4494 if (crtl
->stack_alignment_needed
< 128)
4495 crtl
->stack_alignment_needed
= 128;
4496 if (crtl
->stack_alignment_estimated
< 128)
4497 crtl
->stack_alignment_estimated
= 128;
4498 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
4500 for (tree parm
= DECL_ARGUMENTS (current_function_decl
);
4501 parm
; parm
= DECL_CHAIN (parm
))
4503 if (TYPE_MODE (TREE_TYPE (parm
)) != TImode
)
4505 if (DECL_RTL_SET_P (parm
)
4506 && GET_MODE (DECL_RTL (parm
)) == V1TImode
)
4508 rtx r
= DECL_RTL (parm
);
4510 SET_DECL_RTL (parm
, gen_rtx_SUBREG (TImode
, r
, 0));
4512 if (DECL_INCOMING_RTL (parm
)
4513 && GET_MODE (DECL_INCOMING_RTL (parm
)) == V1TImode
)
4515 rtx r
= DECL_INCOMING_RTL (parm
);
4517 DECL_INCOMING_RTL (parm
) = gen_rtx_SUBREG (TImode
, r
, 0);
4527 const pass_data pass_data_insert_vzeroupper
=
4529 RTL_PASS
, /* type */
4530 "vzeroupper", /* name */
4531 OPTGROUP_NONE
, /* optinfo_flags */
4532 TV_MACH_DEP
, /* tv_id */
4533 0, /* properties_required */
4534 0, /* properties_provided */
4535 0, /* properties_destroyed */
4536 0, /* todo_flags_start */
4537 TODO_df_finish
, /* todo_flags_finish */
4540 class pass_insert_vzeroupper
: public rtl_opt_pass
4543 pass_insert_vzeroupper(gcc::context
*ctxt
)
4544 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
4547 /* opt_pass methods: */
4548 virtual bool gate (function
*)
4550 return TARGET_AVX
&& !TARGET_AVX512F
4551 && TARGET_VZEROUPPER
&& flag_expensive_optimizations
4555 virtual unsigned int execute (function
*)
4557 return rest_of_handle_insert_vzeroupper ();
4560 }; // class pass_insert_vzeroupper
4562 const pass_data pass_data_stv
=
4564 RTL_PASS
, /* type */
4566 OPTGROUP_NONE
, /* optinfo_flags */
4567 TV_MACH_DEP
, /* tv_id */
4568 0, /* properties_required */
4569 0, /* properties_provided */
4570 0, /* properties_destroyed */
4571 0, /* todo_flags_start */
4572 TODO_df_finish
, /* todo_flags_finish */
4575 class pass_stv
: public rtl_opt_pass
4578 pass_stv (gcc::context
*ctxt
)
4579 : rtl_opt_pass (pass_data_stv
, ctxt
),
4583 /* opt_pass methods: */
4584 virtual bool gate (function
*)
4586 return (timode_p
== !!TARGET_64BIT
4587 && TARGET_STV
&& TARGET_SSE2
&& optimize
> 1);
4590 virtual unsigned int execute (function
*)
4592 return convert_scalars_to_vector ();
4597 return new pass_stv (m_ctxt
);
4600 void set_pass_param (unsigned int n
, bool param
)
4602 gcc_assert (n
== 0);
4608 }; // class pass_stv
4613 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
4615 return new pass_insert_vzeroupper (ctxt
);
4619 make_pass_stv (gcc::context
*ctxt
)
4621 return new pass_stv (ctxt
);
4624 /* Return true if a red-zone is in use. */
4627 ix86_using_red_zone (void)
4629 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
4632 /* Return a string that documents the current -m options. The caller is
4633 responsible for freeing the string. */
4636 ix86_target_string (HOST_WIDE_INT isa
, HOST_WIDE_INT isa2
,
4637 int flags
, int flags2
,
4638 const char *arch
, const char *tune
,
4639 enum fpmath_unit fpmath
, bool add_nl_p
)
4641 struct ix86_target_opts
4643 const char *option
; /* option string */
4644 HOST_WIDE_INT mask
; /* isa mask options */
4647 /* This table is ordered so that options like -msse4.2 that imply other
4648 ISAs come first. Target string will be displayed in the same order. */
4649 static struct ix86_target_opts isa2_opts
[] =
4651 { "-mrdpid", OPTION_MASK_ISA_RDPID
},
4652 { "-msgx", OPTION_MASK_ISA_SGX
},
4653 { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW
},
4654 { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS
},
4655 { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ
}
4657 static struct ix86_target_opts isa_opts
[] =
4659 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI
},
4660 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA
},
4661 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL
},
4662 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW
},
4663 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ
},
4664 { "-mavx512er", OPTION_MASK_ISA_AVX512ER
},
4665 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF
},
4666 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD
},
4667 { "-mavx512f", OPTION_MASK_ISA_AVX512F
},
4668 { "-mavx2", OPTION_MASK_ISA_AVX2
},
4669 { "-mfma", OPTION_MASK_ISA_FMA
},
4670 { "-mxop", OPTION_MASK_ISA_XOP
},
4671 { "-mfma4", OPTION_MASK_ISA_FMA4
},
4672 { "-mf16c", OPTION_MASK_ISA_F16C
},
4673 { "-mavx", OPTION_MASK_ISA_AVX
},
4674 /* { "-msse4" OPTION_MASK_ISA_SSE4 }, */
4675 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
4676 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
4677 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
4678 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
4679 { "-msse3", OPTION_MASK_ISA_SSE3
},
4680 { "-maes", OPTION_MASK_ISA_AES
},
4681 { "-msha", OPTION_MASK_ISA_SHA
},
4682 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
4683 { "-msse2", OPTION_MASK_ISA_SSE2
},
4684 { "-msse", OPTION_MASK_ISA_SSE
},
4685 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
4686 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
4687 { "-mmmx", OPTION_MASK_ISA_MMX
},
4688 { "-mrtm", OPTION_MASK_ISA_RTM
},
4689 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
4690 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
4691 { "-madx", OPTION_MASK_ISA_ADX
},
4692 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1
},
4693 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT
},
4694 { "-mxsaves", OPTION_MASK_ISA_XSAVES
},
4695 { "-mxsavec", OPTION_MASK_ISA_XSAVEC
},
4696 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
4697 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
4698 { "-mabm", OPTION_MASK_ISA_ABM
},
4699 { "-mbmi", OPTION_MASK_ISA_BMI
},
4700 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
4701 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
4702 { "-mtbm", OPTION_MASK_ISA_TBM
},
4703 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
4704 { "-mcx16", OPTION_MASK_ISA_CX16
},
4705 { "-msahf", OPTION_MASK_ISA_SAHF
},
4706 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
4707 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
4708 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
4709 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
4710 { "-mmwaitx", OPTION_MASK_ISA_MWAITX
},
4711 { "-mclzero", OPTION_MASK_ISA_CLZERO
},
4712 { "-mpku", OPTION_MASK_ISA_PKU
},
4713 { "-mlwp", OPTION_MASK_ISA_LWP
},
4714 { "-mhle", OPTION_MASK_ISA_HLE
},
4715 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
4716 { "-mmpx", OPTION_MASK_ISA_MPX
},
4717 { "-mclwb", OPTION_MASK_ISA_CLWB
}
4721 static struct ix86_target_opts flag_opts
[] =
4723 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
4724 { "-mlong-double-128", MASK_LONG_DOUBLE_128
},
4725 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
4726 { "-m80387", MASK_80387
},
4727 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
4728 { "-malign-double", MASK_ALIGN_DOUBLE
},
4729 { "-mcld", MASK_CLD
},
4730 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
4731 { "-mieee-fp", MASK_IEEE_FP
},
4732 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
4733 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
4734 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
4735 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
4736 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
4737 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
4738 { "-mno-red-zone", MASK_NO_RED_ZONE
},
4739 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
4740 { "-mrecip", MASK_RECIP
},
4741 { "-mrtd", MASK_RTD
},
4742 { "-msseregparm", MASK_SSEREGPARM
},
4743 { "-mstack-arg-probe", MASK_STACK_PROBE
},
4744 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
4745 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
4746 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
4747 { "-mvzeroupper", MASK_VZEROUPPER
},
4748 { "-mstv", MASK_STV
},
4749 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
4750 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
4751 { "-mprefer-avx128", MASK_PREFER_AVX128
},
4752 { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES
}
4755 /* Additional flag options. */
4756 static struct ix86_target_opts flag2_opts
[] =
4758 { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY
},
4761 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (isa2_opts
)
4762 + ARRAY_SIZE (flag_opts
) + ARRAY_SIZE (flag2_opts
) + 6][2];
4765 char isa2_other
[40];
4766 char flags_other
[40];
4767 char flags2_other
[40];
4777 memset (opts
, '\0', sizeof (opts
));
4779 /* Add -march= option. */
4782 opts
[num
][0] = "-march=";
4783 opts
[num
++][1] = arch
;
4786 /* Add -mtune= option. */
4789 opts
[num
][0] = "-mtune=";
4790 opts
[num
++][1] = tune
;
4793 /* Add -m32/-m64/-mx32. */
4794 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
4796 if ((isa
& OPTION_MASK_ABI_64
) != 0)
4800 isa
&= ~ (OPTION_MASK_ISA_64BIT
4801 | OPTION_MASK_ABI_64
4802 | OPTION_MASK_ABI_X32
);
4806 opts
[num
++][0] = abi
;
4808 /* Pick out the options in isa2 options. */
4809 for (i
= 0; i
< ARRAY_SIZE (isa2_opts
); i
++)
4811 if ((isa2
& isa2_opts
[i
].mask
) != 0)
4813 opts
[num
++][0] = isa2_opts
[i
].option
;
4814 isa2
&= ~ isa2_opts
[i
].mask
;
4818 if (isa2
&& add_nl_p
)
4820 opts
[num
++][0] = isa2_other
;
4821 sprintf (isa2_other
, "(other isa2: %#" HOST_WIDE_INT_PRINT
"x)", isa2
);
4824 /* Pick out the options in isa options. */
4825 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
4827 if ((isa
& isa_opts
[i
].mask
) != 0)
4829 opts
[num
++][0] = isa_opts
[i
].option
;
4830 isa
&= ~ isa_opts
[i
].mask
;
4834 if (isa
&& add_nl_p
)
4836 opts
[num
++][0] = isa_other
;
4837 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)", isa
);
4840 /* Add flag options. */
4841 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
4843 if ((flags
& flag_opts
[i
].mask
) != 0)
4845 opts
[num
++][0] = flag_opts
[i
].option
;
4846 flags
&= ~ flag_opts
[i
].mask
;
4850 if (flags
&& add_nl_p
)
4852 opts
[num
++][0] = flags_other
;
4853 sprintf (flags_other
, "(other flags: %#x)", flags
);
4856 /* Add additional flag options. */
4857 for (i
= 0; i
< ARRAY_SIZE (flag2_opts
); i
++)
4859 if ((flags2
& flag2_opts
[i
].mask
) != 0)
4861 opts
[num
++][0] = flag2_opts
[i
].option
;
4862 flags2
&= ~ flag2_opts
[i
].mask
;
4866 if (flags2
&& add_nl_p
)
4868 opts
[num
++][0] = flags2_other
;
4869 sprintf (flags2_other
, "(other flags2: %#x)", flags2
);
4872 /* Add -fpmath= option. */
4875 opts
[num
][0] = "-mfpmath=";
4876 switch ((int) fpmath
)
4879 opts
[num
++][1] = "387";
4883 opts
[num
++][1] = "sse";
4886 case FPMATH_387
| FPMATH_SSE
:
4887 opts
[num
++][1] = "sse+387";
4899 gcc_assert (num
< ARRAY_SIZE (opts
));
4901 /* Size the string. */
4903 sep_len
= (add_nl_p
) ? 3 : 1;
4904 for (i
= 0; i
< num
; i
++)
4907 for (j
= 0; j
< 2; j
++)
4909 len
+= strlen (opts
[i
][j
]);
4912 /* Build the string. */
4913 ret
= ptr
= (char *) xmalloc (len
);
4916 for (i
= 0; i
< num
; i
++)
4920 for (j
= 0; j
< 2; j
++)
4921 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
4928 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
4936 for (j
= 0; j
< 2; j
++)
4939 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
4941 line_len
+= len2
[j
];
4946 gcc_assert (ret
+ len
>= ptr
);
4951 /* Return true, if profiling code should be emitted before
4952 prologue. Otherwise it returns false.
4953 Note: For x86 with "hotfix" it is sorried. */
4955 ix86_profile_before_prologue (void)
4957 return flag_fentry
!= 0;
4960 /* Function that is callable from the debugger to print the current
4962 void ATTRIBUTE_UNUSED
4963 ix86_debug_options (void)
4965 char *opts
= ix86_target_string (ix86_isa_flags
, ix86_isa_flags2
,
4966 target_flags
, ix86_target_flags
,
4967 ix86_arch_string
,ix86_tune_string
,
4972 fprintf (stderr
, "%s\n\n", opts
);
4976 fputs ("<no options>\n\n", stderr
);
4981 /* Return true if T is one of the bytes we should avoid with
4985 ix86_rop_should_change_byte_p (int t
)
4987 return t
== 0xc2 || t
== 0xc3 || t
== 0xca || t
== 0xcb;
4990 static const char *stringop_alg_names
[] = {
4992 #define DEF_ALG(alg, name) #name,
4993 #include "stringop.def"
4998 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
4999 The string is of the following form (or comma separated list of it):
5001 strategy_alg:max_size:[align|noalign]
5003 where the full size range for the strategy is either [0, max_size] or
5004 [min_size, max_size], in which min_size is the max_size + 1 of the
5005 preceding range. The last size range must have max_size == -1.
5010 -mmemcpy-strategy=libcall:-1:noalign
5012 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
5016 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
5018 This is to tell the compiler to use the following strategy for memset
5019 1) when the expected size is between [1, 16], use rep_8byte strategy;
5020 2) when the size is between [17, 2048], use vector_loop;
5021 3) when the size is > 2048, use libcall. */
5023 struct stringop_size_range
5031 ix86_parse_stringop_strategy_string (char *strategy_str
, bool is_memset
)
5033 const struct stringop_algs
*default_algs
;
5034 stringop_size_range input_ranges
[MAX_STRINGOP_ALGS
];
5035 char *curr_range_str
, *next_range_str
;
5036 const char *opt
= is_memset
? "-mmemset_strategy=" : "-mmemcpy_strategy=";
5040 default_algs
= &ix86_cost
->memset
[TARGET_64BIT
!= 0];
5042 default_algs
= &ix86_cost
->memcpy
[TARGET_64BIT
!= 0];
5044 curr_range_str
= strategy_str
;
5051 next_range_str
= strchr (curr_range_str
, ',');
5053 *next_range_str
++ = '\0';
5055 if (3 != sscanf (curr_range_str
, "%20[^:]:%d:%10s",
5056 alg_name
, &maxs
, align
))
5058 error ("wrong argument %qs to option %qs", curr_range_str
, opt
);
5062 if (n
> 0 && (maxs
< (input_ranges
[n
- 1].max
+ 1) && maxs
!= -1))
5064 error ("size ranges of option %qs should be increasing", opt
);
5068 for (i
= 0; i
< last_alg
; i
++)
5069 if (!strcmp (alg_name
, stringop_alg_names
[i
]))
5074 error ("wrong strategy name %qs specified for option %qs",
5077 auto_vec
<const char *> candidates
;
5078 for (i
= 0; i
< last_alg
; i
++)
5079 if ((stringop_alg
) i
!= rep_prefix_8_byte
|| TARGET_64BIT
)
5080 candidates
.safe_push (stringop_alg_names
[i
]);
5084 = candidates_list_and_hint (alg_name
, s
, candidates
);
5086 inform (input_location
,
5087 "valid arguments to %qs are: %s; did you mean %qs?",
5090 inform (input_location
, "valid arguments to %qs are: %s",
5096 if ((stringop_alg
) i
== rep_prefix_8_byte
5099 /* rep; movq isn't available in 32-bit code. */
5100 error ("strategy name %qs specified for option %qs "
5101 "not supported for 32-bit code", alg_name
, opt
);
5105 input_ranges
[n
].max
= maxs
;
5106 input_ranges
[n
].alg
= (stringop_alg
) i
;
5107 if (!strcmp (align
, "align"))
5108 input_ranges
[n
].noalign
= false;
5109 else if (!strcmp (align
, "noalign"))
5110 input_ranges
[n
].noalign
= true;
5113 error ("unknown alignment %qs specified for option %qs", align
, opt
);
5117 curr_range_str
= next_range_str
;
5119 while (curr_range_str
);
5121 if (input_ranges
[n
- 1].max
!= -1)
5123 error ("the max value for the last size range should be -1"
5124 " for option %qs", opt
);
5128 if (n
> MAX_STRINGOP_ALGS
)
5130 error ("too many size ranges specified in option %qs", opt
);
5134 /* Now override the default algs array. */
5135 for (i
= 0; i
< n
; i
++)
5137 *const_cast<int *>(&default_algs
->size
[i
].max
) = input_ranges
[i
].max
;
5138 *const_cast<stringop_alg
*>(&default_algs
->size
[i
].alg
)
5139 = input_ranges
[i
].alg
;
5140 *const_cast<int *>(&default_algs
->size
[i
].noalign
)
5141 = input_ranges
[i
].noalign
;
5146 /* parse -mtune-ctrl= option. When DUMP is true,
5147 print the features that are explicitly set. */
5150 parse_mtune_ctrl_str (bool dump
)
5152 if (!ix86_tune_ctrl_string
)
5155 char *next_feature_string
= NULL
;
5156 char *curr_feature_string
= xstrdup (ix86_tune_ctrl_string
);
5157 char *orig
= curr_feature_string
;
5163 next_feature_string
= strchr (curr_feature_string
, ',');
5164 if (next_feature_string
)
5165 *next_feature_string
++ = '\0';
5166 if (*curr_feature_string
== '^')
5168 curr_feature_string
++;
5171 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
5173 if (!strcmp (curr_feature_string
, ix86_tune_feature_names
[i
]))
5175 ix86_tune_features
[i
] = !clear
;
5177 fprintf (stderr
, "Explicitly %s feature %s\n",
5178 clear
? "clear" : "set", ix86_tune_feature_names
[i
]);
5182 if (i
== X86_TUNE_LAST
)
5183 error ("Unknown parameter to option -mtune-ctrl: %s",
5184 clear
? curr_feature_string
- 1 : curr_feature_string
);
5185 curr_feature_string
= next_feature_string
;
5187 while (curr_feature_string
);
5191 /* Helper function to set ix86_tune_features. IX86_TUNE is the
5195 set_ix86_tune_features (enum processor_type ix86_tune
, bool dump
)
5197 unsigned int ix86_tune_mask
= 1u << ix86_tune
;
5200 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
5202 if (ix86_tune_no_default
)
5203 ix86_tune_features
[i
] = 0;
5205 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
5210 fprintf (stderr
, "List of x86 specific tuning parameter names:\n");
5211 for (i
= 0; i
< X86_TUNE_LAST
; i
++)
5212 fprintf (stderr
, "%s : %s\n", ix86_tune_feature_names
[i
],
5213 ix86_tune_features
[i
] ? "on" : "off");
5216 parse_mtune_ctrl_str (dump
);
5220 /* Default align_* from the processor table. */
5223 ix86_default_align (struct gcc_options
*opts
)
5225 if (opts
->x_align_loops
== 0)
5227 opts
->x_align_loops
= processor_target_table
[ix86_tune
].align_loop
;
5228 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
5230 if (opts
->x_align_jumps
== 0)
5232 opts
->x_align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
5233 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
5235 if (opts
->x_align_functions
== 0)
5237 opts
->x_align_functions
= processor_target_table
[ix86_tune
].align_func
;
5241 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
5244 ix86_override_options_after_change (void)
5246 ix86_default_align (&global_options
);
5249 /* Override various settings based on options. If MAIN_ARGS_P, the
5250 options are from the command line, otherwise they are from
5251 attributes. Return true if there's an error related to march
5255 ix86_option_override_internal (bool main_args_p
,
5256 struct gcc_options
*opts
,
5257 struct gcc_options
*opts_set
)
5260 unsigned int ix86_arch_mask
;
5261 const bool ix86_tune_specified
= (opts
->x_ix86_tune_string
!= NULL
);
5263 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
5264 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
5265 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
5266 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
5267 #define PTA_AES (HOST_WIDE_INT_1 << 4)
5268 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
5269 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
5270 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
5271 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
5272 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
5273 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
5274 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
5275 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
5276 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
5277 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
5278 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
5279 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
5280 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
5281 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
5282 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
5283 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
5284 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
5285 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
5286 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
5287 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
5288 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
5289 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
5290 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
5291 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
5292 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
5293 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
5294 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
5295 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
5296 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
5297 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
5298 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
5299 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
5300 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
5301 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
5302 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
5303 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
5304 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
5305 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
5306 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
5307 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
5308 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
5309 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
5310 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
5311 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
5312 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
5313 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
5314 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
5315 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
5316 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
5317 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
5318 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
5319 #define PTA_MWAITX (HOST_WIDE_INT_1 << 56)
5320 #define PTA_CLZERO (HOST_WIDE_INT_1 << 57)
5321 #define PTA_NO_80387 (HOST_WIDE_INT_1 << 58)
5322 #define PTA_PKU (HOST_WIDE_INT_1 << 59)
5323 #define PTA_AVX5124VNNIW (HOST_WIDE_INT_1 << 60)
5324 #define PTA_AVX5124FMAPS (HOST_WIDE_INT_1 << 61)
5325 #define PTA_AVX512VPOPCNTDQ (HOST_WIDE_INT_1 << 62)
5326 #define PTA_SGX (HOST_WIDE_INT_1 << 63)
5329 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
5330 | PTA_CX16 | PTA_FXSR)
5331 #define PTA_NEHALEM \
5332 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
5333 #define PTA_WESTMERE \
5334 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
5335 #define PTA_SANDYBRIDGE \
5336 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
5337 #define PTA_IVYBRIDGE \
5338 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
5339 #define PTA_HASWELL \
5340 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
5341 | PTA_FMA | PTA_MOVBE | PTA_HLE)
5342 #define PTA_BROADWELL \
5343 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
5344 #define PTA_SKYLAKE \
5345 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
5346 #define PTA_SKYLAKE_AVX512 \
5347 (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
5348 | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
5350 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
5351 #define PTA_BONNELL \
5352 (PTA_CORE2 | PTA_MOVBE)
5353 #define PTA_SILVERMONT \
5354 (PTA_WESTMERE | PTA_MOVBE)
5356 /* if this reaches 64, need to widen struct pta flags below */
5360 const char *const name
; /* processor name or nickname. */
5361 const enum processor_type processor
;
5362 const enum attr_cpu schedule
;
5363 const unsigned HOST_WIDE_INT flags
;
5365 const processor_alias_table
[] =
5367 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
5368 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
5369 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
5370 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
5371 {"lakemont", PROCESSOR_LAKEMONT
, CPU_PENTIUM
, PTA_NO_80387
},
5372 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
5373 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
5374 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
5375 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
5376 {"samuel-2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
5377 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5378 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
5379 {"nehemiah", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5380 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
5381 {"c7", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5382 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
5383 {"esther", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5384 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
5385 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
5386 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
5387 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
5388 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5389 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
5390 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5391 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
5392 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
5393 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
5394 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
5395 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
5396 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
5397 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
5398 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
5399 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
5400 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
5401 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5402 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
5403 {"core2", PROCESSOR_CORE2
, CPU_CORE2
, PTA_CORE2
},
5404 {"nehalem", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
5405 {"corei7", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_NEHALEM
},
5406 {"westmere", PROCESSOR_NEHALEM
, CPU_NEHALEM
, PTA_WESTMERE
},
5407 {"sandybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
5409 {"corei7-avx", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
5411 {"ivybridge", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
5413 {"core-avx-i", PROCESSOR_SANDYBRIDGE
, CPU_NEHALEM
,
5415 {"haswell", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_HASWELL
},
5416 {"core-avx2", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_HASWELL
},
5417 {"broadwell", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_BROADWELL
},
5418 {"skylake", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_SKYLAKE
},
5419 {"skylake-avx512", PROCESSOR_HASWELL
, CPU_HASWELL
, PTA_SKYLAKE_AVX512
},
5420 {"bonnell", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
5421 {"atom", PROCESSOR_BONNELL
, CPU_ATOM
, PTA_BONNELL
},
5422 {"silvermont", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
5423 {"slm", PROCESSOR_SILVERMONT
, CPU_SLM
, PTA_SILVERMONT
},
5424 {"knl", PROCESSOR_KNL
, CPU_SLM
, PTA_KNL
},
5425 {"intel", PROCESSOR_INTEL
, CPU_SLM
, PTA_NEHALEM
},
5426 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
5427 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
5428 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
5429 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
5430 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
5431 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
5432 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
5433 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
5434 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
5435 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
5436 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
5437 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
5438 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
5439 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
5440 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_FXSR
},
5441 {"x86-64", PROCESSOR_K8
, CPU_K8
,
5442 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
5443 {"eden-x2", PROCESSOR_K8
, CPU_K8
,
5444 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
5445 {"nano", PROCESSOR_K8
, CPU_K8
,
5446 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5447 | PTA_SSSE3
| PTA_FXSR
},
5448 {"nano-1000", PROCESSOR_K8
, CPU_K8
,
5449 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5450 | PTA_SSSE3
| PTA_FXSR
},
5451 {"nano-2000", PROCESSOR_K8
, CPU_K8
,
5452 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5453 | PTA_SSSE3
| PTA_FXSR
},
5454 {"nano-3000", PROCESSOR_K8
, CPU_K8
,
5455 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5456 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
5457 {"nano-x2", PROCESSOR_K8
, CPU_K8
,
5458 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5459 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
5460 {"eden-x4", PROCESSOR_K8
, CPU_K8
,
5461 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5462 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
5463 {"nano-x4", PROCESSOR_K8
, CPU_K8
,
5464 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5465 | PTA_SSSE3
| PTA_SSE4_1
| PTA_FXSR
},
5466 {"k8", PROCESSOR_K8
, CPU_K8
,
5467 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5468 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
5469 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
5470 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5471 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
5472 {"opteron", PROCESSOR_K8
, CPU_K8
,
5473 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5474 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
5475 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
5476 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5477 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
5478 {"athlon64", PROCESSOR_K8
, CPU_K8
,
5479 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5480 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
5481 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
5482 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5483 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
| PTA_FXSR
},
5484 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
5485 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
5486 | PTA_SSE2
| PTA_NO_SAHF
| PTA_FXSR
},
5487 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
5488 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
5489 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
5490 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
5491 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
| PTA_SSE2
5492 | PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_PRFCHW
| PTA_FXSR
},
5493 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
5494 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5495 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
5496 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
5497 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
5498 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
5499 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5500 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
5501 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
5502 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
5503 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
5504 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
5505 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5506 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
5507 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
5508 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
5509 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
5510 | PTA_XSAVEOPT
| PTA_FSGSBASE
},
5511 {"bdver4", PROCESSOR_BDVER4
, CPU_BDVER4
,
5512 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5513 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
5514 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
5515 | PTA_FMA4
| PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_BMI2
5516 | PTA_TBM
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
5517 | PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
| PTA_RDRND
5518 | PTA_MOVBE
| PTA_MWAITX
},
5519 {"znver1", PROCESSOR_ZNVER1
, CPU_ZNVER1
,
5520 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5521 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
5522 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_AVX2
5523 | PTA_BMI
| PTA_BMI2
| PTA_F16C
| PTA_FMA
| PTA_PRFCHW
5524 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
| PTA_FSGSBASE
5525 | PTA_RDRND
| PTA_MOVBE
| PTA_MWAITX
| PTA_ADX
| PTA_RDSEED
5526 | PTA_CLZERO
| PTA_CLFLUSHOPT
| PTA_XSAVEC
| PTA_XSAVES
5527 | PTA_SHA
| PTA_LZCNT
| PTA_POPCNT
},
5528 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC
,
5529 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5530 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
5531 | PTA_FXSR
| PTA_XSAVE
},
5532 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
5533 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
5534 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
5535 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
5536 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
5537 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
5539 {"generic", PROCESSOR_GENERIC
, CPU_GENERIC
,
5541 | PTA_HLE
/* flags are only used for -march switch. */ },
5544 /* -mrecip options. */
5547 const char *string
; /* option name */
5548 unsigned int mask
; /* mask bits to set */
5550 const recip_options
[] =
5552 { "all", RECIP_MASK_ALL
},
5553 { "none", RECIP_MASK_NONE
},
5554 { "div", RECIP_MASK_DIV
},
5555 { "sqrt", RECIP_MASK_SQRT
},
5556 { "vec-div", RECIP_MASK_VEC_DIV
},
5557 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
5560 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
5562 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
5563 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
5564 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5565 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
5566 #ifdef TARGET_BI_ARCH
5569 #if TARGET_BI_ARCH == 1
5570 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
5571 is on and OPTION_MASK_ABI_X32 is off. We turn off
5572 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
5574 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
5575 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
5577 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
5578 on and OPTION_MASK_ABI_64 is off. We turn off
5579 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
5580 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
5581 if (TARGET_LP64_P (opts
->x_ix86_isa_flags
)
5582 || TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
5583 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
5585 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5586 && TARGET_IAMCU_P (opts
->x_target_flags
))
5587 sorry ("Intel MCU psABI isn%'t supported in %s mode",
5588 TARGET_X32_P (opts
->x_ix86_isa_flags
) ? "x32" : "64-bit");
5592 if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
5594 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
5595 OPTION_MASK_ABI_64 for TARGET_X32. */
5596 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
5597 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
5599 else if (TARGET_16BIT_P (opts
->x_ix86_isa_flags
))
5600 opts
->x_ix86_isa_flags
&= ~(OPTION_MASK_ISA_64BIT
5601 | OPTION_MASK_ABI_X32
5602 | OPTION_MASK_ABI_64
);
5603 else if (TARGET_LP64_P (opts
->x_ix86_isa_flags
))
5605 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
5606 OPTION_MASK_ABI_X32 for TARGET_LP64. */
5607 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
5608 opts
->x_ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
5611 #ifdef SUBTARGET_OVERRIDE_OPTIONS
5612 SUBTARGET_OVERRIDE_OPTIONS
;
5615 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
5616 SUBSUBTARGET_OVERRIDE_OPTIONS
;
5619 /* -fPIC is the default for x86_64. */
5620 if (TARGET_MACHO
&& TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5621 opts
->x_flag_pic
= 2;
5623 /* Need to check -mtune=generic first. */
5624 if (opts
->x_ix86_tune_string
)
5626 /* As special support for cross compilers we read -mtune=native
5627 as -mtune=generic. With native compilers we won't see the
5628 -mtune=native, as it was changed by the driver. */
5629 if (!strcmp (opts
->x_ix86_tune_string
, "native"))
5631 opts
->x_ix86_tune_string
= "generic";
5633 else if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
5634 warning (OPT_Wdeprecated
,
5636 ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> "
5637 "or %<-mtune=generic%> instead as appropriate")
5638 : G_("%<target(\"tune=x86-64\")%> is deprecated; use "
5639 "%<target(\"tune=k8\")%> or %<target(\"tune=generic\")%>"
5640 " instead as appropriate"));
5644 if (opts
->x_ix86_arch_string
)
5645 opts
->x_ix86_tune_string
= opts
->x_ix86_arch_string
;
5646 if (!opts
->x_ix86_tune_string
)
5648 opts
->x_ix86_tune_string
5649 = processor_target_table
[TARGET_CPU_DEFAULT
].name
;
5650 ix86_tune_defaulted
= 1;
5653 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
5654 or defaulted. We need to use a sensible tune option. */
5655 if (!strcmp (opts
->x_ix86_tune_string
, "x86-64"))
5657 opts
->x_ix86_tune_string
= "generic";
5661 if (opts
->x_ix86_stringop_alg
== rep_prefix_8_byte
5662 && !TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5664 /* rep; movq isn't available in 32-bit code. */
5665 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
5666 opts
->x_ix86_stringop_alg
= no_stringop
;
5669 if (!opts
->x_ix86_arch_string
)
5670 opts
->x_ix86_arch_string
5671 = TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5672 ? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
5674 ix86_arch_specified
= 1;
5676 if (opts_set
->x_ix86_pmode
)
5678 if ((TARGET_LP64_P (opts
->x_ix86_isa_flags
)
5679 && opts
->x_ix86_pmode
== PMODE_SI
)
5680 || (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5681 && opts
->x_ix86_pmode
== PMODE_DI
))
5682 error ("address mode %qs not supported in the %s bit mode",
5683 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "short" : "long",
5684 TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? "64" : "32");
5687 opts
->x_ix86_pmode
= TARGET_LP64_P (opts
->x_ix86_isa_flags
)
5688 ? PMODE_DI
: PMODE_SI
;
5690 if (!opts_set
->x_ix86_abi
)
5691 opts
->x_ix86_abi
= DEFAULT_ABI
;
5693 if (opts
->x_ix86_abi
== MS_ABI
&& TARGET_X32_P (opts
->x_ix86_isa_flags
))
5694 error ("-mabi=ms not supported with X32 ABI");
5695 gcc_assert (opts
->x_ix86_abi
== SYSV_ABI
|| opts
->x_ix86_abi
== MS_ABI
);
5697 /* For targets using ms ABI enable ms-extensions, if not
5698 explicit turned off. For non-ms ABI we turn off this
5700 if (!opts_set
->x_flag_ms_extensions
)
5701 opts
->x_flag_ms_extensions
= (MS_ABI
== DEFAULT_ABI
);
5703 if (opts_set
->x_ix86_cmodel
)
5705 switch (opts
->x_ix86_cmodel
)
5709 if (opts
->x_flag_pic
)
5710 opts
->x_ix86_cmodel
= CM_SMALL_PIC
;
5711 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5712 error ("code model %qs not supported in the %s bit mode",
5718 if (opts
->x_flag_pic
)
5719 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
5720 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5721 error ("code model %qs not supported in the %s bit mode",
5723 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
5724 error ("code model %qs not supported in x32 mode",
5730 if (opts
->x_flag_pic
)
5731 opts
->x_ix86_cmodel
= CM_LARGE_PIC
;
5732 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5733 error ("code model %qs not supported in the %s bit mode",
5735 else if (TARGET_X32_P (opts
->x_ix86_isa_flags
))
5736 error ("code model %qs not supported in x32 mode",
5741 if (opts
->x_flag_pic
)
5742 error ("code model %s does not support PIC mode", "32");
5743 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5744 error ("code model %qs not supported in the %s bit mode",
5749 if (opts
->x_flag_pic
)
5751 error ("code model %s does not support PIC mode", "kernel");
5752 opts
->x_ix86_cmodel
= CM_32
;
5754 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5755 error ("code model %qs not supported in the %s bit mode",
5765 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
5766 use of rip-relative addressing. This eliminates fixups that
5767 would otherwise be needed if this object is to be placed in a
5768 DLL, and is essentially just as efficient as direct addressing. */
5769 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5770 && (TARGET_RDOS
|| TARGET_PECOFF
))
5771 opts
->x_ix86_cmodel
= CM_MEDIUM_PIC
, opts
->x_flag_pic
= 1;
5772 else if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
5773 opts
->x_ix86_cmodel
= opts
->x_flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
5775 opts
->x_ix86_cmodel
= CM_32
;
5777 if (TARGET_MACHO
&& opts
->x_ix86_asm_dialect
== ASM_INTEL
)
5779 error ("-masm=intel not supported in this configuration");
5780 opts
->x_ix86_asm_dialect
= ASM_ATT
;
5782 if ((TARGET_64BIT_P (opts
->x_ix86_isa_flags
) != 0)
5783 != ((opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
5784 sorry ("%i-bit mode not compiled in",
5785 (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
5787 for (i
= 0; i
< pta_size
; i
++)
5788 if (! strcmp (opts
->x_ix86_arch_string
, processor_alias_table
[i
].name
))
5790 if (!strcmp (opts
->x_ix86_arch_string
, "generic"))
5793 ? G_("%<generic%> CPU can be used only for %<-mtune=%> "
5795 : G_("%<generic%> CPU can be used only for "
5796 "%<target(\"tune=\")%> attribute"));
5799 else if (!strcmp (opts
->x_ix86_arch_string
, "intel"))
5802 ? G_("%<intel%> CPU can be used only for %<-mtune=%> "
5804 : G_("%<intel%> CPU can be used only for "
5805 "%<target(\"tune=\")%> attribute"));
5809 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5810 && !(processor_alias_table
[i
].flags
& PTA_64BIT
))
5812 error ("CPU you selected does not support x86-64 "
5817 ix86_schedule
= processor_alias_table
[i
].schedule
;
5818 ix86_arch
= processor_alias_table
[i
].processor
;
5819 /* Default cpu tuning to the architecture. */
5820 ix86_tune
= ix86_arch
;
5822 if (processor_alias_table
[i
].flags
& PTA_MMX
5823 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
5824 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
5825 if (processor_alias_table
[i
].flags
& PTA_3DNOW
5826 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
5827 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
5828 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
5829 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
5830 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
5831 if (processor_alias_table
[i
].flags
& PTA_SSE
5832 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
5833 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
5834 if (processor_alias_table
[i
].flags
& PTA_SSE2
5835 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
5836 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
5837 if (processor_alias_table
[i
].flags
& PTA_SSE3
5838 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
5839 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
5840 if (processor_alias_table
[i
].flags
& PTA_SSSE3
5841 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
5842 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
5843 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
5844 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
5845 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
5846 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
5847 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
5848 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
5849 if (processor_alias_table
[i
].flags
& PTA_AVX
5850 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
5851 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
5852 if (processor_alias_table
[i
].flags
& PTA_AVX2
5853 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
5854 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
5855 if (processor_alias_table
[i
].flags
& PTA_FMA
5856 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
5857 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
5858 if (processor_alias_table
[i
].flags
& PTA_SSE4A
5859 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
5860 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
5861 if (processor_alias_table
[i
].flags
& PTA_FMA4
5862 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
5863 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
5864 if (processor_alias_table
[i
].flags
& PTA_XOP
5865 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
5866 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
5867 if (processor_alias_table
[i
].flags
& PTA_LWP
5868 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
5869 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
5870 if (processor_alias_table
[i
].flags
& PTA_ABM
5871 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
5872 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
5873 if (processor_alias_table
[i
].flags
& PTA_BMI
5874 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
5875 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
5876 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
5877 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
5878 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
5879 if (processor_alias_table
[i
].flags
& PTA_TBM
5880 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
5881 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
5882 if (processor_alias_table
[i
].flags
& PTA_BMI2
5883 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
5884 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
5885 if (processor_alias_table
[i
].flags
& PTA_CX16
5886 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
5887 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
5888 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
5889 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
5890 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
5891 if (!(TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
5892 && (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
5893 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
5894 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
5895 if (processor_alias_table
[i
].flags
& PTA_MOVBE
5896 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
5897 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
5898 if (processor_alias_table
[i
].flags
& PTA_AES
5899 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
5900 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
5901 if (processor_alias_table
[i
].flags
& PTA_SHA
5902 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SHA
))
5903 ix86_isa_flags
|= OPTION_MASK_ISA_SHA
;
5904 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
5905 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
5906 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
5907 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
5908 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
5909 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
5910 if (processor_alias_table
[i
].flags
& PTA_RDRND
5911 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
5912 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
5913 if (processor_alias_table
[i
].flags
& PTA_F16C
5914 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
5915 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
5916 if (processor_alias_table
[i
].flags
& PTA_RTM
5917 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
5918 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
5919 if (processor_alias_table
[i
].flags
& PTA_HLE
5920 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
5921 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
5922 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
5923 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
5924 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
5925 if (processor_alias_table
[i
].flags
& PTA_RDSEED
5926 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
5927 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
5928 if (processor_alias_table
[i
].flags
& PTA_ADX
5929 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
5930 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
5931 if (processor_alias_table
[i
].flags
& PTA_FXSR
5932 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
5933 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
5934 if (processor_alias_table
[i
].flags
& PTA_XSAVE
5935 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
5936 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
5937 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
5938 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
5939 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
5940 if (processor_alias_table
[i
].flags
& PTA_AVX512F
5941 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512F
))
5942 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512F
;
5943 if (processor_alias_table
[i
].flags
& PTA_AVX512ER
5944 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512ER
))
5945 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512ER
;
5946 if (processor_alias_table
[i
].flags
& PTA_AVX512PF
5947 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512PF
))
5948 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512PF
;
5949 if (processor_alias_table
[i
].flags
& PTA_AVX512CD
5950 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512CD
))
5951 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512CD
;
5952 if (processor_alias_table
[i
].flags
& PTA_PREFETCHWT1
5953 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PREFETCHWT1
))
5954 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PREFETCHWT1
;
5955 if (processor_alias_table
[i
].flags
& PTA_CLWB
5956 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLWB
))
5957 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLWB
;
5958 if (processor_alias_table
[i
].flags
& PTA_CLFLUSHOPT
5959 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLFLUSHOPT
))
5960 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLFLUSHOPT
;
5961 if (processor_alias_table
[i
].flags
& PTA_CLZERO
5962 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_CLZERO
))
5963 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_CLZERO
;
5964 if (processor_alias_table
[i
].flags
& PTA_XSAVEC
5965 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEC
))
5966 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEC
;
5967 if (processor_alias_table
[i
].flags
& PTA_XSAVES
5968 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVES
))
5969 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_XSAVES
;
5970 if (processor_alias_table
[i
].flags
& PTA_AVX512DQ
5971 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512DQ
))
5972 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512DQ
;
5973 if (processor_alias_table
[i
].flags
& PTA_AVX512BW
5974 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512BW
))
5975 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512BW
;
5976 if (processor_alias_table
[i
].flags
& PTA_AVX512VL
5977 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VL
))
5978 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512VL
;
5979 if (processor_alias_table
[i
].flags
& PTA_MPX
5980 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MPX
))
5981 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MPX
;
5982 if (processor_alias_table
[i
].flags
& PTA_AVX512VBMI
5983 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VBMI
))
5984 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512VBMI
;
5985 if (processor_alias_table
[i
].flags
& PTA_AVX512IFMA
5986 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512IFMA
))
5987 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_AVX512IFMA
;
5989 if (processor_alias_table
[i
].flags
& PTA_AVX5124VNNIW
5990 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX5124VNNIW
))
5991 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX5124VNNIW
;
5992 if (processor_alias_table
[i
].flags
& PTA_AVX5124FMAPS
5993 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX5124FMAPS
))
5994 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX5124FMAPS
;
5995 if (processor_alias_table
[i
].flags
& PTA_AVX512VPOPCNTDQ
5996 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_AVX512VPOPCNTDQ
))
5997 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_AVX512VPOPCNTDQ
;
5998 if (processor_alias_table
[i
].flags
& PTA_SGX
5999 && !(opts
->x_ix86_isa_flags2_explicit
& OPTION_MASK_ISA_SGX
))
6000 opts
->x_ix86_isa_flags2
|= OPTION_MASK_ISA_SGX
;
6002 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
6003 x86_prefetch_sse
= true;
6004 if (processor_alias_table
[i
].flags
& PTA_MWAITX
6005 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_MWAITX
))
6006 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_MWAITX
;
6007 if (processor_alias_table
[i
].flags
& PTA_PKU
6008 && !(opts
->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_PKU
))
6009 opts
->x_ix86_isa_flags
|= OPTION_MASK_ISA_PKU
;
6011 /* Don't enable x87 instructions if only
6012 general registers are allowed. */
6013 if (!(opts_set
->x_ix86_target_flags
& OPTION_MASK_GENERAL_REGS_ONLY
)
6014 && !(opts_set
->x_target_flags
& MASK_80387
))
6016 if (processor_alias_table
[i
].flags
& PTA_NO_80387
)
6017 opts
->x_target_flags
&= ~MASK_80387
;
6019 opts
->x_target_flags
|= MASK_80387
;
6024 if (TARGET_X32
&& (opts
->x_ix86_isa_flags
& OPTION_MASK_ISA_MPX
))
6025 error ("Intel MPX does not support x32");
6027 if (TARGET_X32
&& (ix86_isa_flags
& OPTION_MASK_ISA_MPX
))
6028 error ("Intel MPX does not support x32");
6033 ? G_("bad value (%qs) for %<-march=%> switch")
6034 : G_("bad value (%qs) for %<target(\"arch=\")%> attribute"),
6035 opts
->x_ix86_arch_string
);
6037 auto_vec
<const char *> candidates
;
6038 for (i
= 0; i
< pta_size
; i
++)
6039 if (strcmp (processor_alias_table
[i
].name
, "generic")
6040 && strcmp (processor_alias_table
[i
].name
, "intel")
6041 && (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
6042 || (processor_alias_table
[i
].flags
& PTA_64BIT
)))
6043 candidates
.safe_push (processor_alias_table
[i
].name
);
6047 = candidates_list_and_hint (opts
->x_ix86_arch_string
, s
, candidates
);
6049 inform (input_location
,
6051 ? G_("valid arguments to %<-march=%> switch are: "
6052 "%s; did you mean %qs?")
6053 : G_("valid arguments to %<target(\"arch=\")%> attribute are: "
6054 "%s; did you mean %qs?"), s
, hint
);
6056 inform (input_location
,
6058 ? G_("valid arguments to %<-march=%> switch are: %s")
6059 : G_("valid arguments to %<target(\"arch=\")%> attribute "
6064 ix86_arch_mask
= 1u << ix86_arch
;
6065 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
6066 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
6068 for (i
= 0; i
< pta_size
; i
++)
6069 if (! strcmp (opts
->x_ix86_tune_string
, processor_alias_table
[i
].name
))
6071 ix86_schedule
= processor_alias_table
[i
].schedule
;
6072 ix86_tune
= processor_alias_table
[i
].processor
;
6073 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6075 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
6077 if (ix86_tune_defaulted
)
6079 opts
->x_ix86_tune_string
= "x86-64";
6080 for (i
= 0; i
< pta_size
; i
++)
6081 if (! strcmp (opts
->x_ix86_tune_string
,
6082 processor_alias_table
[i
].name
))
6084 ix86_schedule
= processor_alias_table
[i
].schedule
;
6085 ix86_tune
= processor_alias_table
[i
].processor
;
6088 error ("CPU you selected does not support x86-64 "
6092 /* Intel CPUs have always interpreted SSE prefetch instructions as
6093 NOPs; so, we can enable SSE prefetch instructions even when
6094 -mtune (rather than -march) points us to a processor that has them.
6095 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
6096 higher processors. */
6098 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
6099 x86_prefetch_sse
= true;
6103 if (ix86_tune_specified
&& i
== pta_size
)
6106 ? G_("bad value (%qs) for %<-mtune=%> switch")
6107 : G_("bad value (%qs) for %<target(\"tune=\")%> attribute"),
6108 opts
->x_ix86_tune_string
);
6110 auto_vec
<const char *> candidates
;
6111 for (i
= 0; i
< pta_size
; i
++)
6112 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
)
6113 || (processor_alias_table
[i
].flags
& PTA_64BIT
))
6114 candidates
.safe_push (processor_alias_table
[i
].name
);
6118 = candidates_list_and_hint (opts
->x_ix86_tune_string
, s
, candidates
);
6120 inform (input_location
,
6122 ? G_("valid arguments to %<-mtune=%> switch are: "
6123 "%s; did you mean %qs?")
6124 : G_("valid arguments to %<target(\"tune=\")%> attribute are: "
6125 "%s; did you mean %qs?"), s
, hint
);
6127 inform (input_location
,
6129 ? G_("valid arguments to %<-mtune=%> switch are: %s")
6130 : G_("valid arguments to %<target(\"tune=\")%> attribute "
6135 set_ix86_tune_features (ix86_tune
, opts
->x_ix86_dump_tunes
);
6137 #ifndef USE_IX86_FRAME_POINTER
6138 #define USE_IX86_FRAME_POINTER 0
6141 #ifndef USE_X86_64_FRAME_POINTER
6142 #define USE_X86_64_FRAME_POINTER 0
6145 /* Set the default values for switches whose default depends on TARGET_64BIT
6146 in case they weren't overwritten by command line options. */
6147 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6149 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
6150 opts
->x_flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
6151 if (opts
->x_flag_asynchronous_unwind_tables
6152 && !opts_set
->x_flag_unwind_tables
6153 && TARGET_64BIT_MS_ABI
)
6154 opts
->x_flag_unwind_tables
= 1;
6155 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
6156 opts
->x_flag_unwind_tables
6157 = opts
->x_flag_asynchronous_unwind_tables
= 1;
6158 if (opts
->x_flag_pcc_struct_return
== 2)
6159 opts
->x_flag_pcc_struct_return
= 0;
6163 if (opts
->x_optimize
>= 1 && !opts_set
->x_flag_omit_frame_pointer
)
6164 opts
->x_flag_omit_frame_pointer
6165 = !(USE_IX86_FRAME_POINTER
|| opts
->x_optimize_size
);
6166 if (opts
->x_flag_asynchronous_unwind_tables
== 2)
6167 opts
->x_flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
6168 if (opts
->x_flag_pcc_struct_return
== 2)
6170 /* Intel MCU psABI specifies that -freg-struct-return should
6171 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
6172 we check -miamcu so that -freg-struct-return is always
6173 turned on if -miamcu is used. */
6174 if (TARGET_IAMCU_P (opts
->x_target_flags
))
6175 opts
->x_flag_pcc_struct_return
= 0;
6177 opts
->x_flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
6181 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
6182 /* TODO: ix86_cost should be chosen at instruction or function granuality
6183 so for cold code we use size_cost even in !optimize_size compilation. */
6184 if (opts
->x_optimize_size
)
6185 ix86_cost
= &ix86_size_cost
;
6187 ix86_cost
= ix86_tune_cost
;
6189 /* Arrange to set up i386_stack_locals for all functions. */
6190 init_machine_status
= ix86_init_machine_status
;
6192 /* Validate -mregparm= value. */
6193 if (opts_set
->x_ix86_regparm
)
6195 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6196 warning (0, "-mregparm is ignored in 64-bit mode");
6197 else if (TARGET_IAMCU_P (opts
->x_target_flags
))
6198 warning (0, "-mregparm is ignored for Intel MCU psABI");
6199 if (opts
->x_ix86_regparm
> REGPARM_MAX
)
6201 error ("-mregparm=%d is not between 0 and %d",
6202 opts
->x_ix86_regparm
, REGPARM_MAX
);
6203 opts
->x_ix86_regparm
= 0;
6206 if (TARGET_IAMCU_P (opts
->x_target_flags
)
6207 || TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6208 opts
->x_ix86_regparm
= REGPARM_MAX
;
6210 /* Default align_* from the processor table. */
6211 ix86_default_align (opts
);
6213 /* Provide default for -mbranch-cost= value. */
6214 if (!opts_set
->x_ix86_branch_cost
)
6215 opts
->x_ix86_branch_cost
= ix86_tune_cost
->branch_cost
;
6217 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6219 opts
->x_target_flags
6220 |= TARGET_SUBTARGET64_DEFAULT
& ~opts_set
->x_target_flags
;
6222 /* Enable by default the SSE and MMX builtins. Do allow the user to
6223 explicitly disable any of these. In particular, disabling SSE and
6224 MMX for kernel code is extremely useful. */
6225 if (!ix86_arch_specified
)
6226 opts
->x_ix86_isa_flags
6227 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
6228 | TARGET_SUBTARGET64_ISA_DEFAULT
)
6229 & ~opts
->x_ix86_isa_flags_explicit
);
6231 if (TARGET_RTD_P (opts
->x_target_flags
))
6234 ? G_("%<-mrtd%> is ignored in 64bit mode")
6235 : G_("%<target(\"rtd\")%> is ignored in 64bit mode"));
6239 opts
->x_target_flags
6240 |= TARGET_SUBTARGET32_DEFAULT
& ~opts_set
->x_target_flags
;
6242 if (!ix86_arch_specified
)
6243 opts
->x_ix86_isa_flags
6244 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~opts
->x_ix86_isa_flags_explicit
;
6246 /* i386 ABI does not specify red zone. It still makes sense to use it
6247 when programmer takes care to stack from being destroyed. */
6248 if (!(opts_set
->x_target_flags
& MASK_NO_RED_ZONE
))
6249 opts
->x_target_flags
|= MASK_NO_RED_ZONE
;
6252 /* Keep nonleaf frame pointers. */
6253 if (opts
->x_flag_omit_frame_pointer
)
6254 opts
->x_target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
6255 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts
->x_target_flags
))
6256 opts
->x_flag_omit_frame_pointer
= 1;
6258 /* If we're doing fast math, we don't care about comparison order
6259 wrt NaNs. This lets us use a shorter comparison sequence. */
6260 if (opts
->x_flag_finite_math_only
)
6261 opts
->x_target_flags
&= ~MASK_IEEE_FP
;
6263 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
6264 since the insns won't need emulation. */
6265 if (ix86_tune_features
[X86_TUNE_ALWAYS_FANCY_MATH_387
])
6266 opts
->x_target_flags
&= ~MASK_NO_FANCY_MATH_387
;
6268 /* Likewise, if the target doesn't have a 387, or we've specified
6269 software floating point, don't use 387 inline intrinsics. */
6270 if (!TARGET_80387_P (opts
->x_target_flags
))
6271 opts
->x_target_flags
|= MASK_NO_FANCY_MATH_387
;
6273 /* Turn on MMX builtins for -msse. */
6274 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
))
6275 opts
->x_ix86_isa_flags
6276 |= OPTION_MASK_ISA_MMX
& ~opts
->x_ix86_isa_flags_explicit
;
6278 /* Enable SSE prefetch. */
6279 if (TARGET_SSE_P (opts
->x_ix86_isa_flags
)
6280 || (TARGET_PRFCHW_P (opts
->x_ix86_isa_flags
)
6281 && !TARGET_3DNOW_P (opts
->x_ix86_isa_flags
))
6282 || TARGET_PREFETCHWT1_P (opts
->x_ix86_isa_flags
))
6283 x86_prefetch_sse
= true;
6285 /* Enable popcnt instruction for -msse4.2 or -mabm. */
6286 if (TARGET_SSE4_2_P (opts
->x_ix86_isa_flags
)
6287 || TARGET_ABM_P (opts
->x_ix86_isa_flags
))
6288 opts
->x_ix86_isa_flags
6289 |= OPTION_MASK_ISA_POPCNT
& ~opts
->x_ix86_isa_flags_explicit
;
6291 /* Enable lzcnt instruction for -mabm. */
6292 if (TARGET_ABM_P(opts
->x_ix86_isa_flags
))
6293 opts
->x_ix86_isa_flags
6294 |= OPTION_MASK_ISA_LZCNT
& ~opts
->x_ix86_isa_flags_explicit
;
6296 /* Disable BMI, BMI2 and TBM instructions for -m16. */
6297 if (TARGET_16BIT_P(opts
->x_ix86_isa_flags
))
6298 opts
->x_ix86_isa_flags
6299 &= ~((OPTION_MASK_ISA_BMI
| OPTION_MASK_ISA_BMI2
| OPTION_MASK_ISA_TBM
)
6300 & ~opts
->x_ix86_isa_flags_explicit
);
6302 /* Validate -mpreferred-stack-boundary= value or default it to
6303 PREFERRED_STACK_BOUNDARY_DEFAULT. */
6304 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
6305 if (opts_set
->x_ix86_preferred_stack_boundary_arg
)
6307 int min
= TARGET_64BIT_P (opts
->x_ix86_isa_flags
)? 3 : 2;
6308 int max
= TARGET_SEH
? 4 : 12;
6310 if (opts
->x_ix86_preferred_stack_boundary_arg
< min
6311 || opts
->x_ix86_preferred_stack_boundary_arg
> max
)
6314 error ("-mpreferred-stack-boundary is not supported "
6317 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
6318 opts
->x_ix86_preferred_stack_boundary_arg
, min
, max
);
6321 ix86_preferred_stack_boundary
6322 = (1 << opts
->x_ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
6325 /* Set the default value for -mstackrealign. */
6326 if (!opts_set
->x_ix86_force_align_arg_pointer
)
6327 opts
->x_ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
6329 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
6331 /* Validate -mincoming-stack-boundary= value or default it to
6332 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
6333 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
6334 if (opts_set
->x_ix86_incoming_stack_boundary_arg
)
6336 int min
= TARGET_64BIT_P (opts
->x_ix86_isa_flags
) ? 3 : 2;
6338 if (opts
->x_ix86_incoming_stack_boundary_arg
< min
6339 || opts
->x_ix86_incoming_stack_boundary_arg
> 12)
6340 error ("-mincoming-stack-boundary=%d is not between %d and 12",
6341 opts
->x_ix86_incoming_stack_boundary_arg
, min
);
6344 ix86_user_incoming_stack_boundary
6345 = (1 << opts
->x_ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
6346 ix86_incoming_stack_boundary
6347 = ix86_user_incoming_stack_boundary
;
6351 #ifndef NO_PROFILE_COUNTERS
6352 if (flag_nop_mcount
)
6353 error ("-mnop-mcount is not compatible with this target");
6355 if (flag_nop_mcount
&& flag_pic
)
6356 error ("-mnop-mcount is not implemented for -fPIC");
6358 /* Accept -msseregparm only if at least SSE support is enabled. */
6359 if (TARGET_SSEREGPARM_P (opts
->x_target_flags
)
6360 && ! TARGET_SSE_P (opts
->x_ix86_isa_flags
))
6362 ? G_("%<-msseregparm%> used without SSE enabled")
6363 : G_("%<target(\"sseregparm\")%> used without SSE enabled"));
6365 if (opts_set
->x_ix86_fpmath
)
6367 if (opts
->x_ix86_fpmath
& FPMATH_SSE
)
6369 if (!TARGET_SSE_P (opts
->x_ix86_isa_flags
))
6371 if (TARGET_80387_P (opts
->x_target_flags
))
6373 warning (0, "SSE instruction set disabled, using 387 arithmetics");
6374 opts
->x_ix86_fpmath
= FPMATH_387
;
6377 else if ((opts
->x_ix86_fpmath
& FPMATH_387
)
6378 && !TARGET_80387_P (opts
->x_target_flags
))
6380 warning (0, "387 instruction set disabled, using SSE arithmetics");
6381 opts
->x_ix86_fpmath
= FPMATH_SSE
;
6385 /* For all chips supporting SSE2, -mfpmath=sse performs better than
6386 fpmath=387. The second is however default at many targets since the
6387 extra 80bit precision of temporaries is considered to be part of ABI.
6388 Overwrite the default at least for -ffast-math.
6389 TODO: -mfpmath=both seems to produce same performing code with bit
6390 smaller binaries. It is however not clear if register allocation is
6391 ready for this setting.
6392 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
6393 codegen. We may switch to 387 with -ffast-math for size optimized
6395 else if (fast_math_flags_set_p (&global_options
)
6396 && TARGET_SSE2_P (opts
->x_ix86_isa_flags
))
6397 opts
->x_ix86_fpmath
= FPMATH_SSE
;
6399 opts
->x_ix86_fpmath
= TARGET_FPMATH_DEFAULT_P (opts
->x_ix86_isa_flags
);
6401 /* Use external vectorized library in vectorizing intrinsics. */
6402 if (opts_set
->x_ix86_veclibabi_type
)
6403 switch (opts
->x_ix86_veclibabi_type
)
6405 case ix86_veclibabi_type_svml
:
6406 ix86_veclib_handler
= ix86_veclibabi_svml
;
6409 case ix86_veclibabi_type_acml
:
6410 ix86_veclib_handler
= ix86_veclibabi_acml
;
6417 if (ix86_tune_features
[X86_TUNE_ACCUMULATE_OUTGOING_ARGS
]
6418 && !(opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
6419 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
6421 /* If stack probes are required, the space used for large function
6422 arguments on the stack must also be probed, so enable
6423 -maccumulate-outgoing-args so this happens in the prologue. */
6424 if (TARGET_STACK_PROBE_P (opts
->x_target_flags
)
6425 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
6427 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
6430 ? G_("stack probing requires %<-maccumulate-outgoing-args%> "
6432 : G_("stack probing requires "
6433 "%<target(\"accumulate-outgoing-args\")%> for "
6435 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
6438 /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
6439 so enable -maccumulate-outgoing-args when %ebp is fixed. */
6440 if (fixed_regs
[BP_REG
]
6441 && !(opts
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
6443 if (opts_set
->x_target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
)
6446 ? G_("fixed ebp register requires "
6447 "%<-maccumulate-outgoing-args%>")
6448 : G_("fixed ebp register requires "
6449 "%<target(\"accumulate-outgoing-args\")%>"));
6450 opts
->x_target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
6453 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
6456 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
6457 p
= strchr (internal_label_prefix
, 'X');
6458 internal_label_prefix_len
= p
- internal_label_prefix
;
6462 /* When scheduling description is not available, disable scheduler pass
6463 so it won't slow down the compilation and make x87 code slower. */
6464 if (!TARGET_SCHEDULE
)
6465 opts
->x_flag_schedule_insns_after_reload
= opts
->x_flag_schedule_insns
= 0;
6467 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
6468 ix86_tune_cost
->simultaneous_prefetches
,
6469 opts
->x_param_values
,
6470 opts_set
->x_param_values
);
6471 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
6472 ix86_tune_cost
->prefetch_block
,
6473 opts
->x_param_values
,
6474 opts_set
->x_param_values
);
6475 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
6476 ix86_tune_cost
->l1_cache_size
,
6477 opts
->x_param_values
,
6478 opts_set
->x_param_values
);
6479 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
6480 ix86_tune_cost
->l2_cache_size
,
6481 opts
->x_param_values
,
6482 opts_set
->x_param_values
);
6484 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
6485 if (opts
->x_flag_prefetch_loop_arrays
< 0
6487 && (opts
->x_optimize
>= 3 || opts
->x_flag_profile_use
)
6488 && !opts
->x_optimize_size
6489 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
6490 opts
->x_flag_prefetch_loop_arrays
= 1;
6492 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
6493 can be opts->x_optimized to ap = __builtin_next_arg (0). */
6494 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && !opts
->x_flag_split_stack
)
6495 targetm
.expand_builtin_va_start
= NULL
;
6497 if (TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6499 ix86_gen_leave
= gen_leave_rex64
;
6500 if (Pmode
== DImode
)
6502 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
6503 ix86_gen_tls_local_dynamic_base_64
6504 = gen_tls_local_dynamic_base_64_di
;
6508 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
6509 ix86_gen_tls_local_dynamic_base_64
6510 = gen_tls_local_dynamic_base_64_si
;
6514 ix86_gen_leave
= gen_leave
;
6516 if (Pmode
== DImode
)
6518 ix86_gen_add3
= gen_adddi3
;
6519 ix86_gen_sub3
= gen_subdi3
;
6520 ix86_gen_sub3_carry
= gen_subdi3_carry
;
6521 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
6522 ix86_gen_andsp
= gen_anddi3
;
6523 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
6524 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
6525 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
6526 ix86_gen_monitor
= gen_sse3_monitor_di
;
6527 ix86_gen_monitorx
= gen_monitorx_di
;
6528 ix86_gen_clzero
= gen_clzero_di
;
6532 ix86_gen_add3
= gen_addsi3
;
6533 ix86_gen_sub3
= gen_subsi3
;
6534 ix86_gen_sub3_carry
= gen_subsi3_carry
;
6535 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
6536 ix86_gen_andsp
= gen_andsi3
;
6537 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
6538 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
6539 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
6540 ix86_gen_monitor
= gen_sse3_monitor_si
;
6541 ix86_gen_monitorx
= gen_monitorx_si
;
6542 ix86_gen_clzero
= gen_clzero_si
;
6546 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
6547 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
))
6548 opts
->x_target_flags
|= MASK_CLD
& ~opts_set
->x_target_flags
;
6551 /* Set the default value for -mfentry. */
6552 if (!opts_set
->x_flag_fentry
)
6553 opts
->x_flag_fentry
= TARGET_SEH
;
6556 if (!TARGET_64BIT_P (opts
->x_ix86_isa_flags
) && opts
->x_flag_pic
6557 && opts
->x_flag_fentry
)
6558 sorry ("-mfentry isn%'t supported for 32-bit in combination "
6560 else if (TARGET_SEH
&& !opts
->x_flag_fentry
)
6561 sorry ("-mno-fentry isn%'t compatible with SEH");
6564 if (TARGET_SEH
&& TARGET_CALL_MS2SYSV_XLOGUES
)
6565 sorry ("-mcall-ms2sysv-xlogues isn%'t currently supported with SEH");
6567 if (!(opts_set
->x_target_flags
& MASK_VZEROUPPER
))
6568 opts
->x_target_flags
|= MASK_VZEROUPPER
;
6569 if (!(opts_set
->x_target_flags
& MASK_STV
))
6570 opts
->x_target_flags
|= MASK_STV
;
6571 /* Disable STV if -mpreferred-stack-boundary={2,3} or
6572 -mincoming-stack-boundary={2,3} or -mstackrealign - the needed
6573 stack realignment will be extra cost the pass doesn't take into
6574 account and the pass can't realign the stack. */
6575 if (ix86_preferred_stack_boundary
< 128
6576 || ix86_incoming_stack_boundary
< 128
6577 || opts
->x_ix86_force_align_arg_pointer
)
6578 opts
->x_target_flags
&= ~MASK_STV
;
6579 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL
]
6580 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
6581 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
6582 if (!ix86_tune_features
[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL
]
6583 && !(opts_set
->x_target_flags
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
6584 opts
->x_target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
6585 /* Enable 128-bit AVX instruction generation
6586 for the auto-vectorizer. */
6587 if (TARGET_AVX128_OPTIMAL
6588 && !(opts_set
->x_target_flags
& MASK_PREFER_AVX128
))
6589 opts
->x_target_flags
|= MASK_PREFER_AVX128
;
6591 if (opts
->x_ix86_recip_name
)
6593 char *p
= ASTRDUP (opts
->x_ix86_recip_name
);
6595 unsigned int mask
, i
;
6598 while ((q
= strtok (p
, ",")) != NULL
)
6609 if (!strcmp (q
, "default"))
6610 mask
= RECIP_MASK_ALL
;
6613 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
6614 if (!strcmp (q
, recip_options
[i
].string
))
6616 mask
= recip_options
[i
].mask
;
6620 if (i
== ARRAY_SIZE (recip_options
))
6622 error ("unknown option for -mrecip=%s", q
);
6624 mask
= RECIP_MASK_NONE
;
6628 opts
->x_recip_mask_explicit
|= mask
;
6630 opts
->x_recip_mask
&= ~mask
;
6632 opts
->x_recip_mask
|= mask
;
6636 if (TARGET_RECIP_P (opts
->x_target_flags
))
6637 opts
->x_recip_mask
|= RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
;
6638 else if (opts_set
->x_target_flags
& MASK_RECIP
)
6639 opts
->x_recip_mask
&= ~(RECIP_MASK_ALL
& ~opts
->x_recip_mask_explicit
);
6641 /* Default long double to 64-bit for 32-bit Bionic and to __float128
6642 for 64-bit Bionic. Also default long double to 64-bit for Intel
6644 if ((TARGET_HAS_BIONIC
|| TARGET_IAMCU
)
6645 && !(opts_set
->x_target_flags
6646 & (MASK_LONG_DOUBLE_64
| MASK_LONG_DOUBLE_128
)))
6647 opts
->x_target_flags
|= (TARGET_64BIT
6648 ? MASK_LONG_DOUBLE_128
6649 : MASK_LONG_DOUBLE_64
);
6651 /* Only one of them can be active. */
6652 gcc_assert ((opts
->x_target_flags
& MASK_LONG_DOUBLE_64
) == 0
6653 || (opts
->x_target_flags
& MASK_LONG_DOUBLE_128
) == 0);
6655 /* Handle stack protector */
6656 if (!opts_set
->x_ix86_stack_protector_guard
)
6657 opts
->x_ix86_stack_protector_guard
6658 = TARGET_HAS_BIONIC
? SSP_GLOBAL
: SSP_TLS
;
6660 #ifdef TARGET_THREAD_SSP_OFFSET
6661 ix86_stack_protector_guard_offset
= TARGET_THREAD_SSP_OFFSET
;
6664 if (global_options_set
.x_ix86_stack_protector_guard_offset_str
)
6667 const char *str
= ix86_stack_protector_guard_offset_str
;
6672 #if defined(INT64_T_IS_LONG)
6673 offset
= strtol (str
, &endp
, 0);
6675 offset
= strtoll (str
, &endp
, 0);
6678 if (!*str
|| *endp
|| errno
)
6679 error ("%qs is not a valid number "
6680 "in -mstack-protector-guard-offset=", str
);
6682 if (!IN_RANGE (offset
, HOST_WIDE_INT_C (-0x80000000),
6683 HOST_WIDE_INT_C (0x7fffffff)))
6684 error ("%qs is not a valid offset "
6685 "in -mstack-protector-guard-offset=", str
);
6687 ix86_stack_protector_guard_offset
= offset
;
6690 ix86_stack_protector_guard_reg
= DEFAULT_TLS_SEG_REG
;
6692 /* The kernel uses a different segment register for performance
6693 reasons; a system call would not have to trash the userspace
6694 segment register, which would be expensive. */
6695 if (ix86_cmodel
== CM_KERNEL
)
6696 ix86_stack_protector_guard_reg
= ADDR_SPACE_SEG_GS
;
6698 if (global_options_set
.x_ix86_stack_protector_guard_reg_str
)
6700 const char *str
= ix86_stack_protector_guard_reg_str
;
6701 addr_space_t seg
= ADDR_SPACE_GENERIC
;
6703 /* Discard optional register prefix. */
6707 if (strlen (str
) == 2 && str
[1] == 's')
6710 seg
= ADDR_SPACE_SEG_FS
;
6711 else if (str
[0] == 'g')
6712 seg
= ADDR_SPACE_SEG_GS
;
6715 if (seg
== ADDR_SPACE_GENERIC
)
6716 error ("%qs is not a valid base register "
6717 "in -mstack-protector-guard-reg=",
6718 ix86_stack_protector_guard_reg_str
);
6720 ix86_stack_protector_guard_reg
= seg
;
6723 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
6724 if (opts
->x_ix86_tune_memcpy_strategy
)
6726 char *str
= xstrdup (opts
->x_ix86_tune_memcpy_strategy
);
6727 ix86_parse_stringop_strategy_string (str
, false);
6731 if (opts
->x_ix86_tune_memset_strategy
)
6733 char *str
= xstrdup (opts
->x_ix86_tune_memset_strategy
);
6734 ix86_parse_stringop_strategy_string (str
, true);
6738 /* Save the initial options in case the user does function specific
6741 target_option_default_node
= target_option_current_node
6742 = build_target_option_node (opts
);
6747 /* Implement the TARGET_OPTION_OVERRIDE hook. */
6750 ix86_option_override (void)
6752 ix86_option_override_internal (true, &global_options
, &global_options_set
);
6755 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
6757 ix86_offload_options (void)
6760 return xstrdup ("-foffload-abi=lp64");
6761 return xstrdup ("-foffload-abi=ilp32");
6764 /* Update register usage after having seen the compiler flags. */
6767 ix86_conditional_register_usage (void)
6771 /* If there are no caller-saved registers, preserve all registers.
6772 except fixed_regs and registers used for function return value
6773 since aggregate_value_p checks call_used_regs[regno] on return
6775 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
6776 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6777 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
6778 call_used_regs
[i
] = 0;
6780 /* For 32-bit targets, squash the REX registers. */
6783 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
6784 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6785 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
6786 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6787 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
6788 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6791 /* See the definition of CALL_USED_REGISTERS in i386.h. */
6792 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
6794 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
6796 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6798 /* Set/reset conditionally defined registers from
6799 CALL_USED_REGISTERS initializer. */
6800 if (call_used_regs
[i
] > 1)
6801 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
6803 /* Calculate registers of CLOBBERED_REGS register set
6804 as call used registers from GENERAL_REGS register set. */
6805 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
6806 && call_used_regs
[i
])
6807 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
6810 /* If MMX is disabled, squash the registers. */
6812 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6813 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
6814 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6816 /* If SSE is disabled, squash the registers. */
6818 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6819 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
6820 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6822 /* If the FPU is disabled, squash the registers. */
6823 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
6824 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
6825 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
6826 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6828 /* If AVX512F is disabled, squash the registers. */
6829 if (! TARGET_AVX512F
)
6831 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
6832 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6834 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
6835 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6838 /* If MPX is disabled, squash the registers. */
6840 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
6841 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
6845 /* Save the current options */
6848 ix86_function_specific_save (struct cl_target_option
*ptr
,
6849 struct gcc_options
*opts
)
6851 ptr
->arch
= ix86_arch
;
6852 ptr
->schedule
= ix86_schedule
;
6853 ptr
->prefetch_sse
= x86_prefetch_sse
;
6854 ptr
->tune
= ix86_tune
;
6855 ptr
->branch_cost
= ix86_branch_cost
;
6856 ptr
->tune_defaulted
= ix86_tune_defaulted
;
6857 ptr
->arch_specified
= ix86_arch_specified
;
6858 ptr
->x_ix86_isa_flags_explicit
= opts
->x_ix86_isa_flags_explicit
;
6859 ptr
->x_ix86_isa_flags2_explicit
= opts
->x_ix86_isa_flags2_explicit
;
6860 ptr
->x_recip_mask_explicit
= opts
->x_recip_mask_explicit
;
6861 ptr
->x_ix86_arch_string
= opts
->x_ix86_arch_string
;
6862 ptr
->x_ix86_tune_string
= opts
->x_ix86_tune_string
;
6863 ptr
->x_ix86_cmodel
= opts
->x_ix86_cmodel
;
6864 ptr
->x_ix86_abi
= opts
->x_ix86_abi
;
6865 ptr
->x_ix86_asm_dialect
= opts
->x_ix86_asm_dialect
;
6866 ptr
->x_ix86_branch_cost
= opts
->x_ix86_branch_cost
;
6867 ptr
->x_ix86_dump_tunes
= opts
->x_ix86_dump_tunes
;
6868 ptr
->x_ix86_force_align_arg_pointer
= opts
->x_ix86_force_align_arg_pointer
;
6869 ptr
->x_ix86_force_drap
= opts
->x_ix86_force_drap
;
6870 ptr
->x_ix86_incoming_stack_boundary_arg
= opts
->x_ix86_incoming_stack_boundary_arg
;
6871 ptr
->x_ix86_pmode
= opts
->x_ix86_pmode
;
6872 ptr
->x_ix86_preferred_stack_boundary_arg
= opts
->x_ix86_preferred_stack_boundary_arg
;
6873 ptr
->x_ix86_recip_name
= opts
->x_ix86_recip_name
;
6874 ptr
->x_ix86_regparm
= opts
->x_ix86_regparm
;
6875 ptr
->x_ix86_section_threshold
= opts
->x_ix86_section_threshold
;
6876 ptr
->x_ix86_sse2avx
= opts
->x_ix86_sse2avx
;
6877 ptr
->x_ix86_stack_protector_guard
= opts
->x_ix86_stack_protector_guard
;
6878 ptr
->x_ix86_stringop_alg
= opts
->x_ix86_stringop_alg
;
6879 ptr
->x_ix86_tls_dialect
= opts
->x_ix86_tls_dialect
;
6880 ptr
->x_ix86_tune_ctrl_string
= opts
->x_ix86_tune_ctrl_string
;
6881 ptr
->x_ix86_tune_memcpy_strategy
= opts
->x_ix86_tune_memcpy_strategy
;
6882 ptr
->x_ix86_tune_memset_strategy
= opts
->x_ix86_tune_memset_strategy
;
6883 ptr
->x_ix86_tune_no_default
= opts
->x_ix86_tune_no_default
;
6884 ptr
->x_ix86_veclibabi_type
= opts
->x_ix86_veclibabi_type
;
6886 /* The fields are char but the variables are not; make sure the
6887 values fit in the fields. */
6888 gcc_assert (ptr
->arch
== ix86_arch
);
6889 gcc_assert (ptr
->schedule
== ix86_schedule
);
6890 gcc_assert (ptr
->tune
== ix86_tune
);
6891 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
6894 /* Restore the current options */
6897 ix86_function_specific_restore (struct gcc_options
*opts
,
6898 struct cl_target_option
*ptr
)
6900 enum processor_type old_tune
= ix86_tune
;
6901 enum processor_type old_arch
= ix86_arch
;
6902 unsigned int ix86_arch_mask
;
6905 /* We don't change -fPIC. */
6906 opts
->x_flag_pic
= flag_pic
;
6908 ix86_arch
= (enum processor_type
) ptr
->arch
;
6909 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
6910 ix86_tune
= (enum processor_type
) ptr
->tune
;
6911 x86_prefetch_sse
= ptr
->prefetch_sse
;
6912 opts
->x_ix86_branch_cost
= ptr
->branch_cost
;
6913 ix86_tune_defaulted
= ptr
->tune_defaulted
;
6914 ix86_arch_specified
= ptr
->arch_specified
;
6915 opts
->x_ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
6916 opts
->x_ix86_isa_flags2_explicit
= ptr
->x_ix86_isa_flags2_explicit
;
6917 opts
->x_recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
6918 opts
->x_ix86_arch_string
= ptr
->x_ix86_arch_string
;
6919 opts
->x_ix86_tune_string
= ptr
->x_ix86_tune_string
;
6920 opts
->x_ix86_cmodel
= ptr
->x_ix86_cmodel
;
6921 opts
->x_ix86_abi
= ptr
->x_ix86_abi
;
6922 opts
->x_ix86_asm_dialect
= ptr
->x_ix86_asm_dialect
;
6923 opts
->x_ix86_branch_cost
= ptr
->x_ix86_branch_cost
;
6924 opts
->x_ix86_dump_tunes
= ptr
->x_ix86_dump_tunes
;
6925 opts
->x_ix86_force_align_arg_pointer
= ptr
->x_ix86_force_align_arg_pointer
;
6926 opts
->x_ix86_force_drap
= ptr
->x_ix86_force_drap
;
6927 opts
->x_ix86_incoming_stack_boundary_arg
= ptr
->x_ix86_incoming_stack_boundary_arg
;
6928 opts
->x_ix86_pmode
= ptr
->x_ix86_pmode
;
6929 opts
->x_ix86_preferred_stack_boundary_arg
= ptr
->x_ix86_preferred_stack_boundary_arg
;
6930 opts
->x_ix86_recip_name
= ptr
->x_ix86_recip_name
;
6931 opts
->x_ix86_regparm
= ptr
->x_ix86_regparm
;
6932 opts
->x_ix86_section_threshold
= ptr
->x_ix86_section_threshold
;
6933 opts
->x_ix86_sse2avx
= ptr
->x_ix86_sse2avx
;
6934 opts
->x_ix86_stack_protector_guard
= ptr
->x_ix86_stack_protector_guard
;
6935 opts
->x_ix86_stringop_alg
= ptr
->x_ix86_stringop_alg
;
6936 opts
->x_ix86_tls_dialect
= ptr
->x_ix86_tls_dialect
;
6937 opts
->x_ix86_tune_ctrl_string
= ptr
->x_ix86_tune_ctrl_string
;
6938 opts
->x_ix86_tune_memcpy_strategy
= ptr
->x_ix86_tune_memcpy_strategy
;
6939 opts
->x_ix86_tune_memset_strategy
= ptr
->x_ix86_tune_memset_strategy
;
6940 opts
->x_ix86_tune_no_default
= ptr
->x_ix86_tune_no_default
;
6941 opts
->x_ix86_veclibabi_type
= ptr
->x_ix86_veclibabi_type
;
6942 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
6943 /* TODO: ix86_cost should be chosen at instruction or function granuality
6944 so for cold code we use size_cost even in !optimize_size compilation. */
6945 if (opts
->x_optimize_size
)
6946 ix86_cost
= &ix86_size_cost
;
6948 ix86_cost
= ix86_tune_cost
;
6950 /* Recreate the arch feature tests if the arch changed */
6951 if (old_arch
!= ix86_arch
)
6953 ix86_arch_mask
= 1u << ix86_arch
;
6954 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
6955 ix86_arch_features
[i
]
6956 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
6959 /* Recreate the tune optimization tests */
6960 if (old_tune
!= ix86_tune
)
6961 set_ix86_tune_features (ix86_tune
, false);
6964 /* Adjust target options after streaming them in. This is mainly about
6965 reconciling them with global options. */
6968 ix86_function_specific_post_stream_in (struct cl_target_option
*ptr
)
6970 /* flag_pic is a global option, but ix86_cmodel is target saved option
6971 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
6972 for PIC, or error out. */
6974 switch (ptr
->x_ix86_cmodel
)
6977 ptr
->x_ix86_cmodel
= CM_SMALL_PIC
;
6981 ptr
->x_ix86_cmodel
= CM_MEDIUM_PIC
;
6985 ptr
->x_ix86_cmodel
= CM_LARGE_PIC
;
6989 error ("code model %s does not support PIC mode", "kernel");
6996 switch (ptr
->x_ix86_cmodel
)
6999 ptr
->x_ix86_cmodel
= CM_SMALL
;
7003 ptr
->x_ix86_cmodel
= CM_MEDIUM
;
7007 ptr
->x_ix86_cmodel
= CM_LARGE
;
7015 /* Print the current options */
7018 ix86_function_specific_print (FILE *file
, int indent
,
7019 struct cl_target_option
*ptr
)
7022 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_ix86_isa_flags2
,
7023 ptr
->x_target_flags
, ptr
->x_ix86_target_flags
,
7024 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
7026 gcc_assert (ptr
->arch
< PROCESSOR_max
);
7027 fprintf (file
, "%*sarch = %d (%s)\n",
7029 ptr
->arch
, processor_target_table
[ptr
->arch
].name
);
7031 gcc_assert (ptr
->tune
< PROCESSOR_max
);
7032 fprintf (file
, "%*stune = %d (%s)\n",
7034 ptr
->tune
, processor_target_table
[ptr
->tune
].name
);
7036 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
7040 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
7041 free (target_string
);
7046 /* Inner function to process the attribute((target(...))), take an argument and
7047 set the current options from the argument. If we have a list, recursively go
7051 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
7052 struct gcc_options
*opts
,
7053 struct gcc_options
*opts_set
,
7054 struct gcc_options
*enum_opts_set
)
7059 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
7060 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
7061 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
7062 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
7063 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
7079 enum ix86_opt_type type
;
7084 IX86_ATTR_ISA ("sgx", OPT_msgx
),
7085 IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps
),
7086 IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw
),
7087 IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq
),
7089 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi
),
7090 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma
),
7091 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl
),
7092 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw
),
7093 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq
),
7094 IX86_ATTR_ISA ("avx512er", OPT_mavx512er
),
7095 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf
),
7096 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd
),
7097 IX86_ATTR_ISA ("avx512f", OPT_mavx512f
),
7098 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
7099 IX86_ATTR_ISA ("fma", OPT_mfma
),
7100 IX86_ATTR_ISA ("xop", OPT_mxop
),
7101 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
7102 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
7103 IX86_ATTR_ISA ("avx", OPT_mavx
),
7104 IX86_ATTR_ISA ("sse4", OPT_msse4
),
7105 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
7106 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
7107 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
7108 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
7109 IX86_ATTR_ISA ("sse3", OPT_msse3
),
7110 IX86_ATTR_ISA ("aes", OPT_maes
),
7111 IX86_ATTR_ISA ("sha", OPT_msha
),
7112 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
7113 IX86_ATTR_ISA ("sse2", OPT_msse2
),
7114 IX86_ATTR_ISA ("sse", OPT_msse
),
7115 IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa
),
7116 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
7117 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
7118 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
7119 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
7120 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
7121 IX86_ATTR_ISA ("adx", OPT_madx
),
7122 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1
),
7123 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt
),
7124 IX86_ATTR_ISA ("xsaves", OPT_mxsaves
),
7125 IX86_ATTR_ISA ("xsavec", OPT_mxsavec
),
7126 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
7127 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
7128 IX86_ATTR_ISA ("abm", OPT_mabm
),
7129 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
7130 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
7131 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
7132 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
7133 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
7134 IX86_ATTR_ISA ("cx16", OPT_mcx16
),
7135 IX86_ATTR_ISA ("sahf", OPT_msahf
),
7136 IX86_ATTR_ISA ("movbe", OPT_mmovbe
),
7137 IX86_ATTR_ISA ("crc32", OPT_mcrc32
),
7138 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
7139 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
7140 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx
),
7141 IX86_ATTR_ISA ("clzero", OPT_mclzero
),
7142 IX86_ATTR_ISA ("pku", OPT_mpku
),
7143 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
7144 IX86_ATTR_ISA ("hle", OPT_mhle
),
7145 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
7146 IX86_ATTR_ISA ("mpx", OPT_mmpx
),
7147 IX86_ATTR_ISA ("clwb", OPT_mclwb
),
7148 IX86_ATTR_ISA ("rdpid", OPT_mrdpid
),
7151 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
7153 /* string options */
7154 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
7155 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
7158 IX86_ATTR_YES ("cld",
7162 IX86_ATTR_NO ("fancy-math-387",
7163 OPT_mfancy_math_387
,
7164 MASK_NO_FANCY_MATH_387
),
7166 IX86_ATTR_YES ("ieee-fp",
7170 IX86_ATTR_YES ("inline-all-stringops",
7171 OPT_minline_all_stringops
,
7172 MASK_INLINE_ALL_STRINGOPS
),
7174 IX86_ATTR_YES ("inline-stringops-dynamically",
7175 OPT_minline_stringops_dynamically
,
7176 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
7178 IX86_ATTR_NO ("align-stringops",
7179 OPT_mno_align_stringops
,
7180 MASK_NO_ALIGN_STRINGOPS
),
7182 IX86_ATTR_YES ("recip",
7188 /* If this is a list, recurse to get the options. */
7189 if (TREE_CODE (args
) == TREE_LIST
)
7193 for (; args
; args
= TREE_CHAIN (args
))
7194 if (TREE_VALUE (args
)
7195 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
7196 p_strings
, opts
, opts_set
,
7203 else if (TREE_CODE (args
) != STRING_CST
)
7205 error ("attribute %<target%> argument not a string");
7209 /* Handle multiple arguments separated by commas. */
7210 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
7212 while (next_optstr
&& *next_optstr
!= '\0')
7214 char *p
= next_optstr
;
7216 char *comma
= strchr (next_optstr
, ',');
7217 const char *opt_string
;
7218 size_t len
, opt_len
;
7223 enum ix86_opt_type type
= ix86_opt_unknown
;
7229 len
= comma
- next_optstr
;
7230 next_optstr
= comma
+ 1;
7238 /* Recognize no-xxx. */
7239 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
7248 /* Find the option. */
7251 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
7253 type
= attrs
[i
].type
;
7254 opt_len
= attrs
[i
].len
;
7255 if (ch
== attrs
[i
].string
[0]
7256 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
7259 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
7262 mask
= attrs
[i
].mask
;
7263 opt_string
= attrs
[i
].string
;
7268 /* Process the option. */
7271 error ("attribute(target(\"%s\")) is unknown", orig_p
);
7275 else if (type
== ix86_opt_isa
)
7277 struct cl_decoded_option decoded
;
7279 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
7280 ix86_handle_option (opts
, opts_set
,
7281 &decoded
, input_location
);
7284 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
7286 if (type
== ix86_opt_no
)
7287 opt_set_p
= !opt_set_p
;
7290 opts
->x_target_flags
|= mask
;
7292 opts
->x_target_flags
&= ~mask
;
7295 else if (type
== ix86_opt_str
)
7299 error ("option(\"%s\") was already specified", opt_string
);
7303 p_strings
[opt
] = xstrdup (p
+ opt_len
);
7306 else if (type
== ix86_opt_enum
)
7311 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
7313 set_option (opts
, enum_opts_set
, opt
, value
,
7314 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
7318 error ("attribute(target(\"%s\")) is unknown", orig_p
);
7330 /* Release allocated strings. */
7332 release_options_strings (char **option_strings
)
7334 /* Free up memory allocated to hold the strings */
7335 for (unsigned i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
7336 free (option_strings
[i
]);
7339 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
7342 ix86_valid_target_attribute_tree (tree args
,
7343 struct gcc_options
*opts
,
7344 struct gcc_options
*opts_set
)
7346 const char *orig_arch_string
= opts
->x_ix86_arch_string
;
7347 const char *orig_tune_string
= opts
->x_ix86_tune_string
;
7348 enum fpmath_unit orig_fpmath_set
= opts_set
->x_ix86_fpmath
;
7349 int orig_tune_defaulted
= ix86_tune_defaulted
;
7350 int orig_arch_specified
= ix86_arch_specified
;
7351 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
7353 struct cl_target_option
*def
7354 = TREE_TARGET_OPTION (target_option_default_node
);
7355 struct gcc_options enum_opts_set
;
7357 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
7359 /* Process each of the options on the chain. */
7360 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
, opts
,
7361 opts_set
, &enum_opts_set
))
7362 return error_mark_node
;
7364 /* If the changed options are different from the default, rerun
7365 ix86_option_override_internal, and then save the options away.
7366 The string options are attribute options, and will be undone
7367 when we copy the save structure. */
7368 if (opts
->x_ix86_isa_flags
!= def
->x_ix86_isa_flags
7369 || opts
->x_ix86_isa_flags2
!= def
->x_ix86_isa_flags2
7370 || opts
->x_target_flags
!= def
->x_target_flags
7371 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
7372 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
7373 || enum_opts_set
.x_ix86_fpmath
)
7375 /* If we are using the default tune= or arch=, undo the string assigned,
7376 and use the default. */
7377 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
7379 opts
->x_ix86_arch_string
7380 = ggc_strdup (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]);
7382 /* If arch= is set, clear all bits in x_ix86_isa_flags,
7383 except for ISA_64BIT, ABI_64, ABI_X32, and CODE16. */
7384 opts
->x_ix86_isa_flags
&= (OPTION_MASK_ISA_64BIT
7385 | OPTION_MASK_ABI_64
7386 | OPTION_MASK_ABI_X32
7387 | OPTION_MASK_CODE16
);
7388 opts
->x_ix86_isa_flags2
= 0;
7390 else if (!orig_arch_specified
)
7391 opts
->x_ix86_arch_string
= NULL
;
7393 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
7394 opts
->x_ix86_tune_string
7395 = ggc_strdup (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]);
7396 else if (orig_tune_defaulted
)
7397 opts
->x_ix86_tune_string
= NULL
;
7399 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
7400 if (enum_opts_set
.x_ix86_fpmath
)
7401 opts_set
->x_ix86_fpmath
= (enum fpmath_unit
) 1;
7403 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
7404 bool r
= ix86_option_override_internal (false, opts
, opts_set
);
7407 release_options_strings (option_strings
);
7408 return error_mark_node
;
7411 /* Add any builtin functions with the new isa if any. */
7412 ix86_add_new_builtins (opts
->x_ix86_isa_flags
, opts
->x_ix86_isa_flags2
);
7414 /* Save the current options unless we are validating options for
7416 t
= build_target_option_node (opts
);
7418 opts
->x_ix86_arch_string
= orig_arch_string
;
7419 opts
->x_ix86_tune_string
= orig_tune_string
;
7420 opts_set
->x_ix86_fpmath
= orig_fpmath_set
;
7422 release_options_strings (option_strings
);
7428 /* Hook to validate attribute((target("string"))). */
7431 ix86_valid_target_attribute_p (tree fndecl
,
7432 tree
ARG_UNUSED (name
),
7434 int ARG_UNUSED (flags
))
7436 struct gcc_options func_options
;
7437 tree new_target
, new_optimize
;
7440 /* attribute((target("default"))) does nothing, beyond
7441 affecting multi-versioning. */
7442 if (TREE_VALUE (args
)
7443 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
7444 && TREE_CHAIN (args
) == NULL_TREE
7445 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
7448 tree old_optimize
= build_optimization_node (&global_options
);
7450 /* Get the optimization options of the current function. */
7451 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
7454 func_optimize
= old_optimize
;
7456 /* Init func_options. */
7457 memset (&func_options
, 0, sizeof (func_options
));
7458 init_options_struct (&func_options
, NULL
);
7459 lang_hooks
.init_options_struct (&func_options
);
7461 cl_optimization_restore (&func_options
,
7462 TREE_OPTIMIZATION (func_optimize
));
7464 /* Initialize func_options to the default before its target options can
7466 cl_target_option_restore (&func_options
,
7467 TREE_TARGET_OPTION (target_option_default_node
));
7469 new_target
= ix86_valid_target_attribute_tree (args
, &func_options
,
7470 &global_options_set
);
7472 new_optimize
= build_optimization_node (&func_options
);
7474 if (new_target
== error_mark_node
)
7477 else if (fndecl
&& new_target
)
7479 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
7481 if (old_optimize
!= new_optimize
)
7482 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
7485 finalize_options_struct (&func_options
);
7491 /* Hook to determine if one function can safely inline another. */
7494 ix86_can_inline_p (tree caller
, tree callee
)
7496 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
7497 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
7499 callee_tree
= target_option_default_node
;
7501 caller_tree
= target_option_default_node
;
7502 if (callee_tree
== caller_tree
)
7505 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
7506 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
7509 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
7510 function can inline a SSE2 function but a SSE2 function can't inline
7512 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
7513 != callee_opts
->x_ix86_isa_flags
)
7514 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
7515 != callee_opts
->x_ix86_isa_flags2
))
7518 /* See if we have the same non-isa options. */
7519 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
7522 /* See if arch, tune, etc. are the same. */
7523 else if (caller_opts
->arch
!= callee_opts
->arch
)
7526 else if (caller_opts
->tune
!= callee_opts
->tune
)
7529 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
7530 /* If the calle doesn't use FP expressions differences in
7531 ix86_fpmath can be ignored. We are called from FEs
7532 for multi-versioning call optimization, so beware of
7533 ipa_fn_summaries not available. */
7534 && (! ipa_fn_summaries
7535 || ipa_fn_summaries
->get
7536 (cgraph_node::get (callee
))->fp_expressions
))
7539 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
7549 /* Remember the last target of ix86_set_current_function. */
7550 static GTY(()) tree ix86_previous_fndecl
;
7552 /* Set targets globals to the default (or current #pragma GCC target
7553 if active). Invalidate ix86_previous_fndecl cache. */
7556 ix86_reset_previous_fndecl (void)
7558 tree new_tree
= target_option_current_node
;
7559 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
7560 if (TREE_TARGET_GLOBALS (new_tree
))
7561 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
7562 else if (new_tree
== target_option_default_node
)
7563 restore_target_globals (&default_target_globals
);
7565 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
7566 ix86_previous_fndecl
= NULL_TREE
;
7569 /* Set the func_type field from the function FNDECL. */
7572 ix86_set_func_type (tree fndecl
)
7574 if (cfun
->machine
->func_type
== TYPE_UNKNOWN
)
7576 if (lookup_attribute ("interrupt",
7577 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))
7579 if (ix86_function_naked (fndecl
))
7580 error_at (DECL_SOURCE_LOCATION (fndecl
),
7581 "interrupt and naked attributes are not compatible");
7584 for (tree arg
= DECL_ARGUMENTS (fndecl
);
7586 arg
= TREE_CHAIN (arg
))
7588 cfun
->machine
->no_caller_saved_registers
= true;
7589 cfun
->machine
->func_type
7590 = nargs
== 2 ? TYPE_EXCEPTION
: TYPE_INTERRUPT
;
7592 ix86_optimize_mode_switching
[X86_DIRFLAG
] = 1;
7594 /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */
7595 if (write_symbols
!= NO_DEBUG
&& write_symbols
!= DWARF2_DEBUG
)
7596 sorry ("Only DWARF debug format is supported for interrupt "
7597 "service routine.");
7601 cfun
->machine
->func_type
= TYPE_NORMAL
;
7602 if (lookup_attribute ("no_caller_saved_registers",
7603 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))
7604 cfun
->machine
->no_caller_saved_registers
= true;
7609 /* Establish appropriate back-end context for processing the function
7610 FNDECL. The argument might be NULL to indicate processing at top
7611 level, outside of any function scope. */
7613 ix86_set_current_function (tree fndecl
)
7615 /* Only change the context if the function changes. This hook is called
7616 several times in the course of compiling a function, and we don't want to
7617 slow things down too much or call target_reinit when it isn't safe. */
7618 if (fndecl
== ix86_previous_fndecl
)
7620 /* There may be 2 function bodies for the same function FNDECL,
7621 one is extern inline and one isn't. Call ix86_set_func_type
7622 to set the func_type field. */
7623 if (fndecl
!= NULL_TREE
)
7624 ix86_set_func_type (fndecl
);
7629 if (ix86_previous_fndecl
== NULL_TREE
)
7630 old_tree
= target_option_current_node
;
7631 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
))
7632 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
);
7634 old_tree
= target_option_default_node
;
7636 if (fndecl
== NULL_TREE
)
7638 if (old_tree
!= target_option_current_node
)
7639 ix86_reset_previous_fndecl ();
7643 ix86_set_func_type (fndecl
);
7645 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
7646 if (new_tree
== NULL_TREE
)
7647 new_tree
= target_option_default_node
;
7649 if (old_tree
!= new_tree
)
7651 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
7652 if (TREE_TARGET_GLOBALS (new_tree
))
7653 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
7654 else if (new_tree
== target_option_default_node
)
7655 restore_target_globals (&default_target_globals
);
7657 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
7659 ix86_previous_fndecl
= fndecl
;
7661 static bool prev_no_caller_saved_registers
;
7663 /* 64-bit MS and SYSV ABI have different set of call used registers.
7664 Avoid expensive re-initialization of init_regs each time we switch
7665 function context. */
7667 && (call_used_regs
[SI_REG
]
7668 == (cfun
->machine
->call_abi
== MS_ABI
)))
7670 /* Need to re-initialize init_regs if caller-saved registers are
7672 else if (prev_no_caller_saved_registers
7673 != cfun
->machine
->no_caller_saved_registers
)
7676 if (cfun
->machine
->func_type
!= TYPE_NORMAL
7677 || cfun
->machine
->no_caller_saved_registers
)
7679 /* Don't allow MPX, SSE, MMX nor x87 instructions since they
7680 may change processor state. */
7684 else if (TARGET_SSE
)
7686 else if (TARGET_MMX
)
7688 else if (TARGET_80387
)
7694 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7695 sorry ("%s instructions aren't allowed in %s service routine",
7696 isa
, (cfun
->machine
->func_type
== TYPE_EXCEPTION
7697 ? "exception" : "interrupt"));
7699 sorry ("%s instructions aren't allowed in function with "
7700 "no_caller_saved_registers attribute", isa
);
7701 /* Don't issue the same error twice. */
7702 cfun
->machine
->func_type
= TYPE_NORMAL
;
7703 cfun
->machine
->no_caller_saved_registers
= false;
7707 prev_no_caller_saved_registers
7708 = cfun
->machine
->no_caller_saved_registers
;
7712 /* Return true if this goes in large data/bss. */
7715 ix86_in_large_data_p (tree exp
)
7717 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
7720 if (exp
== NULL_TREE
)
7723 /* Functions are never large data. */
7724 if (TREE_CODE (exp
) == FUNCTION_DECL
)
7727 /* Automatic variables are never large data. */
7728 if (VAR_P (exp
) && !is_global_var (exp
))
7731 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
7733 const char *section
= DECL_SECTION_NAME (exp
);
7734 if (strcmp (section
, ".ldata") == 0
7735 || strcmp (section
, ".lbss") == 0)
7741 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
7743 /* If this is an incomplete type with size 0, then we can't put it
7744 in data because it might be too big when completed. Also,
7745 int_size_in_bytes returns -1 if size can vary or is larger than
7746 an integer in which case also it is safer to assume that it goes in
7748 if (size
<= 0 || size
> ix86_section_threshold
)
7755 /* i386-specific section flag to mark large sections. */
7756 #define SECTION_LARGE SECTION_MACH_DEP
7758 /* Switch to the appropriate section for output of DECL.
7759 DECL is either a `VAR_DECL' node or a constant of some sort.
7760 RELOC indicates whether forming the initial value of DECL requires
7761 link-time relocations. */
7763 ATTRIBUTE_UNUSED
static section
*
7764 x86_64_elf_select_section (tree decl
, int reloc
,
7765 unsigned HOST_WIDE_INT align
)
7767 if (ix86_in_large_data_p (decl
))
7769 const char *sname
= NULL
;
7770 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
7771 switch (categorize_decl_for_section (decl
, reloc
))
7776 case SECCAT_DATA_REL
:
7777 sname
= ".ldata.rel";
7779 case SECCAT_DATA_REL_LOCAL
:
7780 sname
= ".ldata.rel.local";
7782 case SECCAT_DATA_REL_RO
:
7783 sname
= ".ldata.rel.ro";
7785 case SECCAT_DATA_REL_RO_LOCAL
:
7786 sname
= ".ldata.rel.ro.local";
7790 flags
|= SECTION_BSS
;
7793 case SECCAT_RODATA_MERGE_STR
:
7794 case SECCAT_RODATA_MERGE_STR_INIT
:
7795 case SECCAT_RODATA_MERGE_CONST
:
7797 flags
&= ~SECTION_WRITE
;
7799 case SECCAT_SRODATA
:
7806 /* We don't split these for medium model. Place them into
7807 default sections and hope for best. */
7812 /* We might get called with string constants, but get_named_section
7813 doesn't like them as they are not DECLs. Also, we need to set
7814 flags in that case. */
7816 return get_section (sname
, flags
, NULL
);
7817 return get_named_section (decl
, sname
, reloc
);
7820 return default_elf_select_section (decl
, reloc
, align
);
7823 /* Select a set of attributes for section NAME based on the properties
7824 of DECL and whether or not RELOC indicates that DECL's initializer
7825 might contain runtime relocations. */
7827 static unsigned int ATTRIBUTE_UNUSED
7828 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
7830 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
7832 if (ix86_in_large_data_p (decl
))
7833 flags
|= SECTION_LARGE
;
7835 if (decl
== NULL_TREE
7836 && (strcmp (name
, ".ldata.rel.ro") == 0
7837 || strcmp (name
, ".ldata.rel.ro.local") == 0))
7838 flags
|= SECTION_RELRO
;
7840 if (strcmp (name
, ".lbss") == 0
7841 || strncmp (name
, ".lbss.", 5) == 0
7842 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
7843 flags
|= SECTION_BSS
;
7848 /* Build up a unique section name, expressed as a
7849 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
7850 RELOC indicates whether the initial value of EXP requires
7851 link-time relocations. */
7853 static void ATTRIBUTE_UNUSED
7854 x86_64_elf_unique_section (tree decl
, int reloc
)
7856 if (ix86_in_large_data_p (decl
))
7858 const char *prefix
= NULL
;
7859 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
7860 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
7862 switch (categorize_decl_for_section (decl
, reloc
))
7865 case SECCAT_DATA_REL
:
7866 case SECCAT_DATA_REL_LOCAL
:
7867 case SECCAT_DATA_REL_RO
:
7868 case SECCAT_DATA_REL_RO_LOCAL
:
7869 prefix
= one_only
? ".ld" : ".ldata";
7872 prefix
= one_only
? ".lb" : ".lbss";
7875 case SECCAT_RODATA_MERGE_STR
:
7876 case SECCAT_RODATA_MERGE_STR_INIT
:
7877 case SECCAT_RODATA_MERGE_CONST
:
7878 prefix
= one_only
? ".lr" : ".lrodata";
7880 case SECCAT_SRODATA
:
7887 /* We don't split these for medium model. Place them into
7888 default sections and hope for best. */
7893 const char *name
, *linkonce
;
7896 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
7897 name
= targetm
.strip_name_encoding (name
);
7899 /* If we're using one_only, then there needs to be a .gnu.linkonce
7900 prefix to the section name. */
7901 linkonce
= one_only
? ".gnu.linkonce" : "";
7903 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
7905 set_decl_section_name (decl
, string
);
7909 default_unique_section (decl
, reloc
);
7912 #ifdef COMMON_ASM_OP
7914 #ifndef LARGECOMM_SECTION_ASM_OP
7915 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
7918 /* This says how to output assembler code to declare an
7919 uninitialized external linkage data object.
7921 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
7924 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
7925 const char *name
, unsigned HOST_WIDE_INT size
,
7928 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
7929 && size
> (unsigned int)ix86_section_threshold
)
7931 switch_to_section (get_named_section (decl
, ".lbss", 0));
7932 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
7935 fputs (COMMON_ASM_OP
, file
);
7936 assemble_name (file
, name
);
7937 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
7938 size
, align
/ BITS_PER_UNIT
);
7942 /* Utility function for targets to use in implementing
7943 ASM_OUTPUT_ALIGNED_BSS. */
7946 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
7947 unsigned HOST_WIDE_INT size
, int align
)
7949 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
7950 && size
> (unsigned int)ix86_section_threshold
)
7951 switch_to_section (get_named_section (decl
, ".lbss", 0));
7953 switch_to_section (bss_section
);
7954 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
7955 #ifdef ASM_DECLARE_OBJECT_NAME
7956 last_assemble_variable_decl
= decl
;
7957 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
7959 /* Standard thing is just output label for the object. */
7960 ASM_OUTPUT_LABEL (file
, name
);
7961 #endif /* ASM_DECLARE_OBJECT_NAME */
7962 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
7965 /* Decide whether we must probe the stack before any space allocation
7966 on this target. It's essentially TARGET_STACK_PROBE except when
7967 -fstack-check causes the stack to be already probed differently. */
7970 ix86_target_stack_probe (void)
7972 /* Do not probe the stack twice if static stack checking is enabled. */
7973 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
7976 return TARGET_STACK_PROBE
;
7979 /* Decide whether we can make a sibling call to a function. DECL is the
7980 declaration of the function being targeted by the call and EXP is the
7981 CALL_EXPR representing the call. */
7984 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
7986 tree type
, decl_or_type
;
7988 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
7990 if (ix86_function_naked (current_function_decl
))
7993 /* Sibling call isn't OK if there are no caller-saved registers
7994 since all registers must be preserved before return. */
7995 if (cfun
->machine
->no_caller_saved_registers
)
7998 /* If we are generating position-independent code, we cannot sibcall
7999 optimize direct calls to global functions, as the PLT requires
8000 %ebx be live. (Darwin does not have a PLT.) */
8008 /* If we need to align the outgoing stack, then sibcalling would
8009 unalign the stack, which may break the called function. */
8010 if (ix86_minimum_incoming_stack_boundary (true)
8011 < PREFERRED_STACK_BOUNDARY
)
8016 decl_or_type
= decl
;
8017 type
= TREE_TYPE (decl
);
8021 /* We're looking at the CALL_EXPR, we need the type of the function. */
8022 type
= CALL_EXPR_FN (exp
); /* pointer expression */
8023 type
= TREE_TYPE (type
); /* pointer type */
8024 type
= TREE_TYPE (type
); /* function type */
8025 decl_or_type
= type
;
8028 /* Check that the return value locations are the same. Like
8029 if we are returning floats on the 80387 register stack, we cannot
8030 make a sibcall from a function that doesn't return a float to a
8031 function that does or, conversely, from a function that does return
8032 a float to a function that doesn't; the necessary stack adjustment
8033 would not be executed. This is also the place we notice
8034 differences in the return value ABI. Note that it is ok for one
8035 of the functions to have void return type as long as the return
8036 value of the other is passed in a register. */
8037 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
8038 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
8040 if (STACK_REG_P (a
) || STACK_REG_P (b
))
8042 if (!rtx_equal_p (a
, b
))
8045 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
8047 else if (!rtx_equal_p (a
, b
))
8052 /* The SYSV ABI has more call-clobbered registers;
8053 disallow sibcalls from MS to SYSV. */
8054 if (cfun
->machine
->call_abi
== MS_ABI
8055 && ix86_function_type_abi (type
) == SYSV_ABI
)
8060 /* If this call is indirect, we'll need to be able to use a
8061 call-clobbered register for the address of the target function.
8062 Make sure that all such registers are not used for passing
8063 parameters. Note that DLLIMPORT functions and call to global
8064 function via GOT slot are indirect. */
8066 || (bind_global
&& flag_pic
&& !flag_plt
)
8067 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
8069 /* Check if regparm >= 3 since arg_reg_available is set to
8070 false if regparm == 0. If regparm is 1 or 2, there is
8071 always a call-clobbered register available.
8073 ??? The symbol indirect call doesn't need a call-clobbered
8074 register. But we don't know if this is a symbol indirect
8075 call or not here. */
8076 if (ix86_function_regparm (type
, NULL
) >= 3
8077 && !cfun
->machine
->arg_reg_available
)
8082 /* Otherwise okay. That also includes certain types of indirect calls. */
8086 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
8087 and "sseregparm" calling convention attributes;
8088 arguments as in struct attribute_spec.handler. */
8091 ix86_handle_cconv_attribute (tree
*node
, tree name
,
8096 if (TREE_CODE (*node
) != FUNCTION_TYPE
8097 && TREE_CODE (*node
) != METHOD_TYPE
8098 && TREE_CODE (*node
) != FIELD_DECL
8099 && TREE_CODE (*node
) != TYPE_DECL
)
8101 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
8103 *no_add_attrs
= true;
8107 /* Can combine regparm with all attributes but fastcall, and thiscall. */
8108 if (is_attribute_p ("regparm", name
))
8112 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
8114 error ("fastcall and regparm attributes are not compatible");
8117 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
8119 error ("regparam and thiscall attributes are not compatible");
8122 cst
= TREE_VALUE (args
);
8123 if (TREE_CODE (cst
) != INTEGER_CST
)
8125 warning (OPT_Wattributes
,
8126 "%qE attribute requires an integer constant argument",
8128 *no_add_attrs
= true;
8130 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
8132 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
8134 *no_add_attrs
= true;
8142 /* Do not warn when emulating the MS ABI. */
8143 if ((TREE_CODE (*node
) != FUNCTION_TYPE
8144 && TREE_CODE (*node
) != METHOD_TYPE
)
8145 || ix86_function_type_abi (*node
) != MS_ABI
)
8146 warning (OPT_Wattributes
, "%qE attribute ignored",
8148 *no_add_attrs
= true;
8152 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
8153 if (is_attribute_p ("fastcall", name
))
8155 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
8157 error ("fastcall and cdecl attributes are not compatible");
8159 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
8161 error ("fastcall and stdcall attributes are not compatible");
8163 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
8165 error ("fastcall and regparm attributes are not compatible");
8167 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
8169 error ("fastcall and thiscall attributes are not compatible");
8173 /* Can combine stdcall with fastcall (redundant), regparm and
8175 else if (is_attribute_p ("stdcall", name
))
8177 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
8179 error ("stdcall and cdecl attributes are not compatible");
8181 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
8183 error ("stdcall and fastcall attributes are not compatible");
8185 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
8187 error ("stdcall and thiscall attributes are not compatible");
8191 /* Can combine cdecl with regparm and sseregparm. */
8192 else if (is_attribute_p ("cdecl", name
))
8194 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
8196 error ("stdcall and cdecl attributes are not compatible");
8198 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
8200 error ("fastcall and cdecl attributes are not compatible");
8202 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
8204 error ("cdecl and thiscall attributes are not compatible");
8207 else if (is_attribute_p ("thiscall", name
))
8209 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
8210 warning (OPT_Wattributes
, "%qE attribute is used for non-class method",
8212 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
8214 error ("stdcall and thiscall attributes are not compatible");
8216 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
8218 error ("fastcall and thiscall attributes are not compatible");
8220 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
8222 error ("cdecl and thiscall attributes are not compatible");
8226 /* Can combine sseregparm with all attributes. */
8231 /* The transactional memory builtins are implicitly regparm or fastcall
8232 depending on the ABI. Override the generic do-nothing attribute that
8233 these builtins were declared with, and replace it with one of the two
8234 attributes that we expect elsewhere. */
8237 ix86_handle_tm_regparm_attribute (tree
*node
, tree
, tree
,
8238 int flags
, bool *no_add_attrs
)
8242 /* In no case do we want to add the placeholder attribute. */
8243 *no_add_attrs
= true;
8245 /* The 64-bit ABI is unchanged for transactional memory. */
8249 /* ??? Is there a better way to validate 32-bit windows? We have
8250 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
8251 if (CHECK_STACK_LIMIT
> 0)
8252 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
8255 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
8256 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
8258 decl_attributes (node
, alt
, flags
);
8263 /* This function determines from TYPE the calling-convention. */
8266 ix86_get_callcvt (const_tree type
)
8268 unsigned int ret
= 0;
8273 return IX86_CALLCVT_CDECL
;
8275 attrs
= TYPE_ATTRIBUTES (type
);
8276 if (attrs
!= NULL_TREE
)
8278 if (lookup_attribute ("cdecl", attrs
))
8279 ret
|= IX86_CALLCVT_CDECL
;
8280 else if (lookup_attribute ("stdcall", attrs
))
8281 ret
|= IX86_CALLCVT_STDCALL
;
8282 else if (lookup_attribute ("fastcall", attrs
))
8283 ret
|= IX86_CALLCVT_FASTCALL
;
8284 else if (lookup_attribute ("thiscall", attrs
))
8285 ret
|= IX86_CALLCVT_THISCALL
;
8287 /* Regparam isn't allowed for thiscall and fastcall. */
8288 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
8290 if (lookup_attribute ("regparm", attrs
))
8291 ret
|= IX86_CALLCVT_REGPARM
;
8292 if (lookup_attribute ("sseregparm", attrs
))
8293 ret
|= IX86_CALLCVT_SSEREGPARM
;
8296 if (IX86_BASE_CALLCVT(ret
) != 0)
8300 is_stdarg
= stdarg_p (type
);
8301 if (TARGET_RTD
&& !is_stdarg
)
8302 return IX86_CALLCVT_STDCALL
| ret
;
8306 || TREE_CODE (type
) != METHOD_TYPE
8307 || ix86_function_type_abi (type
) != MS_ABI
)
8308 return IX86_CALLCVT_CDECL
| ret
;
8310 return IX86_CALLCVT_THISCALL
;
8313 /* Return 0 if the attributes for two types are incompatible, 1 if they
8314 are compatible, and 2 if they are nearly compatible (which causes a
8315 warning to be generated). */
8318 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
8320 unsigned int ccvt1
, ccvt2
;
8322 if (TREE_CODE (type1
) != FUNCTION_TYPE
8323 && TREE_CODE (type1
) != METHOD_TYPE
)
8326 ccvt1
= ix86_get_callcvt (type1
);
8327 ccvt2
= ix86_get_callcvt (type2
);
8330 if (ix86_function_regparm (type1
, NULL
)
8331 != ix86_function_regparm (type2
, NULL
))
8337 /* Return the regparm value for a function with the indicated TYPE and DECL.
8338 DECL may be NULL when calling function indirectly
8339 or considering a libcall. */
8342 ix86_function_regparm (const_tree type
, const_tree decl
)
8349 return (ix86_function_type_abi (type
) == SYSV_ABI
8350 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
8351 ccvt
= ix86_get_callcvt (type
);
8352 regparm
= ix86_regparm
;
8354 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
8356 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
8359 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
8363 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
8365 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
8368 /* Use register calling convention for local functions when possible. */
8370 && TREE_CODE (decl
) == FUNCTION_DECL
)
8372 cgraph_node
*target
= cgraph_node::get (decl
);
8374 target
= target
->function_symbol ();
8376 /* Caller and callee must agree on the calling convention, so
8377 checking here just optimize means that with
8378 __attribute__((optimize (...))) caller could use regparm convention
8379 and callee not, or vice versa. Instead look at whether the callee
8380 is optimized or not. */
8381 if (target
&& opt_for_fn (target
->decl
, optimize
)
8382 && !(profile_flag
&& !flag_fentry
))
8384 cgraph_local_info
*i
= &target
->local
;
8385 if (i
&& i
->local
&& i
->can_change_signature
)
8387 int local_regparm
, globals
= 0, regno
;
8389 /* Make sure no regparm register is taken by a
8390 fixed register variable. */
8391 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
8393 if (fixed_regs
[local_regparm
])
8396 /* We don't want to use regparm(3) for nested functions as
8397 these use a static chain pointer in the third argument. */
8398 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
8401 /* Save a register for the split stack. */
8402 if (flag_split_stack
)
8404 if (local_regparm
== 3)
8406 else if (local_regparm
== 2
8407 && DECL_STATIC_CHAIN (target
->decl
))
8411 /* Each fixed register usage increases register pressure,
8412 so less registers should be used for argument passing.
8413 This functionality can be overriden by an explicit
8415 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
8416 if (fixed_regs
[regno
])
8420 = globals
< local_regparm
? local_regparm
- globals
: 0;
8422 if (local_regparm
> regparm
)
8423 regparm
= local_regparm
;
8431 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
8432 DFmode (2) arguments in SSE registers for a function with the
8433 indicated TYPE and DECL. DECL may be NULL when calling function
8434 indirectly or considering a libcall. Return -1 if any FP parameter
8435 should be rejected by error. This is used in siutation we imply SSE
8436 calling convetion but the function is called from another function with
8437 SSE disabled. Otherwise return 0. */
8440 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
8442 gcc_assert (!TARGET_64BIT
);
8444 /* Use SSE registers to pass SFmode and DFmode arguments if requested
8445 by the sseregparm attribute. */
8446 if (TARGET_SSEREGPARM
8447 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
8454 error ("calling %qD with attribute sseregparm without "
8455 "SSE/SSE2 enabled", decl
);
8457 error ("calling %qT with attribute sseregparm without "
8458 "SSE/SSE2 enabled", type
);
8469 cgraph_node
*target
= cgraph_node::get (decl
);
8471 target
= target
->function_symbol ();
8473 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
8474 (and DFmode for SSE2) arguments in SSE registers. */
8476 /* TARGET_SSE_MATH */
8477 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
8478 && opt_for_fn (target
->decl
, optimize
)
8479 && !(profile_flag
&& !flag_fentry
))
8481 cgraph_local_info
*i
= &target
->local
;
8482 if (i
&& i
->local
&& i
->can_change_signature
)
8484 /* Refuse to produce wrong code when local function with SSE enabled
8485 is called from SSE disabled function.
8486 FIXME: We need a way to detect these cases cross-ltrans partition
8487 and avoid using SSE calling conventions on local functions called
8488 from function with SSE disabled. For now at least delay the
8489 warning until we know we are going to produce wrong code.
8491 if (!TARGET_SSE
&& warn
)
8493 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
8494 ->x_ix86_isa_flags
) ? 2 : 1;
8501 /* Return true if EAX is live at the start of the function. Used by
8502 ix86_expand_prologue to determine if we need special help before
8503 calling allocate_stack_worker. */
8506 ix86_eax_live_at_start_p (void)
8508 /* Cheat. Don't bother working forward from ix86_function_regparm
8509 to the function type to whether an actual argument is located in
8510 eax. Instead just look at cfg info, which is still close enough
8511 to correct at this point. This gives false positives for broken
8512 functions that might use uninitialized data that happens to be
8513 allocated in eax, but who cares? */
8514 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
8518 ix86_keep_aggregate_return_pointer (tree fntype
)
8524 attr
= lookup_attribute ("callee_pop_aggregate_return",
8525 TYPE_ATTRIBUTES (fntype
));
8527 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
8529 /* For 32-bit MS-ABI the default is to keep aggregate
8531 if (ix86_function_type_abi (fntype
) == MS_ABI
)
8534 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
8537 /* Value is the number of bytes of arguments automatically
8538 popped when returning from a subroutine call.
8539 FUNDECL is the declaration node of the function (as a tree),
8540 FUNTYPE is the data type of the function (as a tree),
8541 or for a library call it is an identifier node for the subroutine name.
8542 SIZE is the number of bytes of arguments passed on the stack.
8544 On the 80386, the RTD insn may be used to pop them if the number
8545 of args is fixed, but if the number is variable then the caller
8546 must pop them all. RTD can't be used for library calls now
8547 because the library is compiled with the Unix compiler.
8548 Use of RTD is a selectable option, since it is incompatible with
8549 standard Unix calling sequences. If the option is not selected,
8550 the caller must always pop the args.
8552 The attribute stdcall is equivalent to RTD on a per module basis. */
8555 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
8559 /* None of the 64-bit ABIs pop arguments. */
8563 ccvt
= ix86_get_callcvt (funtype
);
8565 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
8566 | IX86_CALLCVT_THISCALL
)) != 0
8567 && ! stdarg_p (funtype
))
8570 /* Lose any fake structure return argument if it is passed on the stack. */
8571 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
8572 && !ix86_keep_aggregate_return_pointer (funtype
))
8574 int nregs
= ix86_function_regparm (funtype
, fundecl
);
8576 return GET_MODE_SIZE (Pmode
);
8582 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8585 ix86_legitimate_combined_insn (rtx_insn
*insn
)
8589 /* Check operand constraints in case hard registers were propagated
8590 into insn pattern. This check prevents combine pass from
8591 generating insn patterns with invalid hard register operands.
8592 These invalid insns can eventually confuse reload to error out
8593 with a spill failure. See also PRs 46829 and 46843. */
8595 gcc_assert (INSN_CODE (insn
) >= 0);
8597 extract_insn (insn
);
8598 preprocess_constraints (insn
);
8600 int n_operands
= recog_data
.n_operands
;
8601 int n_alternatives
= recog_data
.n_alternatives
;
8602 for (i
= 0; i
< n_operands
; i
++)
8604 rtx op
= recog_data
.operand
[i
];
8605 machine_mode mode
= GET_MODE (op
);
8606 const operand_alternative
*op_alt
;
8611 /* A unary operator may be accepted by the predicate, but it
8612 is irrelevant for matching constraints. */
8618 if (REG_P (SUBREG_REG (op
))
8619 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
8620 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
8621 GET_MODE (SUBREG_REG (op
)),
8624 op
= SUBREG_REG (op
);
8627 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
8630 op_alt
= recog_op_alt
;
8632 /* Operand has no constraints, anything is OK. */
8633 win
= !n_alternatives
;
8635 alternative_mask preferred
= get_preferred_alternatives (insn
);
8636 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
8638 if (!TEST_BIT (preferred
, j
))
8640 if (op_alt
[i
].anything_ok
8641 || (op_alt
[i
].matches
!= -1
8643 (recog_data
.operand
[i
],
8644 recog_data
.operand
[op_alt
[i
].matches
]))
8645 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
8659 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
8661 static unsigned HOST_WIDE_INT
8662 ix86_asan_shadow_offset (void)
8664 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
8665 : HOST_WIDE_INT_C (0x7fff8000))
8666 : (HOST_WIDE_INT_1
<< 29);
8669 /* Argument support functions. */
8671 /* Return true when register may be used to pass function parameters. */
8673 ix86_function_arg_regno_p (int regno
)
8676 enum calling_abi call_abi
;
8677 const int *parm_regs
;
8679 if (TARGET_MPX
&& BND_REGNO_P (regno
))
8685 return (regno
< REGPARM_MAX
8686 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
8688 return (regno
< REGPARM_MAX
8689 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
8690 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
8691 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
8692 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
8695 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
8696 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
8699 /* TODO: The function should depend on current function ABI but
8700 builtins.c would need updating then. Therefore we use the
8702 call_abi
= ix86_cfun_abi ();
8704 /* RAX is used as hidden argument to va_arg functions. */
8705 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
8708 if (call_abi
== MS_ABI
)
8709 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
8711 parm_regs
= x86_64_int_parameter_registers
;
8713 for (i
= 0; i
< (call_abi
== MS_ABI
8714 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
8715 if (regno
== parm_regs
[i
])
8720 /* Return if we do not know how to pass TYPE solely in registers. */
8723 ix86_must_pass_in_stack (machine_mode mode
, const_tree type
)
8725 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
8728 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
8729 The layout_type routine is crafty and tries to trick us into passing
8730 currently unsupported vector types on the stack by using TImode. */
8731 return (!TARGET_64BIT
&& mode
== TImode
8732 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
8735 /* It returns the size, in bytes, of the area reserved for arguments passed
8736 in registers for the function represented by fndecl dependent to the used
8739 ix86_reg_parm_stack_space (const_tree fndecl
)
8741 enum calling_abi call_abi
= SYSV_ABI
;
8742 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
8743 call_abi
= ix86_function_abi (fndecl
);
8745 call_abi
= ix86_function_type_abi (fndecl
);
8746 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
8751 /* We add this as a workaround in order to use libc_has_function
8754 ix86_libc_has_function (enum function_class fn_class
)
8756 return targetm
.libc_has_function (fn_class
);
8759 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
8760 specifying the call abi used. */
8762 ix86_function_type_abi (const_tree fntype
)
8764 enum calling_abi abi
= ix86_abi
;
8766 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
8770 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
8773 if (TARGET_X32
&& !warned
)
8775 error ("X32 does not support ms_abi attribute");
8781 else if (abi
== MS_ABI
8782 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
8788 static enum calling_abi
8789 ix86_function_abi (const_tree fndecl
)
8791 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
8794 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
8795 specifying the call abi used. */
8797 ix86_cfun_abi (void)
8799 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
8803 ix86_function_ms_hook_prologue (const_tree fn
)
8805 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
8807 if (decl_function_context (fn
) != NULL_TREE
)
8808 error_at (DECL_SOURCE_LOCATION (fn
),
8809 "ms_hook_prologue is not compatible with nested function");
8817 ix86_function_naked (const_tree fn
)
8819 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
8825 /* Write the extra assembler code needed to declare a function properly. */
8828 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
8831 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
8835 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
8836 unsigned int filler_cc
= 0xcccccccc;
8838 for (i
= 0; i
< filler_count
; i
+= 4)
8839 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
8842 #ifdef SUBTARGET_ASM_UNWIND_INIT
8843 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
8846 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
8848 /* Output magic byte marker, if hot-patch attribute is set. */
8853 /* leaq [%rsp + 0], %rsp */
8854 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
8859 /* movl.s %edi, %edi
8861 movl.s %esp, %ebp */
8862 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
8867 /* Implementation of call abi switching target hook. Specific to FNDECL
8868 the specific call register sets are set. See also
8869 ix86_conditional_register_usage for more details. */
8871 ix86_call_abi_override (const_tree fndecl
)
8873 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
8876 /* Return 1 if pseudo register should be created and used to hold
8877 GOT address for PIC code. */
8879 ix86_use_pseudo_pic_reg (void)
8882 && (ix86_cmodel
== CM_SMALL_PIC
8889 /* Initialize large model PIC register. */
8892 ix86_init_large_pic_reg (unsigned int tmp_regno
)
8894 rtx_code_label
*label
;
8897 gcc_assert (Pmode
== DImode
);
8898 label
= gen_label_rtx ();
8900 LABEL_PRESERVE_P (label
) = 1;
8901 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
8902 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
8903 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
8905 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
8906 emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
8907 pic_offset_table_rtx
, tmp_reg
));
8908 const char *name
= LABEL_NAME (label
);
8909 PUT_CODE (label
, NOTE
);
8910 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
8911 NOTE_DELETED_LABEL_NAME (label
) = name
;
8914 /* Create and initialize PIC register if required. */
8916 ix86_init_pic_reg (void)
8921 if (!ix86_use_pseudo_pic_reg ())
8928 if (ix86_cmodel
== CM_LARGE_PIC
)
8929 ix86_init_large_pic_reg (R11_REG
);
8931 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
8935 /* If there is future mcount call in the function it is more profitable
8936 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
8937 rtx reg
= crtl
->profile
8938 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
8939 : pic_offset_table_rtx
;
8940 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
8941 RTX_FRAME_RELATED_P (insn
) = 1;
8943 emit_move_insn (pic_offset_table_rtx
, reg
);
8944 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
8950 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
8951 insert_insn_on_edge (seq
, entry_edge
);
8952 commit_one_edge_insertion (entry_edge
);
8955 /* Initialize a variable CUM of type CUMULATIVE_ARGS
8956 for a call to a function whose data type is FNTYPE.
8957 For a library call, FNTYPE is 0. */
8960 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
8961 tree fntype
, /* tree ptr for function decl */
8962 rtx libname
, /* SYMBOL_REF of library name or 0 */
8966 struct cgraph_local_info
*i
= NULL
;
8967 struct cgraph_node
*target
= NULL
;
8969 memset (cum
, 0, sizeof (*cum
));
8973 target
= cgraph_node::get (fndecl
);
8976 target
= target
->function_symbol ();
8977 i
= cgraph_node::local_info (target
->decl
);
8978 cum
->call_abi
= ix86_function_abi (target
->decl
);
8981 cum
->call_abi
= ix86_function_abi (fndecl
);
8984 cum
->call_abi
= ix86_function_type_abi (fntype
);
8986 cum
->caller
= caller
;
8988 /* Set up the number of registers to use for passing arguments. */
8989 cum
->nregs
= ix86_regparm
;
8992 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
8993 ? X86_64_REGPARM_MAX
8994 : X86_64_MS_REGPARM_MAX
);
8998 cum
->sse_nregs
= SSE_REGPARM_MAX
;
9001 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
9002 ? X86_64_SSE_REGPARM_MAX
9003 : X86_64_MS_SSE_REGPARM_MAX
);
9007 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
9008 cum
->warn_avx512f
= true;
9009 cum
->warn_avx
= true;
9010 cum
->warn_sse
= true;
9011 cum
->warn_mmx
= true;
9013 /* Because type might mismatch in between caller and callee, we need to
9014 use actual type of function for local calls.
9015 FIXME: cgraph_analyze can be told to actually record if function uses
9016 va_start so for local functions maybe_vaarg can be made aggressive
9018 FIXME: once typesytem is fixed, we won't need this code anymore. */
9019 if (i
&& i
->local
&& i
->can_change_signature
)
9020 fntype
= TREE_TYPE (target
->decl
);
9021 cum
->stdarg
= stdarg_p (fntype
);
9022 cum
->maybe_vaarg
= (fntype
9023 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
9026 cum
->bnd_regno
= FIRST_BND_REG
;
9027 cum
->bnds_in_bt
= 0;
9028 cum
->force_bnd_pass
= 0;
9033 /* If there are variable arguments, then we won't pass anything
9034 in registers in 32-bit mode. */
9035 if (stdarg_p (fntype
))
9038 /* Since in 32-bit, variable arguments are always passed on
9039 stack, there is scratch register available for indirect
9041 cfun
->machine
->arg_reg_available
= true;
9044 cum
->warn_avx512f
= false;
9045 cum
->warn_avx
= false;
9046 cum
->warn_sse
= false;
9047 cum
->warn_mmx
= false;
9051 /* Use ecx and edx registers if function has fastcall attribute,
9052 else look for regparm information. */
9055 unsigned int ccvt
= ix86_get_callcvt (fntype
);
9056 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
9059 cum
->fastcall
= 1; /* Same first register as in fastcall. */
9061 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
9067 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
9070 /* Set up the number of SSE registers used for passing SFmode
9071 and DFmode arguments. Warn for mismatching ABI. */
9072 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
9075 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
9078 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
9079 But in the case of vector types, it is some vector mode.
9081 When we have only some of our vector isa extensions enabled, then there
9082 are some modes for which vector_mode_supported_p is false. For these
9083 modes, the generic vector support in gcc will choose some non-vector mode
9084 in order to implement the type. By computing the natural mode, we'll
9085 select the proper ABI location for the operand and not depend on whatever
9086 the middle-end decides to do with these vector types.
9088 The midde-end can't deal with the vector types > 16 bytes. In this
9089 case, we return the original mode and warn ABI change if CUM isn't
9092 If INT_RETURN is true, warn ABI change if the vector mode isn't
9093 available for function return value. */
9096 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
9099 machine_mode mode
= TYPE_MODE (type
);
9101 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
9103 HOST_WIDE_INT size
= int_size_in_bytes (type
);
9104 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
9105 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
9106 && TYPE_VECTOR_SUBPARTS (type
) > 1)
9108 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
9110 /* There are no XFmode vector modes. */
9111 if (innermode
== XFmode
)
9114 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
9115 mode
= MIN_MODE_VECTOR_FLOAT
;
9117 mode
= MIN_MODE_VECTOR_INT
;
9119 /* Get the mode which has this inner mode and number of units. */
9120 FOR_EACH_MODE_FROM (mode
, mode
)
9121 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
9122 && GET_MODE_INNER (mode
) == innermode
)
9124 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
9126 static bool warnedavx512f
;
9127 static bool warnedavx512f_ret
;
9129 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
9131 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
9132 "without AVX512F enabled changes the ABI"))
9133 warnedavx512f
= true;
9135 else if (in_return
&& !warnedavx512f_ret
)
9137 if (warning (OPT_Wpsabi
, "AVX512F vector return "
9138 "without AVX512F enabled changes the ABI"))
9139 warnedavx512f_ret
= true;
9142 return TYPE_MODE (type
);
9144 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
9146 static bool warnedavx
;
9147 static bool warnedavx_ret
;
9149 if (cum
&& cum
->warn_avx
&& !warnedavx
)
9151 if (warning (OPT_Wpsabi
, "AVX vector argument "
9152 "without AVX enabled changes the ABI"))
9155 else if (in_return
&& !warnedavx_ret
)
9157 if (warning (OPT_Wpsabi
, "AVX vector return "
9158 "without AVX enabled changes the ABI"))
9159 warnedavx_ret
= true;
9162 return TYPE_MODE (type
);
9164 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
9168 static bool warnedsse
;
9169 static bool warnedsse_ret
;
9171 if (cum
&& cum
->warn_sse
&& !warnedsse
)
9173 if (warning (OPT_Wpsabi
, "SSE vector argument "
9174 "without SSE enabled changes the ABI"))
9177 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
9179 if (warning (OPT_Wpsabi
, "SSE vector return "
9180 "without SSE enabled changes the ABI"))
9181 warnedsse_ret
= true;
9184 else if ((size
== 8 && !TARGET_64BIT
)
9186 || cfun
->machine
->func_type
== TYPE_NORMAL
)
9190 static bool warnedmmx
;
9191 static bool warnedmmx_ret
;
9193 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
9195 if (warning (OPT_Wpsabi
, "MMX vector argument "
9196 "without MMX enabled changes the ABI"))
9199 else if (in_return
&& !warnedmmx_ret
)
9201 if (warning (OPT_Wpsabi
, "MMX vector return "
9202 "without MMX enabled changes the ABI"))
9203 warnedmmx_ret
= true;
9216 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
9217 this may not agree with the mode that the type system has chosen for the
9218 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
9219 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
9222 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
9227 if (orig_mode
!= BLKmode
)
9228 tmp
= gen_rtx_REG (orig_mode
, regno
);
9231 tmp
= gen_rtx_REG (mode
, regno
);
9232 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
9233 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
9239 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
9240 of this code is to classify each 8bytes of incoming argument by the register
9241 class and assign registers accordingly. */
9243 /* Return the union class of CLASS1 and CLASS2.
9244 See the x86-64 PS ABI for details. */
9246 static enum x86_64_reg_class
9247 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
9249 /* Rule #1: If both classes are equal, this is the resulting class. */
9250 if (class1
== class2
)
9253 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
9255 if (class1
== X86_64_NO_CLASS
)
9257 if (class2
== X86_64_NO_CLASS
)
9260 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
9261 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
9262 return X86_64_MEMORY_CLASS
;
9264 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
9265 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
9266 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
9267 return X86_64_INTEGERSI_CLASS
;
9268 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
9269 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
9270 return X86_64_INTEGER_CLASS
;
9272 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
9274 if (class1
== X86_64_X87_CLASS
9275 || class1
== X86_64_X87UP_CLASS
9276 || class1
== X86_64_COMPLEX_X87_CLASS
9277 || class2
== X86_64_X87_CLASS
9278 || class2
== X86_64_X87UP_CLASS
9279 || class2
== X86_64_COMPLEX_X87_CLASS
)
9280 return X86_64_MEMORY_CLASS
;
9282 /* Rule #6: Otherwise class SSE is used. */
9283 return X86_64_SSE_CLASS
;
9286 /* Classify the argument of type TYPE and mode MODE.
9287 CLASSES will be filled by the register class used to pass each word
9288 of the operand. The number of words is returned. In case the parameter
9289 should be passed in memory, 0 is returned. As a special case for zero
9290 sized containers, classes[0] will be NO_CLASS and 1 is returned.
9292 BIT_OFFSET is used internally for handling records and specifies offset
9293 of the offset in bits modulo 512 to avoid overflow cases.
9295 See the x86-64 PS ABI for details.
9299 classify_argument (machine_mode mode
, const_tree type
,
9300 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
9302 HOST_WIDE_INT bytes
=
9303 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
9304 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
9306 /* Variable sized entities are always passed/returned in memory. */
9310 if (mode
!= VOIDmode
9311 && targetm
.calls
.must_pass_in_stack (mode
, type
))
9314 if (type
&& AGGREGATE_TYPE_P (type
))
9318 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
9320 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
9324 for (i
= 0; i
< words
; i
++)
9325 classes
[i
] = X86_64_NO_CLASS
;
9327 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
9328 signalize memory class, so handle it as special case. */
9331 classes
[0] = X86_64_NO_CLASS
;
9335 /* Classify each field of record and merge classes. */
9336 switch (TREE_CODE (type
))
9339 /* And now merge the fields of structure. */
9340 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
9342 if (TREE_CODE (field
) == FIELD_DECL
)
9346 if (TREE_TYPE (field
) == error_mark_node
)
9349 /* Bitfields are always classified as integer. Handle them
9350 early, since later code would consider them to be
9351 misaligned integers. */
9352 if (DECL_BIT_FIELD (field
))
9354 for (i
= (int_bit_position (field
)
9355 + (bit_offset
% 64)) / 8 / 8;
9356 i
< ((int_bit_position (field
) + (bit_offset
% 64))
9357 + tree_to_shwi (DECL_SIZE (field
))
9360 merge_classes (X86_64_INTEGER_CLASS
,
9367 type
= TREE_TYPE (field
);
9369 /* Flexible array member is ignored. */
9370 if (TYPE_MODE (type
) == BLKmode
9371 && TREE_CODE (type
) == ARRAY_TYPE
9372 && TYPE_SIZE (type
) == NULL_TREE
9373 && TYPE_DOMAIN (type
) != NULL_TREE
9374 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
9379 if (!warned
&& warn_psabi
)
9382 inform (input_location
,
9383 "the ABI of passing struct with"
9384 " a flexible array member has"
9385 " changed in GCC 4.4");
9389 num
= classify_argument (TYPE_MODE (type
), type
,
9391 (int_bit_position (field
)
9392 + bit_offset
) % 512);
9395 pos
= (int_bit_position (field
)
9396 + (bit_offset
% 64)) / 8 / 8;
9397 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
9399 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
9406 /* Arrays are handled as small records. */
9409 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
9410 TREE_TYPE (type
), subclasses
, bit_offset
);
9414 /* The partial classes are now full classes. */
9415 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
9416 subclasses
[0] = X86_64_SSE_CLASS
;
9417 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
9418 && !((bit_offset
% 64) == 0 && bytes
== 4))
9419 subclasses
[0] = X86_64_INTEGER_CLASS
;
9421 for (i
= 0; i
< words
; i
++)
9422 classes
[i
] = subclasses
[i
% num
];
9427 case QUAL_UNION_TYPE
:
9428 /* Unions are similar to RECORD_TYPE but offset is always 0.
9430 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
9432 if (TREE_CODE (field
) == FIELD_DECL
)
9436 if (TREE_TYPE (field
) == error_mark_node
)
9439 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
9440 TREE_TYPE (field
), subclasses
,
9444 for (i
= 0; i
< num
&& i
< words
; i
++)
9445 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
9456 /* When size > 16 bytes, if the first one isn't
9457 X86_64_SSE_CLASS or any other ones aren't
9458 X86_64_SSEUP_CLASS, everything should be passed in
9460 if (classes
[0] != X86_64_SSE_CLASS
)
9463 for (i
= 1; i
< words
; i
++)
9464 if (classes
[i
] != X86_64_SSEUP_CLASS
)
9468 /* Final merger cleanup. */
9469 for (i
= 0; i
< words
; i
++)
9471 /* If one class is MEMORY, everything should be passed in
9473 if (classes
[i
] == X86_64_MEMORY_CLASS
)
9476 /* The X86_64_SSEUP_CLASS should be always preceded by
9477 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
9478 if (classes
[i
] == X86_64_SSEUP_CLASS
9479 && classes
[i
- 1] != X86_64_SSE_CLASS
9480 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
9482 /* The first one should never be X86_64_SSEUP_CLASS. */
9483 gcc_assert (i
!= 0);
9484 classes
[i
] = X86_64_SSE_CLASS
;
9487 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
9488 everything should be passed in memory. */
9489 if (classes
[i
] == X86_64_X87UP_CLASS
9490 && (classes
[i
- 1] != X86_64_X87_CLASS
))
9494 /* The first one should never be X86_64_X87UP_CLASS. */
9495 gcc_assert (i
!= 0);
9496 if (!warned
&& warn_psabi
)
9499 inform (input_location
,
9500 "the ABI of passing union with long double"
9501 " has changed in GCC 4.4");
9509 /* Compute alignment needed. We align all types to natural boundaries with
9510 exception of XFmode that is aligned to 64bits. */
9511 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
9513 int mode_alignment
= GET_MODE_BITSIZE (mode
);
9516 mode_alignment
= 128;
9517 else if (mode
== XCmode
)
9518 mode_alignment
= 256;
9519 if (COMPLEX_MODE_P (mode
))
9520 mode_alignment
/= 2;
9521 /* Misaligned fields are always returned in memory. */
9522 if (bit_offset
% mode_alignment
)
9526 /* for V1xx modes, just use the base mode */
9527 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
9528 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
9529 mode
= GET_MODE_INNER (mode
);
9531 /* Classification of atomic types. */
9536 classes
[0] = X86_64_SSE_CLASS
;
9539 classes
[0] = X86_64_SSE_CLASS
;
9540 classes
[1] = X86_64_SSEUP_CLASS
;
9550 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
9552 /* Analyze last 128 bits only. */
9553 size
= (size
- 1) & 0x7f;
9557 classes
[0] = X86_64_INTEGERSI_CLASS
;
9562 classes
[0] = X86_64_INTEGER_CLASS
;
9565 else if (size
< 64+32)
9567 classes
[0] = X86_64_INTEGER_CLASS
;
9568 classes
[1] = X86_64_INTEGERSI_CLASS
;
9571 else if (size
< 64+64)
9573 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
9581 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
9585 /* OImode shouldn't be used directly. */
9590 if (!(bit_offset
% 64))
9591 classes
[0] = X86_64_SSESF_CLASS
;
9593 classes
[0] = X86_64_SSE_CLASS
;
9596 classes
[0] = X86_64_SSEDF_CLASS
;
9599 classes
[0] = X86_64_X87_CLASS
;
9600 classes
[1] = X86_64_X87UP_CLASS
;
9603 classes
[0] = X86_64_SSE_CLASS
;
9604 classes
[1] = X86_64_SSEUP_CLASS
;
9607 classes
[0] = X86_64_SSE_CLASS
;
9608 if (!(bit_offset
% 64))
9614 if (!warned
&& warn_psabi
)
9617 inform (input_location
,
9618 "the ABI of passing structure with complex float"
9619 " member has changed in GCC 4.4");
9621 classes
[1] = X86_64_SSESF_CLASS
;
9625 classes
[0] = X86_64_SSEDF_CLASS
;
9626 classes
[1] = X86_64_SSEDF_CLASS
;
9629 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
9632 /* This modes is larger than 16 bytes. */
9640 classes
[0] = X86_64_SSE_CLASS
;
9641 classes
[1] = X86_64_SSEUP_CLASS
;
9642 classes
[2] = X86_64_SSEUP_CLASS
;
9643 classes
[3] = X86_64_SSEUP_CLASS
;
9651 classes
[0] = X86_64_SSE_CLASS
;
9652 classes
[1] = X86_64_SSEUP_CLASS
;
9653 classes
[2] = X86_64_SSEUP_CLASS
;
9654 classes
[3] = X86_64_SSEUP_CLASS
;
9655 classes
[4] = X86_64_SSEUP_CLASS
;
9656 classes
[5] = X86_64_SSEUP_CLASS
;
9657 classes
[6] = X86_64_SSEUP_CLASS
;
9658 classes
[7] = X86_64_SSEUP_CLASS
;
9666 classes
[0] = X86_64_SSE_CLASS
;
9667 classes
[1] = X86_64_SSEUP_CLASS
;
9675 classes
[0] = X86_64_SSE_CLASS
;
9681 gcc_assert (VECTOR_MODE_P (mode
));
9686 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
9688 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
9689 classes
[0] = X86_64_INTEGERSI_CLASS
;
9691 classes
[0] = X86_64_INTEGER_CLASS
;
9692 classes
[1] = X86_64_INTEGER_CLASS
;
9693 return 1 + (bytes
> 8);
9697 /* Examine the argument and return set number of register required in each
9698 class. Return true iff parameter should be passed in memory. */
9701 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
9702 int *int_nregs
, int *sse_nregs
)
9704 enum x86_64_reg_class regclass
[MAX_CLASSES
];
9705 int n
= classify_argument (mode
, type
, regclass
, 0);
9712 for (n
--; n
>= 0; n
--)
9713 switch (regclass
[n
])
9715 case X86_64_INTEGER_CLASS
:
9716 case X86_64_INTEGERSI_CLASS
:
9719 case X86_64_SSE_CLASS
:
9720 case X86_64_SSESF_CLASS
:
9721 case X86_64_SSEDF_CLASS
:
9724 case X86_64_NO_CLASS
:
9725 case X86_64_SSEUP_CLASS
:
9727 case X86_64_X87_CLASS
:
9728 case X86_64_X87UP_CLASS
:
9729 case X86_64_COMPLEX_X87_CLASS
:
9733 case X86_64_MEMORY_CLASS
:
9740 /* Construct container for the argument used by GCC interface. See
9741 FUNCTION_ARG for the detailed description. */
9744 construct_container (machine_mode mode
, machine_mode orig_mode
,
9745 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
9746 const int *intreg
, int sse_regno
)
9748 /* The following variables hold the static issued_error state. */
9749 static bool issued_sse_arg_error
;
9750 static bool issued_sse_ret_error
;
9751 static bool issued_x87_ret_error
;
9753 machine_mode tmpmode
;
9755 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
9756 enum x86_64_reg_class regclass
[MAX_CLASSES
];
9760 int needed_sseregs
, needed_intregs
;
9761 rtx exp
[MAX_CLASSES
];
9764 n
= classify_argument (mode
, type
, regclass
, 0);
9767 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
9770 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
9773 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
9774 some less clueful developer tries to use floating-point anyway. */
9775 if (needed_sseregs
&& !TARGET_SSE
)
9779 if (!issued_sse_ret_error
)
9781 error ("SSE register return with SSE disabled");
9782 issued_sse_ret_error
= true;
9785 else if (!issued_sse_arg_error
)
9787 error ("SSE register argument with SSE disabled");
9788 issued_sse_arg_error
= true;
9793 /* Likewise, error if the ABI requires us to return values in the
9794 x87 registers and the user specified -mno-80387. */
9795 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
9796 for (i
= 0; i
< n
; i
++)
9797 if (regclass
[i
] == X86_64_X87_CLASS
9798 || regclass
[i
] == X86_64_X87UP_CLASS
9799 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
9801 if (!issued_x87_ret_error
)
9803 error ("x87 register return with x87 disabled");
9804 issued_x87_ret_error
= true;
9809 /* First construct simple cases. Avoid SCmode, since we want to use
9810 single register to pass this type. */
9811 if (n
== 1 && mode
!= SCmode
)
9812 switch (regclass
[0])
9814 case X86_64_INTEGER_CLASS
:
9815 case X86_64_INTEGERSI_CLASS
:
9816 return gen_rtx_REG (mode
, intreg
[0]);
9817 case X86_64_SSE_CLASS
:
9818 case X86_64_SSESF_CLASS
:
9819 case X86_64_SSEDF_CLASS
:
9820 if (mode
!= BLKmode
)
9821 return gen_reg_or_parallel (mode
, orig_mode
,
9822 SSE_REGNO (sse_regno
));
9824 case X86_64_X87_CLASS
:
9825 case X86_64_COMPLEX_X87_CLASS
:
9826 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
9827 case X86_64_NO_CLASS
:
9828 /* Zero sized array, struct or class. */
9834 && regclass
[0] == X86_64_SSE_CLASS
9835 && regclass
[1] == X86_64_SSEUP_CLASS
9837 return gen_reg_or_parallel (mode
, orig_mode
,
9838 SSE_REGNO (sse_regno
));
9840 && regclass
[0] == X86_64_SSE_CLASS
9841 && regclass
[1] == X86_64_SSEUP_CLASS
9842 && regclass
[2] == X86_64_SSEUP_CLASS
9843 && regclass
[3] == X86_64_SSEUP_CLASS
9845 return gen_reg_or_parallel (mode
, orig_mode
,
9846 SSE_REGNO (sse_regno
));
9848 && regclass
[0] == X86_64_SSE_CLASS
9849 && regclass
[1] == X86_64_SSEUP_CLASS
9850 && regclass
[2] == X86_64_SSEUP_CLASS
9851 && regclass
[3] == X86_64_SSEUP_CLASS
9852 && regclass
[4] == X86_64_SSEUP_CLASS
9853 && regclass
[5] == X86_64_SSEUP_CLASS
9854 && regclass
[6] == X86_64_SSEUP_CLASS
9855 && regclass
[7] == X86_64_SSEUP_CLASS
9857 return gen_reg_or_parallel (mode
, orig_mode
,
9858 SSE_REGNO (sse_regno
));
9860 && regclass
[0] == X86_64_X87_CLASS
9861 && regclass
[1] == X86_64_X87UP_CLASS
)
9862 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
9865 && regclass
[0] == X86_64_INTEGER_CLASS
9866 && regclass
[1] == X86_64_INTEGER_CLASS
9867 && (mode
== CDImode
|| mode
== TImode
)
9868 && intreg
[0] + 1 == intreg
[1])
9869 return gen_rtx_REG (mode
, intreg
[0]);
9871 /* Otherwise figure out the entries of the PARALLEL. */
9872 for (i
= 0; i
< n
; i
++)
9876 switch (regclass
[i
])
9878 case X86_64_NO_CLASS
:
9880 case X86_64_INTEGER_CLASS
:
9881 case X86_64_INTEGERSI_CLASS
:
9882 /* Merge TImodes on aligned occasions here too. */
9883 if (i
* 8 + 8 > bytes
)
9885 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
9886 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
9887 /* We've requested 24 bytes we
9888 don't have mode for. Use DImode. */
9891 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
9896 = gen_rtx_EXPR_LIST (VOIDmode
,
9897 gen_rtx_REG (tmpmode
, *intreg
),
9901 case X86_64_SSESF_CLASS
:
9903 = gen_rtx_EXPR_LIST (VOIDmode
,
9904 gen_rtx_REG (SFmode
,
9905 SSE_REGNO (sse_regno
)),
9909 case X86_64_SSEDF_CLASS
:
9911 = gen_rtx_EXPR_LIST (VOIDmode
,
9912 gen_rtx_REG (DFmode
,
9913 SSE_REGNO (sse_regno
)),
9917 case X86_64_SSE_CLASS
:
9925 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
9935 && regclass
[1] == X86_64_SSEUP_CLASS
9936 && regclass
[2] == X86_64_SSEUP_CLASS
9937 && regclass
[3] == X86_64_SSEUP_CLASS
);
9943 && regclass
[1] == X86_64_SSEUP_CLASS
9944 && regclass
[2] == X86_64_SSEUP_CLASS
9945 && regclass
[3] == X86_64_SSEUP_CLASS
9946 && regclass
[4] == X86_64_SSEUP_CLASS
9947 && regclass
[5] == X86_64_SSEUP_CLASS
9948 && regclass
[6] == X86_64_SSEUP_CLASS
9949 && regclass
[7] == X86_64_SSEUP_CLASS
);
9957 = gen_rtx_EXPR_LIST (VOIDmode
,
9958 gen_rtx_REG (tmpmode
,
9959 SSE_REGNO (sse_regno
)),
9968 /* Empty aligned struct, union or class. */
9972 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
9973 for (i
= 0; i
< nexps
; i
++)
9974 XVECEXP (ret
, 0, i
) = exp
[i
];
9978 /* Update the data in CUM to advance over an argument of mode MODE
9979 and data type TYPE. (TYPE is null for libcalls where that information
9980 may not be available.)
9982 Return a number of integer regsiters advanced over. */
9985 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
9986 const_tree type
, HOST_WIDE_INT bytes
,
9987 HOST_WIDE_INT words
)
9990 bool error_p
= false;
9994 /* Intel MCU psABI passes scalars and aggregates no larger than 8
9995 bytes in registers. */
9996 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
10016 cum
->words
+= words
;
10017 cum
->nregs
-= words
;
10018 cum
->regno
+= words
;
10019 if (cum
->nregs
>= 0)
10021 if (cum
->nregs
<= 0)
10024 cfun
->machine
->arg_reg_available
= false;
10030 /* OImode shouldn't be used directly. */
10031 gcc_unreachable ();
10034 if (cum
->float_in_sse
== -1)
10036 if (cum
->float_in_sse
< 2)
10040 if (cum
->float_in_sse
== -1)
10042 if (cum
->float_in_sse
< 1)
10065 if (!type
|| !AGGREGATE_TYPE_P (type
))
10067 cum
->sse_words
+= words
;
10068 cum
->sse_nregs
-= 1;
10069 cum
->sse_regno
+= 1;
10070 if (cum
->sse_nregs
<= 0)
10072 cum
->sse_nregs
= 0;
10073 cum
->sse_regno
= 0;
10084 if (!type
|| !AGGREGATE_TYPE_P (type
))
10086 cum
->mmx_words
+= words
;
10087 cum
->mmx_nregs
-= 1;
10088 cum
->mmx_regno
+= 1;
10089 if (cum
->mmx_nregs
<= 0)
10091 cum
->mmx_nregs
= 0;
10092 cum
->mmx_regno
= 0;
10099 cum
->float_in_sse
= 0;
10100 error ("calling %qD with SSE calling convention without "
10101 "SSE/SSE2 enabled", cum
->decl
);
10102 sorry ("this is a GCC bug that can be worked around by adding "
10103 "attribute used to function called");
10110 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
10111 const_tree type
, HOST_WIDE_INT words
, bool named
)
10113 int int_nregs
, sse_nregs
;
10115 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
10116 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
10117 || VALID_AVX256_REG_MODE (mode
)))
10120 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
10121 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
10123 cum
->nregs
-= int_nregs
;
10124 cum
->sse_nregs
-= sse_nregs
;
10125 cum
->regno
+= int_nregs
;
10126 cum
->sse_regno
+= sse_nregs
;
10131 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
10132 cum
->words
= ROUND_UP (cum
->words
, align
);
10133 cum
->words
+= words
;
10139 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
10140 HOST_WIDE_INT words
)
10142 /* Otherwise, this should be passed indirect. */
10143 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
10145 cum
->words
+= words
;
10146 if (cum
->nregs
> 0)
10155 /* Update the data in CUM to advance over an argument of mode MODE and
10156 data type TYPE. (TYPE is null for libcalls where that information
10157 may not be available.) */
10160 ix86_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
10161 const_tree type
, bool named
)
10163 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10164 HOST_WIDE_INT bytes
, words
;
10167 /* The argument of interrupt handler is a special case and is
10168 handled in ix86_function_arg. */
10169 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
10172 if (mode
== BLKmode
)
10173 bytes
= int_size_in_bytes (type
);
10175 bytes
= GET_MODE_SIZE (mode
);
10176 words
= CEIL (bytes
, UNITS_PER_WORD
);
10179 mode
= type_natural_mode (type
, NULL
, false);
10181 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
10182 || POINTER_BOUNDS_MODE_P (mode
))
10184 /* If we pass bounds in BT then just update remained bounds count. */
10185 if (cum
->bnds_in_bt
)
10191 /* Update remained number of bounds to force. */
10192 if (cum
->force_bnd_pass
)
10193 cum
->force_bnd_pass
--;
10200 /* The first arg not going to Bounds Tables resets this counter. */
10201 cum
->bnds_in_bt
= 0;
10202 /* For unnamed args we always pass bounds to avoid bounds mess when
10203 passed and received types do not match. If bounds do not follow
10204 unnamed arg, still pretend required number of bounds were passed. */
10205 if (cum
->force_bnd_pass
)
10207 cum
->bnd_regno
+= cum
->force_bnd_pass
;
10208 cum
->force_bnd_pass
= 0;
10213 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
10215 if (call_abi
== MS_ABI
)
10216 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
10218 nregs
= function_arg_advance_64 (cum
, mode
, type
, words
, named
);
10221 nregs
= function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
10223 /* For stdarg we expect bounds to be passed for each value passed
10226 cum
->force_bnd_pass
= nregs
;
10227 /* For pointers passed in memory we expect bounds passed in Bounds
10231 /* Track if there are outgoing arguments on stack. */
10233 cfun
->machine
->outgoing_args_on_stack
= true;
10235 cum
->bnds_in_bt
= chkp_type_bounds_count (type
);
10239 /* Define where to put the arguments to a function.
10240 Value is zero to push the argument on the stack,
10241 or a hard register in which to store the argument.
10243 MODE is the argument's machine mode.
10244 TYPE is the data type of the argument (as a tree).
10245 This is null for libcalls where that information may
10247 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10248 the preceding args and about the function being called.
10249 NAMED is nonzero if this argument is a named parameter
10250 (otherwise it is an extra parameter matching an ellipsis). */
10253 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
10254 machine_mode orig_mode
, const_tree type
,
10255 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
10257 bool error_p
= false;
10259 /* Avoid the AL settings for the Unix64 ABI. */
10260 if (mode
== VOIDmode
)
10261 return constm1_rtx
;
10265 /* Intel MCU psABI passes scalars and aggregates no larger than 8
10266 bytes in registers. */
10267 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
10286 if (words
<= cum
->nregs
)
10288 int regno
= cum
->regno
;
10290 /* Fastcall allocates the first two DWORD (SImode) or
10291 smaller arguments to ECX and EDX if it isn't an
10292 aggregate type . */
10295 if (mode
== BLKmode
10297 || (type
&& AGGREGATE_TYPE_P (type
)))
10300 /* ECX not EAX is the first allocated register. */
10301 if (regno
== AX_REG
)
10304 return gen_rtx_REG (mode
, regno
);
10309 if (cum
->float_in_sse
== -1)
10311 if (cum
->float_in_sse
< 2)
10315 if (cum
->float_in_sse
== -1)
10317 if (cum
->float_in_sse
< 1)
10321 /* In 32bit, we pass TImode in xmm registers. */
10328 if (!type
|| !AGGREGATE_TYPE_P (type
))
10330 if (cum
->sse_nregs
)
10331 return gen_reg_or_parallel (mode
, orig_mode
,
10332 cum
->sse_regno
+ FIRST_SSE_REG
);
10338 /* OImode and XImode shouldn't be used directly. */
10339 gcc_unreachable ();
10353 if (!type
|| !AGGREGATE_TYPE_P (type
))
10355 if (cum
->sse_nregs
)
10356 return gen_reg_or_parallel (mode
, orig_mode
,
10357 cum
->sse_regno
+ FIRST_SSE_REG
);
10367 if (!type
|| !AGGREGATE_TYPE_P (type
))
10369 if (cum
->mmx_nregs
)
10370 return gen_reg_or_parallel (mode
, orig_mode
,
10371 cum
->mmx_regno
+ FIRST_MMX_REG
);
10377 cum
->float_in_sse
= 0;
10378 error ("calling %qD with SSE calling convention without "
10379 "SSE/SSE2 enabled", cum
->decl
);
10380 sorry ("this is a GCC bug that can be worked around by adding "
10381 "attribute used to function called");
10388 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
10389 machine_mode orig_mode
, const_tree type
, bool named
)
10391 /* Handle a hidden AL argument containing number of registers
10392 for varargs x86-64 functions. */
10393 if (mode
== VOIDmode
)
10394 return GEN_INT (cum
->maybe_vaarg
10395 ? (cum
->sse_nregs
< 0
10396 ? X86_64_SSE_REGPARM_MAX
10417 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
10423 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
10425 &x86_64_int_parameter_registers
[cum
->regno
],
10430 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
10431 machine_mode orig_mode
, bool named
,
10432 HOST_WIDE_INT bytes
)
10434 unsigned int regno
;
10436 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
10437 We use value of -2 to specify that current function call is MSABI. */
10438 if (mode
== VOIDmode
)
10439 return GEN_INT (-2);
10441 /* If we've run out of registers, it goes on the stack. */
10442 if (cum
->nregs
== 0)
10445 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
10447 /* Only floating point modes are passed in anything but integer regs. */
10448 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
10451 regno
= cum
->regno
+ FIRST_SSE_REG
;
10456 /* Unnamed floating parameters are passed in both the
10457 SSE and integer registers. */
10458 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
10459 t2
= gen_rtx_REG (mode
, regno
);
10460 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
10461 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
10462 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
10465 /* Handle aggregated types passed in register. */
10466 if (orig_mode
== BLKmode
)
10468 if (bytes
> 0 && bytes
<= 8)
10469 mode
= (bytes
> 4 ? DImode
: SImode
);
10470 if (mode
== BLKmode
)
10474 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
10477 /* Return where to put the arguments to a function.
10478 Return zero to push the argument on the stack, or a hard register in which to store the argument.
10480 MODE is the argument's machine mode. TYPE is the data type of the
10481 argument. It is null for libcalls where that information may not be
10482 available. CUM gives information about the preceding args and about
10483 the function being called. NAMED is nonzero if this argument is a
10484 named parameter (otherwise it is an extra parameter matching an
10488 ix86_function_arg (cumulative_args_t cum_v
, machine_mode omode
,
10489 const_tree type
, bool named
)
10491 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10492 machine_mode mode
= omode
;
10493 HOST_WIDE_INT bytes
, words
;
10496 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
10498 gcc_assert (type
!= NULL_TREE
);
10499 if (POINTER_TYPE_P (type
))
10501 /* This is the pointer argument. */
10502 gcc_assert (TYPE_MODE (type
) == Pmode
);
10503 /* It is at -WORD(AP) in the current frame in interrupt and
10504 exception handlers. */
10505 arg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
10509 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
10510 && TREE_CODE (type
) == INTEGER_TYPE
10511 && TYPE_MODE (type
) == word_mode
);
10512 /* The error code is the word-mode integer argument at
10513 -2 * WORD(AP) in the current frame of the exception
10515 arg
= gen_rtx_MEM (word_mode
,
10516 plus_constant (Pmode
,
10518 -2 * UNITS_PER_WORD
));
10523 /* All pointer bounds arguments are handled separately here. */
10524 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
10525 || POINTER_BOUNDS_MODE_P (mode
))
10527 /* Return NULL if bounds are forced to go in Bounds Table. */
10528 if (cum
->bnds_in_bt
)
10530 /* Return the next available bound reg if any. */
10531 else if (cum
->bnd_regno
<= LAST_BND_REG
)
10532 arg
= gen_rtx_REG (BNDmode
, cum
->bnd_regno
);
10533 /* Return the next special slot number otherwise. */
10535 arg
= GEN_INT (cum
->bnd_regno
- LAST_BND_REG
- 1);
10540 if (mode
== BLKmode
)
10541 bytes
= int_size_in_bytes (type
);
10543 bytes
= GET_MODE_SIZE (mode
);
10544 words
= CEIL (bytes
, UNITS_PER_WORD
);
10546 /* To simplify the code below, represent vector types with a vector mode
10547 even if MMX/SSE are not active. */
10548 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
10549 mode
= type_natural_mode (type
, cum
, false);
10553 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
10555 if (call_abi
== MS_ABI
)
10556 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
10558 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
10561 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
10563 /* Track if there are outgoing arguments on stack. */
10564 if (arg
== NULL_RTX
&& cum
->caller
)
10565 cfun
->machine
->outgoing_args_on_stack
= true;
10570 /* A C expression that indicates when an argument must be passed by
10571 reference. If nonzero for an argument, a copy of that argument is
10572 made in memory and a pointer to the argument is passed instead of
10573 the argument itself. The pointer is passed in whatever way is
10574 appropriate for passing a pointer to that type. */
10577 ix86_pass_by_reference (cumulative_args_t cum_v
, machine_mode mode
,
10578 const_tree type
, bool)
10580 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10582 /* Bounds are never passed by reference. */
10583 if ((type
&& POINTER_BOUNDS_TYPE_P (type
))
10584 || POINTER_BOUNDS_MODE_P (mode
))
10589 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
10591 /* See Windows x64 Software Convention. */
10592 if (call_abi
== MS_ABI
)
10594 HOST_WIDE_INT msize
= GET_MODE_SIZE (mode
);
10598 /* Arrays are passed by reference. */
10599 if (TREE_CODE (type
) == ARRAY_TYPE
)
10602 if (RECORD_OR_UNION_TYPE_P (type
))
10604 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
10605 are passed by reference. */
10606 msize
= int_size_in_bytes (type
);
10610 /* __m128 is passed by reference. */
10611 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
10613 else if (type
&& int_size_in_bytes (type
) == -1)
10620 /* Return true when TYPE should be 128bit aligned for 32bit argument
10621 passing ABI. XXX: This function is obsolete and is only used for
10622 checking psABI compatibility with previous versions of GCC. */
10625 ix86_compat_aligned_value_p (const_tree type
)
10627 machine_mode mode
= TYPE_MODE (type
);
10628 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
10632 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
10634 if (TYPE_ALIGN (type
) < 128)
10637 if (AGGREGATE_TYPE_P (type
))
10639 /* Walk the aggregates recursively. */
10640 switch (TREE_CODE (type
))
10644 case QUAL_UNION_TYPE
:
10648 /* Walk all the structure fields. */
10649 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
10651 if (TREE_CODE (field
) == FIELD_DECL
10652 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
10659 /* Just for use if some languages passes arrays by value. */
10660 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
10665 gcc_unreachable ();
10671 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
10672 XXX: This function is obsolete and is only used for checking psABI
10673 compatibility with previous versions of GCC. */
10675 static unsigned int
10676 ix86_compat_function_arg_boundary (machine_mode mode
,
10677 const_tree type
, unsigned int align
)
10679 /* In 32bit, only _Decimal128 and __float128 are aligned to their
10680 natural boundaries. */
10681 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
10683 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
10684 make an exception for SSE modes since these require 128bit
10687 The handling here differs from field_alignment. ICC aligns MMX
10688 arguments to 4 byte boundaries, while structure fields are aligned
10689 to 8 byte boundaries. */
10692 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
10693 align
= PARM_BOUNDARY
;
10697 if (!ix86_compat_aligned_value_p (type
))
10698 align
= PARM_BOUNDARY
;
10701 if (align
> BIGGEST_ALIGNMENT
)
10702 align
= BIGGEST_ALIGNMENT
;
10706 /* Return true when TYPE should be 128bit aligned for 32bit argument
10710 ix86_contains_aligned_value_p (const_tree type
)
10712 machine_mode mode
= TYPE_MODE (type
);
10714 if (mode
== XFmode
|| mode
== XCmode
)
10717 if (TYPE_ALIGN (type
) < 128)
10720 if (AGGREGATE_TYPE_P (type
))
10722 /* Walk the aggregates recursively. */
10723 switch (TREE_CODE (type
))
10727 case QUAL_UNION_TYPE
:
10731 /* Walk all the structure fields. */
10732 for (field
= TYPE_FIELDS (type
);
10734 field
= DECL_CHAIN (field
))
10736 if (TREE_CODE (field
) == FIELD_DECL
10737 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
10744 /* Just for use if some languages passes arrays by value. */
10745 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
10750 gcc_unreachable ();
10754 return TYPE_ALIGN (type
) >= 128;
10759 /* Gives the alignment boundary, in bits, of an argument with the
10760 specified mode and type. */
10762 static unsigned int
10763 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
10765 unsigned int align
;
10768 /* Since the main variant type is used for call, we convert it to
10769 the main variant type. */
10770 type
= TYPE_MAIN_VARIANT (type
);
10771 align
= TYPE_ALIGN (type
);
10774 align
= GET_MODE_ALIGNMENT (mode
);
10775 if (align
< PARM_BOUNDARY
)
10776 align
= PARM_BOUNDARY
;
10779 static bool warned
;
10780 unsigned int saved_align
= align
;
10784 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
10787 if (mode
== XFmode
|| mode
== XCmode
)
10788 align
= PARM_BOUNDARY
;
10790 else if (!ix86_contains_aligned_value_p (type
))
10791 align
= PARM_BOUNDARY
;
10794 align
= PARM_BOUNDARY
;
10799 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
10803 inform (input_location
,
10804 "The ABI for passing parameters with %d-byte"
10805 " alignment has changed in GCC 4.6",
10806 align
/ BITS_PER_UNIT
);
10813 /* Return true if N is a possible register number of function value. */
10816 ix86_function_value_regno_p (const unsigned int regno
)
10823 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
10826 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
10830 return chkp_function_instrumented_p (current_function_decl
);
10832 /* Complex values are returned in %st(0)/%st(1) pair. */
10835 /* TODO: The function should depend on current function ABI but
10836 builtins.c would need updating then. Therefore we use the
10838 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
10840 return TARGET_FLOAT_RETURNS_IN_80387
;
10842 /* Complex values are returned in %xmm0/%xmm1 pair. */
10848 if (TARGET_MACHO
|| TARGET_64BIT
)
10856 /* Define how to find the value returned by a function.
10857 VALTYPE is the data type of the value (as a tree).
10858 If the precise function being called is known, FUNC is its FUNCTION_DECL;
10859 otherwise, FUNC is 0. */
10862 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
10863 const_tree fntype
, const_tree fn
)
10865 unsigned int regno
;
10867 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
10868 we normally prevent this case when mmx is not available. However
10869 some ABIs may require the result to be returned like DImode. */
10870 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
10871 regno
= FIRST_MMX_REG
;
10873 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
10874 we prevent this case when sse is not available. However some ABIs
10875 may require the result to be returned like integer TImode. */
10876 else if (mode
== TImode
10877 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
10878 regno
= FIRST_SSE_REG
;
10880 /* 32-byte vector modes in %ymm0. */
10881 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
10882 regno
= FIRST_SSE_REG
;
10884 /* 64-byte vector modes in %zmm0. */
10885 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
10886 regno
= FIRST_SSE_REG
;
10888 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
10889 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
10890 regno
= FIRST_FLOAT_REG
;
10892 /* Most things go in %eax. */
10895 /* Override FP return register with %xmm0 for local functions when
10896 SSE math is enabled or for functions with sseregparm attribute. */
10897 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
10899 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
10900 if (sse_level
== -1)
10902 error ("calling %qD with SSE calling convention without "
10903 "SSE/SSE2 enabled", fn
);
10904 sorry ("this is a GCC bug that can be worked around by adding "
10905 "attribute used to function called");
10907 else if ((sse_level
>= 1 && mode
== SFmode
)
10908 || (sse_level
== 2 && mode
== DFmode
))
10909 regno
= FIRST_SSE_REG
;
10912 /* OImode shouldn't be used directly. */
10913 gcc_assert (mode
!= OImode
);
10915 return gen_rtx_REG (orig_mode
, regno
);
10919 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
10920 const_tree valtype
)
10924 /* Handle libcalls, which don't provide a type node. */
10925 if (valtype
== NULL
)
10927 unsigned int regno
;
10939 regno
= FIRST_SSE_REG
;
10943 regno
= FIRST_FLOAT_REG
;
10951 return gen_rtx_REG (mode
, regno
);
10953 else if (POINTER_TYPE_P (valtype
))
10955 /* Pointers are always returned in word_mode. */
10959 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
10960 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
10961 x86_64_int_return_registers
, 0);
10963 /* For zero sized structures, construct_container returns NULL, but we
10964 need to keep rest of compiler happy by returning meaningful value. */
10966 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
10972 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
10973 const_tree valtype
)
10975 unsigned int regno
= AX_REG
;
10979 switch (GET_MODE_SIZE (mode
))
10982 if (valtype
!= NULL_TREE
10983 && !VECTOR_INTEGER_TYPE_P (valtype
)
10984 && !VECTOR_INTEGER_TYPE_P (valtype
)
10985 && !INTEGRAL_TYPE_P (valtype
)
10986 && !VECTOR_FLOAT_TYPE_P (valtype
))
10988 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
10989 && !COMPLEX_MODE_P (mode
))
10990 regno
= FIRST_SSE_REG
;
10994 if (mode
== SFmode
|| mode
== DFmode
)
10995 regno
= FIRST_SSE_REG
;
11001 return gen_rtx_REG (orig_mode
, regno
);
11005 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
11006 machine_mode orig_mode
, machine_mode mode
)
11008 const_tree fn
, fntype
;
11011 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
11012 fn
= fntype_or_decl
;
11013 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
11015 if ((valtype
&& POINTER_BOUNDS_TYPE_P (valtype
))
11016 || POINTER_BOUNDS_MODE_P (mode
))
11017 return gen_rtx_REG (BNDmode
, FIRST_BND_REG
);
11018 else if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
11019 return function_value_ms_64 (orig_mode
, mode
, valtype
);
11020 else if (TARGET_64BIT
)
11021 return function_value_64 (orig_mode
, mode
, valtype
);
11023 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
11027 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
11029 machine_mode mode
, orig_mode
;
11031 orig_mode
= TYPE_MODE (valtype
);
11032 mode
= type_natural_mode (valtype
, NULL
, true);
11033 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
11036 /* Return an RTX representing a place where a function returns
11037 or recieves pointer bounds or NULL if no bounds are returned.
11039 VALTYPE is a data type of a value returned by the function.
11041 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
11042 or FUNCTION_TYPE of the function.
11044 If OUTGOING is false, return a place in which the caller will
11045 see the return value. Otherwise, return a place where a
11046 function returns a value. */
11049 ix86_function_value_bounds (const_tree valtype
,
11050 const_tree fntype_or_decl ATTRIBUTE_UNUSED
,
11051 bool outgoing ATTRIBUTE_UNUSED
)
11053 rtx res
= NULL_RTX
;
11055 if (BOUNDED_TYPE_P (valtype
))
11056 res
= gen_rtx_REG (BNDmode
, FIRST_BND_REG
);
11057 else if (chkp_type_has_pointer (valtype
))
11061 bitmap_iterator bi
;
11062 unsigned i
, bnd_no
= 0;
11064 bitmap_obstack_initialize (NULL
);
11065 slots
= BITMAP_ALLOC (NULL
);
11066 chkp_find_bound_slots (valtype
, slots
);
11068 EXECUTE_IF_SET_IN_BITMAP (slots
, 0, i
, bi
)
11070 rtx reg
= gen_rtx_REG (BNDmode
, FIRST_BND_REG
+ bnd_no
);
11071 rtx offs
= GEN_INT (i
* POINTER_SIZE
/ BITS_PER_UNIT
);
11072 gcc_assert (bnd_no
< 2);
11073 bounds
[bnd_no
++] = gen_rtx_EXPR_LIST (VOIDmode
, reg
, offs
);
11076 res
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (bnd_no
, bounds
));
11078 BITMAP_FREE (slots
);
11079 bitmap_obstack_release (NULL
);
11087 /* Pointer function arguments and return values are promoted to
11088 word_mode for normal functions. */
11090 static machine_mode
11091 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
11092 int *punsignedp
, const_tree fntype
,
11095 if (cfun
->machine
->func_type
== TYPE_NORMAL
11096 && type
!= NULL_TREE
11097 && POINTER_TYPE_P (type
))
11099 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
11102 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
11106 /* Return true if a structure, union or array with MODE containing FIELD
11107 should be accessed using BLKmode. */
11110 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
11112 /* Union with XFmode must be in BLKmode. */
11113 return (mode
== XFmode
11114 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
11115 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
11119 ix86_libcall_value (machine_mode mode
)
11121 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
11124 /* Return true iff type is returned in memory. */
11127 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
11129 #ifdef SUBTARGET_RETURN_IN_MEMORY
11130 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
11132 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
11133 HOST_WIDE_INT size
;
11135 if (POINTER_BOUNDS_TYPE_P (type
))
11140 if (ix86_function_type_abi (fntype
) == MS_ABI
)
11142 size
= int_size_in_bytes (type
);
11144 /* __m128 is returned in xmm0. */
11145 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
11146 || INTEGRAL_TYPE_P (type
)
11147 || VECTOR_FLOAT_TYPE_P (type
))
11148 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
11149 && !COMPLEX_MODE_P (mode
)
11150 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
11153 /* Otherwise, the size must be exactly in [1248]. */
11154 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
11158 int needed_intregs
, needed_sseregs
;
11160 return examine_argument (mode
, type
, 1,
11161 &needed_intregs
, &needed_sseregs
);
11166 size
= int_size_in_bytes (type
);
11168 /* Intel MCU psABI returns scalars and aggregates no larger than 8
11169 bytes in registers. */
11171 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
11173 if (mode
== BLKmode
)
11176 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
11179 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
11181 /* User-created vectors small enough to fit in EAX. */
11185 /* Unless ABI prescibes otherwise,
11186 MMX/3dNow values are returned in MM0 if available. */
11189 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
11191 /* SSE values are returned in XMM0 if available. */
11193 return !TARGET_SSE
;
11195 /* AVX values are returned in YMM0 if available. */
11197 return !TARGET_AVX
;
11199 /* AVX512F values are returned in ZMM0 if available. */
11201 return !TARGET_AVX512F
;
11204 if (mode
== XFmode
)
11210 /* OImode shouldn't be used directly. */
11211 gcc_assert (mode
!= OImode
);
11219 /* Create the va_list data type. */
11222 ix86_build_builtin_va_list_64 (void)
11224 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
11226 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
11227 type_decl
= build_decl (BUILTINS_LOCATION
,
11228 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
11230 f_gpr
= build_decl (BUILTINS_LOCATION
,
11231 FIELD_DECL
, get_identifier ("gp_offset"),
11232 unsigned_type_node
);
11233 f_fpr
= build_decl (BUILTINS_LOCATION
,
11234 FIELD_DECL
, get_identifier ("fp_offset"),
11235 unsigned_type_node
);
11236 f_ovf
= build_decl (BUILTINS_LOCATION
,
11237 FIELD_DECL
, get_identifier ("overflow_arg_area"),
11239 f_sav
= build_decl (BUILTINS_LOCATION
,
11240 FIELD_DECL
, get_identifier ("reg_save_area"),
11243 va_list_gpr_counter_field
= f_gpr
;
11244 va_list_fpr_counter_field
= f_fpr
;
11246 DECL_FIELD_CONTEXT (f_gpr
) = record
;
11247 DECL_FIELD_CONTEXT (f_fpr
) = record
;
11248 DECL_FIELD_CONTEXT (f_ovf
) = record
;
11249 DECL_FIELD_CONTEXT (f_sav
) = record
;
11251 TYPE_STUB_DECL (record
) = type_decl
;
11252 TYPE_NAME (record
) = type_decl
;
11253 TYPE_FIELDS (record
) = f_gpr
;
11254 DECL_CHAIN (f_gpr
) = f_fpr
;
11255 DECL_CHAIN (f_fpr
) = f_ovf
;
11256 DECL_CHAIN (f_ovf
) = f_sav
;
11258 layout_type (record
);
11260 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
11261 NULL_TREE
, TYPE_ATTRIBUTES (record
));
11263 /* The correct type is an array type of one element. */
11264 return build_array_type (record
, build_index_type (size_zero_node
));
11267 /* Setup the builtin va_list data type and for 64-bit the additional
11268 calling convention specific va_list data types. */
11271 ix86_build_builtin_va_list (void)
11275 /* Initialize ABI specific va_list builtin types.
11277 In lto1, we can encounter two va_list types:
11278 - one as a result of the type-merge across TUs, and
11279 - the one constructed here.
11280 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
11281 a type identity check in canonical_va_list_type based on
11282 TYPE_MAIN_VARIANT (which we used to have) will not work.
11283 Instead, we tag each va_list_type_node with its unique attribute, and
11284 look for the attribute in the type identity check in
11285 canonical_va_list_type.
11287 Tagging sysv_va_list_type_node directly with the attribute is
11288 problematic since it's a array of one record, which will degrade into a
11289 pointer to record when used as parameter (see build_va_arg comments for
11290 an example), dropping the attribute in the process. So we tag the
11293 /* For SYSV_ABI we use an array of one record. */
11294 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
11296 /* For MS_ABI we use plain pointer to argument area. */
11297 tree char_ptr_type
= build_pointer_type (char_type_node
);
11298 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
11299 TYPE_ATTRIBUTES (char_ptr_type
));
11300 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
11302 return ((ix86_abi
== MS_ABI
)
11303 ? ms_va_list_type_node
11304 : sysv_va_list_type_node
);
11308 /* For i386 we use plain pointer to argument area. */
11309 return build_pointer_type (char_type_node
);
11313 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
11316 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
11318 rtx save_area
, mem
;
11319 alias_set_type set
;
11322 /* GPR size of varargs save area. */
11323 if (cfun
->va_list_gpr_size
)
11324 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
11326 ix86_varargs_gpr_size
= 0;
11328 /* FPR size of varargs save area. We don't need it if we don't pass
11329 anything in SSE registers. */
11330 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
11331 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
11333 ix86_varargs_fpr_size
= 0;
11335 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
11338 save_area
= frame_pointer_rtx
;
11339 set
= get_varargs_alias_set ();
11341 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
11342 if (max
> X86_64_REGPARM_MAX
)
11343 max
= X86_64_REGPARM_MAX
;
11345 for (i
= cum
->regno
; i
< max
; i
++)
11347 mem
= gen_rtx_MEM (word_mode
,
11348 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
11349 MEM_NOTRAP_P (mem
) = 1;
11350 set_mem_alias_set (mem
, set
);
11351 emit_move_insn (mem
,
11352 gen_rtx_REG (word_mode
,
11353 x86_64_int_parameter_registers
[i
]));
11356 if (ix86_varargs_fpr_size
)
11358 machine_mode smode
;
11359 rtx_code_label
*label
;
11362 /* Now emit code to save SSE registers. The AX parameter contains number
11363 of SSE parameter registers used to call this function, though all we
11364 actually check here is the zero/non-zero status. */
11366 label
= gen_label_rtx ();
11367 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
11368 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
11371 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
11372 we used movdqa (i.e. TImode) instead? Perhaps even better would
11373 be if we could determine the real mode of the data, via a hook
11374 into pass_stdarg. Ignore all that for now. */
11376 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
11377 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
11379 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
11380 if (max
> X86_64_SSE_REGPARM_MAX
)
11381 max
= X86_64_SSE_REGPARM_MAX
;
11383 for (i
= cum
->sse_regno
; i
< max
; ++i
)
11385 mem
= plus_constant (Pmode
, save_area
,
11386 i
* 16 + ix86_varargs_gpr_size
);
11387 mem
= gen_rtx_MEM (smode
, mem
);
11388 MEM_NOTRAP_P (mem
) = 1;
11389 set_mem_alias_set (mem
, set
);
11390 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
11392 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
11395 emit_label (label
);
11400 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
11402 alias_set_type set
= get_varargs_alias_set ();
11405 /* Reset to zero, as there might be a sysv vaarg used
11407 ix86_varargs_gpr_size
= 0;
11408 ix86_varargs_fpr_size
= 0;
11410 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
11414 mem
= gen_rtx_MEM (Pmode
,
11415 plus_constant (Pmode
, virtual_incoming_args_rtx
,
11416 i
* UNITS_PER_WORD
));
11417 MEM_NOTRAP_P (mem
) = 1;
11418 set_mem_alias_set (mem
, set
);
11420 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
11421 emit_move_insn (mem
, reg
);
11426 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
11427 tree type
, int *, int no_rtl
)
11429 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
11430 CUMULATIVE_ARGS next_cum
;
11433 /* This argument doesn't appear to be used anymore. Which is good,
11434 because the old code here didn't suppress rtl generation. */
11435 gcc_assert (!no_rtl
);
11440 fntype
= TREE_TYPE (current_function_decl
);
11442 /* For varargs, we do not want to skip the dummy va_dcl argument.
11443 For stdargs, we do want to skip the last named argument. */
11445 if (stdarg_p (fntype
))
11446 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
11449 if (cum
->call_abi
== MS_ABI
)
11450 setup_incoming_varargs_ms_64 (&next_cum
);
11452 setup_incoming_varargs_64 (&next_cum
);
11456 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v
,
11459 int *pretend_size ATTRIBUTE_UNUSED
,
11462 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
11463 CUMULATIVE_ARGS next_cum
;
11466 int bnd_reg
, i
, max
;
11468 gcc_assert (!no_rtl
);
11470 /* Do nothing if we use plain pointer to argument area. */
11471 if (!TARGET_64BIT
|| cum
->call_abi
== MS_ABI
)
11474 fntype
= TREE_TYPE (current_function_decl
);
11476 /* For varargs, we do not want to skip the dummy va_dcl argument.
11477 For stdargs, we do want to skip the last named argument. */
11479 if (stdarg_p (fntype
))
11480 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
11482 save_area
= frame_pointer_rtx
;
11484 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
11485 if (max
> X86_64_REGPARM_MAX
)
11486 max
= X86_64_REGPARM_MAX
;
11488 bnd_reg
= cum
->bnd_regno
+ cum
->force_bnd_pass
;
11489 if (chkp_function_instrumented_p (current_function_decl
))
11490 for (i
= cum
->regno
; i
< max
; i
++)
11492 rtx addr
= plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
);
11493 rtx ptr
= gen_rtx_REG (Pmode
,
11494 x86_64_int_parameter_registers
[i
]);
11497 if (bnd_reg
<= LAST_BND_REG
)
11498 bounds
= gen_rtx_REG (BNDmode
, bnd_reg
);
11502 plus_constant (Pmode
, arg_pointer_rtx
,
11503 (LAST_BND_REG
- bnd_reg
) * GET_MODE_SIZE (Pmode
));
11504 bounds
= gen_reg_rtx (BNDmode
);
11505 emit_insn (BNDmode
== BND64mode
11506 ? gen_bnd64_ldx (bounds
, ldx_addr
, ptr
)
11507 : gen_bnd32_ldx (bounds
, ldx_addr
, ptr
));
11510 emit_insn (BNDmode
== BND64mode
11511 ? gen_bnd64_stx (addr
, ptr
, bounds
)
11512 : gen_bnd32_stx (addr
, ptr
, bounds
));
11519 /* Checks if TYPE is of kind va_list char *. */
11522 is_va_list_char_pointer (tree type
)
11526 /* For 32-bit it is always true. */
11529 canonic
= ix86_canonical_va_list_type (type
);
11530 return (canonic
== ms_va_list_type_node
11531 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
11534 /* Implement va_start. */
11537 ix86_va_start (tree valist
, rtx nextarg
)
11539 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
11540 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
11541 tree gpr
, fpr
, ovf
, sav
, t
;
11545 if (flag_split_stack
11546 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
11548 unsigned int scratch_regno
;
11550 /* When we are splitting the stack, we can't refer to the stack
11551 arguments using internal_arg_pointer, because they may be on
11552 the old stack. The split stack prologue will arrange to
11553 leave a pointer to the old stack arguments in a scratch
11554 register, which we here copy to a pseudo-register. The split
11555 stack prologue can't set the pseudo-register directly because
11556 it (the prologue) runs before any registers have been saved. */
11558 scratch_regno
= split_stack_prologue_scratch_regno ();
11559 if (scratch_regno
!= INVALID_REGNUM
)
11564 reg
= gen_reg_rtx (Pmode
);
11565 cfun
->machine
->split_stack_varargs_pointer
= reg
;
11568 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
11569 seq
= get_insns ();
11572 push_topmost_sequence ();
11573 emit_insn_after (seq
, entry_of_function ());
11574 pop_topmost_sequence ();
11578 /* Only 64bit target needs something special. */
11579 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
11581 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
11582 std_expand_builtin_va_start (valist
, nextarg
);
11587 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
11588 next
= expand_binop (ptr_mode
, add_optab
,
11589 cfun
->machine
->split_stack_varargs_pointer
,
11590 crtl
->args
.arg_offset_rtx
,
11591 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
11592 convert_move (va_r
, next
, 0);
11594 /* Store zero bounds for va_list. */
11595 if (chkp_function_instrumented_p (current_function_decl
))
11596 chkp_expand_bounds_reset_for_mem (valist
,
11597 make_tree (TREE_TYPE (valist
),
11604 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
11605 f_fpr
= DECL_CHAIN (f_gpr
);
11606 f_ovf
= DECL_CHAIN (f_fpr
);
11607 f_sav
= DECL_CHAIN (f_ovf
);
11609 valist
= build_simple_mem_ref (valist
);
11610 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
11611 /* The following should be folded into the MEM_REF offset. */
11612 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
11614 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
11616 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
11618 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
11621 /* Count number of gp and fp argument registers used. */
11622 words
= crtl
->args
.info
.words
;
11623 n_gpr
= crtl
->args
.info
.regno
;
11624 n_fpr
= crtl
->args
.info
.sse_regno
;
11626 if (cfun
->va_list_gpr_size
)
11628 type
= TREE_TYPE (gpr
);
11629 t
= build2 (MODIFY_EXPR
, type
,
11630 gpr
, build_int_cst (type
, n_gpr
* 8));
11631 TREE_SIDE_EFFECTS (t
) = 1;
11632 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
11635 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
11637 type
= TREE_TYPE (fpr
);
11638 t
= build2 (MODIFY_EXPR
, type
, fpr
,
11639 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
11640 TREE_SIDE_EFFECTS (t
) = 1;
11641 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
11644 /* Find the overflow area. */
11645 type
= TREE_TYPE (ovf
);
11646 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
11647 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
11649 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
11650 t
= make_tree (type
, ovf_rtx
);
11652 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
11654 /* Store zero bounds for overflow area pointer. */
11655 if (chkp_function_instrumented_p (current_function_decl
))
11656 chkp_expand_bounds_reset_for_mem (ovf
, t
);
11658 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
11659 TREE_SIDE_EFFECTS (t
) = 1;
11660 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
11662 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
11664 /* Find the register save area.
11665 Prologue of the function save it right above stack frame. */
11666 type
= TREE_TYPE (sav
);
11667 t
= make_tree (type
, frame_pointer_rtx
);
11668 if (!ix86_varargs_gpr_size
)
11669 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
11671 /* Store zero bounds for save area pointer. */
11672 if (chkp_function_instrumented_p (current_function_decl
))
11673 chkp_expand_bounds_reset_for_mem (sav
, t
);
11675 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
11676 TREE_SIDE_EFFECTS (t
) = 1;
11677 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
11681 /* Implement va_arg. */
11684 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
11685 gimple_seq
*post_p
)
11687 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
11688 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
11689 tree gpr
, fpr
, ovf
, sav
, t
;
11691 tree lab_false
, lab_over
= NULL_TREE
;
11694 int indirect_p
= 0;
11696 machine_mode nat_mode
;
11697 unsigned int arg_boundary
;
11699 /* Only 64bit target needs something special. */
11700 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
11701 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
11703 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
11704 f_fpr
= DECL_CHAIN (f_gpr
);
11705 f_ovf
= DECL_CHAIN (f_fpr
);
11706 f_sav
= DECL_CHAIN (f_ovf
);
11708 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
11709 valist
, f_gpr
, NULL_TREE
);
11711 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
11712 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
11713 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
11715 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
11717 type
= build_pointer_type (type
);
11718 size
= int_size_in_bytes (type
);
11719 rsize
= CEIL (size
, UNITS_PER_WORD
);
11721 nat_mode
= type_natural_mode (type
, NULL
, false);
11736 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
11737 if (!TARGET_64BIT_MS_ABI
)
11745 container
= construct_container (nat_mode
, TYPE_MODE (type
),
11746 type
, 0, X86_64_REGPARM_MAX
,
11747 X86_64_SSE_REGPARM_MAX
, intreg
,
11752 /* Pull the value out of the saved registers. */
11754 addr
= create_tmp_var (ptr_type_node
, "addr");
11758 int needed_intregs
, needed_sseregs
;
11760 tree int_addr
, sse_addr
;
11762 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
11763 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
11765 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
11767 need_temp
= (!REG_P (container
)
11768 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
11769 || TYPE_ALIGN (type
) > 128));
11771 /* In case we are passing structure, verify that it is consecutive block
11772 on the register save area. If not we need to do moves. */
11773 if (!need_temp
&& !REG_P (container
))
11775 /* Verify that all registers are strictly consecutive */
11776 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
11780 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
11782 rtx slot
= XVECEXP (container
, 0, i
);
11783 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
11784 || INTVAL (XEXP (slot
, 1)) != i
* 16)
11792 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
11794 rtx slot
= XVECEXP (container
, 0, i
);
11795 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
11796 || INTVAL (XEXP (slot
, 1)) != i
* 8)
11808 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
11809 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
11812 /* First ensure that we fit completely in registers. */
11813 if (needed_intregs
)
11815 t
= build_int_cst (TREE_TYPE (gpr
),
11816 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
11817 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
11818 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
11819 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
11820 gimplify_and_add (t
, pre_p
);
11822 if (needed_sseregs
)
11824 t
= build_int_cst (TREE_TYPE (fpr
),
11825 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
11826 + X86_64_REGPARM_MAX
* 8);
11827 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
11828 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
11829 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
11830 gimplify_and_add (t
, pre_p
);
11833 /* Compute index to start of area used for integer regs. */
11834 if (needed_intregs
)
11836 /* int_addr = gpr + sav; */
11837 t
= fold_build_pointer_plus (sav
, gpr
);
11838 gimplify_assign (int_addr
, t
, pre_p
);
11840 if (needed_sseregs
)
11842 /* sse_addr = fpr + sav; */
11843 t
= fold_build_pointer_plus (sav
, fpr
);
11844 gimplify_assign (sse_addr
, t
, pre_p
);
11848 int i
, prev_size
= 0;
11849 tree temp
= create_tmp_var (type
, "va_arg_tmp");
11851 /* addr = &temp; */
11852 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
11853 gimplify_assign (addr
, t
, pre_p
);
11855 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
11857 rtx slot
= XVECEXP (container
, 0, i
);
11858 rtx reg
= XEXP (slot
, 0);
11859 machine_mode mode
= GET_MODE (reg
);
11863 tree src_addr
, src
;
11865 tree dest_addr
, dest
;
11866 int cur_size
= GET_MODE_SIZE (mode
);
11868 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
11869 prev_size
= INTVAL (XEXP (slot
, 1));
11870 if (prev_size
+ cur_size
> size
)
11872 cur_size
= size
- prev_size
;
11873 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
11874 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
11877 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
11878 if (mode
== GET_MODE (reg
))
11879 addr_type
= build_pointer_type (piece_type
);
11881 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
11883 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
11886 if (SSE_REGNO_P (REGNO (reg
)))
11888 src_addr
= sse_addr
;
11889 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
11893 src_addr
= int_addr
;
11894 src_offset
= REGNO (reg
) * 8;
11896 src_addr
= fold_convert (addr_type
, src_addr
);
11897 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
11899 dest_addr
= fold_convert (daddr_type
, addr
);
11900 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
11901 if (cur_size
== GET_MODE_SIZE (mode
))
11903 src
= build_va_arg_indirect_ref (src_addr
);
11904 dest
= build_va_arg_indirect_ref (dest_addr
);
11906 gimplify_assign (dest
, src
, pre_p
);
11911 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
11912 3, dest_addr
, src_addr
,
11913 size_int (cur_size
));
11914 gimplify_and_add (copy
, pre_p
);
11916 prev_size
+= cur_size
;
11920 if (needed_intregs
)
11922 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
11923 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
11924 gimplify_assign (gpr
, t
, pre_p
);
11927 if (needed_sseregs
)
11929 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
11930 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
11931 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
11934 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
11936 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
11939 /* ... otherwise out of the overflow area. */
11941 /* When we align parameter on stack for caller, if the parameter
11942 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
11943 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
11944 here with caller. */
11945 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
11946 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
11947 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
11949 /* Care for on-stack alignment if needed. */
11950 if (arg_boundary
<= 64 || size
== 0)
11954 HOST_WIDE_INT align
= arg_boundary
/ 8;
11955 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
11956 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
11957 build_int_cst (TREE_TYPE (t
), -align
));
11960 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
11961 gimplify_assign (addr
, t
, pre_p
);
11963 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
11964 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
11967 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
11969 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
11970 addr
= fold_convert (ptrtype
, addr
);
11973 addr
= build_va_arg_indirect_ref (addr
);
11974 return build_va_arg_indirect_ref (addr
);
11977 /* Return true if OPNUM's MEM should be matched
11978 in movabs* patterns. */
11981 ix86_check_movabs (rtx insn
, int opnum
)
11985 set
= PATTERN (insn
);
11986 if (GET_CODE (set
) == PARALLEL
)
11987 set
= XVECEXP (set
, 0, 0);
11988 gcc_assert (GET_CODE (set
) == SET
);
11989 mem
= XEXP (set
, opnum
);
11990 while (SUBREG_P (mem
))
11991 mem
= SUBREG_REG (mem
);
11992 gcc_assert (MEM_P (mem
));
11993 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
11996 /* Return false if INSN contains a MEM with a non-default address space. */
11998 ix86_check_no_addr_space (rtx insn
)
12000 subrtx_var_iterator::array_type array
;
12001 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
12004 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
12010 /* Initialize the table of extra 80387 mathematical constants. */
12013 init_ext_80387_constants (void)
12015 static const char * cst
[5] =
12017 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
12018 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
12019 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
12020 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
12021 "3.1415926535897932385128089594061862044", /* 4: fldpi */
12025 for (i
= 0; i
< 5; i
++)
12027 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
12028 /* Ensure each constant is rounded to XFmode precision. */
12029 real_convert (&ext_80387_constants_table
[i
],
12030 XFmode
, &ext_80387_constants_table
[i
]);
12033 ext_80387_constants_init
= 1;
12036 /* Return non-zero if the constant is something that
12037 can be loaded with a special instruction. */
12040 standard_80387_constant_p (rtx x
)
12042 machine_mode mode
= GET_MODE (x
);
12044 const REAL_VALUE_TYPE
*r
;
12046 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
12049 if (x
== CONST0_RTX (mode
))
12051 if (x
== CONST1_RTX (mode
))
12054 r
= CONST_DOUBLE_REAL_VALUE (x
);
12056 /* For XFmode constants, try to find a special 80387 instruction when
12057 optimizing for size or on those CPUs that benefit from them. */
12059 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
12063 if (! ext_80387_constants_init
)
12064 init_ext_80387_constants ();
12066 for (i
= 0; i
< 5; i
++)
12067 if (real_identical (r
, &ext_80387_constants_table
[i
]))
12071 /* Load of the constant -0.0 or -1.0 will be split as
12072 fldz;fchs or fld1;fchs sequence. */
12073 if (real_isnegzero (r
))
12075 if (real_identical (r
, &dconstm1
))
12081 /* Return the opcode of the special instruction to be used to load
12085 standard_80387_constant_opcode (rtx x
)
12087 switch (standard_80387_constant_p (x
))
12107 gcc_unreachable ();
12111 /* Return the CONST_DOUBLE representing the 80387 constant that is
12112 loaded by the specified special instruction. The argument IDX
12113 matches the return value from standard_80387_constant_p. */
12116 standard_80387_constant_rtx (int idx
)
12120 if (! ext_80387_constants_init
)
12121 init_ext_80387_constants ();
12134 gcc_unreachable ();
12137 return const_double_from_real_value (ext_80387_constants_table
[i
],
12141 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
12142 in supported SSE/AVX vector mode. */
12145 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
12152 mode
= GET_MODE (x
);
12154 if (x
== const0_rtx
|| const0_operand (x
, mode
))
12157 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
12159 /* VOIDmode integer constant, get mode from the predicate. */
12160 if (mode
== VOIDmode
)
12163 switch (GET_MODE_SIZE (mode
))
12166 if (TARGET_AVX512F
)
12179 gcc_unreachable ();
12188 /* Return the opcode of the special instruction to be used to load
12192 standard_sse_constant_opcode (rtx_insn
*insn
, rtx x
)
12196 gcc_assert (TARGET_SSE
);
12198 mode
= GET_MODE (x
);
12200 if (x
== const0_rtx
|| const0_operand (x
, mode
))
12202 switch (get_attr_mode (insn
))
12205 return "vpxord\t%g0, %g0, %g0";
12207 return (TARGET_AVX512VL
12208 ? "vpxord\t%x0, %x0, %x0"
12209 : "vpxor\t%x0, %x0, %x0");
12211 return (TARGET_AVX512VL
12212 ? "vpxord\t%t0, %t0, %t0"
12213 : "%vpxor\t%0, %d0");
12216 return (TARGET_AVX512DQ
12217 ? "vxorpd\t%g0, %g0, %g0"
12218 : "vpxorq\t%g0, %g0, %g0");
12220 return "vxorpd\t%x0, %x0, %x0";
12222 return "%vxorpd\t%0, %d0";
12225 return (TARGET_AVX512DQ
12226 ? "vxorps\t%g0, %g0, %g0"
12227 : "vpxord\t%g0, %g0, %g0");
12229 return "vxorps\t%x0, %x0, %x0";
12231 return "%vxorps\t%0, %d0";
12234 gcc_unreachable ();
12237 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
12239 enum attr_mode insn_mode
= get_attr_mode (insn
);
12246 gcc_assert (TARGET_AVX512F
);
12247 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
12252 gcc_assert (TARGET_AVX2
);
12257 gcc_assert (TARGET_SSE2
);
12259 ? "vpcmpeqd\t%0, %0, %0"
12260 : "pcmpeqd\t%0, %0");
12263 gcc_unreachable ();
12267 gcc_unreachable ();
12270 /* Returns true if INSN can be transformed from a memory load
12271 to a supported FP constant load. */
12274 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
12276 rtx src
= find_constant_src (insn
);
12278 gcc_assert (REG_P (dst
));
12281 || (SSE_REGNO_P (REGNO (dst
))
12282 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
12283 || (STACK_REGNO_P (REGNO (dst
))
12284 && standard_80387_constant_p (src
) < 1))
12290 /* Returns true if OP contains a symbol reference */
12293 symbolic_reference_mentioned_p (rtx op
)
12298 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
12301 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
12302 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
12308 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
12309 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
12313 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
12320 /* Return true if it is appropriate to emit `ret' instructions in the
12321 body of a function. Do this only if the epilogue is simple, needing a
12322 couple of insns. Prior to reloading, we can't tell how many registers
12323 must be saved, so return false then. Return false if there is no frame
12324 marker to de-allocate. */
12327 ix86_can_use_return_insn_p (void)
12329 struct ix86_frame frame
;
12331 if (ix86_function_naked (current_function_decl
))
12334 /* Don't use `ret' instruction in interrupt handler. */
12335 if (! reload_completed
12336 || frame_pointer_needed
12337 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
12340 /* Don't allow more than 32k pop, since that's all we can do
12341 with one instruction. */
12342 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
12345 frame
= cfun
->machine
->frame
;
12346 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
12347 && (frame
.nregs
+ frame
.nsseregs
) == 0);
12350 /* Value should be nonzero if functions must have frame pointers.
12351 Zero means the frame pointer need not be set up (and parms may
12352 be accessed via the stack pointer) in functions that seem suitable. */
12355 ix86_frame_pointer_required (void)
12357 /* If we accessed previous frames, then the generated code expects
12358 to be able to access the saved ebp value in our frame. */
12359 if (cfun
->machine
->accesses_prev_frame
)
12362 /* Several x86 os'es need a frame pointer for other reasons,
12363 usually pertaining to setjmp. */
12364 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
12367 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
12368 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
12371 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
12372 allocation is 4GB. */
12373 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
12376 /* SSE saves require frame-pointer when stack is misaligned. */
12377 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
12380 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
12381 turns off the frame pointer by default. Turn it back on now if
12382 we've not got a leaf function. */
12383 if (TARGET_OMIT_LEAF_FRAME_POINTER
12385 || ix86_current_function_calls_tls_descriptor
))
12388 if (crtl
->profile
&& !flag_fentry
)
12394 /* Record that the current function accesses previous call frames. */
12397 ix86_setup_frame_addresses (void)
12399 cfun
->machine
->accesses_prev_frame
= 1;
12402 #ifndef USE_HIDDEN_LINKONCE
12403 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
12404 # define USE_HIDDEN_LINKONCE 1
12406 # define USE_HIDDEN_LINKONCE 0
12410 static int pic_labels_used
;
12412 /* Fills in the label name that should be used for a pc thunk for
12413 the given register. */
12416 get_pc_thunk_name (char name
[32], unsigned int regno
)
12418 gcc_assert (!TARGET_64BIT
);
12420 if (USE_HIDDEN_LINKONCE
)
12421 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
12423 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
12427 /* This function generates code for -fpic that loads %ebx with
12428 the return address of the caller and then returns. */
12431 ix86_code_end (void)
12436 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
12441 if (!(pic_labels_used
& (1 << regno
)))
12444 get_pc_thunk_name (name
, regno
);
12446 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
12447 get_identifier (name
),
12448 build_function_type_list (void_type_node
, NULL_TREE
));
12449 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
12450 NULL_TREE
, void_type_node
);
12451 TREE_PUBLIC (decl
) = 1;
12452 TREE_STATIC (decl
) = 1;
12453 DECL_IGNORED_P (decl
) = 1;
12458 switch_to_section (darwin_sections
[picbase_thunk_section
]);
12459 fputs ("\t.weak_definition\t", asm_out_file
);
12460 assemble_name (asm_out_file
, name
);
12461 fputs ("\n\t.private_extern\t", asm_out_file
);
12462 assemble_name (asm_out_file
, name
);
12463 putc ('\n', asm_out_file
);
12464 ASM_OUTPUT_LABEL (asm_out_file
, name
);
12465 DECL_WEAK (decl
) = 1;
12469 if (USE_HIDDEN_LINKONCE
)
12471 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
12473 targetm
.asm_out
.unique_section (decl
, 0);
12474 switch_to_section (get_named_section (decl
, NULL
, 0));
12476 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
12477 fputs ("\t.hidden\t", asm_out_file
);
12478 assemble_name (asm_out_file
, name
);
12479 putc ('\n', asm_out_file
);
12480 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
12484 switch_to_section (text_section
);
12485 ASM_OUTPUT_LABEL (asm_out_file
, name
);
12488 DECL_INITIAL (decl
) = make_node (BLOCK
);
12489 current_function_decl
= decl
;
12490 allocate_struct_function (decl
, false);
12491 init_function_start (decl
);
12492 /* We're about to hide the function body from callees of final_* by
12493 emitting it directly; tell them we're a thunk, if they care. */
12494 cfun
->is_thunk
= true;
12495 first_function_block_is_cold
= false;
12496 /* Make sure unwind info is emitted for the thunk if needed. */
12497 final_start_function (emit_barrier (), asm_out_file
, 1);
12499 /* Pad stack IP move with 4 instructions (two NOPs count
12500 as one instruction). */
12501 if (TARGET_PAD_SHORT_FUNCTION
)
12506 fputs ("\tnop\n", asm_out_file
);
12509 xops
[0] = gen_rtx_REG (Pmode
, regno
);
12510 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
12511 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
12512 output_asm_insn ("%!ret", NULL
);
12513 final_end_function ();
12514 init_insn_lengths ();
12515 free_after_compilation (cfun
);
12517 current_function_decl
= NULL
;
12520 if (flag_split_stack
)
12521 file_end_indicate_split_stack ();
12524 /* Emit code for the SET_GOT patterns. */
12527 output_set_got (rtx dest
, rtx label
)
12533 if (TARGET_VXWORKS_RTP
&& flag_pic
)
12535 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
12536 xops
[2] = gen_rtx_MEM (Pmode
,
12537 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
12538 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
12540 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
12541 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
12542 an unadorned address. */
12543 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
12544 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
12545 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
12549 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
12554 get_pc_thunk_name (name
, REGNO (dest
));
12555 pic_labels_used
|= 1 << REGNO (dest
);
12557 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
12558 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
12559 output_asm_insn ("%!call\t%X2", xops
);
12562 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
12563 This is what will be referenced by the Mach-O PIC subsystem. */
12564 if (machopic_should_output_picbase_label () || !label
)
12565 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
12567 /* When we are restoring the pic base at the site of a nonlocal label,
12568 and we decided to emit the pic base above, we will still output a
12569 local label used for calculating the correction offset (even though
12570 the offset will be 0 in that case). */
12572 targetm
.asm_out
.internal_label (asm_out_file
, "L",
12573 CODE_LABEL_NUMBER (label
));
12579 /* We don't need a pic base, we're not producing pic. */
12580 gcc_unreachable ();
12582 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
12583 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
12584 targetm
.asm_out
.internal_label (asm_out_file
, "L",
12585 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
12589 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
12594 /* Generate an "push" pattern for input ARG. */
12599 struct machine_function
*m
= cfun
->machine
;
12601 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
12602 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
12603 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
12605 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
12606 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
12608 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
12609 gen_rtx_PRE_DEC (Pmode
,
12610 stack_pointer_rtx
)),
12614 /* Generate an "pop" pattern for input ARG. */
12619 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
12620 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
12622 return gen_rtx_SET (arg
,
12623 gen_rtx_MEM (word_mode
,
12624 gen_rtx_POST_INC (Pmode
,
12625 stack_pointer_rtx
)));
12628 /* Return >= 0 if there is an unused call-clobbered register available
12629 for the entire function. */
12631 static unsigned int
12632 ix86_select_alt_pic_regnum (void)
12634 if (ix86_use_pseudo_pic_reg ())
12635 return INVALID_REGNUM
;
12639 && !ix86_current_function_calls_tls_descriptor
)
12642 /* Can't use the same register for both PIC and DRAP. */
12643 if (crtl
->drap_reg
)
12644 drap
= REGNO (crtl
->drap_reg
);
12647 for (i
= 2; i
>= 0; --i
)
12648 if (i
!= drap
&& !df_regs_ever_live_p (i
))
12652 return INVALID_REGNUM
;
12655 /* Return true if REGNO is used by the epilogue. */
12658 ix86_epilogue_uses (int regno
)
12660 /* If there are no caller-saved registers, we preserve all registers,
12661 except for MMX and x87 registers which aren't supported when saving
12662 and restoring registers. Don't explicitly save SP register since
12663 it is always preserved. */
12664 return (epilogue_completed
12665 && cfun
->machine
->no_caller_saved_registers
12666 && !fixed_regs
[regno
]
12667 && !STACK_REGNO_P (regno
)
12668 && !MMX_REGNO_P (regno
));
12671 /* Return nonzero if register REGNO can be used as a scratch register
12675 ix86_hard_regno_scratch_ok (unsigned int regno
)
12677 /* If there are no caller-saved registers, we can't use any register
12678 as a scratch register after epilogue and use REGNO as scratch
12679 register only if it has been used before to avoid saving and
12681 return (!cfun
->machine
->no_caller_saved_registers
12682 || (!epilogue_completed
12683 && df_regs_ever_live_p (regno
)));
12686 /* Return true if register class CL should be an additional allocno
12690 ix86_additional_allocno_class_p (reg_class_t cl
)
12692 return cl
== MOD4_SSE_REGS
;
12695 /* Return TRUE if we need to save REGNO. */
12698 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
12700 /* If there are no caller-saved registers, we preserve all registers,
12701 except for MMX and x87 registers which aren't supported when saving
12702 and restoring registers. Don't explicitly save SP register since
12703 it is always preserved. */
12704 if (cfun
->machine
->no_caller_saved_registers
)
12706 /* Don't preserve registers used for function return value. */
12707 rtx reg
= crtl
->return_rtx
;
12710 unsigned int i
= REGNO (reg
);
12711 unsigned int nregs
= REG_NREGS (reg
);
12712 while (nregs
-- > 0)
12713 if ((i
+ nregs
) == regno
)
12716 reg
= crtl
->return_bnd
;
12720 nregs
= REG_NREGS (reg
);
12721 while (nregs
-- > 0)
12722 if ((i
+ nregs
) == regno
)
12727 return (df_regs_ever_live_p (regno
)
12728 && !fixed_regs
[regno
]
12729 && !STACK_REGNO_P (regno
)
12730 && !MMX_REGNO_P (regno
)
12731 && (regno
!= HARD_FRAME_POINTER_REGNUM
12732 || !frame_pointer_needed
));
12735 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
12736 && pic_offset_table_rtx
)
12738 if (ix86_use_pseudo_pic_reg ())
12740 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
12741 _mcount in prologue. */
12742 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
12745 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
12747 || crtl
->calls_eh_return
12748 || crtl
->uses_const_pool
12749 || cfun
->has_nonlocal_label
)
12750 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
12753 if (crtl
->calls_eh_return
&& maybe_eh_return
)
12758 unsigned test
= EH_RETURN_DATA_REGNO (i
);
12759 if (test
== INVALID_REGNUM
)
12766 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
12768 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
12769 + xlogue_layout::MIN_REGS
;
12770 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
12775 && regno
== REGNO (crtl
->drap_reg
)
12776 && !cfun
->machine
->no_drap_save_restore
)
12779 return (df_regs_ever_live_p (regno
)
12780 && !call_used_regs
[regno
]
12781 && !fixed_regs
[regno
]
12782 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
12785 /* Return number of saved general prupose registers. */
12788 ix86_nsaved_regs (void)
12793 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
12794 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
12799 /* Return number of saved SSE registers. */
12802 ix86_nsaved_sseregs (void)
12807 if (!TARGET_64BIT_MS_ABI
)
12809 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
12810 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
12815 /* Given FROM and TO register numbers, say whether this elimination is
12816 allowed. If stack alignment is needed, we can only replace argument
12817 pointer with hard frame pointer, or replace frame pointer with stack
12818 pointer. Otherwise, frame pointer elimination is automatically
12819 handled and all other eliminations are valid. */
12822 ix86_can_eliminate (const int from
, const int to
)
12824 if (stack_realign_fp
)
12825 return ((from
== ARG_POINTER_REGNUM
12826 && to
== HARD_FRAME_POINTER_REGNUM
)
12827 || (from
== FRAME_POINTER_REGNUM
12828 && to
== STACK_POINTER_REGNUM
));
12830 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
12833 /* Return the offset between two registers, one to be eliminated, and the other
12834 its replacement, at the start of a routine. */
12837 ix86_initial_elimination_offset (int from
, int to
)
12839 struct ix86_frame frame
= cfun
->machine
->frame
;
12841 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
12842 return frame
.hard_frame_pointer_offset
;
12843 else if (from
== FRAME_POINTER_REGNUM
12844 && to
== HARD_FRAME_POINTER_REGNUM
)
12845 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
12848 gcc_assert (to
== STACK_POINTER_REGNUM
);
12850 if (from
== ARG_POINTER_REGNUM
)
12851 return frame
.stack_pointer_offset
;
12853 gcc_assert (from
== FRAME_POINTER_REGNUM
);
12854 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
12858 /* In a dynamically-aligned function, we can't know the offset from
12859 stack pointer to frame pointer, so we must ensure that setjmp
12860 eliminates fp against the hard fp (%ebp) rather than trying to
12861 index from %esp up to the top of the frame across a gap that is
12862 of unknown (at compile-time) size. */
12864 ix86_builtin_setjmp_frame_value (void)
12866 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
12869 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
12870 static void warn_once_call_ms2sysv_xlogues (const char *feature
)
12872 static bool warned_once
= false;
12875 warning (0, "-mcall-ms2sysv-xlogues is not compatible with %s",
12877 warned_once
= true;
12881 /* When using -fsplit-stack, the allocation routines set a field in
12882 the TCB to the bottom of the stack plus this much space, measured
12885 #define SPLIT_STACK_AVAILABLE 256
12887 /* Fill structure ix86_frame about frame of currently computed function. */
12890 ix86_compute_frame_layout (void)
12892 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
12893 struct machine_function
*m
= cfun
->machine
;
12894 unsigned HOST_WIDE_INT stack_alignment_needed
;
12895 HOST_WIDE_INT offset
;
12896 unsigned HOST_WIDE_INT preferred_alignment
;
12897 HOST_WIDE_INT size
= get_frame_size ();
12898 HOST_WIDE_INT to_allocate
;
12900 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
12901 * ms_abi functions that call a sysv function. We now need to prune away
12902 * cases where it should be disabled. */
12903 if (TARGET_64BIT
&& m
->call_ms2sysv
)
12905 gcc_assert (TARGET_64BIT_MS_ABI
);
12906 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
12907 gcc_assert (!TARGET_SEH
);
12908 gcc_assert (TARGET_SSE
);
12909 gcc_assert (!ix86_using_red_zone ());
12911 if (crtl
->calls_eh_return
)
12913 gcc_assert (!reload_completed
);
12914 m
->call_ms2sysv
= false;
12915 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
12918 else if (ix86_static_chain_on_stack
)
12920 gcc_assert (!reload_completed
);
12921 m
->call_ms2sysv
= false;
12922 warn_once_call_ms2sysv_xlogues ("static call chains");
12925 /* Finally, compute which registers the stub will manage. */
12928 unsigned count
= xlogue_layout::count_stub_managed_regs ();
12929 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
12930 m
->call_ms2sysv_pad_in
= 0;
12934 frame
->nregs
= ix86_nsaved_regs ();
12935 frame
->nsseregs
= ix86_nsaved_sseregs ();
12937 /* 64-bit MS ABI seem to require stack alignment to be always 16,
12938 except for function prologues, leaf functions and when the defult
12939 incoming stack boundary is overriden at command line or via
12940 force_align_arg_pointer attribute. */
12941 if ((TARGET_64BIT_MS_ABI
&& crtl
->preferred_stack_boundary
< 128)
12942 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
12943 || ix86_current_function_calls_tls_descriptor
12944 || ix86_incoming_stack_boundary
< 128))
12946 crtl
->preferred_stack_boundary
= 128;
12947 crtl
->stack_alignment_needed
= 128;
12950 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
12951 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
12953 gcc_assert (!size
|| stack_alignment_needed
);
12954 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
12955 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
12957 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
12958 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
12959 if (TARGET_64BIT
&& m
->call_ms2sysv
)
12961 gcc_assert (stack_alignment_needed
>= 16);
12962 gcc_assert (!frame
->nsseregs
);
12965 /* For SEH we have to limit the amount of code movement into the prologue.
12966 At present we do this via a BLOCKAGE, at which point there's very little
12967 scheduling that can be done, which means that there's very little point
12968 in doing anything except PUSHs. */
12970 m
->use_fast_prologue_epilogue
= false;
12971 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
12973 int count
= frame
->nregs
;
12974 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
12976 /* The fast prologue uses move instead of push to save registers. This
12977 is significantly longer, but also executes faster as modern hardware
12978 can execute the moves in parallel, but can't do that for push/pop.
12980 Be careful about choosing what prologue to emit: When function takes
12981 many instructions to execute we may use slow version as well as in
12982 case function is known to be outside hot spot (this is known with
12983 feedback only). Weight the size of function by number of registers
12984 to save as it is cheap to use one or two push instructions but very
12985 slow to use many of them. */
12987 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
12988 if (node
->frequency
< NODE_FREQUENCY_NORMAL
12989 || (flag_branch_probabilities
12990 && node
->frequency
< NODE_FREQUENCY_HOT
))
12991 m
->use_fast_prologue_epilogue
= false;
12993 m
->use_fast_prologue_epilogue
12994 = !expensive_function_p (count
);
12997 frame
->save_regs_using_mov
12998 = (TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
12999 /* If static stack checking is enabled and done with probes,
13000 the registers need to be saved before allocating the frame. */
13001 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
13003 /* Skip return address and error code in exception handler. */
13004 offset
= INCOMING_FRAME_SP_OFFSET
;
13006 /* Skip pushed static chain. */
13007 if (ix86_static_chain_on_stack
)
13008 offset
+= UNITS_PER_WORD
;
13010 /* Skip saved base pointer. */
13011 if (frame_pointer_needed
)
13012 offset
+= UNITS_PER_WORD
;
13013 frame
->hfp_save_offset
= offset
;
13015 /* The traditional frame pointer location is at the top of the frame. */
13016 frame
->hard_frame_pointer_offset
= offset
;
13018 /* Register save area */
13019 offset
+= frame
->nregs
* UNITS_PER_WORD
;
13020 frame
->reg_save_offset
= offset
;
13022 /* On SEH target, registers are pushed just before the frame pointer
13025 frame
->hard_frame_pointer_offset
= offset
;
13027 /* Calculate the size of the va-arg area (not including padding, if any). */
13028 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
13030 if (stack_realign_fp
)
13032 /* We may need a 16-byte aligned stack for the remainder of the
13033 register save area, but the stack frame for the local function
13034 may require a greater alignment if using AVX/2/512. In order
13035 to avoid wasting space, we first calculate the space needed for
13036 the rest of the register saves, add that to the stack pointer,
13037 and then realign the stack to the boundary of the start of the
13038 frame for the local function. */
13039 HOST_WIDE_INT space_needed
= 0;
13040 HOST_WIDE_INT sse_reg_space_needed
= 0;
13044 if (m
->call_ms2sysv
)
13046 m
->call_ms2sysv_pad_in
= 0;
13047 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
13050 else if (frame
->nsseregs
)
13051 /* The only ABI that has saved SSE registers (Win64) also has a
13052 16-byte aligned default stack. However, many programs violate
13053 the ABI, and Wine64 forces stack realignment to compensate. */
13054 space_needed
= frame
->nsseregs
* 16;
13056 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
13058 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
13059 rounding to be pedantic. */
13060 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
13063 space_needed
= frame
->va_arg_size
;
13065 /* Record the allocation size required prior to the realignment AND. */
13066 frame
->stack_realign_allocate
= space_needed
;
13068 /* The re-aligned stack starts at frame->stack_realign_offset. Values
13069 before this point are not directly comparable with values below
13070 this point. Use sp_valid_at to determine if the stack pointer is
13071 valid for a given offset, fp_valid_at for the frame pointer, or
13072 choose_baseaddr to have a base register chosen for you.
13074 Note that the result of (frame->stack_realign_offset
13075 & (stack_alignment_needed - 1)) may not equal zero. */
13076 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
13077 frame
->stack_realign_offset
= offset
- space_needed
;
13078 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
13079 + sse_reg_space_needed
;
13083 frame
->stack_realign_offset
= offset
;
13085 if (TARGET_64BIT
&& m
->call_ms2sysv
)
13087 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
13088 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
13091 /* Align and set SSE register save area. */
13092 else if (frame
->nsseregs
)
13094 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
13095 required and the DRAP re-alignment boundary is at least 16 bytes,
13096 then we want the SSE register save area properly aligned. */
13097 if (ix86_incoming_stack_boundary
>= 128
13098 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
13099 offset
= ROUND_UP (offset
, 16);
13100 offset
+= frame
->nsseregs
* 16;
13102 frame
->sse_reg_save_offset
= offset
;
13103 offset
+= frame
->va_arg_size
;
13106 /* Align start of frame for local function. */
13107 if (m
->call_ms2sysv
13108 || frame
->va_arg_size
!= 0
13111 || cfun
->calls_alloca
13112 || ix86_current_function_calls_tls_descriptor
)
13113 offset
= ROUND_UP (offset
, stack_alignment_needed
);
13115 /* Frame pointer points here. */
13116 frame
->frame_pointer_offset
= offset
;
13120 /* Add outgoing arguments area. Can be skipped if we eliminated
13121 all the function calls as dead code.
13122 Skipping is however impossible when function calls alloca. Alloca
13123 expander assumes that last crtl->outgoing_args_size
13124 of stack frame are unused. */
13125 if (ACCUMULATE_OUTGOING_ARGS
13126 && (!crtl
->is_leaf
|| cfun
->calls_alloca
13127 || ix86_current_function_calls_tls_descriptor
))
13129 offset
+= crtl
->outgoing_args_size
;
13130 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
13133 frame
->outgoing_arguments_size
= 0;
13135 /* Align stack boundary. Only needed if we're calling another function
13136 or using alloca. */
13137 if (!crtl
->is_leaf
|| cfun
->calls_alloca
13138 || ix86_current_function_calls_tls_descriptor
)
13139 offset
= ROUND_UP (offset
, preferred_alignment
);
13141 /* We've reached end of stack frame. */
13142 frame
->stack_pointer_offset
= offset
;
13144 /* Size prologue needs to allocate. */
13145 to_allocate
= offset
- frame
->sse_reg_save_offset
;
13147 if ((!to_allocate
&& frame
->nregs
<= 1)
13148 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000)))
13149 frame
->save_regs_using_mov
= false;
13151 if (ix86_using_red_zone ()
13152 && crtl
->sp_is_unchanging
13154 && !ix86_pc_thunk_call_expanded
13155 && !ix86_current_function_calls_tls_descriptor
)
13157 frame
->red_zone_size
= to_allocate
;
13158 if (frame
->save_regs_using_mov
)
13159 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
13160 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
13161 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
13164 frame
->red_zone_size
= 0;
13165 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
13167 /* The SEH frame pointer location is near the bottom of the frame.
13168 This is enforced by the fact that the difference between the
13169 stack pointer and the frame pointer is limited to 240 bytes in
13170 the unwind data structure. */
13173 HOST_WIDE_INT diff
;
13175 /* If we can leave the frame pointer where it is, do so. Also, returns
13176 the establisher frame for __builtin_frame_address (0). */
13177 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
13178 if (diff
<= SEH_MAX_FRAME_SIZE
13179 && (diff
> 240 || (diff
& 15) != 0)
13180 && !crtl
->accesses_prior_frames
)
13182 /* Ideally we'd determine what portion of the local stack frame
13183 (within the constraint of the lowest 240) is most heavily used.
13184 But without that complication, simply bias the frame pointer
13185 by 128 bytes so as to maximize the amount of the local stack
13186 frame that is addressable with 8-bit offsets. */
13187 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
13192 /* This is semi-inlined memory_address_length, but simplified
13193 since we know that we're always dealing with reg+offset, and
13194 to avoid having to create and discard all that rtl. */
13197 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
13203 /* EBP and R13 cannot be encoded without an offset. */
13204 len
= (regno
== BP_REG
|| regno
== R13_REG
);
13206 else if (IN_RANGE (offset
, -128, 127))
13209 /* ESP and R12 must be encoded with a SIB byte. */
13210 if (regno
== SP_REG
|| regno
== R12_REG
)
13216 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
13217 the frame save area. The register is saved at CFA - CFA_OFFSET. */
13220 sp_valid_at (HOST_WIDE_INT cfa_offset
)
13222 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
13223 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
13225 /* Validate that the cfa_offset isn't in a "no-man's land". */
13226 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
13229 return fs
.sp_valid
;
13232 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
13233 the frame save area. The register is saved at CFA - CFA_OFFSET. */
13236 fp_valid_at (HOST_WIDE_INT cfa_offset
)
13238 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
13239 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
13241 /* Validate that the cfa_offset isn't in a "no-man's land". */
13242 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
13245 return fs
.fp_valid
;
13248 /* Choose a base register based upon alignment requested, speed and/or
13252 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
13253 HOST_WIDE_INT
&base_offset
,
13254 unsigned int align_reqested
, unsigned int *align
)
13256 const struct machine_function
*m
= cfun
->machine
;
13257 unsigned int hfp_align
;
13258 unsigned int drap_align
;
13259 unsigned int sp_align
;
13260 bool hfp_ok
= fp_valid_at (cfa_offset
);
13261 bool drap_ok
= m
->fs
.drap_valid
;
13262 bool sp_ok
= sp_valid_at (cfa_offset
);
13264 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
13266 /* Filter out any registers that don't meet the requested alignment
13268 if (align_reqested
)
13270 if (m
->fs
.realigned
)
13271 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
13272 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
13273 notes (which we would need to use a realigned stack pointer),
13274 so disable on SEH targets. */
13275 else if (m
->fs
.sp_realigned
)
13276 sp_align
= crtl
->stack_alignment_needed
;
13278 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
13279 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
13280 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
13283 if (m
->use_fast_prologue_epilogue
)
13285 /* Choose the base register most likely to allow the most scheduling
13286 opportunities. Generally FP is valid throughout the function,
13287 while DRAP must be reloaded within the epilogue. But choose either
13288 over the SP due to increased encoding size. */
13292 base_reg
= hard_frame_pointer_rtx
;
13293 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
13297 base_reg
= crtl
->drap_reg
;
13298 base_offset
= 0 - cfa_offset
;
13302 base_reg
= stack_pointer_rtx
;
13303 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
13308 HOST_WIDE_INT toffset
;
13309 int len
= 16, tlen
;
13311 /* Choose the base register with the smallest address encoding.
13312 With a tie, choose FP > DRAP > SP. */
13315 base_reg
= stack_pointer_rtx
;
13316 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
13317 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
13321 toffset
= 0 - cfa_offset
;
13322 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
13325 base_reg
= crtl
->drap_reg
;
13326 base_offset
= toffset
;
13332 toffset
= m
->fs
.fp_offset
- cfa_offset
;
13333 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
13336 base_reg
= hard_frame_pointer_rtx
;
13337 base_offset
= toffset
;
13343 /* Set the align return value. */
13346 if (base_reg
== stack_pointer_rtx
)
13348 else if (base_reg
== crtl
->drap_reg
)
13349 *align
= drap_align
;
13350 else if (base_reg
== hard_frame_pointer_rtx
)
13351 *align
= hfp_align
;
13355 /* Return an RTX that points to CFA_OFFSET within the stack frame and
13356 the alignment of address. If ALIGN is non-null, it should point to
13357 an alignment value (in bits) that is preferred or zero and will
13358 recieve the alignment of the base register that was selected,
13359 irrespective of rather or not CFA_OFFSET is a multiple of that
13362 The valid base registers are taken from CFUN->MACHINE->FS. */
13365 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
)
13367 rtx base_reg
= NULL
;
13368 HOST_WIDE_INT base_offset
= 0;
13370 /* If a specific alignment is requested, try to get a base register
13371 with that alignment first. */
13372 if (align
&& *align
)
13373 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
13376 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
13378 gcc_assert (base_reg
!= NULL
);
13379 return plus_constant (Pmode
, base_reg
, base_offset
);
13382 /* Emit code to save registers in the prologue. */
13385 ix86_emit_save_regs (void)
13387 unsigned int regno
;
13390 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
13391 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
13393 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
13394 RTX_FRAME_RELATED_P (insn
) = 1;
13398 /* Emit a single register save at CFA - CFA_OFFSET. */
13401 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
13402 HOST_WIDE_INT cfa_offset
)
13404 struct machine_function
*m
= cfun
->machine
;
13405 rtx reg
= gen_rtx_REG (mode
, regno
);
13406 rtx mem
, addr
, base
, insn
;
13407 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
13409 addr
= choose_baseaddr (cfa_offset
, &align
);
13410 mem
= gen_frame_mem (mode
, addr
);
13412 /* The location aligment depends upon the base register. */
13413 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
13414 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
13415 set_mem_align (mem
, align
);
13417 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
13418 RTX_FRAME_RELATED_P (insn
) = 1;
13421 if (GET_CODE (base
) == PLUS
)
13422 base
= XEXP (base
, 0);
13423 gcc_checking_assert (REG_P (base
));
13425 /* When saving registers into a re-aligned local stack frame, avoid
13426 any tricky guessing by dwarf2out. */
13427 if (m
->fs
.realigned
)
13429 gcc_checking_assert (stack_realign_drap
);
13431 if (regno
== REGNO (crtl
->drap_reg
))
13433 /* A bit of a hack. We force the DRAP register to be saved in
13434 the re-aligned stack frame, which provides us with a copy
13435 of the CFA that will last past the prologue. Install it. */
13436 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
13437 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
13438 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
13439 mem
= gen_rtx_MEM (mode
, addr
);
13440 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
13444 /* The frame pointer is a stable reference within the
13445 aligned frame. Use it. */
13446 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
13447 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
13448 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
13449 mem
= gen_rtx_MEM (mode
, addr
);
13450 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
13454 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
13455 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
13457 gcc_checking_assert (stack_realign_fp
);
13458 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
13461 /* The memory may not be relative to the current CFA register,
13462 which means that we may need to generate a new pattern for
13463 use by the unwind info. */
13464 else if (base
!= m
->fs
.cfa_reg
)
13466 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
13467 m
->fs
.cfa_offset
- cfa_offset
);
13468 mem
= gen_rtx_MEM (mode
, addr
);
13469 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
13473 /* Emit code to save registers using MOV insns.
13474 First register is stored at CFA - CFA_OFFSET. */
13476 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
13478 unsigned int regno
;
13480 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
13481 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
13483 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
13484 cfa_offset
-= UNITS_PER_WORD
;
13488 /* Emit code to save SSE registers using MOV insns.
13489 First register is stored at CFA - CFA_OFFSET. */
13491 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
13493 unsigned int regno
;
13495 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
13496 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
13498 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
13499 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
13503 static GTY(()) rtx queued_cfa_restores
;
13505 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
13506 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
13507 Don't add the note if the previously saved value will be left untouched
13508 within stack red-zone till return, as unwinders can find the same value
13509 in the register and on the stack. */
13512 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
13514 if (!crtl
->shrink_wrapped
13515 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
13520 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
13521 RTX_FRAME_RELATED_P (insn
) = 1;
13524 queued_cfa_restores
13525 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
13528 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
13531 ix86_add_queued_cfa_restore_notes (rtx insn
)
13534 if (!queued_cfa_restores
)
13536 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
13538 XEXP (last
, 1) = REG_NOTES (insn
);
13539 REG_NOTES (insn
) = queued_cfa_restores
;
13540 queued_cfa_restores
= NULL_RTX
;
13541 RTX_FRAME_RELATED_P (insn
) = 1;
13544 /* Expand prologue or epilogue stack adjustment.
13545 The pattern exist to put a dependency on all ebp-based memory accesses.
13546 STYLE should be negative if instructions should be marked as frame related,
13547 zero if %r11 register is live and cannot be freely used and positive
13551 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
13552 int style
, bool set_cfa
)
13554 struct machine_function
*m
= cfun
->machine
;
13556 bool add_frame_related_expr
= false;
13558 if (Pmode
== SImode
)
13559 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
13560 else if (x86_64_immediate_operand (offset
, DImode
))
13561 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
13565 /* r11 is used by indirect sibcall return as well, set before the
13566 epilogue and used after the epilogue. */
13568 tmp
= gen_rtx_REG (DImode
, R11_REG
);
13571 gcc_assert (src
!= hard_frame_pointer_rtx
13572 && dest
!= hard_frame_pointer_rtx
);
13573 tmp
= hard_frame_pointer_rtx
;
13575 insn
= emit_insn (gen_rtx_SET (tmp
, offset
));
13577 add_frame_related_expr
= true;
13579 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
13582 insn
= emit_insn (insn
);
13584 ix86_add_queued_cfa_restore_notes (insn
);
13590 gcc_assert (m
->fs
.cfa_reg
== src
);
13591 m
->fs
.cfa_offset
+= INTVAL (offset
);
13592 m
->fs
.cfa_reg
= dest
;
13594 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
13595 r
= gen_rtx_SET (dest
, r
);
13596 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
13597 RTX_FRAME_RELATED_P (insn
) = 1;
13599 else if (style
< 0)
13601 RTX_FRAME_RELATED_P (insn
) = 1;
13602 if (add_frame_related_expr
)
13604 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
13605 r
= gen_rtx_SET (dest
, r
);
13606 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
13610 if (dest
== stack_pointer_rtx
)
13612 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
13613 bool valid
= m
->fs
.sp_valid
;
13614 bool realigned
= m
->fs
.sp_realigned
;
13616 if (src
== hard_frame_pointer_rtx
)
13618 valid
= m
->fs
.fp_valid
;
13620 ooffset
= m
->fs
.fp_offset
;
13622 else if (src
== crtl
->drap_reg
)
13624 valid
= m
->fs
.drap_valid
;
13630 /* Else there are two possibilities: SP itself, which we set
13631 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
13632 taken care of this by hand along the eh_return path. */
13633 gcc_checking_assert (src
== stack_pointer_rtx
13634 || offset
== const0_rtx
);
13637 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
13638 m
->fs
.sp_valid
= valid
;
13639 m
->fs
.sp_realigned
= realigned
;
13643 /* Find an available register to be used as dynamic realign argument
13644 pointer regsiter. Such a register will be written in prologue and
13645 used in begin of body, so it must not be
13646 1. parameter passing register.
13648 We reuse static-chain register if it is available. Otherwise, we
13649 use DI for i386 and R13 for x86-64. We chose R13 since it has
13652 Return: the regno of chosen register. */
13654 static unsigned int
13655 find_drap_reg (void)
13657 tree decl
= cfun
->decl
;
13659 /* Always use callee-saved register if there are no caller-saved
13663 /* Use R13 for nested function or function need static chain.
13664 Since function with tail call may use any caller-saved
13665 registers in epilogue, DRAP must not use caller-saved
13666 register in such case. */
13667 if (DECL_STATIC_CHAIN (decl
)
13668 || cfun
->machine
->no_caller_saved_registers
13669 || crtl
->tail_call_emit
)
13676 /* Use DI for nested function or function need static chain.
13677 Since function with tail call may use any caller-saved
13678 registers in epilogue, DRAP must not use caller-saved
13679 register in such case. */
13680 if (DECL_STATIC_CHAIN (decl
)
13681 || cfun
->machine
->no_caller_saved_registers
13682 || crtl
->tail_call_emit
)
13685 /* Reuse static chain register if it isn't used for parameter
13687 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
13689 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
13690 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
13697 /* Handle a "force_align_arg_pointer" attribute. */
13700 ix86_handle_force_align_arg_pointer_attribute (tree
*node
, tree name
,
13701 tree
, int, bool *no_add_attrs
)
13703 if (TREE_CODE (*node
) != FUNCTION_TYPE
13704 && TREE_CODE (*node
) != METHOD_TYPE
13705 && TREE_CODE (*node
) != FIELD_DECL
13706 && TREE_CODE (*node
) != TYPE_DECL
)
13708 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
13710 *no_add_attrs
= true;
13716 /* Return minimum incoming stack alignment. */
13718 static unsigned int
13719 ix86_minimum_incoming_stack_boundary (bool sibcall
)
13721 unsigned int incoming_stack_boundary
;
13723 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
13724 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
13725 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
13726 /* Prefer the one specified at command line. */
13727 else if (ix86_user_incoming_stack_boundary
)
13728 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
13729 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
13730 if -mstackrealign is used, it isn't used for sibcall check and
13731 estimated stack alignment is 128bit. */
13733 && ix86_force_align_arg_pointer
13734 && crtl
->stack_alignment_estimated
== 128)
13735 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
13737 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
13739 /* Incoming stack alignment can be changed on individual functions
13740 via force_align_arg_pointer attribute. We use the smallest
13741 incoming stack boundary. */
13742 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
13743 && lookup_attribute (ix86_force_align_arg_pointer_string
,
13744 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
13745 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
13747 /* The incoming stack frame has to be aligned at least at
13748 parm_stack_boundary. */
13749 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
13750 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
13752 /* Stack at entrance of main is aligned by runtime. We use the
13753 smallest incoming stack boundary. */
13754 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
13755 && DECL_NAME (current_function_decl
)
13756 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
13757 && DECL_FILE_SCOPE_P (current_function_decl
))
13758 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
13760 return incoming_stack_boundary
;
13763 /* Update incoming stack boundary and estimated stack alignment. */
13766 ix86_update_stack_boundary (void)
13768 ix86_incoming_stack_boundary
13769 = ix86_minimum_incoming_stack_boundary (false);
13771 /* x86_64 vararg needs 16byte stack alignment for register save
13775 && crtl
->stack_alignment_estimated
< 128)
13776 crtl
->stack_alignment_estimated
= 128;
13778 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
13779 if (ix86_tls_descriptor_calls_expanded_in_cfun
13780 && crtl
->preferred_stack_boundary
< 128)
13781 crtl
->preferred_stack_boundary
= 128;
13784 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
13785 needed or an rtx for DRAP otherwise. */
13788 ix86_get_drap_rtx (void)
13790 /* We must use DRAP if there are outgoing arguments on stack and
13791 ACCUMULATE_OUTGOING_ARGS is false. */
13792 if (ix86_force_drap
13793 || (cfun
->machine
->outgoing_args_on_stack
13794 && !ACCUMULATE_OUTGOING_ARGS
))
13795 crtl
->need_drap
= true;
13797 if (stack_realign_drap
)
13799 /* Assign DRAP to vDRAP and returns vDRAP */
13800 unsigned int regno
= find_drap_reg ();
13803 rtx_insn
*seq
, *insn
;
13805 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
13806 crtl
->drap_reg
= arg_ptr
;
13809 drap_vreg
= copy_to_reg (arg_ptr
);
13810 seq
= get_insns ();
13813 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
13816 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
13817 RTX_FRAME_RELATED_P (insn
) = 1;
13825 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
13828 ix86_internal_arg_pointer (void)
13830 return virtual_incoming_args_rtx
;
13833 struct scratch_reg
{
13838 /* Return a short-lived scratch register for use on function entry.
13839 In 32-bit mode, it is valid only after the registers are saved
13840 in the prologue. This register must be released by means of
13841 release_scratch_register_on_entry once it is dead. */
13844 get_scratch_register_on_entry (struct scratch_reg
*sr
)
13852 /* We always use R11 in 64-bit mode. */
13857 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
13859 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
13861 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
13862 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
13863 int regparm
= ix86_function_regparm (fntype
, decl
);
13865 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
13867 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
13868 for the static chain register. */
13869 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
13870 && drap_regno
!= AX_REG
)
13872 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
13873 for the static chain register. */
13874 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
13876 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
13878 /* ecx is the static chain register. */
13879 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
13881 && drap_regno
!= CX_REG
)
13883 else if (ix86_save_reg (BX_REG
, true, false))
13885 /* esi is the static chain register. */
13886 else if (!(regparm
== 3 && static_chain_p
)
13887 && ix86_save_reg (SI_REG
, true, false))
13889 else if (ix86_save_reg (DI_REG
, true, false))
13893 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
13898 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
13901 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
13902 RTX_FRAME_RELATED_P (insn
) = 1;
13906 /* Release a scratch register obtained from the preceding function. */
13909 release_scratch_register_on_entry (struct scratch_reg
*sr
)
13913 struct machine_function
*m
= cfun
->machine
;
13914 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
13916 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
13917 RTX_FRAME_RELATED_P (insn
) = 1;
13918 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
13919 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
13920 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
13921 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
13925 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
13927 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
13930 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
13932 /* We skip the probe for the first interval + a small dope of 4 words and
13933 probe that many bytes past the specified size to maintain a protection
13934 area at the botton of the stack. */
13935 const int dope
= 4 * UNITS_PER_WORD
;
13936 rtx size_rtx
= GEN_INT (size
), last
;
13938 /* See if we have a constant small number of probes to generate. If so,
13939 that's the easy case. The run-time loop is made up of 9 insns in the
13940 generic case while the compile-time loop is made up of 3+2*(n-1) insns
13941 for n # of intervals. */
13942 if (size
<= 4 * PROBE_INTERVAL
)
13944 HOST_WIDE_INT i
, adjust
;
13945 bool first_probe
= true;
13947 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
13948 values of N from 1 until it exceeds SIZE. If only one probe is
13949 needed, this will not generate any code. Then adjust and probe
13950 to PROBE_INTERVAL + SIZE. */
13951 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
13955 adjust
= 2 * PROBE_INTERVAL
+ dope
;
13956 first_probe
= false;
13959 adjust
= PROBE_INTERVAL
;
13961 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
13962 plus_constant (Pmode
, stack_pointer_rtx
,
13964 emit_stack_probe (stack_pointer_rtx
);
13968 adjust
= size
+ PROBE_INTERVAL
+ dope
;
13970 adjust
= size
+ PROBE_INTERVAL
- i
;
13972 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
13973 plus_constant (Pmode
, stack_pointer_rtx
,
13975 emit_stack_probe (stack_pointer_rtx
);
13977 /* Adjust back to account for the additional first interval. */
13978 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
13979 plus_constant (Pmode
, stack_pointer_rtx
,
13980 PROBE_INTERVAL
+ dope
)));
13983 /* Otherwise, do the same as above, but in a loop. Note that we must be
13984 extra careful with variables wrapping around because we might be at
13985 the very top (or the very bottom) of the address space and we have
13986 to be able to handle this case properly; in particular, we use an
13987 equality test for the loop condition. */
13990 HOST_WIDE_INT rounded_size
;
13991 struct scratch_reg sr
;
13993 get_scratch_register_on_entry (&sr
);
13996 /* Step 1: round SIZE to the previous multiple of the interval. */
13998 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
14001 /* Step 2: compute initial and final value of the loop counter. */
14003 /* SP = SP_0 + PROBE_INTERVAL. */
14004 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
14005 plus_constant (Pmode
, stack_pointer_rtx
,
14006 - (PROBE_INTERVAL
+ dope
))));
14008 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
14009 if (rounded_size
<= (HOST_WIDE_INT_1
<< 31))
14010 emit_insn (gen_rtx_SET (sr
.reg
,
14011 plus_constant (Pmode
, stack_pointer_rtx
,
14015 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
14016 emit_insn (gen_rtx_SET (sr
.reg
,
14017 gen_rtx_PLUS (Pmode
, sr
.reg
,
14018 stack_pointer_rtx
)));
14022 /* Step 3: the loop
14026 SP = SP + PROBE_INTERVAL
14029 while (SP != LAST_ADDR)
14031 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
14032 values of N from 1 until it is equal to ROUNDED_SIZE. */
14034 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
14037 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
14038 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
14040 if (size
!= rounded_size
)
14042 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
14043 plus_constant (Pmode
, stack_pointer_rtx
,
14044 rounded_size
- size
)));
14045 emit_stack_probe (stack_pointer_rtx
);
14048 /* Adjust back to account for the additional first interval. */
14049 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
14050 plus_constant (Pmode
, stack_pointer_rtx
,
14051 PROBE_INTERVAL
+ dope
)));
14053 release_scratch_register_on_entry (&sr
);
14056 /* Even if the stack pointer isn't the CFA register, we need to correctly
14057 describe the adjustments made to it, in particular differentiate the
14058 frame-related ones from the frame-unrelated ones. */
14061 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
14062 XVECEXP (expr
, 0, 0)
14063 = gen_rtx_SET (stack_pointer_rtx
,
14064 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
14065 XVECEXP (expr
, 0, 1)
14066 = gen_rtx_SET (stack_pointer_rtx
,
14067 plus_constant (Pmode
, stack_pointer_rtx
,
14068 PROBE_INTERVAL
+ dope
+ size
));
14069 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
14070 RTX_FRAME_RELATED_P (last
) = 1;
14072 cfun
->machine
->fs
.sp_offset
+= size
;
14075 /* Make sure nothing is scheduled before we are done. */
14076 emit_insn (gen_blockage ());
14079 /* Adjust the stack pointer up to REG while probing it. */
14082 output_adjust_stack_and_probe (rtx reg
)
14084 static int labelno
= 0;
14088 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
14091 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
14093 /* SP = SP + PROBE_INTERVAL. */
14094 xops
[0] = stack_pointer_rtx
;
14095 xops
[1] = GEN_INT (PROBE_INTERVAL
);
14096 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
14099 xops
[1] = const0_rtx
;
14100 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
14102 /* Test if SP == LAST_ADDR. */
14103 xops
[0] = stack_pointer_rtx
;
14105 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
14108 fputs ("\tjne\t", asm_out_file
);
14109 assemble_name_raw (asm_out_file
, loop_lab
);
14110 fputc ('\n', asm_out_file
);
14115 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
14116 inclusive. These are offsets from the current stack pointer. */
14119 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
14121 /* See if we have a constant small number of probes to generate. If so,
14122 that's the easy case. The run-time loop is made up of 6 insns in the
14123 generic case while the compile-time loop is made up of n insns for n #
14125 if (size
<= 6 * PROBE_INTERVAL
)
14129 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
14130 it exceeds SIZE. If only one probe is needed, this will not
14131 generate any code. Then probe at FIRST + SIZE. */
14132 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
14133 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
14136 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
14140 /* Otherwise, do the same as above, but in a loop. Note that we must be
14141 extra careful with variables wrapping around because we might be at
14142 the very top (or the very bottom) of the address space and we have
14143 to be able to handle this case properly; in particular, we use an
14144 equality test for the loop condition. */
14147 HOST_WIDE_INT rounded_size
, last
;
14148 struct scratch_reg sr
;
14150 get_scratch_register_on_entry (&sr
);
14153 /* Step 1: round SIZE to the previous multiple of the interval. */
14155 rounded_size
= ROUND_DOWN (size
, PROBE_INTERVAL
);
14158 /* Step 2: compute initial and final value of the loop counter. */
14160 /* TEST_OFFSET = FIRST. */
14161 emit_move_insn (sr
.reg
, GEN_INT (-first
));
14163 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
14164 last
= first
+ rounded_size
;
14167 /* Step 3: the loop
14171 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
14174 while (TEST_ADDR != LAST_ADDR)
14176 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
14177 until it is equal to ROUNDED_SIZE. */
14179 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
14182 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
14183 that SIZE is equal to ROUNDED_SIZE. */
14185 if (size
!= rounded_size
)
14186 emit_stack_probe (plus_constant (Pmode
,
14187 gen_rtx_PLUS (Pmode
,
14190 rounded_size
- size
));
14192 release_scratch_register_on_entry (&sr
);
14195 /* Make sure nothing is scheduled before we are done. */
14196 emit_insn (gen_blockage ());
14199 /* Probe a range of stack addresses from REG to END, inclusive. These are
14200 offsets from the current stack pointer. */
14203 output_probe_stack_range (rtx reg
, rtx end
)
14205 static int labelno
= 0;
14209 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
14212 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
14214 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
14216 xops
[1] = GEN_INT (PROBE_INTERVAL
);
14217 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
14219 /* Probe at TEST_ADDR. */
14220 xops
[0] = stack_pointer_rtx
;
14222 xops
[2] = const0_rtx
;
14223 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
14225 /* Test if TEST_ADDR == LAST_ADDR. */
14228 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
14231 fputs ("\tjne\t", asm_out_file
);
14232 assemble_name_raw (asm_out_file
, loop_lab
);
14233 fputc ('\n', asm_out_file
);
14238 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
14239 will guide prologue/epilogue to be generated in correct form. */
14242 ix86_finalize_stack_frame_flags (void)
14244 /* Check if stack realign is really needed after reload, and
14245 stores result in cfun */
14246 unsigned int incoming_stack_boundary
14247 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
14248 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
14249 unsigned int stack_alignment
14250 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
14251 ? crtl
->max_used_stack_slot_alignment
14252 : crtl
->stack_alignment_needed
);
14253 unsigned int stack_realign
14254 = (incoming_stack_boundary
< stack_alignment
);
14255 bool recompute_frame_layout_p
= false;
14257 if (crtl
->stack_realign_finalized
)
14259 /* After stack_realign_needed is finalized, we can't no longer
14261 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
14265 /* If the only reason for frame_pointer_needed is that we conservatively
14266 assumed stack realignment might be needed or -fno-omit-frame-pointer
14267 is used, but in the end nothing that needed the stack alignment had
14268 been spilled nor stack access, clear frame_pointer_needed and say we
14269 don't need stack realignment. */
14270 if ((stack_realign
|| !flag_omit_frame_pointer
)
14271 && frame_pointer_needed
14273 && crtl
->sp_is_unchanging
14274 && !ix86_current_function_calls_tls_descriptor
14275 && !crtl
->accesses_prior_frames
14276 && !cfun
->calls_alloca
14277 && !crtl
->calls_eh_return
14278 /* See ira_setup_eliminable_regset for the rationale. */
14279 && !(STACK_CHECK_MOVING_SP
14280 && flag_stack_check
14282 && cfun
->can_throw_non_call_exceptions
)
14283 && !ix86_frame_pointer_required ()
14284 && get_frame_size () == 0
14285 && ix86_nsaved_sseregs () == 0
14286 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
14288 HARD_REG_SET set_up_by_prologue
, prologue_used
;
14291 CLEAR_HARD_REG_SET (prologue_used
);
14292 CLEAR_HARD_REG_SET (set_up_by_prologue
);
14293 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
14294 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
14295 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
14296 HARD_FRAME_POINTER_REGNUM
);
14298 /* The preferred stack alignment is the minimum stack alignment. */
14299 if (stack_alignment
> crtl
->preferred_stack_boundary
)
14300 stack_alignment
= crtl
->preferred_stack_boundary
;
14302 bool require_stack_frame
= false;
14304 FOR_EACH_BB_FN (bb
, cfun
)
14307 FOR_BB_INSNS (bb
, insn
)
14308 if (NONDEBUG_INSN_P (insn
)
14309 && requires_stack_frame_p (insn
, prologue_used
,
14310 set_up_by_prologue
))
14312 require_stack_frame
= true;
14316 /* Find the maximum stack alignment. */
14317 subrtx_iterator::array_type array
;
14318 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
14320 && (reg_mentioned_p (stack_pointer_rtx
,
14322 || reg_mentioned_p (frame_pointer_rtx
,
14325 unsigned int alignment
= MEM_ALIGN (*iter
);
14326 if (alignment
> stack_alignment
)
14327 stack_alignment
= alignment
;
14333 if (require_stack_frame
)
14335 /* Stack frame is required. If stack alignment needed is less
14336 than incoming stack boundary, don't realign stack. */
14337 stack_realign
= incoming_stack_boundary
< stack_alignment
;
14338 if (!stack_realign
)
14340 crtl
->max_used_stack_slot_alignment
14341 = incoming_stack_boundary
;
14342 crtl
->stack_alignment_needed
14343 = incoming_stack_boundary
;
14344 /* Also update preferred_stack_boundary for leaf
14346 crtl
->preferred_stack_boundary
14347 = incoming_stack_boundary
;
14352 /* If drap has been set, but it actually isn't live at the
14353 start of the function, there is no reason to set it up. */
14354 if (crtl
->drap_reg
)
14356 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
14357 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
14358 REGNO (crtl
->drap_reg
)))
14360 crtl
->drap_reg
= NULL_RTX
;
14361 crtl
->need_drap
= false;
14365 cfun
->machine
->no_drap_save_restore
= true;
14367 frame_pointer_needed
= false;
14368 stack_realign
= false;
14369 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
14370 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
14371 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
14372 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
14373 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
14374 df_finish_pass (true);
14375 df_scan_alloc (NULL
);
14377 df_compute_regs_ever_live (true);
14380 if (flag_var_tracking
)
14382 /* Since frame pointer is no longer available, replace it with
14383 stack pointer - UNITS_PER_WORD in debug insns. */
14385 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
14388 rtx_insn
*insn
= DF_REF_INSN (ref
);
14389 /* Make sure the next ref is for a different instruction,
14390 so that we're not affected by the rescan. */
14391 next
= DF_REF_NEXT_REG (ref
);
14392 while (next
&& DF_REF_INSN (next
) == insn
)
14393 next
= DF_REF_NEXT_REG (next
);
14395 if (DEBUG_INSN_P (insn
))
14397 bool changed
= false;
14398 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
14400 rtx
*loc
= DF_REF_LOC (ref
);
14401 if (*loc
== hard_frame_pointer_rtx
)
14403 *loc
= plus_constant (Pmode
,
14410 df_insn_rescan (insn
);
14415 recompute_frame_layout_p
= true;
14419 if (crtl
->stack_realign_needed
!= stack_realign
)
14420 recompute_frame_layout_p
= true;
14421 crtl
->stack_realign_needed
= stack_realign
;
14422 crtl
->stack_realign_finalized
= true;
14423 if (recompute_frame_layout_p
)
14424 ix86_compute_frame_layout ();
14427 /* Delete SET_GOT right after entry block if it is allocated to reg. */
14430 ix86_elim_entry_set_got (rtx reg
)
14432 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
14433 rtx_insn
*c_insn
= BB_HEAD (bb
);
14434 if (!NONDEBUG_INSN_P (c_insn
))
14435 c_insn
= next_nonnote_nondebug_insn (c_insn
);
14436 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
14438 rtx pat
= PATTERN (c_insn
);
14439 if (GET_CODE (pat
) == PARALLEL
)
14441 rtx vec
= XVECEXP (pat
, 0, 0);
14442 if (GET_CODE (vec
) == SET
14443 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
14444 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
14445 delete_insn (c_insn
);
14451 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
14456 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
14457 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
14458 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
14462 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
14464 return gen_frame_set (reg
, frame_reg
, offset
, false);
14468 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
14470 return gen_frame_set (reg
, frame_reg
, offset
, true);
14474 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
14476 struct machine_function
*m
= cfun
->machine
;
14477 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
14478 + m
->call_ms2sysv_extra_regs
;
14479 rtvec v
= rtvec_alloc (ncregs
+ 1);
14480 unsigned int align
, i
, vi
= 0;
14483 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
14484 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
14485 HOST_WIDE_INT allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
14487 /* AL should only be live with sysv_abi. */
14488 gcc_assert (!ix86_eax_live_at_start_p ());
14490 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
14491 we've actually realigned the stack or not. */
14492 align
= GET_MODE_ALIGNMENT (V4SFmode
);
14493 addr
= choose_baseaddr (frame
.stack_realign_offset
14494 + xlogue
.get_stub_ptr_offset (), &align
);
14495 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
14496 emit_insn (gen_rtx_SET (rax
, addr
));
14498 /* Allocate stack if not already done. */
14500 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14501 GEN_INT (-allocate
), -1, false);
14503 /* Get the stub symbol. */
14504 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
14505 : XLOGUE_STUB_SAVE
);
14506 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
14508 for (i
= 0; i
< ncregs
; ++i
)
14510 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
14511 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
14513 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
14516 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
14518 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
14519 RTX_FRAME_RELATED_P (insn
) = true;
14522 /* Expand the prologue into a bunch of separate insns. */
14525 ix86_expand_prologue (void)
14527 struct machine_function
*m
= cfun
->machine
;
14529 struct ix86_frame frame
;
14530 HOST_WIDE_INT allocate
;
14531 bool int_registers_saved
;
14532 bool sse_registers_saved
;
14533 rtx static_chain
= NULL_RTX
;
14535 if (ix86_function_naked (current_function_decl
))
14538 ix86_finalize_stack_frame_flags ();
14540 /* DRAP should not coexist with stack_realign_fp */
14541 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
14543 memset (&m
->fs
, 0, sizeof (m
->fs
));
14545 /* Initialize CFA state for before the prologue. */
14546 m
->fs
.cfa_reg
= stack_pointer_rtx
;
14547 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
14549 /* Track SP offset to the CFA. We continue tracking this after we've
14550 swapped the CFA register away from SP. In the case of re-alignment
14551 this is fudged; we're interested to offsets within the local frame. */
14552 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
14553 m
->fs
.sp_valid
= true;
14554 m
->fs
.sp_realigned
= false;
14558 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
14560 /* We should have already generated an error for any use of
14561 ms_hook on a nested function. */
14562 gcc_checking_assert (!ix86_static_chain_on_stack
);
14564 /* Check if profiling is active and we shall use profiling before
14565 prologue variant. If so sorry. */
14566 if (crtl
->profile
&& flag_fentry
!= 0)
14567 sorry ("ms_hook_prologue attribute isn%'t compatible "
14568 "with -mfentry for 32-bit");
14570 /* In ix86_asm_output_function_label we emitted:
14571 8b ff movl.s %edi,%edi
14573 8b ec movl.s %esp,%ebp
14575 This matches the hookable function prologue in Win32 API
14576 functions in Microsoft Windows XP Service Pack 2 and newer.
14577 Wine uses this to enable Windows apps to hook the Win32 API
14578 functions provided by Wine.
14580 What that means is that we've already set up the frame pointer. */
14582 if (frame_pointer_needed
14583 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
14587 /* We've decided to use the frame pointer already set up.
14588 Describe this to the unwinder by pretending that both
14589 push and mov insns happen right here.
14591 Putting the unwind info here at the end of the ms_hook
14592 is done so that we can make absolutely certain we get
14593 the required byte sequence at the start of the function,
14594 rather than relying on an assembler that can produce
14595 the exact encoding required.
14597 However it does mean (in the unpatched case) that we have
14598 a 1 insn window where the asynchronous unwind info is
14599 incorrect. However, if we placed the unwind info at
14600 its correct location we would have incorrect unwind info
14601 in the patched case. Which is probably all moot since
14602 I don't expect Wine generates dwarf2 unwind info for the
14603 system libraries that use this feature. */
14605 insn
= emit_insn (gen_blockage ());
14607 push
= gen_push (hard_frame_pointer_rtx
);
14608 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
14609 stack_pointer_rtx
);
14610 RTX_FRAME_RELATED_P (push
) = 1;
14611 RTX_FRAME_RELATED_P (mov
) = 1;
14613 RTX_FRAME_RELATED_P (insn
) = 1;
14614 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
14615 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
14617 /* Note that gen_push incremented m->fs.cfa_offset, even
14618 though we didn't emit the push insn here. */
14619 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
14620 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
14621 m
->fs
.fp_valid
= true;
14625 /* The frame pointer is not needed so pop %ebp again.
14626 This leaves us with a pristine state. */
14627 emit_insn (gen_pop (hard_frame_pointer_rtx
));
14631 /* The first insn of a function that accepts its static chain on the
14632 stack is to push the register that would be filled in by a direct
14633 call. This insn will be skipped by the trampoline. */
14634 else if (ix86_static_chain_on_stack
)
14636 static_chain
= ix86_static_chain (cfun
->decl
, false);
14637 insn
= emit_insn (gen_push (static_chain
));
14638 emit_insn (gen_blockage ());
14640 /* We don't want to interpret this push insn as a register save,
14641 only as a stack adjustment. The real copy of the register as
14642 a save will be done later, if needed. */
14643 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
14644 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
14645 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
14646 RTX_FRAME_RELATED_P (insn
) = 1;
14649 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
14650 of DRAP is needed and stack realignment is really needed after reload */
14651 if (stack_realign_drap
)
14653 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
14655 /* Can't use DRAP in interrupt function. */
14656 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
14657 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
14658 "in interrupt service routine. This may be worked "
14659 "around by avoiding functions with aggregate return.");
14661 /* Only need to push parameter pointer reg if it is caller saved. */
14662 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
14664 /* Push arg pointer reg */
14665 insn
= emit_insn (gen_push (crtl
->drap_reg
));
14666 RTX_FRAME_RELATED_P (insn
) = 1;
14669 /* Grab the argument pointer. */
14670 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
14671 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
14672 RTX_FRAME_RELATED_P (insn
) = 1;
14673 m
->fs
.cfa_reg
= crtl
->drap_reg
;
14674 m
->fs
.cfa_offset
= 0;
14676 /* Align the stack. */
14677 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
14679 GEN_INT (-align_bytes
)));
14680 RTX_FRAME_RELATED_P (insn
) = 1;
14682 /* Replicate the return address on the stack so that return
14683 address can be reached via (argp - 1) slot. This is needed
14684 to implement macro RETURN_ADDR_RTX and intrinsic function
14685 expand_builtin_return_addr etc. */
14686 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
14687 t
= gen_frame_mem (word_mode
, t
);
14688 insn
= emit_insn (gen_push (t
));
14689 RTX_FRAME_RELATED_P (insn
) = 1;
14691 /* For the purposes of frame and register save area addressing,
14692 we've started over with a new frame. */
14693 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
14694 m
->fs
.realigned
= true;
14698 /* Replicate static chain on the stack so that static chain
14699 can be reached via (argp - 2) slot. This is needed for
14700 nested function with stack realignment. */
14701 insn
= emit_insn (gen_push (static_chain
));
14702 RTX_FRAME_RELATED_P (insn
) = 1;
14706 int_registers_saved
= (frame
.nregs
== 0);
14707 sse_registers_saved
= (frame
.nsseregs
== 0);
14709 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
14711 /* Note: AT&T enter does NOT have reversed args. Enter is probably
14712 slower on all targets. Also sdb doesn't like it. */
14713 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
14714 RTX_FRAME_RELATED_P (insn
) = 1;
14716 /* Push registers now, before setting the frame pointer
14718 if (!int_registers_saved
14720 && !frame
.save_regs_using_mov
)
14722 ix86_emit_save_regs ();
14723 int_registers_saved
= true;
14724 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
14727 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
14729 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
14730 RTX_FRAME_RELATED_P (insn
) = 1;
14732 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
14733 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
14734 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
14735 m
->fs
.fp_valid
= true;
14739 if (!int_registers_saved
)
14741 /* If saving registers via PUSH, do so now. */
14742 if (!frame
.save_regs_using_mov
)
14744 ix86_emit_save_regs ();
14745 int_registers_saved
= true;
14746 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
14749 /* When using red zone we may start register saving before allocating
14750 the stack frame saving one cycle of the prologue. However, avoid
14751 doing this if we have to probe the stack; at least on x86_64 the
14752 stack probe can turn into a call that clobbers a red zone location. */
14753 else if (ix86_using_red_zone ()
14754 && (! TARGET_STACK_PROBE
14755 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
14757 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
14758 int_registers_saved
= true;
14762 if (stack_realign_fp
)
14764 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
14765 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
14767 /* Record last valid frame pointer offset. */
14768 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
14770 /* The computation of the size of the re-aligned stack frame means
14771 that we must allocate the size of the register save area before
14772 performing the actual alignment. Otherwise we cannot guarantee
14773 that there's enough storage above the realignment point. */
14774 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
14775 + frame
.stack_realign_allocate
;
14777 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14778 GEN_INT (-allocate
), -1, false);
14780 /* Align the stack. */
14781 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
14783 GEN_INT (-align_bytes
)));
14784 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
14785 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
14786 - frame
.stack_realign_allocate
;
14787 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
14788 Beyond this point, stack access should be done via choose_baseaddr or
14789 by using sp_valid_at and fp_valid_at to determine the correct base
14790 register. Henceforth, any CFA offset should be thought of as logical
14791 and not physical. */
14792 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
14793 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
14794 m
->fs
.sp_realigned
= true;
14796 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
14797 is needed to describe where a register is saved using a realigned
14798 stack pointer, so we need to invalidate the stack pointer for that
14801 m
->fs
.sp_valid
= false;
14804 if (m
->call_ms2sysv
)
14805 ix86_emit_outlined_ms2sysv_save (frame
);
14807 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
14809 if (flag_stack_usage_info
)
14811 /* We start to count from ARG_POINTER. */
14812 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
14814 /* If it was realigned, take into account the fake frame. */
14815 if (stack_realign_drap
)
14817 if (ix86_static_chain_on_stack
)
14818 stack_size
+= UNITS_PER_WORD
;
14820 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
14821 stack_size
+= UNITS_PER_WORD
;
14823 /* This over-estimates by 1 minimal-stack-alignment-unit but
14824 mitigates that by counting in the new return address slot. */
14825 current_function_dynamic_stack_size
14826 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
14829 current_function_static_stack_size
= stack_size
;
14832 /* On SEH target with very large frame size, allocate an area to save
14833 SSE registers (as the very large allocation won't be described). */
14835 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
14836 && !sse_registers_saved
)
14838 HOST_WIDE_INT sse_size
=
14839 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
14841 gcc_assert (int_registers_saved
);
14843 /* No need to do stack checking as the area will be immediately
14845 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14846 GEN_INT (-sse_size
), -1,
14847 m
->fs
.cfa_reg
== stack_pointer_rtx
);
14848 allocate
-= sse_size
;
14849 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
14850 sse_registers_saved
= true;
14853 /* The stack has already been decremented by the instruction calling us
14854 so probe if the size is non-negative to preserve the protection area. */
14855 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
14857 /* We expect the GP registers to be saved when probes are used. */
14858 gcc_assert (int_registers_saved
);
14860 if (STACK_CHECK_MOVING_SP
)
14862 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
14863 && allocate
<= PROBE_INTERVAL
))
14865 ix86_adjust_stack_and_probe (allocate
);
14871 HOST_WIDE_INT size
= allocate
;
14873 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
14874 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
14876 if (TARGET_STACK_PROBE
)
14878 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
14880 if (size
> PROBE_INTERVAL
)
14881 ix86_emit_probe_stack_range (0, size
);
14884 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
14888 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
14890 if (size
> PROBE_INTERVAL
&& size
> STACK_CHECK_PROTECT
)
14891 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
,
14892 size
- STACK_CHECK_PROTECT
);
14895 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
14902 else if (!ix86_target_stack_probe ()
14903 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
14905 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
14906 GEN_INT (-allocate
), -1,
14907 m
->fs
.cfa_reg
== stack_pointer_rtx
);
14911 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
14913 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
14914 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
14915 bool eax_live
= ix86_eax_live_at_start_p ();
14916 bool r10_live
= false;
14919 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
14923 insn
= emit_insn (gen_push (eax
));
14924 allocate
-= UNITS_PER_WORD
;
14925 /* Note that SEH directives need to continue tracking the stack
14926 pointer even after the frame pointer has been set up. */
14927 if (sp_is_cfa_reg
|| TARGET_SEH
)
14930 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
14931 RTX_FRAME_RELATED_P (insn
) = 1;
14932 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
14933 gen_rtx_SET (stack_pointer_rtx
,
14934 plus_constant (Pmode
, stack_pointer_rtx
,
14935 -UNITS_PER_WORD
)));
14941 r10
= gen_rtx_REG (Pmode
, R10_REG
);
14942 insn
= emit_insn (gen_push (r10
));
14943 allocate
-= UNITS_PER_WORD
;
14944 if (sp_is_cfa_reg
|| TARGET_SEH
)
14947 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
14948 RTX_FRAME_RELATED_P (insn
) = 1;
14949 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
14950 gen_rtx_SET (stack_pointer_rtx
,
14951 plus_constant (Pmode
, stack_pointer_rtx
,
14952 -UNITS_PER_WORD
)));
14956 emit_move_insn (eax
, GEN_INT (allocate
));
14957 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
14959 /* Use the fact that AX still contains ALLOCATE. */
14960 adjust_stack_insn
= (Pmode
== DImode
14961 ? gen_pro_epilogue_adjust_stack_di_sub
14962 : gen_pro_epilogue_adjust_stack_si_sub
);
14964 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
14965 stack_pointer_rtx
, eax
));
14967 if (sp_is_cfa_reg
|| TARGET_SEH
)
14970 m
->fs
.cfa_offset
+= allocate
;
14971 RTX_FRAME_RELATED_P (insn
) = 1;
14972 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
14973 gen_rtx_SET (stack_pointer_rtx
,
14974 plus_constant (Pmode
, stack_pointer_rtx
,
14977 m
->fs
.sp_offset
+= allocate
;
14979 /* Use stack_pointer_rtx for relative addressing so that code
14980 works for realigned stack, too. */
14981 if (r10_live
&& eax_live
)
14983 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
14984 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
14985 gen_frame_mem (word_mode
, t
));
14986 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
14987 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
14988 gen_frame_mem (word_mode
, t
));
14990 else if (eax_live
|| r10_live
)
14992 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
14993 emit_move_insn (gen_rtx_REG (word_mode
,
14994 (eax_live
? AX_REG
: R10_REG
)),
14995 gen_frame_mem (word_mode
, t
));
14998 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
15000 /* If we havn't already set up the frame pointer, do so now. */
15001 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
15003 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
15004 GEN_INT (frame
.stack_pointer_offset
15005 - frame
.hard_frame_pointer_offset
));
15006 insn
= emit_insn (insn
);
15007 RTX_FRAME_RELATED_P (insn
) = 1;
15008 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
15010 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
15011 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
15012 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
15013 m
->fs
.fp_valid
= true;
15016 if (!int_registers_saved
)
15017 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
15018 if (!sse_registers_saved
)
15019 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
15021 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
15023 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
15025 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
15026 insn
= emit_insn (gen_set_got (pic
));
15027 RTX_FRAME_RELATED_P (insn
) = 1;
15028 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
15029 emit_insn (gen_prologue_use (pic
));
15030 /* Deleting already emmitted SET_GOT if exist and allocated to
15031 REAL_PIC_OFFSET_TABLE_REGNUM. */
15032 ix86_elim_entry_set_got (pic
);
15035 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
15037 /* vDRAP is setup but after reload it turns out stack realign
15038 isn't necessary, here we will emit prologue to setup DRAP
15039 without stack realign adjustment */
15040 t
= choose_baseaddr (0, NULL
);
15041 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
15044 /* Prevent instructions from being scheduled into register save push
15045 sequence when access to the redzone area is done through frame pointer.
15046 The offset between the frame pointer and the stack pointer is calculated
15047 relative to the value of the stack pointer at the end of the function
15048 prologue, and moving instructions that access redzone area via frame
15049 pointer inside push sequence violates this assumption. */
15050 if (frame_pointer_needed
&& frame
.red_zone_size
)
15051 emit_insn (gen_memory_blockage ());
15053 /* SEH requires that the prologue end within 256 bytes of the start of
15054 the function. Prevent instruction schedules that would extend that.
15055 Further, prevent alloca modifications to the stack pointer from being
15056 combined with prologue modifications. */
15058 emit_insn (gen_prologue_use (stack_pointer_rtx
));
15061 /* Emit code to restore REG using a POP insn. */
15064 ix86_emit_restore_reg_using_pop (rtx reg
)
15066 struct machine_function
*m
= cfun
->machine
;
15067 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
15069 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
15070 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
15072 if (m
->fs
.cfa_reg
== crtl
->drap_reg
15073 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
15075 /* Previously we'd represented the CFA as an expression
15076 like *(%ebp - 8). We've just popped that value from
15077 the stack, which means we need to reset the CFA to
15078 the drap register. This will remain until we restore
15079 the stack pointer. */
15080 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
15081 RTX_FRAME_RELATED_P (insn
) = 1;
15083 /* This means that the DRAP register is valid for addressing too. */
15084 m
->fs
.drap_valid
= true;
15088 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
15090 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
15091 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
15092 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
15093 RTX_FRAME_RELATED_P (insn
) = 1;
15095 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
15098 /* When the frame pointer is the CFA, and we pop it, we are
15099 swapping back to the stack pointer as the CFA. This happens
15100 for stack frames that don't allocate other data, so we assume
15101 the stack pointer is now pointing at the return address, i.e.
15102 the function entry state, which makes the offset be 1 word. */
15103 if (reg
== hard_frame_pointer_rtx
)
15105 m
->fs
.fp_valid
= false;
15106 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
15108 m
->fs
.cfa_reg
= stack_pointer_rtx
;
15109 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
15111 add_reg_note (insn
, REG_CFA_DEF_CFA
,
15112 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15113 GEN_INT (m
->fs
.cfa_offset
)));
15114 RTX_FRAME_RELATED_P (insn
) = 1;
15119 /* Emit code to restore saved registers using POP insns. */
15122 ix86_emit_restore_regs_using_pop (void)
15124 unsigned int regno
;
15126 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
15127 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
15128 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
15131 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
15132 omits the emit and only attaches the notes. */
15135 ix86_emit_leave (rtx_insn
*insn
)
15137 struct machine_function
*m
= cfun
->machine
;
15139 insn
= emit_insn (ix86_gen_leave ());
15141 ix86_add_queued_cfa_restore_notes (insn
);
15143 gcc_assert (m
->fs
.fp_valid
);
15144 m
->fs
.sp_valid
= true;
15145 m
->fs
.sp_realigned
= false;
15146 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
15147 m
->fs
.fp_valid
= false;
15149 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
15151 m
->fs
.cfa_reg
= stack_pointer_rtx
;
15152 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
15154 add_reg_note (insn
, REG_CFA_DEF_CFA
,
15155 plus_constant (Pmode
, stack_pointer_rtx
,
15157 RTX_FRAME_RELATED_P (insn
) = 1;
15159 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
15163 /* Emit code to restore saved registers using MOV insns.
15164 First register is restored from CFA - CFA_OFFSET. */
15166 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
15167 bool maybe_eh_return
)
15169 struct machine_function
*m
= cfun
->machine
;
15170 unsigned int regno
;
15172 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
15173 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
15175 rtx reg
= gen_rtx_REG (word_mode
, regno
);
15179 mem
= choose_baseaddr (cfa_offset
, NULL
);
15180 mem
= gen_frame_mem (word_mode
, mem
);
15181 insn
= emit_move_insn (reg
, mem
);
15183 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
15185 /* Previously we'd represented the CFA as an expression
15186 like *(%ebp - 8). We've just popped that value from
15187 the stack, which means we need to reset the CFA to
15188 the drap register. This will remain until we restore
15189 the stack pointer. */
15190 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
15191 RTX_FRAME_RELATED_P (insn
) = 1;
15193 /* This means that the DRAP register is valid for addressing. */
15194 m
->fs
.drap_valid
= true;
15197 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
15199 cfa_offset
-= UNITS_PER_WORD
;
15203 /* Emit code to restore saved registers using MOV insns.
15204 First register is restored from CFA - CFA_OFFSET. */
15206 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
15207 bool maybe_eh_return
)
15209 unsigned int regno
;
15211 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
15212 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
15214 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
15216 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
15218 mem
= choose_baseaddr (cfa_offset
, &align
);
15219 mem
= gen_rtx_MEM (V4SFmode
, mem
);
15221 /* The location aligment depends upon the base register. */
15222 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
15223 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
15224 set_mem_align (mem
, align
);
15225 emit_insn (gen_rtx_SET (reg
, mem
));
15227 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
15229 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
15234 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
15235 bool use_call
, int style
)
15237 struct machine_function
*m
= cfun
->machine
;
15238 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
15239 + m
->call_ms2sysv_extra_regs
;
15241 unsigned int elems_needed
, align
, i
, vi
= 0;
15244 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
15245 rtx r10
= NULL_RTX
;
15246 const struct xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
15247 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
15248 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
15249 rtx rsi_frame_load
= NULL_RTX
;
15250 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
15251 enum xlogue_stub stub
;
15253 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
15255 /* If using a realigned stack, we should never start with padding. */
15256 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
15258 /* Setup RSI as the stub's base pointer. */
15259 align
= GET_MODE_ALIGNMENT (V4SFmode
);
15260 tmp
= choose_baseaddr (rsi_offset
, &align
);
15261 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
15262 emit_insn (gen_rtx_SET (rsi
, tmp
));
15264 /* Get a symbol for the stub. */
15265 if (frame_pointer_needed
)
15266 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
15267 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
15269 stub
= use_call
? XLOGUE_STUB_RESTORE
15270 : XLOGUE_STUB_RESTORE_TAIL
;
15271 sym
= xlogue
.get_stub_rtx (stub
);
15273 elems_needed
= ncregs
;
15277 elems_needed
+= frame_pointer_needed
? 5 : 3;
15278 v
= rtvec_alloc (elems_needed
);
15280 /* We call the epilogue stub when we need to pop incoming args or we are
15281 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
15282 epilogue stub and it is the tail-call. */
15284 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
15287 RTVEC_ELT (v
, vi
++) = ret_rtx
;
15288 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
15289 if (frame_pointer_needed
)
15291 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
15292 gcc_assert (m
->fs
.fp_valid
);
15293 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
15295 tmp
= gen_rtx_PLUS (DImode
, rbp
, GEN_INT (8));
15296 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
15297 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
15298 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
15299 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
15303 /* If no hard frame pointer, we set R10 to the SP restore value. */
15304 gcc_assert (!m
->fs
.fp_valid
);
15305 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
15306 gcc_assert (m
->fs
.sp_valid
);
15308 r10
= gen_rtx_REG (DImode
, R10_REG
);
15309 tmp
= gen_rtx_PLUS (Pmode
, rsi
, GEN_INT (stub_ptr_offset
));
15310 emit_insn (gen_rtx_SET (r10
, tmp
));
15312 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
15316 /* Generate frame load insns and restore notes. */
15317 for (i
= 0; i
< ncregs
; ++i
)
15319 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
15320 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
15321 rtx reg
, frame_load
;
15323 reg
= gen_rtx_REG (mode
, r
.regno
);
15324 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
15326 /* Save RSI frame load insn & note to add last. */
15327 if (r
.regno
== SI_REG
)
15329 gcc_assert (!rsi_frame_load
);
15330 rsi_frame_load
= frame_load
;
15331 rsi_restore_offset
= r
.offset
;
15335 RTVEC_ELT (v
, vi
++) = frame_load
;
15336 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
15340 /* Add RSI frame load & restore note at the end. */
15341 gcc_assert (rsi_frame_load
);
15342 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
15343 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
15344 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
15345 rsi_restore_offset
);
15347 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
15348 if (!use_call
&& !frame_pointer_needed
)
15350 gcc_assert (m
->fs
.sp_valid
);
15351 gcc_assert (!m
->fs
.sp_realigned
);
15353 /* At this point, R10 should point to frame.stack_realign_offset. */
15354 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
15355 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
15356 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
15359 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
15360 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
15362 insn
= emit_insn (tmp
);
15365 insn
= emit_jump_insn (tmp
);
15366 JUMP_LABEL (insn
) = ret_rtx
;
15368 if (frame_pointer_needed
)
15369 ix86_emit_leave (insn
);
15372 /* Need CFA adjust note. */
15373 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
15374 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
15378 RTX_FRAME_RELATED_P (insn
) = true;
15379 ix86_add_queued_cfa_restore_notes (insn
);
15381 /* If we're not doing a tail-call, we need to adjust the stack. */
15382 if (use_call
&& m
->fs
.sp_valid
)
15384 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
15385 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
15386 GEN_INT (dealloc
), style
,
15387 m
->fs
.cfa_reg
== stack_pointer_rtx
);
15391 /* Restore function stack, frame, and registers. */
15394 ix86_expand_epilogue (int style
)
15396 struct machine_function
*m
= cfun
->machine
;
15397 struct machine_frame_state frame_state_save
= m
->fs
;
15398 struct ix86_frame frame
;
15399 bool restore_regs_via_mov
;
15401 bool restore_stub_is_tail
= false;
15403 if (ix86_function_naked (current_function_decl
))
15405 /* The program should not reach this point. */
15406 emit_insn (gen_ud2 ());
15410 ix86_finalize_stack_frame_flags ();
15413 m
->fs
.sp_realigned
= stack_realign_fp
;
15414 m
->fs
.sp_valid
= stack_realign_fp
15415 || !frame_pointer_needed
15416 || crtl
->sp_is_unchanging
;
15417 gcc_assert (!m
->fs
.sp_valid
15418 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
15420 /* The FP must be valid if the frame pointer is present. */
15421 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
15422 gcc_assert (!m
->fs
.fp_valid
15423 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
15425 /* We must have *some* valid pointer to the stack frame. */
15426 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
15428 /* The DRAP is never valid at this point. */
15429 gcc_assert (!m
->fs
.drap_valid
);
15431 /* See the comment about red zone and frame
15432 pointer usage in ix86_expand_prologue. */
15433 if (frame_pointer_needed
&& frame
.red_zone_size
)
15434 emit_insn (gen_memory_blockage ());
15436 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
15437 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
15439 /* Determine the CFA offset of the end of the red-zone. */
15440 m
->fs
.red_zone_offset
= 0;
15441 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
15443 /* The red-zone begins below return address and error code in
15444 exception handler. */
15445 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
15447 /* When the register save area is in the aligned portion of
15448 the stack, determine the maximum runtime displacement that
15449 matches up with the aligned frame. */
15450 if (stack_realign_drap
)
15451 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
15455 /* Special care must be taken for the normal return case of a function
15456 using eh_return: the eax and edx registers are marked as saved, but
15457 not restored along this path. Adjust the save location to match. */
15458 if (crtl
->calls_eh_return
&& style
!= 2)
15459 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
15461 /* EH_RETURN requires the use of moves to function properly. */
15462 if (crtl
->calls_eh_return
)
15463 restore_regs_via_mov
= true;
15464 /* SEH requires the use of pops to identify the epilogue. */
15465 else if (TARGET_SEH
)
15466 restore_regs_via_mov
= false;
15467 /* If we're only restoring one register and sp cannot be used then
15468 using a move instruction to restore the register since it's
15469 less work than reloading sp and popping the register. */
15470 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
15471 restore_regs_via_mov
= true;
15472 else if (TARGET_EPILOGUE_USING_MOVE
15473 && cfun
->machine
->use_fast_prologue_epilogue
15474 && (frame
.nregs
> 1
15475 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
15476 restore_regs_via_mov
= true;
15477 else if (frame_pointer_needed
15479 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
15480 restore_regs_via_mov
= true;
15481 else if (frame_pointer_needed
15482 && TARGET_USE_LEAVE
15483 && cfun
->machine
->use_fast_prologue_epilogue
15484 && frame
.nregs
== 1)
15485 restore_regs_via_mov
= true;
15487 restore_regs_via_mov
= false;
15489 if (restore_regs_via_mov
|| frame
.nsseregs
)
15491 /* Ensure that the entire register save area is addressable via
15492 the stack pointer, if we will restore SSE regs via sp. */
15494 && m
->fs
.sp_offset
> 0x7fffffff
15495 && sp_valid_at (frame
.stack_realign_offset
)
15496 && (frame
.nsseregs
+ frame
.nregs
) != 0)
15498 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
15499 GEN_INT (m
->fs
.sp_offset
15500 - frame
.sse_reg_save_offset
),
15502 m
->fs
.cfa_reg
== stack_pointer_rtx
);
15506 /* If there are any SSE registers to restore, then we have to do it
15507 via moves, since there's obviously no pop for SSE regs. */
15508 if (frame
.nsseregs
)
15509 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
15512 if (m
->call_ms2sysv
)
15514 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
15516 /* We cannot use a tail-call for the stub if:
15517 1. We have to pop incoming args,
15518 2. We have additional int regs to restore, or
15519 3. A sibling call will be the tail-call, or
15520 4. We are emitting an eh_return_internal epilogue.
15522 TODO: Item 4 has not yet tested!
15524 If any of the above are true, we will call the stub rather than
15526 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
15527 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
15530 /* If using out-of-line stub that is a tail-call, then...*/
15531 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
15533 /* TODO: parinoid tests. (remove eventually) */
15534 gcc_assert (m
->fs
.sp_valid
);
15535 gcc_assert (!m
->fs
.sp_realigned
);
15536 gcc_assert (!m
->fs
.fp_valid
);
15537 gcc_assert (!m
->fs
.realigned
);
15538 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
15539 gcc_assert (!crtl
->drap_reg
);
15540 gcc_assert (!frame
.nregs
);
15542 else if (restore_regs_via_mov
)
15547 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
15549 /* eh_return epilogues need %ecx added to the stack pointer. */
15552 rtx sa
= EH_RETURN_STACKADJ_RTX
;
15555 /* %ecx can't be used for both DRAP register and eh_return. */
15556 if (crtl
->drap_reg
)
15557 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
15559 /* regparm nested functions don't work with eh_return. */
15560 gcc_assert (!ix86_static_chain_on_stack
);
15562 if (frame_pointer_needed
)
15564 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
15565 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
15566 emit_insn (gen_rtx_SET (sa
, t
));
15568 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
15569 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
15571 /* Note that we use SA as a temporary CFA, as the return
15572 address is at the proper place relative to it. We
15573 pretend this happens at the FP restore insn because
15574 prior to this insn the FP would be stored at the wrong
15575 offset relative to SA, and after this insn we have no
15576 other reasonable register to use for the CFA. We don't
15577 bother resetting the CFA to the SP for the duration of
15578 the return insn. */
15579 add_reg_note (insn
, REG_CFA_DEF_CFA
,
15580 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
15581 ix86_add_queued_cfa_restore_notes (insn
);
15582 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
15583 RTX_FRAME_RELATED_P (insn
) = 1;
15585 m
->fs
.cfa_reg
= sa
;
15586 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
15587 m
->fs
.fp_valid
= false;
15589 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
15590 const0_rtx
, style
, false);
15594 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
15595 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
15596 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
15597 ix86_add_queued_cfa_restore_notes (insn
);
15599 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
15600 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
15602 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
15603 add_reg_note (insn
, REG_CFA_DEF_CFA
,
15604 plus_constant (Pmode
, stack_pointer_rtx
,
15606 RTX_FRAME_RELATED_P (insn
) = 1;
15609 m
->fs
.sp_offset
= UNITS_PER_WORD
;
15610 m
->fs
.sp_valid
= true;
15611 m
->fs
.sp_realigned
= false;
15616 /* SEH requires that the function end with (1) a stack adjustment
15617 if necessary, (2) a sequence of pops, and (3) a return or
15618 jump instruction. Prevent insns from the function body from
15619 being scheduled into this sequence. */
15622 /* Prevent a catch region from being adjacent to the standard
15623 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
15624 several other flags that would be interesting to test are
15626 if (flag_non_call_exceptions
)
15627 emit_insn (gen_nops (const1_rtx
));
15629 emit_insn (gen_blockage ());
15632 /* First step is to deallocate the stack frame so that we can
15633 pop the registers. If the stack pointer was realigned, it needs
15634 to be restored now. Also do it on SEH target for very large
15635 frame as the emitted instructions aren't allowed by the ABI
15637 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
15639 && (m
->fs
.sp_offset
- frame
.reg_save_offset
15640 >= SEH_MAX_FRAME_SIZE
)))
15642 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
15643 GEN_INT (m
->fs
.fp_offset
15644 - frame
.reg_save_offset
),
15647 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
15649 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
15650 GEN_INT (m
->fs
.sp_offset
15651 - frame
.reg_save_offset
),
15653 m
->fs
.cfa_reg
== stack_pointer_rtx
);
15656 ix86_emit_restore_regs_using_pop ();
15659 /* If we used a stack pointer and haven't already got rid of it,
15661 if (m
->fs
.fp_valid
)
15663 /* If the stack pointer is valid and pointing at the frame
15664 pointer store address, then we only need a pop. */
15665 if (sp_valid_at (frame
.hfp_save_offset
)
15666 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
15667 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
15668 /* Leave results in shorter dependency chains on CPUs that are
15669 able to grok it fast. */
15670 else if (TARGET_USE_LEAVE
15671 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
15672 || !cfun
->machine
->use_fast_prologue_epilogue
)
15673 ix86_emit_leave (NULL
);
15676 pro_epilogue_adjust_stack (stack_pointer_rtx
,
15677 hard_frame_pointer_rtx
,
15678 const0_rtx
, style
, !using_drap
);
15679 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
15685 int param_ptr_offset
= UNITS_PER_WORD
;
15688 gcc_assert (stack_realign_drap
);
15690 if (ix86_static_chain_on_stack
)
15691 param_ptr_offset
+= UNITS_PER_WORD
;
15692 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
15693 param_ptr_offset
+= UNITS_PER_WORD
;
15695 insn
= emit_insn (gen_rtx_SET
15696 (stack_pointer_rtx
,
15697 gen_rtx_PLUS (Pmode
,
15699 GEN_INT (-param_ptr_offset
))));
15700 m
->fs
.cfa_reg
= stack_pointer_rtx
;
15701 m
->fs
.cfa_offset
= param_ptr_offset
;
15702 m
->fs
.sp_offset
= param_ptr_offset
;
15703 m
->fs
.realigned
= false;
15705 add_reg_note (insn
, REG_CFA_DEF_CFA
,
15706 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15707 GEN_INT (param_ptr_offset
)));
15708 RTX_FRAME_RELATED_P (insn
) = 1;
15710 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
15711 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
15714 /* At this point the stack pointer must be valid, and we must have
15715 restored all of the registers. We may not have deallocated the
15716 entire stack frame. We've delayed this until now because it may
15717 be possible to merge the local stack deallocation with the
15718 deallocation forced by ix86_static_chain_on_stack. */
15719 gcc_assert (m
->fs
.sp_valid
);
15720 gcc_assert (!m
->fs
.sp_realigned
);
15721 gcc_assert (!m
->fs
.fp_valid
);
15722 gcc_assert (!m
->fs
.realigned
);
15723 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
15725 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
15726 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
15730 ix86_add_queued_cfa_restore_notes (get_last_insn ());
15732 /* Sibcall epilogues don't want a return instruction. */
15735 m
->fs
= frame_state_save
;
15739 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
15740 emit_jump_insn (gen_interrupt_return ());
15741 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
15743 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
15745 /* i386 can only pop 64K bytes. If asked to pop more, pop return
15746 address, do explicit add, and jump indirectly to the caller. */
15748 if (crtl
->args
.pops_args
>= 65536)
15750 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
15753 /* There is no "pascal" calling convention in any 64bit ABI. */
15754 gcc_assert (!TARGET_64BIT
);
15756 insn
= emit_insn (gen_pop (ecx
));
15757 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
15758 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
15760 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
15761 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
15762 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
15763 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
15764 RTX_FRAME_RELATED_P (insn
) = 1;
15766 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
15768 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
15771 emit_jump_insn (gen_simple_return_pop_internal (popc
));
15773 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
15774 emit_jump_insn (gen_simple_return_internal ());
15776 /* Restore the state back to the state from the prologue,
15777 so that it's correct for the next epilogue. */
15778 m
->fs
= frame_state_save
;
15781 /* Reset from the function's potential modifications. */
15784 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
15786 if (pic_offset_table_rtx
15787 && !ix86_use_pseudo_pic_reg ())
15788 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
15792 rtx_insn
*insn
= get_last_insn ();
15793 rtx_insn
*deleted_debug_label
= NULL
;
15795 /* Mach-O doesn't support labels at the end of objects, so if
15796 it looks like we might want one, take special action.
15797 First, collect any sequence of deleted debug labels. */
15800 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
15802 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
15803 notes only, instead set their CODE_LABEL_NUMBER to -1,
15804 otherwise there would be code generation differences
15805 in between -g and -g0. */
15806 if (NOTE_P (insn
) && NOTE_KIND (insn
)
15807 == NOTE_INSN_DELETED_DEBUG_LABEL
)
15808 deleted_debug_label
= insn
;
15809 insn
= PREV_INSN (insn
);
15815 then this needs to be detected, so skip past the barrier. */
15817 if (insn
&& BARRIER_P (insn
))
15818 insn
= PREV_INSN (insn
);
15820 /* Up to now we've only seen notes or barriers. */
15825 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
15826 /* Trailing label. */
15827 fputs ("\tnop\n", file
);
15828 else if (cfun
&& ! cfun
->is_thunk
)
15830 /* See if we have a completely empty function body, skipping
15831 the special case of the picbase thunk emitted as asm. */
15832 while (insn
&& ! INSN_P (insn
))
15833 insn
= PREV_INSN (insn
);
15834 /* If we don't find any insns, we've got an empty function body;
15835 I.e. completely empty - without a return or branch. This is
15836 taken as the case where a function body has been removed
15837 because it contains an inline __builtin_unreachable(). GCC
15838 declares that reaching __builtin_unreachable() means UB so
15839 we're not obliged to do anything special; however, we want
15840 non-zero-sized function bodies. To meet this, and help the
15841 user out, let's trap the case. */
15843 fputs ("\tud2\n", file
);
15846 else if (deleted_debug_label
)
15847 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
15848 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
15849 CODE_LABEL_NUMBER (insn
) = -1;
15853 /* Return a scratch register to use in the split stack prologue. The
15854 split stack prologue is used for -fsplit-stack. It is the first
15855 instructions in the function, even before the regular prologue.
15856 The scratch register can be any caller-saved register which is not
15857 used for parameters or for the static chain. */
15859 static unsigned int
15860 split_stack_prologue_scratch_regno (void)
15866 bool is_fastcall
, is_thiscall
;
15869 is_fastcall
= (lookup_attribute ("fastcall",
15870 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
15872 is_thiscall
= (lookup_attribute ("thiscall",
15873 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
15875 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
15879 if (DECL_STATIC_CHAIN (cfun
->decl
))
15881 sorry ("-fsplit-stack does not support fastcall with "
15882 "nested function");
15883 return INVALID_REGNUM
;
15887 else if (is_thiscall
)
15889 if (!DECL_STATIC_CHAIN (cfun
->decl
))
15893 else if (regparm
< 3)
15895 if (!DECL_STATIC_CHAIN (cfun
->decl
))
15901 sorry ("-fsplit-stack does not support 2 register "
15902 "parameters for a nested function");
15903 return INVALID_REGNUM
;
15910 /* FIXME: We could make this work by pushing a register
15911 around the addition and comparison. */
15912 sorry ("-fsplit-stack does not support 3 register parameters");
15913 return INVALID_REGNUM
;
15918 /* A SYMBOL_REF for the function which allocates new stackspace for
15921 static GTY(()) rtx split_stack_fn
;
15923 /* A SYMBOL_REF for the more stack function when using the large
15926 static GTY(()) rtx split_stack_fn_large
;
15928 /* Return location of the stack guard value in the TLS block. */
15931 ix86_split_stack_guard (void)
15934 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
15937 gcc_assert (flag_split_stack
);
15939 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15940 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15942 gcc_unreachable ();
15945 r
= GEN_INT (offset
);
15946 r
= gen_const_mem (Pmode
, r
);
15947 set_mem_addr_space (r
, as
);
15952 /* Handle -fsplit-stack. These are the first instructions in the
15953 function, even before the regular prologue. */
15956 ix86_expand_split_stack_prologue (void)
15958 struct ix86_frame frame
;
15959 HOST_WIDE_INT allocate
;
15960 unsigned HOST_WIDE_INT args_size
;
15961 rtx_code_label
*label
;
15962 rtx limit
, current
, allocate_rtx
, call_insn
, call_fusage
;
15963 rtx scratch_reg
= NULL_RTX
;
15964 rtx_code_label
*varargs_label
= NULL
;
15967 gcc_assert (flag_split_stack
&& reload_completed
);
15969 ix86_finalize_stack_frame_flags ();
15970 frame
= cfun
->machine
->frame
;
15971 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
15973 /* This is the label we will branch to if we have enough stack
15974 space. We expect the basic block reordering pass to reverse this
15975 branch if optimizing, so that we branch in the unlikely case. */
15976 label
= gen_label_rtx ();
15978 /* We need to compare the stack pointer minus the frame size with
15979 the stack boundary in the TCB. The stack boundary always gives
15980 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
15981 can compare directly. Otherwise we need to do an addition. */
15983 limit
= ix86_split_stack_guard ();
15985 if (allocate
< SPLIT_STACK_AVAILABLE
)
15986 current
= stack_pointer_rtx
;
15989 unsigned int scratch_regno
;
15992 /* We need a scratch register to hold the stack pointer minus
15993 the required frame size. Since this is the very start of the
15994 function, the scratch register can be any caller-saved
15995 register which is not used for parameters. */
15996 offset
= GEN_INT (- allocate
);
15997 scratch_regno
= split_stack_prologue_scratch_regno ();
15998 if (scratch_regno
== INVALID_REGNUM
)
16000 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
16001 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
16003 /* We don't use ix86_gen_add3 in this case because it will
16004 want to split to lea, but when not optimizing the insn
16005 will not be split after this point. */
16006 emit_insn (gen_rtx_SET (scratch_reg
,
16007 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16012 emit_move_insn (scratch_reg
, offset
);
16013 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
16014 stack_pointer_rtx
));
16016 current
= scratch_reg
;
16019 ix86_expand_branch (GEU
, current
, limit
, label
);
16020 rtx_insn
*jump_insn
= get_last_insn ();
16021 JUMP_LABEL (jump_insn
) = label
;
16023 /* Mark the jump as very likely to be taken. */
16024 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
16026 if (split_stack_fn
== NULL_RTX
)
16028 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
16029 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
16031 fn
= split_stack_fn
;
16033 /* Get more stack space. We pass in the desired stack space and the
16034 size of the arguments to copy to the new stack. In 32-bit mode
16035 we push the parameters; __morestack will return on a new stack
16036 anyhow. In 64-bit mode we pass the parameters in r10 and
16038 allocate_rtx
= GEN_INT (allocate
);
16039 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
16040 call_fusage
= NULL_RTX
;
16041 rtx pop
= NULL_RTX
;
16046 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
16047 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
16049 /* If this function uses a static chain, it will be in %r10.
16050 Preserve it across the call to __morestack. */
16051 if (DECL_STATIC_CHAIN (cfun
->decl
))
16055 rax
= gen_rtx_REG (word_mode
, AX_REG
);
16056 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
16057 use_reg (&call_fusage
, rax
);
16060 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
16063 HOST_WIDE_INT argval
;
16065 gcc_assert (Pmode
== DImode
);
16066 /* When using the large model we need to load the address
16067 into a register, and we've run out of registers. So we
16068 switch to a different calling convention, and we call a
16069 different function: __morestack_large. We pass the
16070 argument size in the upper 32 bits of r10 and pass the
16071 frame size in the lower 32 bits. */
16072 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
16073 gcc_assert ((args_size
& 0xffffffff) == args_size
);
16075 if (split_stack_fn_large
== NULL_RTX
)
16077 split_stack_fn_large
=
16078 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
16079 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
16081 if (ix86_cmodel
== CM_LARGE_PIC
)
16083 rtx_code_label
*label
;
16086 label
= gen_label_rtx ();
16087 emit_label (label
);
16088 LABEL_PRESERVE_P (label
) = 1;
16089 emit_insn (gen_set_rip_rex64 (reg10
, label
));
16090 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
16091 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
16092 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
16094 x
= gen_rtx_CONST (Pmode
, x
);
16095 emit_move_insn (reg11
, x
);
16096 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
16097 x
= gen_const_mem (Pmode
, x
);
16098 emit_move_insn (reg11
, x
);
16101 emit_move_insn (reg11
, split_stack_fn_large
);
16105 argval
= ((args_size
<< 16) << 16) + allocate
;
16106 emit_move_insn (reg10
, GEN_INT (argval
));
16110 emit_move_insn (reg10
, allocate_rtx
);
16111 emit_move_insn (reg11
, GEN_INT (args_size
));
16112 use_reg (&call_fusage
, reg11
);
16115 use_reg (&call_fusage
, reg10
);
16119 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
16120 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
16121 insn
= emit_insn (gen_push (allocate_rtx
));
16122 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
16123 pop
= GEN_INT (2 * UNITS_PER_WORD
);
16125 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
16126 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
16128 add_function_usage_to (call_insn
, call_fusage
);
16130 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
16131 /* Indicate that this function can't jump to non-local gotos. */
16132 make_reg_eh_region_note_nothrow_nononlocal (as_a
<rtx_insn
*> (call_insn
));
16134 /* In order to make call/return prediction work right, we now need
16135 to execute a return instruction. See
16136 libgcc/config/i386/morestack.S for the details on how this works.
16138 For flow purposes gcc must not see this as a return
16139 instruction--we need control flow to continue at the subsequent
16140 label. Therefore, we use an unspec. */
16141 gcc_assert (crtl
->args
.pops_args
< 65536);
16142 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
16144 /* If we are in 64-bit mode and this function uses a static chain,
16145 we saved %r10 in %rax before calling _morestack. */
16146 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
16147 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
16148 gen_rtx_REG (word_mode
, AX_REG
));
16150 /* If this function calls va_start, we need to store a pointer to
16151 the arguments on the old stack, because they may not have been
16152 all copied to the new stack. At this point the old stack can be
16153 found at the frame pointer value used by __morestack, because
16154 __morestack has set that up before calling back to us. Here we
16155 store that pointer in a scratch register, and in
16156 ix86_expand_prologue we store the scratch register in a stack
16158 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
16160 unsigned int scratch_regno
;
16164 scratch_regno
= split_stack_prologue_scratch_regno ();
16165 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
16166 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
16170 return address within this function
16171 return address of caller of this function
16173 So we add three words to get to the stack arguments.
16177 return address within this function
16178 first argument to __morestack
16179 second argument to __morestack
16180 return address of caller of this function
16182 So we add five words to get to the stack arguments.
16184 words
= TARGET_64BIT
? 3 : 5;
16185 emit_insn (gen_rtx_SET (scratch_reg
,
16186 gen_rtx_PLUS (Pmode
, frame_reg
,
16187 GEN_INT (words
* UNITS_PER_WORD
))));
16189 varargs_label
= gen_label_rtx ();
16190 emit_jump_insn (gen_jump (varargs_label
));
16191 JUMP_LABEL (get_last_insn ()) = varargs_label
;
16196 emit_label (label
);
16197 LABEL_NUSES (label
) = 1;
16199 /* If this function calls va_start, we now have to set the scratch
16200 register for the case where we do not call __morestack. In this
16201 case we need to set it based on the stack pointer. */
16202 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
16204 emit_insn (gen_rtx_SET (scratch_reg
,
16205 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
16206 GEN_INT (UNITS_PER_WORD
))));
16208 emit_label (varargs_label
);
16209 LABEL_NUSES (varargs_label
) = 1;
16213 /* We may have to tell the dataflow pass that the split stack prologue
16214 is initializing a scratch register. */
16217 ix86_live_on_entry (bitmap regs
)
16219 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
16221 gcc_assert (flag_split_stack
);
16222 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
16226 /* Extract the parts of an RTL expression that is a valid memory address
16227 for an instruction. Return 0 if the structure of the address is
16228 grossly off. Return -1 if the address contains ASHIFT, so it is not
16229 strictly valid, but still used for computing length of lea instruction. */
16232 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
16234 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
16235 rtx base_reg
, index_reg
;
16236 HOST_WIDE_INT scale
= 1;
16237 rtx scale_rtx
= NULL_RTX
;
16240 addr_space_t seg
= ADDR_SPACE_GENERIC
;
16242 /* Allow zero-extended SImode addresses,
16243 they will be emitted with addr32 prefix. */
16244 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
16246 if (GET_CODE (addr
) == ZERO_EXTEND
16247 && GET_MODE (XEXP (addr
, 0)) == SImode
)
16249 addr
= XEXP (addr
, 0);
16250 if (CONST_INT_P (addr
))
16253 else if (GET_CODE (addr
) == AND
16254 && const_32bit_mask (XEXP (addr
, 1), DImode
))
16256 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
16257 if (addr
== NULL_RTX
)
16260 if (CONST_INT_P (addr
))
16265 /* Allow SImode subregs of DImode addresses,
16266 they will be emitted with addr32 prefix. */
16267 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
16269 if (SUBREG_P (addr
)
16270 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
16272 addr
= SUBREG_REG (addr
);
16273 if (CONST_INT_P (addr
))
16280 else if (SUBREG_P (addr
))
16282 if (REG_P (SUBREG_REG (addr
)))
16287 else if (GET_CODE (addr
) == PLUS
)
16289 rtx addends
[4], op
;
16297 addends
[n
++] = XEXP (op
, 1);
16300 while (GET_CODE (op
) == PLUS
);
16305 for (i
= n
; i
>= 0; --i
)
16308 switch (GET_CODE (op
))
16313 index
= XEXP (op
, 0);
16314 scale_rtx
= XEXP (op
, 1);
16320 index
= XEXP (op
, 0);
16321 tmp
= XEXP (op
, 1);
16322 if (!CONST_INT_P (tmp
))
16324 scale
= INTVAL (tmp
);
16325 if ((unsigned HOST_WIDE_INT
) scale
> 3)
16327 scale
= 1 << scale
;
16332 if (GET_CODE (op
) != UNSPEC
)
16337 if (XINT (op
, 1) == UNSPEC_TP
16338 && TARGET_TLS_DIRECT_SEG_REFS
16339 && seg
== ADDR_SPACE_GENERIC
)
16340 seg
= DEFAULT_TLS_SEG_REG
;
16346 if (!REG_P (SUBREG_REG (op
)))
16373 else if (GET_CODE (addr
) == MULT
)
16375 index
= XEXP (addr
, 0); /* index*scale */
16376 scale_rtx
= XEXP (addr
, 1);
16378 else if (GET_CODE (addr
) == ASHIFT
)
16380 /* We're called for lea too, which implements ashift on occasion. */
16381 index
= XEXP (addr
, 0);
16382 tmp
= XEXP (addr
, 1);
16383 if (!CONST_INT_P (tmp
))
16385 scale
= INTVAL (tmp
);
16386 if ((unsigned HOST_WIDE_INT
) scale
> 3)
16388 scale
= 1 << scale
;
16392 disp
= addr
; /* displacement */
16398 else if (SUBREG_P (index
)
16399 && REG_P (SUBREG_REG (index
)))
16405 /* Extract the integral value of scale. */
16408 if (!CONST_INT_P (scale_rtx
))
16410 scale
= INTVAL (scale_rtx
);
16413 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
16414 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
16416 /* Avoid useless 0 displacement. */
16417 if (disp
== const0_rtx
&& (base
|| index
))
16420 /* Allow arg pointer and stack pointer as index if there is not scaling. */
16421 if (base_reg
&& index_reg
&& scale
== 1
16422 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
16423 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
16424 || REGNO (index_reg
) == SP_REG
))
16426 std::swap (base
, index
);
16427 std::swap (base_reg
, index_reg
);
16430 /* Special case: %ebp cannot be encoded as a base without a displacement.
16432 if (!disp
&& base_reg
16433 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
16434 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
16435 || REGNO (base_reg
) == BP_REG
16436 || REGNO (base_reg
) == R13_REG
))
16439 /* Special case: on K6, [%esi] makes the instruction vector decoded.
16440 Avoid this by transforming to [%esi+0].
16441 Reload calls address legitimization without cfun defined, so we need
16442 to test cfun for being non-NULL. */
16443 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
16444 && base_reg
&& !index_reg
&& !disp
16445 && REGNO (base_reg
) == SI_REG
)
16448 /* Special case: encode reg+reg instead of reg*2. */
16449 if (!base
&& index
&& scale
== 2)
16450 base
= index
, base_reg
= index_reg
, scale
= 1;
16452 /* Special case: scaling cannot be encoded without base or displacement. */
16453 if (!base
&& !disp
&& index
&& scale
!= 1)
16457 out
->index
= index
;
16459 out
->scale
= scale
;
16465 /* Return cost of the memory address x.
16466 For i386, it is better to use a complex address than let gcc copy
16467 the address into a reg and make a new pseudo. But not if the address
16468 requires to two regs - that would mean more pseudos with longer
16471 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
16473 struct ix86_address parts
;
16475 int ok
= ix86_decompose_address (x
, &parts
);
16479 if (parts
.base
&& SUBREG_P (parts
.base
))
16480 parts
.base
= SUBREG_REG (parts
.base
);
16481 if (parts
.index
&& SUBREG_P (parts
.index
))
16482 parts
.index
= SUBREG_REG (parts
.index
);
16484 /* Attempt to minimize number of registers in the address by increasing
16485 address cost for each used register. We don't increase address cost
16486 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
16487 is not invariant itself it most likely means that base or index is not
16488 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
16489 which is not profitable for x86. */
16491 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
16492 && (current_pass
->type
== GIMPLE_PASS
16493 || !pic_offset_table_rtx
16494 || !REG_P (parts
.base
)
16495 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
16499 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
16500 && (current_pass
->type
== GIMPLE_PASS
16501 || !pic_offset_table_rtx
16502 || !REG_P (parts
.index
)
16503 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
16506 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
16507 since it's predecode logic can't detect the length of instructions
16508 and it degenerates to vector decoded. Increase cost of such
16509 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
16510 to split such addresses or even refuse such addresses at all.
16512 Following addressing modes are affected:
16517 The first and last case may be avoidable by explicitly coding the zero in
16518 memory address, but I don't have AMD-K6 machine handy to check this
16522 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
16523 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
16524 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
16530 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
16531 this is used for to form addresses to local data when -fPIC is in
16535 darwin_local_data_pic (rtx disp
)
16537 return (GET_CODE (disp
) == UNSPEC
16538 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
16541 /* True if operand X should be loaded from GOT. */
16544 ix86_force_load_from_GOT_p (rtx x
)
16546 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
16547 && !TARGET_PECOFF
&& !TARGET_MACHO
16548 && !flag_plt
&& !flag_pic
16549 && ix86_cmodel
!= CM_LARGE
16550 && GET_CODE (x
) == SYMBOL_REF
16551 && SYMBOL_REF_FUNCTION_P (x
)
16552 && !SYMBOL_REF_LOCAL_P (x
));
16555 /* Determine if a given RTX is a valid constant. We already know this
16556 satisfies CONSTANT_P. */
16559 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
16561 /* Pointer bounds constants are not valid. */
16562 if (POINTER_BOUNDS_MODE_P (GET_MODE (x
)))
16565 switch (GET_CODE (x
))
16570 if (GET_CODE (x
) == PLUS
)
16572 if (!CONST_INT_P (XEXP (x
, 1)))
16577 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
16580 /* Only some unspecs are valid as "constants". */
16581 if (GET_CODE (x
) == UNSPEC
)
16582 switch (XINT (x
, 1))
16585 case UNSPEC_GOTOFF
:
16586 case UNSPEC_PLTOFF
:
16587 return TARGET_64BIT
;
16589 case UNSPEC_NTPOFF
:
16590 x
= XVECEXP (x
, 0, 0);
16591 return (GET_CODE (x
) == SYMBOL_REF
16592 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
16593 case UNSPEC_DTPOFF
:
16594 x
= XVECEXP (x
, 0, 0);
16595 return (GET_CODE (x
) == SYMBOL_REF
16596 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
16601 /* We must have drilled down to a symbol. */
16602 if (GET_CODE (x
) == LABEL_REF
)
16604 if (GET_CODE (x
) != SYMBOL_REF
)
16609 /* TLS symbols are never valid. */
16610 if (SYMBOL_REF_TLS_MODEL (x
))
16613 /* DLLIMPORT symbols are never valid. */
16614 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
16615 && SYMBOL_REF_DLLIMPORT_P (x
))
16619 /* mdynamic-no-pic */
16620 if (MACHO_DYNAMIC_NO_PIC_P
)
16621 return machopic_symbol_defined_p (x
);
16624 /* External function address should be loaded
16625 via the GOT slot to avoid PLT. */
16626 if (ix86_force_load_from_GOT_p (x
))
16631 CASE_CONST_SCALAR_INT
:
16640 if (!standard_sse_constant_p (x
, mode
))
16648 if (!standard_sse_constant_p (x
, mode
))
16655 /* Otherwise we handle everything else in the move patterns. */
16659 /* Determine if it's legal to put X into the constant pool. This
16660 is not possible for the address of thread-local symbols, which
16661 is checked above. */
16664 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
16666 /* We can put any immediate constant in memory. */
16667 switch (GET_CODE (x
))
16676 return !ix86_legitimate_constant_p (mode
, x
);
16679 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
16683 is_imported_p (rtx x
)
16685 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
16686 || GET_CODE (x
) != SYMBOL_REF
)
16689 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
16693 /* Nonzero if the constant value X is a legitimate general operand
16694 when generating PIC code. It is given that flag_pic is on and
16695 that X satisfies CONSTANT_P. */
16698 legitimate_pic_operand_p (rtx x
)
16702 switch (GET_CODE (x
))
16705 inner
= XEXP (x
, 0);
16706 if (GET_CODE (inner
) == PLUS
16707 && CONST_INT_P (XEXP (inner
, 1)))
16708 inner
= XEXP (inner
, 0);
16710 /* Only some unspecs are valid as "constants". */
16711 if (GET_CODE (inner
) == UNSPEC
)
16712 switch (XINT (inner
, 1))
16715 case UNSPEC_GOTOFF
:
16716 case UNSPEC_PLTOFF
:
16717 return TARGET_64BIT
;
16719 x
= XVECEXP (inner
, 0, 0);
16720 return (GET_CODE (x
) == SYMBOL_REF
16721 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
16722 case UNSPEC_MACHOPIC_OFFSET
:
16723 return legitimate_pic_address_disp_p (x
);
16731 return legitimate_pic_address_disp_p (x
);
16738 /* Determine if a given CONST RTX is a valid memory displacement
16742 legitimate_pic_address_disp_p (rtx disp
)
16746 /* In 64bit mode we can allow direct addresses of symbols and labels
16747 when they are not dynamic symbols. */
16750 rtx op0
= disp
, op1
;
16752 switch (GET_CODE (disp
))
16758 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
16760 op0
= XEXP (XEXP (disp
, 0), 0);
16761 op1
= XEXP (XEXP (disp
, 0), 1);
16762 if (!CONST_INT_P (op1
)
16763 || INTVAL (op1
) >= 16*1024*1024
16764 || INTVAL (op1
) < -16*1024*1024)
16766 if (GET_CODE (op0
) == LABEL_REF
)
16768 if (GET_CODE (op0
) == CONST
16769 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
16770 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
16772 if (GET_CODE (op0
) == UNSPEC
16773 && XINT (op0
, 1) == UNSPEC_PCREL
)
16775 if (GET_CODE (op0
) != SYMBOL_REF
)
16780 /* TLS references should always be enclosed in UNSPEC.
16781 The dllimported symbol needs always to be resolved. */
16782 if (SYMBOL_REF_TLS_MODEL (op0
)
16783 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
16788 if (is_imported_p (op0
))
16791 if (SYMBOL_REF_FAR_ADDR_P (op0
)
16792 || !SYMBOL_REF_LOCAL_P (op0
))
16795 /* Function-symbols need to be resolved only for
16797 For the small-model we don't need to resolve anything
16799 if ((ix86_cmodel
!= CM_LARGE_PIC
16800 && SYMBOL_REF_FUNCTION_P (op0
))
16801 || ix86_cmodel
== CM_SMALL_PIC
)
16803 /* Non-external symbols don't need to be resolved for
16804 large, and medium-model. */
16805 if ((ix86_cmodel
== CM_LARGE_PIC
16806 || ix86_cmodel
== CM_MEDIUM_PIC
)
16807 && !SYMBOL_REF_EXTERNAL_P (op0
))
16810 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
16811 && (SYMBOL_REF_LOCAL_P (op0
)
16812 || (HAVE_LD_PIE_COPYRELOC
16814 && !SYMBOL_REF_WEAK (op0
)
16815 && !SYMBOL_REF_FUNCTION_P (op0
)))
16816 && ix86_cmodel
!= CM_LARGE_PIC
)
16824 if (GET_CODE (disp
) != CONST
)
16826 disp
= XEXP (disp
, 0);
16830 /* We are unsafe to allow PLUS expressions. This limit allowed distance
16831 of GOT tables. We should not need these anyway. */
16832 if (GET_CODE (disp
) != UNSPEC
16833 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
16834 && XINT (disp
, 1) != UNSPEC_GOTOFF
16835 && XINT (disp
, 1) != UNSPEC_PCREL
16836 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
16839 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
16840 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
16846 if (GET_CODE (disp
) == PLUS
)
16848 if (!CONST_INT_P (XEXP (disp
, 1)))
16850 disp
= XEXP (disp
, 0);
16854 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
16857 if (GET_CODE (disp
) != UNSPEC
)
16860 switch (XINT (disp
, 1))
16865 /* We need to check for both symbols and labels because VxWorks loads
16866 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
16868 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
16869 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
16870 case UNSPEC_GOTOFF
:
16871 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
16872 While ABI specify also 32bit relocation but we don't produce it in
16873 small PIC model at all. */
16874 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
16875 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
16877 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
16879 case UNSPEC_GOTTPOFF
:
16880 case UNSPEC_GOTNTPOFF
:
16881 case UNSPEC_INDNTPOFF
:
16884 disp
= XVECEXP (disp
, 0, 0);
16885 return (GET_CODE (disp
) == SYMBOL_REF
16886 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
16887 case UNSPEC_NTPOFF
:
16888 disp
= XVECEXP (disp
, 0, 0);
16889 return (GET_CODE (disp
) == SYMBOL_REF
16890 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
16891 case UNSPEC_DTPOFF
:
16892 disp
= XVECEXP (disp
, 0, 0);
16893 return (GET_CODE (disp
) == SYMBOL_REF
16894 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
16900 /* Determine if op is suitable RTX for an address register.
16901 Return naked register if a register or a register subreg is
16902 found, otherwise return NULL_RTX. */
16905 ix86_validate_address_register (rtx op
)
16907 machine_mode mode
= GET_MODE (op
);
16909 /* Only SImode or DImode registers can form the address. */
16910 if (mode
!= SImode
&& mode
!= DImode
)
16915 else if (SUBREG_P (op
))
16917 rtx reg
= SUBREG_REG (op
);
16922 mode
= GET_MODE (reg
);
16924 /* Don't allow SUBREGs that span more than a word. It can
16925 lead to spill failures when the register is one word out
16926 of a two word structure. */
16927 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
16930 /* Allow only SUBREGs of non-eliminable hard registers. */
16931 if (register_no_elim_operand (reg
, mode
))
16935 /* Op is not a register. */
16939 /* Recognizes RTL expressions that are valid memory addresses for an
16940 instruction. The MODE argument is the machine mode for the MEM
16941 expression that wants to use this address.
16943 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
16944 convert common non-canonical forms to canonical form so that they will
16948 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
16950 struct ix86_address parts
;
16951 rtx base
, index
, disp
;
16952 HOST_WIDE_INT scale
;
16955 if (ix86_decompose_address (addr
, &parts
) <= 0)
16956 /* Decomposition failed. */
16960 index
= parts
.index
;
16962 scale
= parts
.scale
;
16965 /* Validate base register. */
16968 rtx reg
= ix86_validate_address_register (base
);
16970 if (reg
== NULL_RTX
)
16973 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
16974 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
16975 /* Base is not valid. */
16979 /* Validate index register. */
16982 rtx reg
= ix86_validate_address_register (index
);
16984 if (reg
== NULL_RTX
)
16987 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
16988 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
16989 /* Index is not valid. */
16993 /* Index and base should have the same mode. */
16995 && GET_MODE (base
) != GET_MODE (index
))
16998 /* Address override works only on the (%reg) part of %fs:(%reg). */
16999 if (seg
!= ADDR_SPACE_GENERIC
17000 && ((base
&& GET_MODE (base
) != word_mode
)
17001 || (index
&& GET_MODE (index
) != word_mode
)))
17004 /* Validate scale factor. */
17008 /* Scale without index. */
17011 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
17012 /* Scale is not a valid multiplier. */
17016 /* Validate displacement. */
17019 if (GET_CODE (disp
) == CONST
17020 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
17021 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
17022 switch (XINT (XEXP (disp
, 0), 1))
17024 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
17025 when used. While ABI specify also 32bit relocations, we
17026 don't produce them at all and use IP relative instead.
17027 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
17028 should be loaded via GOT. */
17031 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
17032 goto is_legitimate_pic
;
17034 case UNSPEC_GOTOFF
:
17035 gcc_assert (flag_pic
);
17037 goto is_legitimate_pic
;
17039 /* 64bit address unspec. */
17042 case UNSPEC_GOTPCREL
:
17043 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
17044 goto is_legitimate_pic
;
17047 gcc_assert (flag_pic
);
17048 goto is_legitimate_pic
;
17050 case UNSPEC_GOTTPOFF
:
17051 case UNSPEC_GOTNTPOFF
:
17052 case UNSPEC_INDNTPOFF
:
17053 case UNSPEC_NTPOFF
:
17054 case UNSPEC_DTPOFF
:
17058 /* Invalid address unspec. */
17062 else if (SYMBOLIC_CONST (disp
)
17066 && MACHOPIC_INDIRECT
17067 && !machopic_operand_p (disp
)
17073 if (TARGET_64BIT
&& (index
|| base
))
17075 /* foo@dtpoff(%rX) is ok. */
17076 if (GET_CODE (disp
) != CONST
17077 || GET_CODE (XEXP (disp
, 0)) != PLUS
17078 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
17079 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
17080 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
17081 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
17082 /* Non-constant pic memory reference. */
17085 else if ((!TARGET_MACHO
|| flag_pic
)
17086 && ! legitimate_pic_address_disp_p (disp
))
17087 /* Displacement is an invalid pic construct. */
17090 else if (MACHO_DYNAMIC_NO_PIC_P
17091 && !ix86_legitimate_constant_p (Pmode
, disp
))
17092 /* displacment must be referenced via non_lazy_pointer */
17096 /* This code used to verify that a symbolic pic displacement
17097 includes the pic_offset_table_rtx register.
17099 While this is good idea, unfortunately these constructs may
17100 be created by "adds using lea" optimization for incorrect
17109 This code is nonsensical, but results in addressing
17110 GOT table with pic_offset_table_rtx base. We can't
17111 just refuse it easily, since it gets matched by
17112 "addsi3" pattern, that later gets split to lea in the
17113 case output register differs from input. While this
17114 can be handled by separate addsi pattern for this case
17115 that never results in lea, this seems to be easier and
17116 correct fix for crash to disable this test. */
17118 else if (GET_CODE (disp
) != LABEL_REF
17119 && !CONST_INT_P (disp
)
17120 && (GET_CODE (disp
) != CONST
17121 || !ix86_legitimate_constant_p (Pmode
, disp
))
17122 && (GET_CODE (disp
) != SYMBOL_REF
17123 || !ix86_legitimate_constant_p (Pmode
, disp
)))
17124 /* Displacement is not constant. */
17126 else if (TARGET_64BIT
17127 && !x86_64_immediate_operand (disp
, VOIDmode
))
17128 /* Displacement is out of range. */
17130 /* In x32 mode, constant addresses are sign extended to 64bit, so
17131 we have to prevent addresses from 0x80000000 to 0xffffffff. */
17132 else if (TARGET_X32
&& !(index
|| base
)
17133 && CONST_INT_P (disp
)
17134 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
17138 /* Everything looks valid. */
17142 /* Determine if a given RTX is a valid constant address. */
17145 constant_address_p (rtx x
)
17147 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
17150 /* Return a unique alias set for the GOT. */
17152 static alias_set_type
17153 ix86_GOT_alias_set (void)
17155 static alias_set_type set
= -1;
17157 set
= new_alias_set ();
17161 /* Return a legitimate reference for ORIG (an address) using the
17162 register REG. If REG is 0, a new pseudo is generated.
17164 There are two types of references that must be handled:
17166 1. Global data references must load the address from the GOT, via
17167 the PIC reg. An insn is emitted to do this load, and the reg is
17170 2. Static data references, constant pool addresses, and code labels
17171 compute the address as an offset from the GOT, whose base is in
17172 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
17173 differentiate them from global data objects. The returned
17174 address is the PIC reg + an unspec constant.
17176 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
17177 reg also appears in the address. */
17180 legitimize_pic_address (rtx orig
, rtx reg
)
17183 rtx new_rtx
= orig
;
17186 if (TARGET_MACHO
&& !TARGET_64BIT
)
17189 reg
= gen_reg_rtx (Pmode
);
17190 /* Use the generic Mach-O PIC machinery. */
17191 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
17195 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
17197 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
17202 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
17204 else if ((!TARGET_64BIT
17205 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
17207 && gotoff_operand (addr
, Pmode
))
17209 /* This symbol may be referenced via a displacement
17210 from the PIC base address (@GOTOFF). */
17211 if (GET_CODE (addr
) == CONST
)
17212 addr
= XEXP (addr
, 0);
17214 if (GET_CODE (addr
) == PLUS
)
17216 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
17218 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
17221 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
17223 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
17226 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
17230 gcc_assert (REG_P (reg
));
17231 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
17232 new_rtx
, reg
, 1, OPTAB_DIRECT
);
17235 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
17237 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
17238 /* We can't use @GOTOFF for text labels
17239 on VxWorks, see gotoff_operand. */
17240 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
17242 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
17246 /* For x64 PE-COFF there is no GOT table,
17247 so we use address directly. */
17248 if (TARGET_64BIT
&& TARGET_PECOFF
)
17250 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
17251 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
17253 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
17255 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
17257 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
17258 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
17259 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
17263 /* This symbol must be referenced via a load
17264 from the Global Offset Table (@GOT). */
17265 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
17266 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
17268 new_rtx
= force_reg (Pmode
, new_rtx
);
17269 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
17270 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
17271 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
17274 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
17278 if (CONST_INT_P (addr
)
17279 && !x86_64_immediate_operand (addr
, VOIDmode
))
17280 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
17281 else if (GET_CODE (addr
) == CONST
)
17283 addr
= XEXP (addr
, 0);
17285 /* We must match stuff we generate before. Assume the only
17286 unspecs that can get here are ours. Not that we could do
17287 anything with them anyway.... */
17288 if (GET_CODE (addr
) == UNSPEC
17289 || (GET_CODE (addr
) == PLUS
17290 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
17292 gcc_assert (GET_CODE (addr
) == PLUS
);
17295 if (GET_CODE (addr
) == PLUS
)
17297 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
17299 /* Check first to see if this is a constant
17300 offset from a @GOTOFF symbol reference. */
17302 && gotoff_operand (op0
, Pmode
)
17303 && CONST_INT_P (op1
))
17307 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
17309 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
17310 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
17314 gcc_assert (REG_P (reg
));
17315 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
17316 pic_offset_table_rtx
,
17322 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
17326 if (INTVAL (op1
) < -16*1024*1024
17327 || INTVAL (op1
) >= 16*1024*1024)
17329 if (!x86_64_immediate_operand (op1
, Pmode
))
17330 op1
= force_reg (Pmode
, op1
);
17333 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
17339 rtx base
= legitimize_pic_address (op0
, reg
);
17340 machine_mode mode
= GET_MODE (base
);
17342 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
17344 if (CONST_INT_P (new_rtx
))
17346 if (INTVAL (new_rtx
) < -16*1024*1024
17347 || INTVAL (new_rtx
) >= 16*1024*1024)
17349 if (!x86_64_immediate_operand (new_rtx
, mode
))
17350 new_rtx
= force_reg (mode
, new_rtx
);
17353 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
17356 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
17360 /* For %rip addressing, we have to use
17361 just disp32, not base nor index. */
17363 && (GET_CODE (base
) == SYMBOL_REF
17364 || GET_CODE (base
) == LABEL_REF
))
17365 base
= force_reg (mode
, base
);
17366 if (GET_CODE (new_rtx
) == PLUS
17367 && CONSTANT_P (XEXP (new_rtx
, 1)))
17369 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
17370 new_rtx
= XEXP (new_rtx
, 1);
17372 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
17380 /* Load the thread pointer. If TO_REG is true, force it into a register. */
17383 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
17385 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
17387 if (GET_MODE (tp
) != tp_mode
)
17389 gcc_assert (GET_MODE (tp
) == SImode
);
17390 gcc_assert (tp_mode
== DImode
);
17392 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
17396 tp
= copy_to_mode_reg (tp_mode
, tp
);
17401 /* Construct the SYMBOL_REF for the tls_get_addr function. */
17403 static GTY(()) rtx ix86_tls_symbol
;
17406 ix86_tls_get_addr (void)
17408 if (!ix86_tls_symbol
)
17411 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
17412 ? "___tls_get_addr" : "__tls_get_addr");
17414 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
17417 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
17419 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
17421 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
17422 gen_rtx_CONST (Pmode
, unspec
));
17425 return ix86_tls_symbol
;
17428 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
17430 static GTY(()) rtx ix86_tls_module_base_symbol
;
17433 ix86_tls_module_base (void)
17435 if (!ix86_tls_module_base_symbol
)
17437 ix86_tls_module_base_symbol
17438 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
17440 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
17441 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
17444 return ix86_tls_module_base_symbol
;
17447 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
17448 false if we expect this to be used for a memory address and true if
17449 we expect to load the address into a register. */
17452 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
17454 rtx dest
, base
, off
;
17455 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
17456 machine_mode tp_mode
= Pmode
;
17459 /* Fall back to global dynamic model if tool chain cannot support local
17461 if (TARGET_SUN_TLS
&& !TARGET_64BIT
17462 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
17463 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
17464 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
17468 case TLS_MODEL_GLOBAL_DYNAMIC
:
17469 dest
= gen_reg_rtx (Pmode
);
17473 if (flag_pic
&& !TARGET_PECOFF
)
17474 pic
= pic_offset_table_rtx
;
17477 pic
= gen_reg_rtx (Pmode
);
17478 emit_insn (gen_set_got (pic
));
17482 if (TARGET_GNU2_TLS
)
17485 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
17487 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
17489 tp
= get_thread_pointer (Pmode
, true);
17490 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
17492 if (GET_MODE (x
) != Pmode
)
17493 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
17495 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
17499 rtx caddr
= ix86_tls_get_addr ();
17503 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
17508 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
17509 insns
= get_insns ();
17512 if (GET_MODE (x
) != Pmode
)
17513 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
17515 RTL_CONST_CALL_P (insns
) = 1;
17516 emit_libcall_block (insns
, dest
, rax
, x
);
17519 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
17523 case TLS_MODEL_LOCAL_DYNAMIC
:
17524 base
= gen_reg_rtx (Pmode
);
17529 pic
= pic_offset_table_rtx
;
17532 pic
= gen_reg_rtx (Pmode
);
17533 emit_insn (gen_set_got (pic
));
17537 if (TARGET_GNU2_TLS
)
17539 rtx tmp
= ix86_tls_module_base ();
17542 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
17544 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
17546 tp
= get_thread_pointer (Pmode
, true);
17547 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
17548 gen_rtx_MINUS (Pmode
, tmp
, tp
));
17552 rtx caddr
= ix86_tls_get_addr ();
17556 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
17562 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
17563 insns
= get_insns ();
17566 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
17567 share the LD_BASE result with other LD model accesses. */
17568 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
17569 UNSPEC_TLS_LD_BASE
);
17571 RTL_CONST_CALL_P (insns
) = 1;
17572 emit_libcall_block (insns
, base
, rax
, eqv
);
17575 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
17578 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
17579 off
= gen_rtx_CONST (Pmode
, off
);
17581 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
17583 if (TARGET_GNU2_TLS
)
17585 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
17587 if (GET_MODE (x
) != Pmode
)
17588 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
17590 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
17594 case TLS_MODEL_INITIAL_EXEC
:
17597 if (TARGET_SUN_TLS
&& !TARGET_X32
)
17599 /* The Sun linker took the AMD64 TLS spec literally
17600 and can only handle %rax as destination of the
17601 initial executable code sequence. */
17603 dest
= gen_reg_rtx (DImode
);
17604 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
17608 /* Generate DImode references to avoid %fs:(%reg32)
17609 problems and linker IE->LE relaxation bug. */
17612 type
= UNSPEC_GOTNTPOFF
;
17616 pic
= pic_offset_table_rtx
;
17617 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
17619 else if (!TARGET_ANY_GNU_TLS
)
17621 pic
= gen_reg_rtx (Pmode
);
17622 emit_insn (gen_set_got (pic
));
17623 type
= UNSPEC_GOTTPOFF
;
17628 type
= UNSPEC_INDNTPOFF
;
17631 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
17632 off
= gen_rtx_CONST (tp_mode
, off
);
17634 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
17635 off
= gen_const_mem (tp_mode
, off
);
17636 set_mem_alias_set (off
, ix86_GOT_alias_set ());
17638 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
17640 base
= get_thread_pointer (tp_mode
,
17641 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
17642 off
= force_reg (tp_mode
, off
);
17643 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
17644 if (tp_mode
!= Pmode
)
17645 dest
= convert_to_mode (Pmode
, dest
, 1);
17649 base
= get_thread_pointer (Pmode
, true);
17650 dest
= gen_reg_rtx (Pmode
);
17651 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
17655 case TLS_MODEL_LOCAL_EXEC
:
17656 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
17657 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
17658 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
17659 off
= gen_rtx_CONST (Pmode
, off
);
17661 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
17663 base
= get_thread_pointer (Pmode
,
17664 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
17665 return gen_rtx_PLUS (Pmode
, base
, off
);
17669 base
= get_thread_pointer (Pmode
, true);
17670 dest
= gen_reg_rtx (Pmode
);
17671 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
17676 gcc_unreachable ();
17682 /* Return true if OP refers to a TLS address. */
17684 ix86_tls_address_pattern_p (rtx op
)
17686 subrtx_var_iterator::array_type array
;
17687 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
17692 rtx
*x
= &XEXP (op
, 0);
17693 while (GET_CODE (*x
) == PLUS
)
17696 for (i
= 0; i
< 2; i
++)
17698 rtx u
= XEXP (*x
, i
);
17699 if (GET_CODE (u
) == ZERO_EXTEND
)
17701 if (GET_CODE (u
) == UNSPEC
17702 && XINT (u
, 1) == UNSPEC_TP
)
17708 iter
.skip_subrtxes ();
17715 /* Rewrite *LOC so that it refers to a default TLS address space. */
17717 ix86_rewrite_tls_address_1 (rtx
*loc
)
17719 subrtx_ptr_iterator::array_type array
;
17720 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
17725 rtx addr
= XEXP (*loc
, 0);
17727 while (GET_CODE (*x
) == PLUS
)
17730 for (i
= 0; i
< 2; i
++)
17732 rtx u
= XEXP (*x
, i
);
17733 if (GET_CODE (u
) == ZERO_EXTEND
)
17735 if (GET_CODE (u
) == UNSPEC
17736 && XINT (u
, 1) == UNSPEC_TP
)
17738 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
17740 *x
= XEXP (*x
, 1 - i
);
17742 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
17743 set_mem_addr_space (*loc
, as
);
17750 iter
.skip_subrtxes ();
17755 /* Rewrite instruction pattern involvning TLS address
17756 so that it refers to a default TLS address space. */
17758 ix86_rewrite_tls_address (rtx pattern
)
17760 pattern
= copy_insn (pattern
);
17761 ix86_rewrite_tls_address_1 (&pattern
);
17765 /* Create or return the unique __imp_DECL dllimport symbol corresponding
17766 to symbol DECL if BEIMPORT is true. Otherwise create or return the
17767 unique refptr-DECL symbol corresponding to symbol DECL. */
17769 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
17771 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
17773 equal (tree_map
*a
, tree_map
*b
)
17775 return a
->base
.from
== b
->base
.from
;
17779 keep_cache_entry (tree_map
*&m
)
17781 return ggc_marked_p (m
->base
.from
);
17785 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
17788 get_dllimport_decl (tree decl
, bool beimport
)
17790 struct tree_map
*h
, in
;
17792 const char *prefix
;
17793 size_t namelen
, prefixlen
;
17798 if (!dllimport_map
)
17799 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
17801 in
.hash
= htab_hash_pointer (decl
);
17802 in
.base
.from
= decl
;
17803 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
17808 *loc
= h
= ggc_alloc
<tree_map
> ();
17810 h
->base
.from
= decl
;
17811 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
17812 VAR_DECL
, NULL
, ptr_type_node
);
17813 DECL_ARTIFICIAL (to
) = 1;
17814 DECL_IGNORED_P (to
) = 1;
17815 DECL_EXTERNAL (to
) = 1;
17816 TREE_READONLY (to
) = 1;
17818 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
17819 name
= targetm
.strip_name_encoding (name
);
17821 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
17822 ? "*__imp_" : "*__imp__";
17824 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
17825 namelen
= strlen (name
);
17826 prefixlen
= strlen (prefix
);
17827 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
17828 memcpy (imp_name
, prefix
, prefixlen
);
17829 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
17831 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
17832 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
17833 SET_SYMBOL_REF_DECL (rtl
, to
);
17834 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
17837 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
17838 #ifdef SUB_TARGET_RECORD_STUB
17839 SUB_TARGET_RECORD_STUB (name
);
17843 rtl
= gen_const_mem (Pmode
, rtl
);
17844 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
17846 SET_DECL_RTL (to
, rtl
);
17847 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
17852 /* Expand SYMBOL into its corresponding far-address symbol.
17853 WANT_REG is true if we require the result be a register. */
17856 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
17861 gcc_assert (SYMBOL_REF_DECL (symbol
));
17862 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
17864 x
= DECL_RTL (imp_decl
);
17866 x
= force_reg (Pmode
, x
);
17870 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
17871 true if we require the result be a register. */
17874 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
17879 gcc_assert (SYMBOL_REF_DECL (symbol
));
17880 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
17882 x
= DECL_RTL (imp_decl
);
17884 x
= force_reg (Pmode
, x
);
17888 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
17889 is true if we require the result be a register. */
17892 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
17894 if (!TARGET_PECOFF
)
17897 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
17899 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
17900 return legitimize_dllimport_symbol (addr
, inreg
);
17901 if (GET_CODE (addr
) == CONST
17902 && GET_CODE (XEXP (addr
, 0)) == PLUS
17903 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
17904 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
17906 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
17907 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
17911 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
17913 if (GET_CODE (addr
) == SYMBOL_REF
17914 && !is_imported_p (addr
)
17915 && SYMBOL_REF_EXTERNAL_P (addr
)
17916 && SYMBOL_REF_DECL (addr
))
17917 return legitimize_pe_coff_extern_decl (addr
, inreg
);
17919 if (GET_CODE (addr
) == CONST
17920 && GET_CODE (XEXP (addr
, 0)) == PLUS
17921 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
17922 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
17923 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
17924 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
17926 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
17927 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
17932 /* Try machine-dependent ways of modifying an illegitimate address
17933 to be legitimate. If we find one, return the new, valid address.
17934 This macro is used in only one place: `memory_address' in explow.c.
17936 OLDX is the address as it was before break_out_memory_refs was called.
17937 In some cases it is useful to look at this to decide what needs to be done.
17939 It is always safe for this macro to do nothing. It exists to recognize
17940 opportunities to optimize the output.
17942 For the 80386, we handle X+REG by loading X into a register R and
17943 using R+REG. R will go in a general reg and indexing will be used.
17944 However, if REG is a broken-out memory address or multiplication,
17945 nothing needs to be done because REG can certainly go in a general reg.
17947 When -fpic is used, special handling is needed for symbolic references.
17948 See comments by legitimize_pic_address in i386.c for details. */
17951 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
17953 bool changed
= false;
17956 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
17958 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
17959 if (GET_CODE (x
) == CONST
17960 && GET_CODE (XEXP (x
, 0)) == PLUS
17961 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
17962 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
17964 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
17965 (enum tls_model
) log
, false);
17966 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
17969 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
17971 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
17976 if (flag_pic
&& SYMBOLIC_CONST (x
))
17977 return legitimize_pic_address (x
, 0);
17980 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
17981 return machopic_indirect_data_reference (x
, 0);
17984 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
17985 if (GET_CODE (x
) == ASHIFT
17986 && CONST_INT_P (XEXP (x
, 1))
17987 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
17990 log
= INTVAL (XEXP (x
, 1));
17991 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
17992 GEN_INT (1 << log
));
17995 if (GET_CODE (x
) == PLUS
)
17997 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
17999 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
18000 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
18001 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
18004 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
18005 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
18006 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
18007 GEN_INT (1 << log
));
18010 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
18011 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
18012 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
18015 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
18016 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
18017 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
18018 GEN_INT (1 << log
));
18021 /* Put multiply first if it isn't already. */
18022 if (GET_CODE (XEXP (x
, 1)) == MULT
)
18024 std::swap (XEXP (x
, 0), XEXP (x
, 1));
18028 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
18029 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
18030 created by virtual register instantiation, register elimination, and
18031 similar optimizations. */
18032 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
18035 x
= gen_rtx_PLUS (Pmode
,
18036 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
18037 XEXP (XEXP (x
, 1), 0)),
18038 XEXP (XEXP (x
, 1), 1));
18042 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
18043 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
18044 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
18045 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
18046 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
18047 && CONSTANT_P (XEXP (x
, 1)))
18050 rtx other
= NULL_RTX
;
18052 if (CONST_INT_P (XEXP (x
, 1)))
18054 constant
= XEXP (x
, 1);
18055 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
18057 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
18059 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
18060 other
= XEXP (x
, 1);
18068 x
= gen_rtx_PLUS (Pmode
,
18069 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
18070 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
18071 plus_constant (Pmode
, other
,
18072 INTVAL (constant
)));
18076 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
18079 if (GET_CODE (XEXP (x
, 0)) == MULT
)
18082 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
18085 if (GET_CODE (XEXP (x
, 1)) == MULT
)
18088 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
18092 && REG_P (XEXP (x
, 1))
18093 && REG_P (XEXP (x
, 0)))
18096 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
18099 x
= legitimize_pic_address (x
, 0);
18102 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
18105 if (REG_P (XEXP (x
, 0)))
18107 rtx temp
= gen_reg_rtx (Pmode
);
18108 rtx val
= force_operand (XEXP (x
, 1), temp
);
18111 val
= convert_to_mode (Pmode
, val
, 1);
18112 emit_move_insn (temp
, val
);
18115 XEXP (x
, 1) = temp
;
18119 else if (REG_P (XEXP (x
, 1)))
18121 rtx temp
= gen_reg_rtx (Pmode
);
18122 rtx val
= force_operand (XEXP (x
, 0), temp
);
18125 val
= convert_to_mode (Pmode
, val
, 1);
18126 emit_move_insn (temp
, val
);
18129 XEXP (x
, 0) = temp
;
18137 /* Print an integer constant expression in assembler syntax. Addition
18138 and subtraction are the only arithmetic that may appear in these
18139 expressions. FILE is the stdio stream to write to, X is the rtx, and
18140 CODE is the operand print code from the output string. */
18143 output_pic_addr_const (FILE *file
, rtx x
, int code
)
18147 switch (GET_CODE (x
))
18150 gcc_assert (flag_pic
);
18155 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
18156 output_addr_const (file
, x
);
18159 const char *name
= XSTR (x
, 0);
18161 /* Mark the decl as referenced so that cgraph will
18162 output the function. */
18163 if (SYMBOL_REF_DECL (x
))
18164 mark_decl_referenced (SYMBOL_REF_DECL (x
));
18167 if (MACHOPIC_INDIRECT
18168 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
18169 name
= machopic_indirection_name (x
, /*stub_p=*/true);
18171 assemble_name (file
, name
);
18173 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
18174 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
18175 fputs ("@PLT", file
);
18182 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
18183 assemble_name (asm_out_file
, buf
);
18187 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
18191 /* This used to output parentheses around the expression,
18192 but that does not work on the 386 (either ATT or BSD assembler). */
18193 output_pic_addr_const (file
, XEXP (x
, 0), code
);
18197 /* We can't handle floating point constants;
18198 TARGET_PRINT_OPERAND must handle them. */
18199 output_operand_lossage ("floating constant misused");
18203 /* Some assemblers need integer constants to appear first. */
18204 if (CONST_INT_P (XEXP (x
, 0)))
18206 output_pic_addr_const (file
, XEXP (x
, 0), code
);
18208 output_pic_addr_const (file
, XEXP (x
, 1), code
);
18212 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
18213 output_pic_addr_const (file
, XEXP (x
, 1), code
);
18215 output_pic_addr_const (file
, XEXP (x
, 0), code
);
18221 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
18222 output_pic_addr_const (file
, XEXP (x
, 0), code
);
18224 output_pic_addr_const (file
, XEXP (x
, 1), code
);
18226 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
18230 gcc_assert (XVECLEN (x
, 0) == 1);
18231 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
18232 switch (XINT (x
, 1))
18235 fputs ("@GOT", file
);
18237 case UNSPEC_GOTOFF
:
18238 fputs ("@GOTOFF", file
);
18240 case UNSPEC_PLTOFF
:
18241 fputs ("@PLTOFF", file
);
18244 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
18245 "(%rip)" : "[rip]", file
);
18247 case UNSPEC_GOTPCREL
:
18248 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
18249 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
18251 case UNSPEC_GOTTPOFF
:
18252 /* FIXME: This might be @TPOFF in Sun ld too. */
18253 fputs ("@gottpoff", file
);
18256 fputs ("@tpoff", file
);
18258 case UNSPEC_NTPOFF
:
18260 fputs ("@tpoff", file
);
18262 fputs ("@ntpoff", file
);
18264 case UNSPEC_DTPOFF
:
18265 fputs ("@dtpoff", file
);
18267 case UNSPEC_GOTNTPOFF
:
18269 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
18270 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
18272 fputs ("@gotntpoff", file
);
18274 case UNSPEC_INDNTPOFF
:
18275 fputs ("@indntpoff", file
);
18278 case UNSPEC_MACHOPIC_OFFSET
:
18280 machopic_output_function_base_name (file
);
18284 output_operand_lossage ("invalid UNSPEC as operand");
18290 output_operand_lossage ("invalid expression as operand");
18294 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
18295 We need to emit DTP-relative relocations. */
18297 static void ATTRIBUTE_UNUSED
18298 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
18300 fputs (ASM_LONG
, file
);
18301 output_addr_const (file
, x
);
18302 fputs ("@dtpoff", file
);
18308 fputs (", 0", file
);
18311 gcc_unreachable ();
18315 /* Return true if X is a representation of the PIC register. This copes
18316 with calls from ix86_find_base_term, where the register might have
18317 been replaced by a cselib value. */
18320 ix86_pic_register_p (rtx x
)
18322 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
18323 return (pic_offset_table_rtx
18324 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
18325 else if (!REG_P (x
))
18327 else if (pic_offset_table_rtx
)
18329 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
18331 if (HARD_REGISTER_P (x
)
18332 && !HARD_REGISTER_P (pic_offset_table_rtx
)
18333 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
18338 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
18341 /* Helper function for ix86_delegitimize_address.
18342 Attempt to delegitimize TLS local-exec accesses. */
18345 ix86_delegitimize_tls_address (rtx orig_x
)
18347 rtx x
= orig_x
, unspec
;
18348 struct ix86_address addr
;
18350 if (!TARGET_TLS_DIRECT_SEG_REFS
)
18354 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
18356 if (ix86_decompose_address (x
, &addr
) == 0
18357 || addr
.seg
!= DEFAULT_TLS_SEG_REG
18358 || addr
.disp
== NULL_RTX
18359 || GET_CODE (addr
.disp
) != CONST
)
18361 unspec
= XEXP (addr
.disp
, 0);
18362 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
18363 unspec
= XEXP (unspec
, 0);
18364 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
18366 x
= XVECEXP (unspec
, 0, 0);
18367 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
18368 if (unspec
!= XEXP (addr
.disp
, 0))
18369 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
18372 rtx idx
= addr
.index
;
18373 if (addr
.scale
!= 1)
18374 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
18375 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
18378 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
18379 if (MEM_P (orig_x
))
18380 x
= replace_equiv_address_nv (orig_x
, x
);
18384 /* In the name of slightly smaller debug output, and to cater to
18385 general assembler lossage, recognize PIC+GOTOFF and turn it back
18386 into a direct symbol reference.
18388 On Darwin, this is necessary to avoid a crash, because Darwin
18389 has a different PIC label for each routine but the DWARF debugging
18390 information is not associated with any particular routine, so it's
18391 necessary to remove references to the PIC label from RTL stored by
18392 the DWARF output code.
18394 This helper is used in the normal ix86_delegitimize_address
18395 entrypoint (e.g. used in the target delegitimization hook) and
18396 in ix86_find_base_term. As compile time memory optimization, we
18397 avoid allocating rtxes that will not change anything on the outcome
18398 of the callers (find_base_value and find_base_term). */
18401 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
18403 rtx orig_x
= delegitimize_mem_from_attrs (x
);
18404 /* addend is NULL or some rtx if x is something+GOTOFF where
18405 something doesn't include the PIC register. */
18406 rtx addend
= NULL_RTX
;
18407 /* reg_addend is NULL or a multiple of some register. */
18408 rtx reg_addend
= NULL_RTX
;
18409 /* const_addend is NULL or a const_int. */
18410 rtx const_addend
= NULL_RTX
;
18411 /* This is the result, or NULL. */
18412 rtx result
= NULL_RTX
;
18421 if (GET_CODE (x
) == CONST
18422 && GET_CODE (XEXP (x
, 0)) == PLUS
18423 && GET_MODE (XEXP (x
, 0)) == Pmode
18424 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
18425 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
18426 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
18428 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
18429 base. A CONST can't be arg_pointer_rtx based. */
18430 if (base_term_p
&& MEM_P (orig_x
))
18432 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
18433 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
18434 if (MEM_P (orig_x
))
18435 x
= replace_equiv_address_nv (orig_x
, x
);
18439 if (GET_CODE (x
) == CONST
18440 && GET_CODE (XEXP (x
, 0)) == UNSPEC
18441 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
18442 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
18443 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
18445 x
= XVECEXP (XEXP (x
, 0), 0, 0);
18446 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
18448 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
18455 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
18456 return ix86_delegitimize_tls_address (orig_x
);
18458 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
18459 and -mcmodel=medium -fpic. */
18462 if (GET_CODE (x
) != PLUS
18463 || GET_CODE (XEXP (x
, 1)) != CONST
)
18464 return ix86_delegitimize_tls_address (orig_x
);
18466 if (ix86_pic_register_p (XEXP (x
, 0)))
18467 /* %ebx + GOT/GOTOFF */
18469 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
18471 /* %ebx + %reg * scale + GOT/GOTOFF */
18472 reg_addend
= XEXP (x
, 0);
18473 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
18474 reg_addend
= XEXP (reg_addend
, 1);
18475 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
18476 reg_addend
= XEXP (reg_addend
, 0);
18479 reg_addend
= NULL_RTX
;
18480 addend
= XEXP (x
, 0);
18484 addend
= XEXP (x
, 0);
18486 x
= XEXP (XEXP (x
, 1), 0);
18487 if (GET_CODE (x
) == PLUS
18488 && CONST_INT_P (XEXP (x
, 1)))
18490 const_addend
= XEXP (x
, 1);
18494 if (GET_CODE (x
) == UNSPEC
18495 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
18496 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
18497 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
18498 && !MEM_P (orig_x
) && !addend
)))
18499 result
= XVECEXP (x
, 0, 0);
18501 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
18502 && !MEM_P (orig_x
))
18503 result
= XVECEXP (x
, 0, 0);
18506 return ix86_delegitimize_tls_address (orig_x
);
18508 /* For (PLUS something CONST_INT) both find_base_{value,term} just
18509 recurse on the first operand. */
18510 if (const_addend
&& !base_term_p
)
18511 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
18513 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
18516 /* If the rest of original X doesn't involve the PIC register, add
18517 addend and subtract pic_offset_table_rtx. This can happen e.g.
18519 leal (%ebx, %ecx, 4), %ecx
18521 movl foo@GOTOFF(%ecx), %edx
18522 in which case we return (%ecx - %ebx) + foo
18523 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
18524 and reload has completed. */
18525 if (pic_offset_table_rtx
18526 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
18527 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
18528 pic_offset_table_rtx
),
18530 else if (pic_offset_table_rtx
&& !TARGET_MACHO
&& !TARGET_VXWORKS_RTP
)
18532 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
18533 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
18534 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
18539 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
18541 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
18542 if (result
== NULL_RTX
)
18548 /* The normal instantiation of the above template. */
18551 ix86_delegitimize_address (rtx x
)
18553 return ix86_delegitimize_address_1 (x
, false);
18556 /* If X is a machine specific address (i.e. a symbol or label being
18557 referenced as a displacement from the GOT implemented using an
18558 UNSPEC), then return the base term. Otherwise return X. */
18561 ix86_find_base_term (rtx x
)
18567 if (GET_CODE (x
) != CONST
)
18569 term
= XEXP (x
, 0);
18570 if (GET_CODE (term
) == PLUS
18571 && CONST_INT_P (XEXP (term
, 1)))
18572 term
= XEXP (term
, 0);
18573 if (GET_CODE (term
) != UNSPEC
18574 || (XINT (term
, 1) != UNSPEC_GOTPCREL
18575 && XINT (term
, 1) != UNSPEC_PCREL
))
18578 return XVECEXP (term
, 0, 0);
18581 return ix86_delegitimize_address_1 (x
, true);
18585 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
18586 bool fp
, FILE *file
)
18588 const char *suffix
;
18590 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
18592 code
= ix86_fp_compare_code_to_integer (code
);
18596 code
= reverse_condition (code
);
18647 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
18651 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
18652 Those same assemblers have the same but opposite lossage on cmov. */
18653 if (mode
== CCmode
)
18654 suffix
= fp
? "nbe" : "a";
18656 gcc_unreachable ();
18672 gcc_unreachable ();
18676 if (mode
== CCmode
)
18678 else if (mode
== CCCmode
)
18679 suffix
= fp
? "b" : "c";
18681 gcc_unreachable ();
18697 gcc_unreachable ();
18701 if (mode
== CCmode
)
18703 else if (mode
== CCCmode
)
18704 suffix
= fp
? "nb" : "nc";
18706 gcc_unreachable ();
18709 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
18713 if (mode
== CCmode
)
18716 gcc_unreachable ();
18719 suffix
= fp
? "u" : "p";
18722 suffix
= fp
? "nu" : "np";
18725 gcc_unreachable ();
18727 fputs (suffix
, file
);
18730 /* Print the name of register X to FILE based on its machine mode and number.
18731 If CODE is 'w', pretend the mode is HImode.
18732 If CODE is 'b', pretend the mode is QImode.
18733 If CODE is 'k', pretend the mode is SImode.
18734 If CODE is 'q', pretend the mode is DImode.
18735 If CODE is 'x', pretend the mode is V4SFmode.
18736 If CODE is 't', pretend the mode is V8SFmode.
18737 If CODE is 'g', pretend the mode is V16SFmode.
18738 If CODE is 'h', pretend the reg is the 'high' byte register.
18739 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
18740 If CODE is 'd', duplicate the operand for AVX instruction.
18744 print_reg (rtx x
, int code
, FILE *file
)
18748 unsigned int regno
;
18751 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18756 gcc_assert (TARGET_64BIT
);
18757 fputs ("rip", file
);
18761 if (code
== 'y' && STACK_TOP_P (x
))
18763 fputs ("st(0)", file
);
18769 else if (code
== 'b')
18771 else if (code
== 'k')
18773 else if (code
== 'q')
18775 else if (code
== 'h')
18777 else if (code
== 'x')
18779 else if (code
== 't')
18781 else if (code
== 'g')
18784 msize
= GET_MODE_SIZE (GET_MODE (x
));
18788 if (regno
== ARG_POINTER_REGNUM
18789 || regno
== FRAME_POINTER_REGNUM
18790 || regno
== FPSR_REG
18791 || regno
== FPCR_REG
)
18793 output_operand_lossage
18794 ("invalid use of register '%s'", reg_names
[regno
]);
18797 else if (regno
== FLAGS_REG
)
18799 output_operand_lossage ("invalid use of asm flag output");
18803 duplicated
= code
== 'd' && TARGET_AVX
;
18810 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
18811 warning (0, "unsupported size for integer register");
18814 if (LEGACY_INT_REGNO_P (regno
))
18815 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
18819 reg
= hi_reg_name
[regno
];
18822 if (regno
>= ARRAY_SIZE (qi_reg_name
))
18824 if (!ANY_QI_REGNO_P (regno
))
18825 error ("unsupported size for integer register");
18826 reg
= qi_reg_name
[regno
];
18829 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
18831 reg
= qi_high_reg_name
[regno
];
18835 if (SSE_REGNO_P (regno
))
18837 gcc_assert (!duplicated
);
18838 putc (msize
== 32 ? 'y' : 'z', file
);
18839 reg
= hi_reg_name
[regno
] + 1;
18844 gcc_unreachable ();
18849 /* Irritatingly, AMD extended registers use
18850 different naming convention: "r%d[bwd]" */
18851 if (REX_INT_REGNO_P (regno
))
18853 gcc_assert (TARGET_64BIT
);
18857 error ("extended registers have no high halves");
18872 error ("unsupported operand size for extended register");
18880 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18881 fprintf (file
, ", %%%s", reg
);
18883 fprintf (file
, ", %s", reg
);
18887 /* Meaning of CODE:
18888 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
18889 C -- print opcode suffix for set/cmov insn.
18890 c -- like C, but print reversed condition
18891 F,f -- likewise, but for floating-point.
18892 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
18894 R -- print embeded rounding and sae.
18895 r -- print only sae.
18896 z -- print the opcode suffix for the size of the current operand.
18897 Z -- likewise, with special suffixes for x87 instructions.
18898 * -- print a star (in certain assembler syntax)
18899 A -- print an absolute memory reference.
18900 E -- print address with DImode register names if TARGET_64BIT.
18901 w -- print the operand as if it's a "word" (HImode) even if it isn't.
18902 s -- print a shift double count, followed by the assemblers argument
18904 b -- print the QImode name of the register for the indicated operand.
18905 %b0 would print %al if operands[0] is reg 0.
18906 w -- likewise, print the HImode name of the register.
18907 k -- likewise, print the SImode name of the register.
18908 q -- likewise, print the DImode name of the register.
18909 x -- likewise, print the V4SFmode name of the register.
18910 t -- likewise, print the V8SFmode name of the register.
18911 g -- likewise, print the V16SFmode name of the register.
18912 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
18913 y -- print "st(0)" instead of "st" as a register.
18914 d -- print duplicated register operand for AVX instruction.
18915 D -- print condition for SSE cmp instruction.
18916 P -- if PIC, print an @PLT suffix.
18917 p -- print raw symbol name.
18918 X -- don't print any sort of PIC '@' suffix for a symbol.
18919 & -- print some in-use local-dynamic symbol name.
18920 H -- print a memory address offset by 8; used for sse high-parts
18921 Y -- print condition for XOP pcom* instruction.
18922 + -- print a branch hint as 'cs' or 'ds' prefix
18923 ; -- print a semicolon (after prefixes due to bug in older gas).
18924 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
18925 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
18926 ! -- print MPX prefix for jxx/call/ret instructions if required.
18930 ix86_print_operand (FILE *file
, rtx x
, int code
)
18937 switch (ASSEMBLER_DIALECT
)
18944 /* Intel syntax. For absolute addresses, registers should not
18945 be surrounded by braces. */
18949 ix86_print_operand (file
, x
, 0);
18956 gcc_unreachable ();
18959 ix86_print_operand (file
, x
, 0);
18963 /* Wrap address in an UNSPEC to declare special handling. */
18965 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
18967 output_address (VOIDmode
, x
);
18971 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18976 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18981 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18986 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18991 if (ASSEMBLER_DIALECT
== ASM_ATT
)
18996 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19001 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
19002 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
19005 switch (GET_MODE_SIZE (GET_MODE (x
)))
19020 output_operand_lossage ("invalid operand size for operand "
19030 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
19032 /* Opcodes don't get size suffixes if using Intel opcodes. */
19033 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
19036 switch (GET_MODE_SIZE (GET_MODE (x
)))
19055 output_operand_lossage ("invalid operand size for operand "
19061 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
19062 warning (0, "non-integer operand used with operand code 'z'");
19066 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
19067 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
19070 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
19072 switch (GET_MODE_SIZE (GET_MODE (x
)))
19075 #ifdef HAVE_AS_IX86_FILDS
19085 #ifdef HAVE_AS_IX86_FILDQ
19088 fputs ("ll", file
);
19096 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
19098 /* 387 opcodes don't get size suffixes
19099 if the operands are registers. */
19100 if (STACK_REG_P (x
))
19103 switch (GET_MODE_SIZE (GET_MODE (x
)))
19124 output_operand_lossage ("invalid operand type used with "
19125 "operand code 'Z'");
19129 output_operand_lossage ("invalid operand size for operand code 'Z'");
19148 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
19150 ix86_print_operand (file
, x
, 0);
19151 fputs (", ", file
);
19156 switch (GET_CODE (x
))
19159 fputs ("neq", file
);
19162 fputs ("eq", file
);
19166 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
19170 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
19174 fputs ("le", file
);
19178 fputs ("lt", file
);
19181 fputs ("unord", file
);
19184 fputs ("ord", file
);
19187 fputs ("ueq", file
);
19190 fputs ("nlt", file
);
19193 fputs ("nle", file
);
19196 fputs ("ule", file
);
19199 fputs ("ult", file
);
19202 fputs ("une", file
);
19205 output_operand_lossage ("operand is not a condition code, "
19206 "invalid operand code 'Y'");
19212 /* Little bit of braindamage here. The SSE compare instructions
19213 does use completely different names for the comparisons that the
19214 fp conditional moves. */
19215 switch (GET_CODE (x
))
19220 fputs ("eq_us", file
);
19225 fputs ("eq", file
);
19230 fputs ("nge", file
);
19235 fputs ("lt", file
);
19240 fputs ("ngt", file
);
19245 fputs ("le", file
);
19248 fputs ("unord", file
);
19253 fputs ("neq_oq", file
);
19258 fputs ("neq", file
);
19263 fputs ("ge", file
);
19268 fputs ("nlt", file
);
19273 fputs ("gt", file
);
19278 fputs ("nle", file
);
19281 fputs ("ord", file
);
19284 output_operand_lossage ("operand is not a condition code, "
19285 "invalid operand code 'D'");
19292 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
19293 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19295 gcc_fallthrough ();
19300 if (!COMPARISON_P (x
))
19302 output_operand_lossage ("operand is not a condition code, "
19303 "invalid operand code '%c'", code
);
19306 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
19307 code
== 'c' || code
== 'f',
19308 code
== 'F' || code
== 'f',
19313 if (!offsettable_memref_p (x
))
19315 output_operand_lossage ("operand is not an offsettable memory "
19316 "reference, invalid operand code 'H'");
19319 /* It doesn't actually matter what mode we use here, as we're
19320 only going to use this for printing. */
19321 x
= adjust_address_nv (x
, DImode
, 8);
19322 /* Output 'qword ptr' for intel assembler dialect. */
19323 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
19328 if (!CONST_INT_P (x
))
19330 output_operand_lossage ("operand is not an integer, invalid "
19331 "operand code 'K'");
19335 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
19336 #ifdef HAVE_AS_IX86_HLE
19337 fputs ("xacquire ", file
);
19339 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
19341 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
19342 #ifdef HAVE_AS_IX86_HLE
19343 fputs ("xrelease ", file
);
19345 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
19347 /* We do not want to print value of the operand. */
19351 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
19352 fputs ("{z}", file
);
19356 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
19358 output_operand_lossage ("operand is not a specific integer, "
19359 "invalid operand code 'r'");
19363 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
19364 fputs (", ", file
);
19366 fputs ("{sae}", file
);
19368 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19369 fputs (", ", file
);
19374 if (!CONST_INT_P (x
))
19376 output_operand_lossage ("operand is not an integer, invalid "
19377 "operand code 'R'");
19381 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
19382 fputs (", ", file
);
19384 switch (INTVAL (x
))
19386 case ROUND_NEAREST_INT
| ROUND_SAE
:
19387 fputs ("{rn-sae}", file
);
19389 case ROUND_NEG_INF
| ROUND_SAE
:
19390 fputs ("{rd-sae}", file
);
19392 case ROUND_POS_INF
| ROUND_SAE
:
19393 fputs ("{ru-sae}", file
);
19395 case ROUND_ZERO
| ROUND_SAE
:
19396 fputs ("{rz-sae}", file
);
19399 output_operand_lossage ("operand is not a specific integer, "
19400 "invalid operand code 'R'");
19403 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19404 fputs (", ", file
);
19409 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19415 const char *name
= get_some_local_dynamic_name ();
19417 output_operand_lossage ("'%%&' used without any "
19418 "local dynamic TLS references");
19420 assemble_name (file
, name
);
19429 || optimize_function_for_size_p (cfun
)
19430 || !TARGET_BRANCH_PREDICTION_HINTS
)
19433 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
19436 int pred_val
= profile_probability::from_reg_br_prob_note
19437 (XINT (x
, 0)).to_reg_br_prob_base ();
19439 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
19440 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
19442 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
19444 = final_forward_branch_p (current_output_insn
) == 0;
19446 /* Emit hints only in the case default branch prediction
19447 heuristics would fail. */
19448 if (taken
!= cputaken
)
19450 /* We use 3e (DS) prefix for taken branches and
19451 2e (CS) prefix for not taken branches. */
19453 fputs ("ds ; ", file
);
19455 fputs ("cs ; ", file
);
19463 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
19469 putc (TARGET_AVX2
? 'i' : 'f', file
);
19473 if (TARGET_64BIT
&& Pmode
!= word_mode
)
19474 fputs ("addr32 ", file
);
19478 if (ix86_bnd_prefixed_insn_p (current_output_insn
))
19479 fputs ("bnd ", file
);
19483 output_operand_lossage ("invalid operand code '%c'", code
);
19488 print_reg (x
, code
, file
);
19490 else if (MEM_P (x
))
19492 rtx addr
= XEXP (x
, 0);
19494 /* No `byte ptr' prefix for call instructions ... */
19495 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
19497 machine_mode mode
= GET_MODE (x
);
19500 /* Check for explicit size override codes. */
19503 else if (code
== 'w')
19505 else if (code
== 'k')
19507 else if (code
== 'q')
19509 else if (code
== 'x')
19511 else if (code
== 't')
19513 else if (code
== 'g')
19515 else if (mode
== BLKmode
)
19516 /* ... or BLKmode operands, when not overridden. */
19519 switch (GET_MODE_SIZE (mode
))
19521 case 1: size
= "BYTE"; break;
19522 case 2: size
= "WORD"; break;
19523 case 4: size
= "DWORD"; break;
19524 case 8: size
= "QWORD"; break;
19525 case 12: size
= "TBYTE"; break;
19527 if (mode
== XFmode
)
19532 case 32: size
= "YMMWORD"; break;
19533 case 64: size
= "ZMMWORD"; break;
19535 gcc_unreachable ();
19539 fputs (size
, file
);
19540 fputs (" PTR ", file
);
19544 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
19545 output_operand_lossage ("invalid constraints for operand");
19547 ix86_print_operand_address_as
19548 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
19551 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
19555 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
19557 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19559 /* Sign extend 32bit SFmode immediate to 8 bytes. */
19561 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
19562 (unsigned long long) (int) l
);
19564 fprintf (file
, "0x%08x", (unsigned int) l
);
19567 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
19571 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
19573 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19575 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
19578 /* These float cases don't actually occur as immediate operands. */
19579 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
19583 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
19584 fputs (dstr
, file
);
19589 /* We have patterns that allow zero sets of memory, for instance.
19590 In 64-bit mode, we should probably support all 8-byte vectors,
19591 since we can in fact encode that into an immediate. */
19592 if (GET_CODE (x
) == CONST_VECTOR
)
19594 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
19598 if (code
!= 'P' && code
!= 'p')
19600 if (CONST_INT_P (x
))
19602 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19605 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
19606 || GET_CODE (x
) == LABEL_REF
)
19608 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19611 fputs ("OFFSET FLAT:", file
);
19614 if (CONST_INT_P (x
))
19615 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
19616 else if (flag_pic
|| MACHOPIC_INDIRECT
)
19617 output_pic_addr_const (file
, x
, code
);
19619 output_addr_const (file
, x
);
19624 ix86_print_operand_punct_valid_p (unsigned char code
)
19626 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
19627 || code
== '~' || code
== '^' || code
== '!');
19630 /* Print a memory operand whose address is ADDR. */
19633 ix86_print_operand_address_as (FILE *file
, rtx addr
,
19634 addr_space_t as
, bool no_rip
)
19636 struct ix86_address parts
;
19637 rtx base
, index
, disp
;
19643 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
19645 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
19646 gcc_assert (parts
.index
== NULL_RTX
);
19647 parts
.index
= XVECEXP (addr
, 0, 1);
19648 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
19649 addr
= XVECEXP (addr
, 0, 0);
19652 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
19654 gcc_assert (TARGET_64BIT
);
19655 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
19658 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDMK_ADDR
)
19660 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 1), &parts
);
19661 gcc_assert (parts
.base
== NULL_RTX
|| parts
.index
== NULL_RTX
);
19662 if (parts
.base
!= NULL_RTX
)
19664 parts
.index
= parts
.base
;
19667 parts
.base
= XVECEXP (addr
, 0, 0);
19668 addr
= XVECEXP (addr
, 0, 0);
19670 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_BNDLDX_ADDR
)
19672 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
19673 gcc_assert (parts
.index
== NULL_RTX
);
19674 parts
.index
= XVECEXP (addr
, 0, 1);
19675 addr
= XVECEXP (addr
, 0, 0);
19678 ok
= ix86_decompose_address (addr
, &parts
);
19683 index
= parts
.index
;
19685 scale
= parts
.scale
;
19687 if (ADDR_SPACE_GENERIC_P (as
))
19690 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
19692 if (!ADDR_SPACE_GENERIC_P (as
))
19694 const char *string
;
19696 if (as
== ADDR_SPACE_SEG_FS
)
19697 string
= (ASSEMBLER_DIALECT
== ASM_ATT
? "%fs:" : "fs:");
19698 else if (as
== ADDR_SPACE_SEG_GS
)
19699 string
= (ASSEMBLER_DIALECT
== ASM_ATT
? "%gs:" : "gs:");
19701 gcc_unreachable ();
19702 fputs (string
, file
);
19705 /* Use one byte shorter RIP relative addressing for 64bit mode. */
19706 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
19710 if (GET_CODE (disp
) == CONST
19711 && GET_CODE (XEXP (disp
, 0)) == PLUS
19712 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
19713 symbol
= XEXP (XEXP (disp
, 0), 0);
19715 if (GET_CODE (symbol
) == LABEL_REF
19716 || (GET_CODE (symbol
) == SYMBOL_REF
19717 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
19721 if (!base
&& !index
)
19723 /* Displacement only requires special attention. */
19724 if (CONST_INT_P (disp
))
19726 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
19727 fputs ("ds:", file
);
19728 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
19730 /* Load the external function address via the GOT slot to avoid PLT. */
19731 else if (GET_CODE (disp
) == CONST
19732 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
19733 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
19734 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
19735 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
19736 output_pic_addr_const (file
, disp
, 0);
19738 output_pic_addr_const (file
, disp
, 0);
19740 output_addr_const (file
, disp
);
19744 /* Print SImode register names to force addr32 prefix. */
19745 if (SImode_address_operand (addr
, VOIDmode
))
19749 gcc_assert (TARGET_64BIT
);
19750 switch (GET_CODE (addr
))
19753 gcc_assert (GET_MODE (addr
) == SImode
);
19754 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
19758 gcc_assert (GET_MODE (addr
) == DImode
);
19761 gcc_unreachable ();
19764 gcc_assert (!code
);
19770 && CONST_INT_P (disp
)
19771 && INTVAL (disp
) < -16*1024*1024)
19773 /* X32 runs in 64-bit mode, where displacement, DISP, in
19774 address DISP(%r64), is encoded as 32-bit immediate sign-
19775 extended from 32-bit to 64-bit. For -0x40000300(%r64),
19776 address is %r64 + 0xffffffffbffffd00. When %r64 <
19777 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
19778 which is invalid for x32. The correct address is %r64
19779 - 0x40000300 == 0xf7ffdd64. To properly encode
19780 -0x40000300(%r64) for x32, we zero-extend negative
19781 displacement by forcing addr32 prefix which truncates
19782 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
19783 zero-extend all negative displacements, including -1(%rsp).
19784 However, for small negative displacements, sign-extension
19785 won't cause overflow. We only zero-extend negative
19786 displacements if they < -16*1024*1024, which is also used
19787 to check legitimate address displacements for PIC. */
19791 if (ASSEMBLER_DIALECT
== ASM_ATT
)
19796 output_pic_addr_const (file
, disp
, 0);
19797 else if (GET_CODE (disp
) == LABEL_REF
)
19798 output_asm_label (disp
);
19800 output_addr_const (file
, disp
);
19805 print_reg (base
, code
, file
);
19809 print_reg (index
, vsib
? 0 : code
, file
);
19810 if (scale
!= 1 || vsib
)
19811 fprintf (file
, ",%d", scale
);
19817 rtx offset
= NULL_RTX
;
19821 /* Pull out the offset of a symbol; print any symbol itself. */
19822 if (GET_CODE (disp
) == CONST
19823 && GET_CODE (XEXP (disp
, 0)) == PLUS
19824 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
19826 offset
= XEXP (XEXP (disp
, 0), 1);
19827 disp
= gen_rtx_CONST (VOIDmode
,
19828 XEXP (XEXP (disp
, 0), 0));
19832 output_pic_addr_const (file
, disp
, 0);
19833 else if (GET_CODE (disp
) == LABEL_REF
)
19834 output_asm_label (disp
);
19835 else if (CONST_INT_P (disp
))
19838 output_addr_const (file
, disp
);
19844 print_reg (base
, code
, file
);
19847 if (INTVAL (offset
) >= 0)
19849 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
19853 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
19860 print_reg (index
, vsib
? 0 : code
, file
);
19861 if (scale
!= 1 || vsib
)
19862 fprintf (file
, "*%d", scale
);
19870 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
19872 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
19875 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
19878 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
19882 if (GET_CODE (x
) != UNSPEC
)
19885 op
= XVECEXP (x
, 0, 0);
19886 switch (XINT (x
, 1))
19888 case UNSPEC_GOTTPOFF
:
19889 output_addr_const (file
, op
);
19890 /* FIXME: This might be @TPOFF in Sun ld. */
19891 fputs ("@gottpoff", file
);
19894 output_addr_const (file
, op
);
19895 fputs ("@tpoff", file
);
19897 case UNSPEC_NTPOFF
:
19898 output_addr_const (file
, op
);
19900 fputs ("@tpoff", file
);
19902 fputs ("@ntpoff", file
);
19904 case UNSPEC_DTPOFF
:
19905 output_addr_const (file
, op
);
19906 fputs ("@dtpoff", file
);
19908 case UNSPEC_GOTNTPOFF
:
19909 output_addr_const (file
, op
);
19911 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
19912 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
19914 fputs ("@gotntpoff", file
);
19916 case UNSPEC_INDNTPOFF
:
19917 output_addr_const (file
, op
);
19918 fputs ("@indntpoff", file
);
19921 case UNSPEC_MACHOPIC_OFFSET
:
19922 output_addr_const (file
, op
);
19924 machopic_output_function_base_name (file
);
19935 /* Split one or more double-mode RTL references into pairs of half-mode
19936 references. The RTL can be REG, offsettable MEM, integer constant, or
19937 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
19938 split and "num" is its length. lo_half and hi_half are output arrays
19939 that parallel "operands". */
19942 split_double_mode (machine_mode mode
, rtx operands
[],
19943 int num
, rtx lo_half
[], rtx hi_half
[])
19945 machine_mode half_mode
;
19951 half_mode
= DImode
;
19954 half_mode
= SImode
;
19957 gcc_unreachable ();
19960 byte
= GET_MODE_SIZE (half_mode
);
19964 rtx op
= operands
[num
];
19966 /* simplify_subreg refuse to split volatile memory addresses,
19967 but we still have to handle it. */
19970 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
19971 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
19975 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
19976 GET_MODE (op
) == VOIDmode
19977 ? mode
: GET_MODE (op
), 0);
19978 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
19979 GET_MODE (op
) == VOIDmode
19980 ? mode
: GET_MODE (op
), byte
);
19985 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
19986 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
19987 is the expression of the binary operation. The output may either be
19988 emitted here, or returned to the caller, like all output_* functions.
19990 There is no guarantee that the operands are the same mode, as they
19991 might be within FLOAT or FLOAT_EXTEND expressions. */
19993 #ifndef SYSV386_COMPAT
19994 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
19995 wants to fix the assemblers because that causes incompatibility
19996 with gcc. No-one wants to fix gcc because that causes
19997 incompatibility with assemblers... You can use the option of
19998 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
19999 #define SYSV386_COMPAT 1
20003 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
20005 static char buf
[40];
20008 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
20010 /* Even if we do not want to check the inputs, this documents input
20011 constraints. Which helps in understanding the following code. */
20014 if (STACK_REG_P (operands
[0])
20015 && ((REG_P (operands
[1])
20016 && REGNO (operands
[0]) == REGNO (operands
[1])
20017 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
20018 || (REG_P (operands
[2])
20019 && REGNO (operands
[0]) == REGNO (operands
[2])
20020 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
20021 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
20024 gcc_assert (is_sse
);
20027 switch (GET_CODE (operands
[3]))
20030 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
20031 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
20039 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
20040 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
20048 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
20049 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
20057 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
20058 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
20066 gcc_unreachable ();
20073 strcpy (buf
, ssep
);
20074 if (GET_MODE (operands
[0]) == SFmode
)
20075 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
20077 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
20081 strcpy (buf
, ssep
+ 1);
20082 if (GET_MODE (operands
[0]) == SFmode
)
20083 strcat (buf
, "ss\t{%2, %0|%0, %2}");
20085 strcat (buf
, "sd\t{%2, %0|%0, %2}");
20091 switch (GET_CODE (operands
[3]))
20095 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
20096 std::swap (operands
[1], operands
[2]);
20098 /* know operands[0] == operands[1]. */
20100 if (MEM_P (operands
[2]))
20106 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
20108 if (STACK_TOP_P (operands
[0]))
20109 /* How is it that we are storing to a dead operand[2]?
20110 Well, presumably operands[1] is dead too. We can't
20111 store the result to st(0) as st(0) gets popped on this
20112 instruction. Instead store to operands[2] (which I
20113 think has to be st(1)). st(1) will be popped later.
20114 gcc <= 2.8.1 didn't have this check and generated
20115 assembly code that the Unixware assembler rejected. */
20116 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
20118 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
20122 if (STACK_TOP_P (operands
[0]))
20123 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
20125 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
20130 if (MEM_P (operands
[1]))
20136 if (MEM_P (operands
[2]))
20142 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
20145 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
20146 derived assemblers, confusingly reverse the direction of
20147 the operation for fsub{r} and fdiv{r} when the
20148 destination register is not st(0). The Intel assembler
20149 doesn't have this brain damage. Read !SYSV386_COMPAT to
20150 figure out what the hardware really does. */
20151 if (STACK_TOP_P (operands
[0]))
20152 p
= "{p\t%0, %2|rp\t%2, %0}";
20154 p
= "{rp\t%2, %0|p\t%0, %2}";
20156 if (STACK_TOP_P (operands
[0]))
20157 /* As above for fmul/fadd, we can't store to st(0). */
20158 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
20160 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
20165 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20168 if (STACK_TOP_P (operands
[0]))
20169 p
= "{rp\t%0, %1|p\t%1, %0}";
20171 p
= "{p\t%1, %0|rp\t%0, %1}";
20173 if (STACK_TOP_P (operands
[0]))
20174 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
20176 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
20181 if (STACK_TOP_P (operands
[0]))
20183 if (STACK_TOP_P (operands
[1]))
20184 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
20186 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
20189 else if (STACK_TOP_P (operands
[1]))
20192 p
= "{\t%1, %0|r\t%0, %1}";
20194 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
20200 p
= "{r\t%2, %0|\t%0, %2}";
20202 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
20208 gcc_unreachable ();
20215 /* Return needed mode for entity in optimize_mode_switching pass. */
20218 ix86_dirflag_mode_needed (rtx_insn
*insn
)
20222 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
20223 return X86_DIRFLAG_ANY
;
20225 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
20226 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
20229 if (recog_memoized (insn
) < 0)
20230 return X86_DIRFLAG_ANY
;
20232 if (get_attr_type (insn
) == TYPE_STR
)
20234 /* Emit cld instruction if stringops are used in the function. */
20235 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
20236 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
20238 return X86_DIRFLAG_RESET
;
20241 return X86_DIRFLAG_ANY
;
20244 /* Check if a 256bit AVX register is referenced inside of EXP. */
20247 ix86_check_avx256_register (const_rtx exp
)
20249 if (SUBREG_P (exp
))
20250 exp
= SUBREG_REG (exp
);
20252 return (REG_P (exp
)
20253 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)));
20256 /* Return needed mode for entity in optimize_mode_switching pass. */
20259 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
20265 /* Needed mode is set to AVX_U128_CLEAN if there are
20266 no 256bit modes used in function arguments. */
20267 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
20269 link
= XEXP (link
, 1))
20271 if (GET_CODE (XEXP (link
, 0)) == USE
)
20273 rtx arg
= XEXP (XEXP (link
, 0), 0);
20275 if (ix86_check_avx256_register (arg
))
20276 return AVX_U128_DIRTY
;
20280 return AVX_U128_CLEAN
;
20283 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
20284 changes state only when a 256bit register is written to, but we need
20285 to prevent the compiler from moving optimal insertion point above
20286 eventual read from 256bit register. */
20287 subrtx_iterator::array_type array
;
20288 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
20289 if (ix86_check_avx256_register (*iter
))
20290 return AVX_U128_DIRTY
;
20292 return AVX_U128_ANY
;
20295 /* Return mode that i387 must be switched into
20296 prior to the execution of insn. */
20299 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
20301 enum attr_i387_cw mode
;
20303 /* The mode UNINITIALIZED is used to store control word after a
20304 function call or ASM pattern. The mode ANY specify that function
20305 has no requirements on the control word and make no changes in the
20306 bits we are interested in. */
20309 || (NONJUMP_INSN_P (insn
)
20310 && (asm_noperands (PATTERN (insn
)) >= 0
20311 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
20312 return I387_CW_UNINITIALIZED
;
20314 if (recog_memoized (insn
) < 0)
20315 return I387_CW_ANY
;
20317 mode
= get_attr_i387_cw (insn
);
20322 if (mode
== I387_CW_TRUNC
)
20327 if (mode
== I387_CW_FLOOR
)
20332 if (mode
== I387_CW_CEIL
)
20337 if (mode
== I387_CW_MASK_PM
)
20342 gcc_unreachable ();
20345 return I387_CW_ANY
;
20348 /* Return mode that entity must be switched into
20349 prior to the execution of insn. */
20352 ix86_mode_needed (int entity
, rtx_insn
*insn
)
20357 return ix86_dirflag_mode_needed (insn
);
20359 return ix86_avx_u128_mode_needed (insn
);
20364 return ix86_i387_mode_needed (entity
, insn
);
20366 gcc_unreachable ();
20371 /* Check if a 256bit AVX register is referenced in stores. */
20374 ix86_check_avx256_stores (rtx dest
, const_rtx
, void *data
)
20376 if (ix86_check_avx256_register (dest
))
20378 bool *used
= (bool *) data
;
20383 /* Calculate mode of upper 128bit AVX registers after the insn. */
20386 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
20388 rtx pat
= PATTERN (insn
);
20390 if (vzeroupper_operation (pat
, VOIDmode
)
20391 || vzeroall_operation (pat
, VOIDmode
))
20392 return AVX_U128_CLEAN
;
20394 /* We know that state is clean after CALL insn if there are no
20395 256bit registers used in the function return register. */
20398 bool avx_reg256_found
= false;
20399 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
20401 return avx_reg256_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
20404 /* Otherwise, return current mode. Remember that if insn
20405 references AVX 256bit registers, the mode was already changed
20406 to DIRTY from MODE_NEEDED. */
20410 /* Return the mode that an insn results in. */
20413 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
20420 return ix86_avx_u128_mode_after (mode
, insn
);
20427 gcc_unreachable ();
20432 ix86_dirflag_mode_entry (void)
20434 /* For TARGET_CLD or in the interrupt handler we can't assume
20435 direction flag state at function entry. */
20437 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
20438 return X86_DIRFLAG_ANY
;
20440 return X86_DIRFLAG_RESET
;
20444 ix86_avx_u128_mode_entry (void)
20448 /* Entry mode is set to AVX_U128_DIRTY if there are
20449 256bit modes used in function arguments. */
20450 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
20451 arg
= TREE_CHAIN (arg
))
20453 rtx incoming
= DECL_INCOMING_RTL (arg
);
20455 if (incoming
&& ix86_check_avx256_register (incoming
))
20456 return AVX_U128_DIRTY
;
20459 return AVX_U128_CLEAN
;
20462 /* Return a mode that ENTITY is assumed to be
20463 switched to at function entry. */
20466 ix86_mode_entry (int entity
)
20471 return ix86_dirflag_mode_entry ();
20473 return ix86_avx_u128_mode_entry ();
20478 return I387_CW_ANY
;
20480 gcc_unreachable ();
20485 ix86_avx_u128_mode_exit (void)
20487 rtx reg
= crtl
->return_rtx
;
20489 /* Exit mode is set to AVX_U128_DIRTY if there are
20490 256bit modes used in the function return register. */
20491 if (reg
&& ix86_check_avx256_register (reg
))
20492 return AVX_U128_DIRTY
;
20494 return AVX_U128_CLEAN
;
20497 /* Return a mode that ENTITY is assumed to be
20498 switched to at function exit. */
20501 ix86_mode_exit (int entity
)
20506 return X86_DIRFLAG_ANY
;
20508 return ix86_avx_u128_mode_exit ();
20513 return I387_CW_ANY
;
20515 gcc_unreachable ();
20520 ix86_mode_priority (int, int n
)
20525 /* Output code to initialize control word copies used by trunc?f?i and
20526 rounding patterns. CURRENT_MODE is set to current control word,
20527 while NEW_MODE is set to new control word. */
20530 emit_i387_cw_initialization (int mode
)
20532 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
20535 enum ix86_stack_slot slot
;
20537 rtx reg
= gen_reg_rtx (HImode
);
20539 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
20540 emit_move_insn (reg
, copy_rtx (stored_mode
));
20542 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
20543 || optimize_insn_for_size_p ())
20547 case I387_CW_TRUNC
:
20548 /* round toward zero (truncate) */
20549 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
20550 slot
= SLOT_CW_TRUNC
;
20553 case I387_CW_FLOOR
:
20554 /* round down toward -oo */
20555 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
20556 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
20557 slot
= SLOT_CW_FLOOR
;
20561 /* round up toward +oo */
20562 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
20563 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
20564 slot
= SLOT_CW_CEIL
;
20567 case I387_CW_MASK_PM
:
20568 /* mask precision exception for nearbyint() */
20569 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
20570 slot
= SLOT_CW_MASK_PM
;
20574 gcc_unreachable ();
20581 case I387_CW_TRUNC
:
20582 /* round toward zero (truncate) */
20583 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0xc)));
20584 slot
= SLOT_CW_TRUNC
;
20587 case I387_CW_FLOOR
:
20588 /* round down toward -oo */
20589 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0x4)));
20590 slot
= SLOT_CW_FLOOR
;
20594 /* round up toward +oo */
20595 emit_insn (gen_insvsi_1 (reg
, GEN_INT (0x8)));
20596 slot
= SLOT_CW_CEIL
;
20599 case I387_CW_MASK_PM
:
20600 /* mask precision exception for nearbyint() */
20601 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
20602 slot
= SLOT_CW_MASK_PM
;
20606 gcc_unreachable ();
20610 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
20612 new_mode
= assign_386_stack_local (HImode
, slot
);
20613 emit_move_insn (new_mode
, reg
);
20616 /* Emit vzeroupper. */
20619 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
20623 /* Cancel automatic vzeroupper insertion if there are
20624 live call-saved SSE registers at the insertion point. */
20626 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
20627 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
20631 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
20632 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
20635 emit_insn (gen_avx_vzeroupper ());
20638 /* Generate one or more insns to set ENTITY to MODE. */
20640 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
20641 is the set of hard registers live at the point where the insn(s)
20642 are to be inserted. */
20645 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
20646 HARD_REG_SET regs_live
)
20651 if (mode
== X86_DIRFLAG_RESET
)
20652 emit_insn (gen_cld ());
20655 if (mode
== AVX_U128_CLEAN
)
20656 ix86_avx_emit_vzeroupper (regs_live
);
20662 if (mode
!= I387_CW_ANY
20663 && mode
!= I387_CW_UNINITIALIZED
)
20664 emit_i387_cw_initialization (mode
);
20667 gcc_unreachable ();
20671 /* Output code for INSN to convert a float to a signed int. OPERANDS
20672 are the insn operands. The output may be [HSD]Imode and the input
20673 operand may be [SDX]Fmode. */
20676 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
20678 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
20679 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
20680 int round_mode
= get_attr_i387_cw (insn
);
20682 /* Jump through a hoop or two for DImode, since the hardware has no
20683 non-popping instruction. We used to do this a different way, but
20684 that was somewhat fragile and broke with post-reload splitters. */
20685 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
20686 output_asm_insn ("fld\t%y1", operands
);
20688 gcc_assert (STACK_TOP_P (operands
[1]));
20689 gcc_assert (MEM_P (operands
[0]));
20690 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
20693 output_asm_insn ("fisttp%Z0\t%0", operands
);
20696 if (round_mode
!= I387_CW_ANY
)
20697 output_asm_insn ("fldcw\t%3", operands
);
20698 if (stack_top_dies
|| dimode_p
)
20699 output_asm_insn ("fistp%Z0\t%0", operands
);
20701 output_asm_insn ("fist%Z0\t%0", operands
);
20702 if (round_mode
!= I387_CW_ANY
)
20703 output_asm_insn ("fldcw\t%2", operands
);
20709 /* Output code for x87 ffreep insn. The OPNO argument, which may only
20710 have the values zero or one, indicates the ffreep insn's operand
20711 from the OPERANDS array. */
20713 static const char *
20714 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
20716 if (TARGET_USE_FFREEP
)
20717 #ifdef HAVE_AS_IX86_FFREEP
20718 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
20721 static char retval
[32];
20722 int regno
= REGNO (operands
[opno
]);
20724 gcc_assert (STACK_REGNO_P (regno
));
20726 regno
-= FIRST_STACK_REG
;
20728 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
20733 return opno
? "fstp\t%y1" : "fstp\t%y0";
20737 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
20738 should be used. UNORDERED_P is true when fucom should be used. */
20741 output_fp_compare (rtx_insn
*insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
20743 int stack_top_dies
;
20744 rtx cmp_op0
, cmp_op1
;
20745 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
20749 cmp_op0
= operands
[0];
20750 cmp_op1
= operands
[1];
20754 cmp_op0
= operands
[1];
20755 cmp_op1
= operands
[2];
20760 if (GET_MODE (operands
[0]) == SFmode
)
20762 return "%vucomiss\t{%1, %0|%0, %1}";
20764 return "%vcomiss\t{%1, %0|%0, %1}";
20767 return "%vucomisd\t{%1, %0|%0, %1}";
20769 return "%vcomisd\t{%1, %0|%0, %1}";
20772 gcc_assert (STACK_TOP_P (cmp_op0
));
20774 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
20776 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
20778 if (stack_top_dies
)
20780 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
20781 return output_387_ffreep (operands
, 1);
20784 return "ftst\n\tfnstsw\t%0";
20787 if (STACK_REG_P (cmp_op1
)
20789 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
20790 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
20792 /* If both the top of the 387 stack dies, and the other operand
20793 is also a stack register that dies, then this must be a
20794 `fcompp' float compare */
20798 /* There is no double popping fcomi variant. Fortunately,
20799 eflags is immune from the fstp's cc clobbering. */
20801 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
20803 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
20804 return output_387_ffreep (operands
, 0);
20809 return "fucompp\n\tfnstsw\t%0";
20811 return "fcompp\n\tfnstsw\t%0";
20816 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
20818 static const char * const alt
[16] =
20820 "fcom%Z2\t%y2\n\tfnstsw\t%0",
20821 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
20822 "fucom%Z2\t%y2\n\tfnstsw\t%0",
20823 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
20825 "ficom%Z2\t%y2\n\tfnstsw\t%0",
20826 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
20830 "fcomi\t{%y1, %0|%0, %y1}",
20831 "fcomip\t{%y1, %0|%0, %y1}",
20832 "fucomi\t{%y1, %0|%0, %y1}",
20833 "fucomip\t{%y1, %0|%0, %y1}",
20844 mask
= eflags_p
<< 3;
20845 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
20846 mask
|= unordered_p
<< 1;
20847 mask
|= stack_top_dies
;
20849 gcc_assert (mask
< 16);
20858 ix86_output_addr_vec_elt (FILE *file
, int value
)
20860 const char *directive
= ASM_LONG
;
20864 directive
= ASM_QUAD
;
20866 gcc_assert (!TARGET_64BIT
);
20869 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
20873 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
20875 const char *directive
= ASM_LONG
;
20878 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
20879 directive
= ASM_QUAD
;
20881 gcc_assert (!TARGET_64BIT
);
20883 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
20884 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
20885 fprintf (file
, "%s%s%d-%s%d\n",
20886 directive
, LPREFIX
, value
, LPREFIX
, rel
);
20887 else if (HAVE_AS_GOTOFF_IN_DATA
)
20888 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
20890 else if (TARGET_MACHO
)
20892 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
20893 machopic_output_function_base_name (file
);
20898 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
20899 GOT_SYMBOL_NAME
, LPREFIX
, value
);
20902 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
20906 ix86_expand_clear (rtx dest
)
20910 /* We play register width games, which are only valid after reload. */
20911 gcc_assert (reload_completed
);
20913 /* Avoid HImode and its attendant prefix byte. */
20914 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
20915 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
20916 tmp
= gen_rtx_SET (dest
, const0_rtx
);
20918 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
20920 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
20921 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
20927 /* X is an unchanging MEM. If it is a constant pool reference, return
20928 the constant pool rtx, else NULL. */
20931 maybe_get_pool_constant (rtx x
)
20933 x
= ix86_delegitimize_address (XEXP (x
, 0));
20935 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
20936 return get_pool_constant (x
);
20942 ix86_expand_move (machine_mode mode
, rtx operands
[])
20945 rtx tmp
, addend
= NULL_RTX
;
20946 enum tls_model model
;
20951 switch (GET_CODE (op1
))
20954 tmp
= XEXP (op1
, 0);
20956 if (GET_CODE (tmp
) != PLUS
20957 || GET_CODE (XEXP (tmp
, 0)) != SYMBOL_REF
)
20960 op1
= XEXP (tmp
, 0);
20961 addend
= XEXP (tmp
, 1);
20965 model
= SYMBOL_REF_TLS_MODEL (op1
);
20968 op1
= legitimize_tls_address (op1
, model
, true);
20969 else if (ix86_force_load_from_GOT_p (op1
))
20971 /* Load the external function address via GOT slot to avoid PLT. */
20972 op1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op1
),
20976 op1
= gen_rtx_CONST (Pmode
, op1
);
20977 op1
= gen_const_mem (Pmode
, op1
);
20978 set_mem_alias_set (op1
, ix86_GOT_alias_set ());
20982 tmp
= legitimize_pe_coff_symbol (op1
, addend
!= NULL_RTX
);
20998 op1
= force_operand (op1
, NULL_RTX
);
20999 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, addend
,
21000 op0
, 1, OPTAB_DIRECT
);
21003 op1
= force_operand (op1
, op0
);
21008 op1
= convert_to_mode (mode
, op1
, 1);
21014 if ((flag_pic
|| MACHOPIC_INDIRECT
)
21015 && symbolic_operand (op1
, mode
))
21017 if (TARGET_MACHO
&& !TARGET_64BIT
)
21020 /* dynamic-no-pic */
21021 if (MACHOPIC_INDIRECT
)
21023 rtx temp
= (op0
&& REG_P (op0
) && mode
== Pmode
)
21024 ? op0
: gen_reg_rtx (Pmode
);
21025 op1
= machopic_indirect_data_reference (op1
, temp
);
21027 op1
= machopic_legitimize_pic_address (op1
, mode
,
21028 temp
== op1
? 0 : temp
);
21030 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
21032 rtx insn
= gen_rtx_SET (op0
, op1
);
21036 if (GET_CODE (op0
) == MEM
)
21037 op1
= force_reg (Pmode
, op1
);
21041 if (GET_CODE (temp
) != REG
)
21042 temp
= gen_reg_rtx (Pmode
);
21043 temp
= legitimize_pic_address (op1
, temp
);
21048 /* dynamic-no-pic */
21054 op1
= force_reg (mode
, op1
);
21055 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
21057 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
21058 op1
= legitimize_pic_address (op1
, reg
);
21061 op1
= convert_to_mode (mode
, op1
, 1);
21068 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
21069 || !push_operand (op0
, mode
))
21071 op1
= force_reg (mode
, op1
);
21073 if (push_operand (op0
, mode
)
21074 && ! general_no_elim_operand (op1
, mode
))
21075 op1
= copy_to_mode_reg (mode
, op1
);
21077 /* Force large constants in 64bit compilation into register
21078 to get them CSEed. */
21079 if (can_create_pseudo_p ()
21080 && (mode
== DImode
) && TARGET_64BIT
21081 && immediate_operand (op1
, mode
)
21082 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
21083 && !register_operand (op0
, mode
)
21085 op1
= copy_to_mode_reg (mode
, op1
);
21087 if (can_create_pseudo_p ()
21088 && CONST_DOUBLE_P (op1
))
21090 /* If we are loading a floating point constant to a register,
21091 force the value to memory now, since we'll get better code
21092 out the back end. */
21094 op1
= validize_mem (force_const_mem (mode
, op1
));
21095 if (!register_operand (op0
, mode
))
21097 rtx temp
= gen_reg_rtx (mode
);
21098 emit_insn (gen_rtx_SET (temp
, op1
));
21099 emit_move_insn (op0
, temp
);
21105 emit_insn (gen_rtx_SET (op0
, op1
));
21109 ix86_expand_vector_move (machine_mode mode
, rtx operands
[])
21111 rtx op0
= operands
[0], op1
= operands
[1];
21112 /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU
21113 psABI since the biggest alignment is 4 byte for IA MCU psABI. */
21114 unsigned int align
= (TARGET_IAMCU
21115 ? GET_MODE_BITSIZE (mode
)
21116 : GET_MODE_ALIGNMENT (mode
));
21118 if (push_operand (op0
, VOIDmode
))
21119 op0
= emit_move_resolve_push (mode
, op0
);
21121 /* Force constants other than zero into memory. We do not know how
21122 the instructions used to build constants modify the upper 64 bits
21123 of the register, once we have that information we may be able
21124 to handle some of them more efficiently. */
21125 if (can_create_pseudo_p ()
21126 && (CONSTANT_P (op1
)
21128 && CONSTANT_P (SUBREG_REG (op1
))))
21129 && ((register_operand (op0
, mode
)
21130 && !standard_sse_constant_p (op1
, mode
))
21131 /* ix86_expand_vector_move_misalign() does not like constants. */
21132 || (SSE_REG_MODE_P (mode
)
21134 && MEM_ALIGN (op0
) < align
)))
21136 if (SUBREG_P (op1
))
21138 machine_mode imode
= GET_MODE (SUBREG_REG (op1
));
21139 rtx r
= force_const_mem (imode
, SUBREG_REG (op1
));
21141 r
= validize_mem (r
);
21143 r
= force_reg (imode
, SUBREG_REG (op1
));
21144 op1
= simplify_gen_subreg (mode
, r
, imode
, SUBREG_BYTE (op1
));
21147 op1
= validize_mem (force_const_mem (mode
, op1
));
21150 /* We need to check memory alignment for SSE mode since attribute
21151 can make operands unaligned. */
21152 if (can_create_pseudo_p ()
21153 && SSE_REG_MODE_P (mode
)
21154 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
21155 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
21159 /* ix86_expand_vector_move_misalign() does not like both
21160 arguments in memory. */
21161 if (!register_operand (op0
, mode
)
21162 && !register_operand (op1
, mode
))
21163 op1
= force_reg (mode
, op1
);
21165 tmp
[0] = op0
; tmp
[1] = op1
;
21166 ix86_expand_vector_move_misalign (mode
, tmp
);
21170 /* Make operand1 a register if it isn't already. */
21171 if (can_create_pseudo_p ()
21172 && !register_operand (op0
, mode
)
21173 && !register_operand (op1
, mode
))
21175 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
21179 emit_insn (gen_rtx_SET (op0
, op1
));
21182 /* Split 32-byte AVX unaligned load and store if needed. */
21185 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
21188 rtx (*extract
) (rtx
, rtx
, rtx
);
21191 if ((MEM_P (op1
) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
21192 || (MEM_P (op0
) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE
))
21194 emit_insn (gen_rtx_SET (op0
, op1
));
21198 rtx orig_op0
= NULL_RTX
;
21199 mode
= GET_MODE (op0
);
21200 switch (GET_MODE_CLASS (mode
))
21202 case MODE_VECTOR_INT
:
21204 if (mode
!= V32QImode
)
21209 op0
= gen_reg_rtx (V32QImode
);
21212 op0
= gen_lowpart (V32QImode
, op0
);
21213 op1
= gen_lowpart (V32QImode
, op1
);
21217 case MODE_VECTOR_FLOAT
:
21220 gcc_unreachable ();
21226 gcc_unreachable ();
21228 extract
= gen_avx_vextractf128v32qi
;
21232 extract
= gen_avx_vextractf128v8sf
;
21236 extract
= gen_avx_vextractf128v4df
;
21243 rtx r
= gen_reg_rtx (mode
);
21244 m
= adjust_address (op1
, mode
, 0);
21245 emit_move_insn (r
, m
);
21246 m
= adjust_address (op1
, mode
, 16);
21247 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
21248 emit_move_insn (op0
, r
);
21250 else if (MEM_P (op0
))
21252 m
= adjust_address (op0
, mode
, 0);
21253 emit_insn (extract (m
, op1
, const0_rtx
));
21254 m
= adjust_address (op0
, mode
, 16);
21255 emit_insn (extract (m
, copy_rtx (op1
), const1_rtx
));
21258 gcc_unreachable ();
21261 emit_move_insn (orig_op0
, gen_lowpart (GET_MODE (orig_op0
), op0
));
21264 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
21265 straight to ix86_expand_vector_move. */
21266 /* Code generation for scalar reg-reg moves of single and double precision data:
21267 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
21271 if (x86_sse_partial_reg_dependency == true)
21276 Code generation for scalar loads of double precision data:
21277 if (x86_sse_split_regs == true)
21278 movlpd mem, reg (gas syntax)
21282 Code generation for unaligned packed loads of single precision data
21283 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
21284 if (x86_sse_unaligned_move_optimal)
21287 if (x86_sse_partial_reg_dependency == true)
21299 Code generation for unaligned packed loads of double precision data
21300 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
21301 if (x86_sse_unaligned_move_optimal)
21304 if (x86_sse_split_regs == true)
21317 ix86_expand_vector_move_misalign (machine_mode mode
, rtx operands
[])
21324 /* Use unaligned load/store for AVX512 or when optimizing for size. */
21325 if (GET_MODE_SIZE (mode
) == 64 || optimize_insn_for_size_p ())
21327 emit_insn (gen_rtx_SET (op0
, op1
));
21333 if (GET_MODE_SIZE (mode
) == 32)
21334 ix86_avx256_split_vector_move_misalign (op0
, op1
);
21336 /* Always use 128-bit mov<mode>_internal pattern for AVX. */
21337 emit_insn (gen_rtx_SET (op0
, op1
));
21341 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
21342 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
21344 emit_insn (gen_rtx_SET (op0
, op1
));
21348 /* ??? If we have typed data, then it would appear that using
21349 movdqu is the only way to get unaligned data loaded with
21351 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
21353 emit_insn (gen_rtx_SET (op0
, op1
));
21359 if (TARGET_SSE2
&& mode
== V2DFmode
)
21363 /* When SSE registers are split into halves, we can avoid
21364 writing to the top half twice. */
21365 if (TARGET_SSE_SPLIT_REGS
)
21367 emit_clobber (op0
);
21372 /* ??? Not sure about the best option for the Intel chips.
21373 The following would seem to satisfy; the register is
21374 entirely cleared, breaking the dependency chain. We
21375 then store to the upper half, with a dependency depth
21376 of one. A rumor has it that Intel recommends two movsd
21377 followed by an unpacklpd, but this is unconfirmed. And
21378 given that the dependency depth of the unpacklpd would
21379 still be one, I'm not sure why this would be better. */
21380 zero
= CONST0_RTX (V2DFmode
);
21383 m
= adjust_address (op1
, DFmode
, 0);
21384 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
21385 m
= adjust_address (op1
, DFmode
, 8);
21386 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
21392 if (mode
!= V4SFmode
)
21393 t
= gen_reg_rtx (V4SFmode
);
21397 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
21398 emit_move_insn (t
, CONST0_RTX (V4SFmode
));
21402 m
= adjust_address (op1
, V2SFmode
, 0);
21403 emit_insn (gen_sse_loadlps (t
, t
, m
));
21404 m
= adjust_address (op1
, V2SFmode
, 8);
21405 emit_insn (gen_sse_loadhps (t
, t
, m
));
21406 if (mode
!= V4SFmode
)
21407 emit_move_insn (op0
, gen_lowpart (mode
, t
));
21410 else if (MEM_P (op0
))
21412 if (TARGET_SSE2
&& mode
== V2DFmode
)
21414 m
= adjust_address (op0
, DFmode
, 0);
21415 emit_insn (gen_sse2_storelpd (m
, op1
));
21416 m
= adjust_address (op0
, DFmode
, 8);
21417 emit_insn (gen_sse2_storehpd (m
, op1
));
21421 if (mode
!= V4SFmode
)
21422 op1
= gen_lowpart (V4SFmode
, op1
);
21424 m
= adjust_address (op0
, V2SFmode
, 0);
21425 emit_insn (gen_sse_storelps (m
, op1
));
21426 m
= adjust_address (op0
, V2SFmode
, 8);
21427 emit_insn (gen_sse_storehps (m
, copy_rtx (op1
)));
21431 gcc_unreachable ();
21434 /* Helper function of ix86_fixup_binary_operands to canonicalize
21435 operand order. Returns true if the operands should be swapped. */
21438 ix86_swap_binary_operands_p (enum rtx_code code
, machine_mode mode
,
21441 rtx dst
= operands
[0];
21442 rtx src1
= operands
[1];
21443 rtx src2
= operands
[2];
21445 /* If the operation is not commutative, we can't do anything. */
21446 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
21449 /* Highest priority is that src1 should match dst. */
21450 if (rtx_equal_p (dst
, src1
))
21452 if (rtx_equal_p (dst
, src2
))
21455 /* Next highest priority is that immediate constants come second. */
21456 if (immediate_operand (src2
, mode
))
21458 if (immediate_operand (src1
, mode
))
21461 /* Lowest priority is that memory references should come second. */
21471 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
21472 destination to use for the operation. If different from the true
21473 destination in operands[0], a copy operation will be required. */
21476 ix86_fixup_binary_operands (enum rtx_code code
, machine_mode mode
,
21479 rtx dst
= operands
[0];
21480 rtx src1
= operands
[1];
21481 rtx src2
= operands
[2];
21483 /* Canonicalize operand order. */
21484 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
21486 /* It is invalid to swap operands of different modes. */
21487 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
21489 std::swap (src1
, src2
);
21492 /* Both source operands cannot be in memory. */
21493 if (MEM_P (src1
) && MEM_P (src2
))
21495 /* Optimization: Only read from memory once. */
21496 if (rtx_equal_p (src1
, src2
))
21498 src2
= force_reg (mode
, src2
);
21501 else if (rtx_equal_p (dst
, src1
))
21502 src2
= force_reg (mode
, src2
);
21504 src1
= force_reg (mode
, src1
);
21507 /* If the destination is memory, and we do not have matching source
21508 operands, do things in registers. */
21509 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
21510 dst
= gen_reg_rtx (mode
);
21512 /* Source 1 cannot be a constant. */
21513 if (CONSTANT_P (src1
))
21514 src1
= force_reg (mode
, src1
);
21516 /* Source 1 cannot be a non-matching memory. */
21517 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
21518 src1
= force_reg (mode
, src1
);
21520 /* Improve address combine. */
21522 && GET_MODE_CLASS (mode
) == MODE_INT
21524 src2
= force_reg (mode
, src2
);
21526 operands
[1] = src1
;
21527 operands
[2] = src2
;
21531 /* Similarly, but assume that the destination has already been
21532 set up properly. */
21535 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
21536 machine_mode mode
, rtx operands
[])
21538 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
21539 gcc_assert (dst
== operands
[0]);
21542 /* Attempt to expand a binary operator. Make the expansion closer to the
21543 actual machine, then just general_operand, which will allow 3 separate
21544 memory references (one output, two input) in a single insn. */
21547 ix86_expand_binary_operator (enum rtx_code code
, machine_mode mode
,
21550 rtx src1
, src2
, dst
, op
, clob
;
21552 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
21553 src1
= operands
[1];
21554 src2
= operands
[2];
21556 /* Emit the instruction. */
21558 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
21560 if (reload_completed
21562 && !rtx_equal_p (dst
, src1
))
21564 /* This is going to be an LEA; avoid splitting it later. */
21569 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
21570 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
21573 /* Fix up the destination if needed. */
21574 if (dst
!= operands
[0])
21575 emit_move_insn (operands
[0], dst
);
21578 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
21579 the given OPERANDS. */
21582 ix86_expand_vector_logical_operator (enum rtx_code code
, machine_mode mode
,
21585 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
21586 if (SUBREG_P (operands
[1]))
21591 else if (SUBREG_P (operands
[2]))
21596 /* Optimize (__m128i) d | (__m128i) e and similar code
21597 when d and e are float vectors into float vector logical
21598 insn. In C/C++ without using intrinsics there is no other way
21599 to express vector logical operation on float vectors than
21600 to cast them temporarily to integer vectors. */
21602 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
21603 && (SUBREG_P (op2
) || GET_CODE (op2
) == CONST_VECTOR
)
21604 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
21605 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
21606 && SUBREG_BYTE (op1
) == 0
21607 && (GET_CODE (op2
) == CONST_VECTOR
21608 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
21609 && SUBREG_BYTE (op2
) == 0))
21610 && can_create_pseudo_p ())
21613 switch (GET_MODE (SUBREG_REG (op1
)))
21621 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
21622 if (GET_CODE (op2
) == CONST_VECTOR
)
21624 op2
= gen_lowpart (GET_MODE (dst
), op2
);
21625 op2
= force_reg (GET_MODE (dst
), op2
);
21630 op2
= SUBREG_REG (operands
[2]);
21631 if (!vector_operand (op2
, GET_MODE (dst
)))
21632 op2
= force_reg (GET_MODE (dst
), op2
);
21634 op1
= SUBREG_REG (op1
);
21635 if (!vector_operand (op1
, GET_MODE (dst
)))
21636 op1
= force_reg (GET_MODE (dst
), op1
);
21637 emit_insn (gen_rtx_SET (dst
,
21638 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
21640 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
21646 if (!vector_operand (operands
[1], mode
))
21647 operands
[1] = force_reg (mode
, operands
[1]);
21648 if (!vector_operand (operands
[2], mode
))
21649 operands
[2] = force_reg (mode
, operands
[2]);
21650 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
21651 emit_insn (gen_rtx_SET (operands
[0],
21652 gen_rtx_fmt_ee (code
, mode
, operands
[1],
21656 /* Return TRUE or FALSE depending on whether the binary operator meets the
21657 appropriate constraints. */
21660 ix86_binary_operator_ok (enum rtx_code code
, machine_mode mode
,
21663 rtx dst
= operands
[0];
21664 rtx src1
= operands
[1];
21665 rtx src2
= operands
[2];
21667 /* Both source operands cannot be in memory. */
21668 if (MEM_P (src1
) && MEM_P (src2
))
21671 /* Canonicalize operand order for commutative operators. */
21672 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
21673 std::swap (src1
, src2
);
21675 /* If the destination is memory, we must have a matching source operand. */
21676 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
21679 /* Source 1 cannot be a constant. */
21680 if (CONSTANT_P (src1
))
21683 /* Source 1 cannot be a non-matching memory. */
21684 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
21685 /* Support "andhi/andsi/anddi" as a zero-extending move. */
21686 return (code
== AND
21689 || (TARGET_64BIT
&& mode
== DImode
))
21690 && satisfies_constraint_L (src2
));
21695 /* Attempt to expand a unary operator. Make the expansion closer to the
21696 actual machine, then just general_operand, which will allow 2 separate
21697 memory references (one output, one input) in a single insn. */
21700 ix86_expand_unary_operator (enum rtx_code code
, machine_mode mode
,
21703 bool matching_memory
= false;
21704 rtx src
, dst
, op
, clob
;
21709 /* If the destination is memory, and we do not have matching source
21710 operands, do things in registers. */
21713 if (rtx_equal_p (dst
, src
))
21714 matching_memory
= true;
21716 dst
= gen_reg_rtx (mode
);
21719 /* When source operand is memory, destination must match. */
21720 if (MEM_P (src
) && !matching_memory
)
21721 src
= force_reg (mode
, src
);
21723 /* Emit the instruction. */
21725 op
= gen_rtx_SET (dst
, gen_rtx_fmt_e (code
, mode
, src
));
21731 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
21732 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
21735 /* Fix up the destination if needed. */
21736 if (dst
!= operands
[0])
21737 emit_move_insn (operands
[0], dst
);
21740 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
21741 divisor are within the range [0-255]. */
21744 ix86_split_idivmod (machine_mode mode
, rtx operands
[],
21747 rtx_code_label
*end_label
, *qimode_label
;
21750 rtx scratch
, tmp0
, tmp1
, tmp2
;
21751 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
21752 rtx (*gen_zero_extend
) (rtx
, rtx
);
21753 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
21758 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
21759 gen_test_ccno_1
= gen_testsi_ccno_1
;
21760 gen_zero_extend
= gen_zero_extendqisi2
;
21763 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
21764 gen_test_ccno_1
= gen_testdi_ccno_1
;
21765 gen_zero_extend
= gen_zero_extendqidi2
;
21768 gcc_unreachable ();
21771 end_label
= gen_label_rtx ();
21772 qimode_label
= gen_label_rtx ();
21774 scratch
= gen_reg_rtx (mode
);
21776 /* Use 8bit unsigned divimod if dividend and divisor are within
21777 the range [0-255]. */
21778 emit_move_insn (scratch
, operands
[2]);
21779 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
21780 scratch
, 1, OPTAB_DIRECT
);
21781 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
21782 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
21783 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
21784 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
21785 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
21787 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp0
));
21788 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21789 JUMP_LABEL (insn
) = qimode_label
;
21791 /* Generate original signed/unsigned divimod. */
21792 div
= gen_divmod4_1 (operands
[0], operands
[1],
21793 operands
[2], operands
[3]);
21796 /* Branch to the end. */
21797 emit_jump_insn (gen_jump (end_label
));
21800 /* Generate 8bit unsigned divide. */
21801 emit_label (qimode_label
);
21802 /* Don't use operands[0] for result of 8bit divide since not all
21803 registers support QImode ZERO_EXTRACT. */
21804 tmp0
= lowpart_subreg (HImode
, scratch
, mode
);
21805 tmp1
= lowpart_subreg (HImode
, operands
[2], mode
);
21806 tmp2
= lowpart_subreg (QImode
, operands
[3], mode
);
21807 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
21811 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
21812 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
21816 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
21817 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
21820 /* Extract remainder from AH. */
21821 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
21822 if (REG_P (operands
[1]))
21823 insn
= emit_move_insn (operands
[1], tmp1
);
21826 /* Need a new scratch register since the old one has result
21828 scratch
= gen_reg_rtx (mode
);
21829 emit_move_insn (scratch
, tmp1
);
21830 insn
= emit_move_insn (operands
[1], scratch
);
21832 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
21834 /* Zero extend quotient from AL. */
21835 tmp1
= gen_lowpart (QImode
, tmp0
);
21836 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
21837 set_unique_reg_note (insn
, REG_EQUAL
, div
);
21839 emit_label (end_label
);
21842 #define LEA_MAX_STALL (3)
21843 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
21845 /* Increase given DISTANCE in half-cycles according to
21846 dependencies between PREV and NEXT instructions.
21847 Add 1 half-cycle if there is no dependency and
21848 go to next cycle if there is some dependecy. */
21850 static unsigned int
21851 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
21855 if (!prev
|| !next
)
21856 return distance
+ (distance
& 1) + 2;
21858 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
21859 return distance
+ 1;
21861 FOR_EACH_INSN_USE (use
, next
)
21862 FOR_EACH_INSN_DEF (def
, prev
)
21863 if (!DF_REF_IS_ARTIFICIAL (def
)
21864 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
21865 return distance
+ (distance
& 1) + 2;
21867 return distance
+ 1;
21870 /* Function checks if instruction INSN defines register number
21871 REGNO1 or REGNO2. */
21874 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
21879 FOR_EACH_INSN_DEF (def
, insn
)
21880 if (DF_REF_REG_DEF_P (def
)
21881 && !DF_REF_IS_ARTIFICIAL (def
)
21882 && (regno1
== DF_REF_REGNO (def
)
21883 || regno2
== DF_REF_REGNO (def
)))
21889 /* Function checks if instruction INSN uses register number
21890 REGNO as a part of address expression. */
21893 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
21897 FOR_EACH_INSN_USE (use
, insn
)
21898 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
21904 /* Search backward for non-agu definition of register number REGNO1
21905 or register number REGNO2 in basic block starting from instruction
21906 START up to head of basic block or instruction INSN.
21908 Function puts true value into *FOUND var if definition was found
21909 and false otherwise.
21911 Distance in half-cycles between START and found instruction or head
21912 of BB is added to DISTANCE and returned. */
21915 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
21916 rtx_insn
*insn
, int distance
,
21917 rtx_insn
*start
, bool *found
)
21919 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
21920 rtx_insn
*prev
= start
;
21921 rtx_insn
*next
= NULL
;
21927 && distance
< LEA_SEARCH_THRESHOLD
)
21929 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
21931 distance
= increase_distance (prev
, next
, distance
);
21932 if (insn_defines_reg (regno1
, regno2
, prev
))
21934 if (recog_memoized (prev
) < 0
21935 || get_attr_type (prev
) != TYPE_LEA
)
21944 if (prev
== BB_HEAD (bb
))
21947 prev
= PREV_INSN (prev
);
21953 /* Search backward for non-agu definition of register number REGNO1
21954 or register number REGNO2 in INSN's basic block until
21955 1. Pass LEA_SEARCH_THRESHOLD instructions, or
21956 2. Reach neighbor BBs boundary, or
21957 3. Reach agu definition.
21958 Returns the distance between the non-agu definition point and INSN.
21959 If no definition point, returns -1. */
21962 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
21965 basic_block bb
= BLOCK_FOR_INSN (insn
);
21967 bool found
= false;
21969 if (insn
!= BB_HEAD (bb
))
21970 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
21971 distance
, PREV_INSN (insn
),
21974 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
21978 bool simple_loop
= false;
21980 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21983 simple_loop
= true;
21988 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
21990 BB_END (bb
), &found
);
21993 int shortest_dist
= -1;
21994 bool found_in_bb
= false;
21996 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21999 = distance_non_agu_define_in_bb (regno1
, regno2
,
22005 if (shortest_dist
< 0)
22006 shortest_dist
= bb_dist
;
22007 else if (bb_dist
> 0)
22008 shortest_dist
= MIN (bb_dist
, shortest_dist
);
22014 distance
= shortest_dist
;
22018 /* get_attr_type may modify recog data. We want to make sure
22019 that recog data is valid for instruction INSN, on which
22020 distance_non_agu_define is called. INSN is unchanged here. */
22021 extract_insn_cached (insn
);
22026 return distance
>> 1;
22029 /* Return the distance in half-cycles between INSN and the next
22030 insn that uses register number REGNO in memory address added
22031 to DISTANCE. Return -1 if REGNO0 is set.
22033 Put true value into *FOUND if register usage was found and
22035 Put true value into *REDEFINED if register redefinition was
22036 found and false otherwise. */
22039 distance_agu_use_in_bb (unsigned int regno
,
22040 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
22041 bool *found
, bool *redefined
)
22043 basic_block bb
= NULL
;
22044 rtx_insn
*next
= start
;
22045 rtx_insn
*prev
= NULL
;
22048 *redefined
= false;
22050 if (start
!= NULL_RTX
)
22052 bb
= BLOCK_FOR_INSN (start
);
22053 if (start
!= BB_HEAD (bb
))
22054 /* If insn and start belong to the same bb, set prev to insn,
22055 so the call to increase_distance will increase the distance
22056 between insns by 1. */
22062 && distance
< LEA_SEARCH_THRESHOLD
)
22064 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
22066 distance
= increase_distance(prev
, next
, distance
);
22067 if (insn_uses_reg_mem (regno
, next
))
22069 /* Return DISTANCE if OP0 is used in memory
22070 address in NEXT. */
22075 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
22077 /* Return -1 if OP0 is set in NEXT. */
22085 if (next
== BB_END (bb
))
22088 next
= NEXT_INSN (next
);
22094 /* Return the distance between INSN and the next insn that uses
22095 register number REGNO0 in memory address. Return -1 if no such
22096 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
22099 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
22101 basic_block bb
= BLOCK_FOR_INSN (insn
);
22103 bool found
= false;
22104 bool redefined
= false;
22106 if (insn
!= BB_END (bb
))
22107 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
22109 &found
, &redefined
);
22111 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
22115 bool simple_loop
= false;
22117 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
22120 simple_loop
= true;
22125 distance
= distance_agu_use_in_bb (regno0
, insn
,
22126 distance
, BB_HEAD (bb
),
22127 &found
, &redefined
);
22130 int shortest_dist
= -1;
22131 bool found_in_bb
= false;
22132 bool redefined_in_bb
= false;
22134 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
22137 = distance_agu_use_in_bb (regno0
, insn
,
22138 distance
, BB_HEAD (e
->dest
),
22139 &found_in_bb
, &redefined_in_bb
);
22142 if (shortest_dist
< 0)
22143 shortest_dist
= bb_dist
;
22144 else if (bb_dist
> 0)
22145 shortest_dist
= MIN (bb_dist
, shortest_dist
);
22151 distance
= shortest_dist
;
22155 if (!found
|| redefined
)
22158 return distance
>> 1;
22161 /* Define this macro to tune LEA priority vs ADD, it take effect when
22162 there is a dilemma of choicing LEA or ADD
22163 Negative value: ADD is more preferred than LEA
22165 Positive value: LEA is more preferred than ADD*/
22166 #define IX86_LEA_PRIORITY 0
22168 /* Return true if usage of lea INSN has performance advantage
22169 over a sequence of instructions. Instructions sequence has
22170 SPLIT_COST cycles higher latency than lea latency. */
22173 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
22174 unsigned int regno2
, int split_cost
, bool has_scale
)
22176 int dist_define
, dist_use
;
22178 /* For Silvermont if using a 2-source or 3-source LEA for
22179 non-destructive destination purposes, or due to wanting
22180 ability to use SCALE, the use of LEA is justified. */
22181 if (TARGET_SILVERMONT
|| TARGET_INTEL
)
22185 if (split_cost
< 1)
22187 if (regno0
== regno1
|| regno0
== regno2
)
22192 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
22193 dist_use
= distance_agu_use (regno0
, insn
);
22195 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
22197 /* If there is no non AGU operand definition, no AGU
22198 operand usage and split cost is 0 then both lea
22199 and non lea variants have same priority. Currently
22200 we prefer lea for 64 bit code and non lea on 32 bit
22202 if (dist_use
< 0 && split_cost
== 0)
22203 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
22208 /* With longer definitions distance lea is more preferable.
22209 Here we change it to take into account splitting cost and
22211 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
22213 /* If there is no use in memory addess then we just check
22214 that split cost exceeds AGU stall. */
22216 return dist_define
> LEA_MAX_STALL
;
22218 /* If this insn has both backward non-agu dependence and forward
22219 agu dependence, the one with short distance takes effect. */
22220 return dist_define
>= dist_use
;
22223 /* Return true if it is legal to clobber flags by INSN and
22224 false otherwise. */
22227 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
22229 basic_block bb
= BLOCK_FOR_INSN (insn
);
22235 if (NONDEBUG_INSN_P (insn
))
22237 FOR_EACH_INSN_USE (use
, insn
)
22238 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
22241 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
22245 if (insn
== BB_END (bb
))
22248 insn
= NEXT_INSN (insn
);
22251 live
= df_get_live_out(bb
);
22252 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
22255 /* Return true if we need to split op0 = op1 + op2 into a sequence of
22256 move and add to avoid AGU stalls. */
22259 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
22261 unsigned int regno0
, regno1
, regno2
;
22263 /* Check if we need to optimize. */
22264 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
22267 /* Check it is correct to split here. */
22268 if (!ix86_ok_to_clobber_flags(insn
))
22271 regno0
= true_regnum (operands
[0]);
22272 regno1
= true_regnum (operands
[1]);
22273 regno2
= true_regnum (operands
[2]);
22275 /* We need to split only adds with non destructive
22276 destination operand. */
22277 if (regno0
== regno1
|| regno0
== regno2
)
22280 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
22283 /* Return true if we should emit lea instruction instead of mov
22287 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
22289 unsigned int regno0
, regno1
;
22291 /* Check if we need to optimize. */
22292 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
22295 /* Use lea for reg to reg moves only. */
22296 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
22299 regno0
= true_regnum (operands
[0]);
22300 regno1
= true_regnum (operands
[1]);
22302 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
22305 /* Return true if we need to split lea into a sequence of
22306 instructions to avoid AGU stalls. */
22309 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
22311 unsigned int regno0
, regno1
, regno2
;
22313 struct ix86_address parts
;
22316 /* Check we need to optimize. */
22317 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
22320 /* The "at least two components" test below might not catch simple
22321 move or zero extension insns if parts.base is non-NULL and parts.disp
22322 is const0_rtx as the only components in the address, e.g. if the
22323 register is %rbp or %r13. As this test is much cheaper and moves or
22324 zero extensions are the common case, do this check first. */
22325 if (REG_P (operands
[1])
22326 || (SImode_address_operand (operands
[1], VOIDmode
)
22327 && REG_P (XEXP (operands
[1], 0))))
22330 /* Check if it is OK to split here. */
22331 if (!ix86_ok_to_clobber_flags (insn
))
22334 ok
= ix86_decompose_address (operands
[1], &parts
);
22337 /* There should be at least two components in the address. */
22338 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
22339 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
22342 /* We should not split into add if non legitimate pic
22343 operand is used as displacement. */
22344 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
22347 regno0
= true_regnum (operands
[0]) ;
22348 regno1
= INVALID_REGNUM
;
22349 regno2
= INVALID_REGNUM
;
22352 regno1
= true_regnum (parts
.base
);
22354 regno2
= true_regnum (parts
.index
);
22358 /* Compute how many cycles we will add to execution time
22359 if split lea into a sequence of instructions. */
22360 if (parts
.base
|| parts
.index
)
22362 /* Have to use mov instruction if non desctructive
22363 destination form is used. */
22364 if (regno1
!= regno0
&& regno2
!= regno0
)
22367 /* Have to add index to base if both exist. */
22368 if (parts
.base
&& parts
.index
)
22371 /* Have to use shift and adds if scale is 2 or greater. */
22372 if (parts
.scale
> 1)
22374 if (regno0
!= regno1
)
22376 else if (regno2
== regno0
)
22379 split_cost
+= parts
.scale
;
22382 /* Have to use add instruction with immediate if
22383 disp is non zero. */
22384 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
22387 /* Subtract the price of lea. */
22391 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
22395 /* Emit x86 binary operand CODE in mode MODE, where the first operand
22396 matches destination. RTX includes clobber of FLAGS_REG. */
22399 ix86_emit_binop (enum rtx_code code
, machine_mode mode
,
22404 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
22405 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
22407 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
22410 /* Return true if regno1 def is nearest to the insn. */
22413 find_nearest_reg_def (rtx_insn
*insn
, int regno1
, int regno2
)
22415 rtx_insn
*prev
= insn
;
22416 rtx_insn
*start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
22420 while (prev
&& prev
!= start
)
22422 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
22424 prev
= PREV_INSN (prev
);
22427 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
22429 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
22431 prev
= PREV_INSN (prev
);
22434 /* None of the regs is defined in the bb. */
22438 /* Split lea instructions into a sequence of instructions
22439 which are executed on ALU to avoid AGU stalls.
22440 It is assumed that it is allowed to clobber flags register
22441 at lea position. */
22444 ix86_split_lea_for_addr (rtx_insn
*insn
, rtx operands
[], machine_mode mode
)
22446 unsigned int regno0
, regno1
, regno2
;
22447 struct ix86_address parts
;
22451 ok
= ix86_decompose_address (operands
[1], &parts
);
22454 target
= gen_lowpart (mode
, operands
[0]);
22456 regno0
= true_regnum (target
);
22457 regno1
= INVALID_REGNUM
;
22458 regno2
= INVALID_REGNUM
;
22462 parts
.base
= gen_lowpart (mode
, parts
.base
);
22463 regno1
= true_regnum (parts
.base
);
22468 parts
.index
= gen_lowpart (mode
, parts
.index
);
22469 regno2
= true_regnum (parts
.index
);
22473 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
22475 if (parts
.scale
> 1)
22477 /* Case r1 = r1 + ... */
22478 if (regno1
== regno0
)
22480 /* If we have a case r1 = r1 + C * r2 then we
22481 should use multiplication which is very
22482 expensive. Assume cost model is wrong if we
22483 have such case here. */
22484 gcc_assert (regno2
!= regno0
);
22486 for (adds
= parts
.scale
; adds
> 0; adds
--)
22487 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
22491 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
22492 if (regno0
!= regno2
)
22493 emit_insn (gen_rtx_SET (target
, parts
.index
));
22495 /* Use shift for scaling. */
22496 ix86_emit_binop (ASHIFT
, mode
, target
,
22497 GEN_INT (exact_log2 (parts
.scale
)));
22500 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
22502 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
22503 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
22506 else if (!parts
.base
&& !parts
.index
)
22508 gcc_assert(parts
.disp
);
22509 emit_insn (gen_rtx_SET (target
, parts
.disp
));
22515 if (regno0
!= regno2
)
22516 emit_insn (gen_rtx_SET (target
, parts
.index
));
22518 else if (!parts
.index
)
22520 if (regno0
!= regno1
)
22521 emit_insn (gen_rtx_SET (target
, parts
.base
));
22525 if (regno0
== regno1
)
22527 else if (regno0
== regno2
)
22533 /* Find better operand for SET instruction, depending
22534 on which definition is farther from the insn. */
22535 if (find_nearest_reg_def (insn
, regno1
, regno2
))
22536 tmp
= parts
.index
, tmp1
= parts
.base
;
22538 tmp
= parts
.base
, tmp1
= parts
.index
;
22540 emit_insn (gen_rtx_SET (target
, tmp
));
22542 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
22543 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
22545 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
22549 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
22552 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
22553 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
22557 /* Return true if it is ok to optimize an ADD operation to LEA
22558 operation to avoid flag register consumation. For most processors,
22559 ADD is faster than LEA. For the processors like BONNELL, if the
22560 destination register of LEA holds an actual address which will be
22561 used soon, LEA is better and otherwise ADD is better. */
22564 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
22566 unsigned int regno0
= true_regnum (operands
[0]);
22567 unsigned int regno1
= true_regnum (operands
[1]);
22568 unsigned int regno2
= true_regnum (operands
[2]);
22570 /* If a = b + c, (a!=b && a!=c), must use lea form. */
22571 if (regno0
!= regno1
&& regno0
!= regno2
)
22574 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
22577 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
22580 /* Return true if destination reg of SET_BODY is shift count of
22584 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
22590 /* Retrieve destination of SET_BODY. */
22591 switch (GET_CODE (set_body
))
22594 set_dest
= SET_DEST (set_body
);
22595 if (!set_dest
|| !REG_P (set_dest
))
22599 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
22600 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
22608 /* Retrieve shift count of USE_BODY. */
22609 switch (GET_CODE (use_body
))
22612 shift_rtx
= XEXP (use_body
, 1);
22615 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
22616 if (ix86_dep_by_shift_count_body (set_body
,
22617 XVECEXP (use_body
, 0, i
)))
22625 && (GET_CODE (shift_rtx
) == ASHIFT
22626 || GET_CODE (shift_rtx
) == LSHIFTRT
22627 || GET_CODE (shift_rtx
) == ASHIFTRT
22628 || GET_CODE (shift_rtx
) == ROTATE
22629 || GET_CODE (shift_rtx
) == ROTATERT
))
22631 rtx shift_count
= XEXP (shift_rtx
, 1);
22633 /* Return true if shift count is dest of SET_BODY. */
22634 if (REG_P (shift_count
))
22636 /* Add check since it can be invoked before register
22637 allocation in pre-reload schedule. */
22638 if (reload_completed
22639 && true_regnum (set_dest
) == true_regnum (shift_count
))
22641 else if (REGNO(set_dest
) == REGNO(shift_count
))
22649 /* Return true if destination reg of SET_INSN is shift count of
22653 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
22655 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
22656 PATTERN (use_insn
));
22659 /* Return TRUE or FALSE depending on whether the unary operator meets the
22660 appropriate constraints. */
22663 ix86_unary_operator_ok (enum rtx_code
,
22667 /* If one of operands is memory, source and destination must match. */
22668 if ((MEM_P (operands
[0])
22669 || MEM_P (operands
[1]))
22670 && ! rtx_equal_p (operands
[0], operands
[1]))
22675 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
22676 are ok, keeping in mind the possible movddup alternative. */
22679 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
22681 if (MEM_P (operands
[0]))
22682 return rtx_equal_p (operands
[0], operands
[1 + high
]);
22683 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
22684 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
22688 /* Post-reload splitter for converting an SF or DFmode value in an
22689 SSE register into an unsigned SImode. */
22692 ix86_split_convert_uns_si_sse (rtx operands
[])
22694 machine_mode vecmode
;
22695 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
22697 large
= operands
[1];
22698 zero_or_two31
= operands
[2];
22699 input
= operands
[3];
22700 two31
= operands
[4];
22701 vecmode
= GET_MODE (large
);
22702 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
22704 /* Load up the value into the low element. We must ensure that the other
22705 elements are valid floats -- zero is the easiest such value. */
22708 if (vecmode
== V4SFmode
)
22709 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
22711 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
22715 input
= gen_rtx_REG (vecmode
, REGNO (input
));
22716 emit_move_insn (value
, CONST0_RTX (vecmode
));
22717 if (vecmode
== V4SFmode
)
22718 emit_insn (gen_sse_movss (value
, value
, input
));
22720 emit_insn (gen_sse2_movsd (value
, value
, input
));
22723 emit_move_insn (large
, two31
);
22724 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
22726 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
22727 emit_insn (gen_rtx_SET (large
, x
));
22729 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
22730 emit_insn (gen_rtx_SET (zero_or_two31
, x
));
22732 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
22733 emit_insn (gen_rtx_SET (value
, x
));
22735 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
22736 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
22738 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
22739 if (vecmode
== V4SFmode
)
22740 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
22742 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
22745 emit_insn (gen_xorv4si3 (value
, value
, large
));
22748 /* Convert an unsigned DImode value into a DFmode, using only SSE.
22749 Expects the 64-bit DImode to be supplied in a pair of integral
22750 registers. Requires SSE2; will use SSE3 if available. For x86_32,
22751 -mfpmath=sse, !optimize_size only. */
22754 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
22756 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
22757 rtx int_xmm
, fp_xmm
;
22758 rtx biases
, exponents
;
22761 int_xmm
= gen_reg_rtx (V4SImode
);
22762 if (TARGET_INTER_UNIT_MOVES_TO_VEC
)
22763 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
22764 else if (TARGET_SSE_SPLIT_REGS
)
22766 emit_clobber (int_xmm
);
22767 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
22771 x
= gen_reg_rtx (V2DImode
);
22772 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
22773 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
22776 x
= gen_rtx_CONST_VECTOR (V4SImode
,
22777 gen_rtvec (4, GEN_INT (0x43300000UL
),
22778 GEN_INT (0x45300000UL
),
22779 const0_rtx
, const0_rtx
));
22780 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
22782 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
22783 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
22785 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
22786 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
22787 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
22788 (0x1.0p84 + double(fp_value_hi_xmm)).
22789 Note these exponents differ by 32. */
22791 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
22793 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
22794 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
22795 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
22796 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
22797 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
22798 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
22799 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
22800 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
22801 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
22803 /* Add the upper and lower DFmode values together. */
22805 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
22808 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
22809 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
22810 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
22813 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
22816 /* Not used, but eases macroization of patterns. */
22818 ix86_expand_convert_uns_sixf_sse (rtx
, rtx
)
22820 gcc_unreachable ();
22823 /* Convert an unsigned SImode value into a DFmode. Only currently used
22824 for SSE, but applicable anywhere. */
22827 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
22829 REAL_VALUE_TYPE TWO31r
;
22832 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
22833 NULL
, 1, OPTAB_DIRECT
);
22835 fp
= gen_reg_rtx (DFmode
);
22836 emit_insn (gen_floatsidf2 (fp
, x
));
22838 real_ldexp (&TWO31r
, &dconst1
, 31);
22839 x
= const_double_from_real_value (TWO31r
, DFmode
);
22841 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
22843 emit_move_insn (target
, x
);
22846 /* Convert a signed DImode value into a DFmode. Only used for SSE in
22847 32-bit mode; otherwise we have a direct convert instruction. */
22850 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
22852 REAL_VALUE_TYPE TWO32r
;
22853 rtx fp_lo
, fp_hi
, x
;
22855 fp_lo
= gen_reg_rtx (DFmode
);
22856 fp_hi
= gen_reg_rtx (DFmode
);
22858 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
22860 real_ldexp (&TWO32r
, &dconst1
, 32);
22861 x
= const_double_from_real_value (TWO32r
, DFmode
);
22862 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
22864 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
22866 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
22869 emit_move_insn (target
, x
);
22872 /* Convert an unsigned SImode value into a SFmode, using only SSE.
22873 For x86_32, -mfpmath=sse, !optimize_size only. */
22875 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
22877 REAL_VALUE_TYPE ONE16r
;
22878 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
22880 real_ldexp (&ONE16r
, &dconst1
, 16);
22881 x
= const_double_from_real_value (ONE16r
, SFmode
);
22882 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
22883 NULL
, 0, OPTAB_DIRECT
);
22884 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
22885 NULL
, 0, OPTAB_DIRECT
);
22886 fp_hi
= gen_reg_rtx (SFmode
);
22887 fp_lo
= gen_reg_rtx (SFmode
);
22888 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
22889 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
22890 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
22892 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
22894 if (!rtx_equal_p (target
, fp_hi
))
22895 emit_move_insn (target
, fp_hi
);
22898 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
22899 a vector of unsigned ints VAL to vector of floats TARGET. */
22902 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
22905 REAL_VALUE_TYPE TWO16r
;
22906 machine_mode intmode
= GET_MODE (val
);
22907 machine_mode fltmode
= GET_MODE (target
);
22908 rtx (*cvt
) (rtx
, rtx
);
22910 if (intmode
== V4SImode
)
22911 cvt
= gen_floatv4siv4sf2
;
22913 cvt
= gen_floatv8siv8sf2
;
22914 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
22915 tmp
[0] = force_reg (intmode
, tmp
[0]);
22916 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
22918 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
22919 NULL_RTX
, 1, OPTAB_DIRECT
);
22920 tmp
[3] = gen_reg_rtx (fltmode
);
22921 emit_insn (cvt (tmp
[3], tmp
[1]));
22922 tmp
[4] = gen_reg_rtx (fltmode
);
22923 emit_insn (cvt (tmp
[4], tmp
[2]));
22924 real_ldexp (&TWO16r
, &dconst1
, 16);
22925 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
22926 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
22927 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
22929 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
22931 if (tmp
[7] != target
)
22932 emit_move_insn (target
, tmp
[7]);
22935 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
22936 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
22937 This is done by doing just signed conversion if < 0x1p31, and otherwise by
22938 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
22941 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
22943 REAL_VALUE_TYPE TWO31r
;
22944 rtx two31r
, tmp
[4];
22945 machine_mode mode
= GET_MODE (val
);
22946 machine_mode scalarmode
= GET_MODE_INNER (mode
);
22947 machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
22948 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
22951 for (i
= 0; i
< 3; i
++)
22952 tmp
[i
] = gen_reg_rtx (mode
);
22953 real_ldexp (&TWO31r
, &dconst1
, 31);
22954 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
22955 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
22956 two31r
= force_reg (mode
, two31r
);
22959 case E_V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
22960 case E_V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
22961 case E_V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
22962 case E_V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
22963 default: gcc_unreachable ();
22965 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
22966 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
22967 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
22969 if (intmode
== V4SImode
|| TARGET_AVX2
)
22970 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
22971 gen_lowpart (intmode
, tmp
[0]),
22972 GEN_INT (31), NULL_RTX
, 0,
22976 rtx two31
= GEN_INT (HOST_WIDE_INT_1U
<< 31);
22977 two31
= ix86_build_const_vector (intmode
, 1, two31
);
22978 *xorp
= expand_simple_binop (intmode
, AND
,
22979 gen_lowpart (intmode
, tmp
[0]),
22980 two31
, NULL_RTX
, 0,
22983 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
22987 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
22988 then replicate the value for all elements of the vector
22992 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
22996 machine_mode scalar_mode
;
23020 n_elt
= GET_MODE_NUNITS (mode
);
23021 v
= rtvec_alloc (n_elt
);
23022 scalar_mode
= GET_MODE_INNER (mode
);
23024 RTVEC_ELT (v
, 0) = value
;
23026 for (i
= 1; i
< n_elt
; ++i
)
23027 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
23029 return gen_rtx_CONST_VECTOR (mode
, v
);
23032 gcc_unreachable ();
23036 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
23037 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
23038 for an SSE register. If VECT is true, then replicate the mask for
23039 all elements of the vector register. If INVERT is true, then create
23040 a mask excluding the sign bit. */
23043 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
23045 machine_mode vec_mode
, imode
;
23073 vec_mode
= VOIDmode
;
23078 gcc_unreachable ();
23081 machine_mode inner_mode
= GET_MODE_INNER (mode
);
23082 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
23083 GET_MODE_BITSIZE (inner_mode
));
23085 w
= wi::bit_not (w
);
23087 /* Force this value into the low part of a fp vector constant. */
23088 mask
= immed_wide_int_const (w
, imode
);
23089 mask
= gen_lowpart (inner_mode
, mask
);
23091 if (vec_mode
== VOIDmode
)
23092 return force_reg (inner_mode
, mask
);
23094 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
23095 return force_reg (vec_mode
, v
);
23098 /* Generate code for floating point ABS or NEG. */
23101 ix86_expand_fp_absneg_operator (enum rtx_code code
, machine_mode mode
,
23104 rtx mask
, set
, dst
, src
;
23105 bool use_sse
= false;
23106 bool vector_mode
= VECTOR_MODE_P (mode
);
23107 machine_mode vmode
= mode
;
23111 else if (mode
== TFmode
)
23113 else if (TARGET_SSE_MATH
)
23115 use_sse
= SSE_FLOAT_MODE_P (mode
);
23116 if (mode
== SFmode
)
23118 else if (mode
== DFmode
)
23122 /* NEG and ABS performed with SSE use bitwise mask operations.
23123 Create the appropriate mask now. */
23125 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
23132 set
= gen_rtx_fmt_e (code
, mode
, src
);
23133 set
= gen_rtx_SET (dst
, set
);
23140 use
= gen_rtx_USE (VOIDmode
, mask
);
23142 par
= gen_rtvec (2, set
, use
);
23145 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
23146 par
= gen_rtvec (3, set
, use
, clob
);
23148 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
23154 /* Expand a copysign operation. Special case operand 0 being a constant. */
23157 ix86_expand_copysign (rtx operands
[])
23159 machine_mode mode
, vmode
;
23160 rtx dest
, op0
, op1
, mask
, nmask
;
23162 dest
= operands
[0];
23166 mode
= GET_MODE (dest
);
23168 if (mode
== SFmode
)
23170 else if (mode
== DFmode
)
23175 if (CONST_DOUBLE_P (op0
))
23177 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
23179 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
23180 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
23182 if (mode
== SFmode
|| mode
== DFmode
)
23184 if (op0
== CONST0_RTX (mode
))
23185 op0
= CONST0_RTX (vmode
);
23188 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
23190 op0
= force_reg (vmode
, v
);
23193 else if (op0
!= CONST0_RTX (mode
))
23194 op0
= force_reg (mode
, op0
);
23196 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
23198 if (mode
== SFmode
)
23199 copysign_insn
= gen_copysignsf3_const
;
23200 else if (mode
== DFmode
)
23201 copysign_insn
= gen_copysigndf3_const
;
23203 copysign_insn
= gen_copysigntf3_const
;
23205 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
23209 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
23211 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
23212 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
23214 if (mode
== SFmode
)
23215 copysign_insn
= gen_copysignsf3_var
;
23216 else if (mode
== DFmode
)
23217 copysign_insn
= gen_copysigndf3_var
;
23219 copysign_insn
= gen_copysigntf3_var
;
23221 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
23225 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
23226 be a constant, and so has already been expanded into a vector constant. */
23229 ix86_split_copysign_const (rtx operands
[])
23231 machine_mode mode
, vmode
;
23232 rtx dest
, op0
, mask
, x
;
23234 dest
= operands
[0];
23236 mask
= operands
[3];
23238 mode
= GET_MODE (dest
);
23239 vmode
= GET_MODE (mask
);
23241 dest
= lowpart_subreg (vmode
, dest
, mode
);
23242 x
= gen_rtx_AND (vmode
, dest
, mask
);
23243 emit_insn (gen_rtx_SET (dest
, x
));
23245 if (op0
!= CONST0_RTX (vmode
))
23247 x
= gen_rtx_IOR (vmode
, dest
, op0
);
23248 emit_insn (gen_rtx_SET (dest
, x
));
23252 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
23253 so we have to do two masks. */
23256 ix86_split_copysign_var (rtx operands
[])
23258 machine_mode mode
, vmode
;
23259 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
23261 dest
= operands
[0];
23262 scratch
= operands
[1];
23265 nmask
= operands
[4];
23266 mask
= operands
[5];
23268 mode
= GET_MODE (dest
);
23269 vmode
= GET_MODE (mask
);
23271 if (rtx_equal_p (op0
, op1
))
23273 /* Shouldn't happen often (it's useless, obviously), but when it does
23274 we'd generate incorrect code if we continue below. */
23275 emit_move_insn (dest
, op0
);
23279 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
23281 gcc_assert (REGNO (op1
) == REGNO (scratch
));
23283 x
= gen_rtx_AND (vmode
, scratch
, mask
);
23284 emit_insn (gen_rtx_SET (scratch
, x
));
23287 op0
= lowpart_subreg (vmode
, op0
, mode
);
23288 x
= gen_rtx_NOT (vmode
, dest
);
23289 x
= gen_rtx_AND (vmode
, x
, op0
);
23290 emit_insn (gen_rtx_SET (dest
, x
));
23294 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
23296 x
= gen_rtx_AND (vmode
, scratch
, mask
);
23298 else /* alternative 2,4 */
23300 gcc_assert (REGNO (mask
) == REGNO (scratch
));
23301 op1
= lowpart_subreg (vmode
, op1
, mode
);
23302 x
= gen_rtx_AND (vmode
, scratch
, op1
);
23304 emit_insn (gen_rtx_SET (scratch
, x
));
23306 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
23308 dest
= lowpart_subreg (vmode
, op0
, mode
);
23309 x
= gen_rtx_AND (vmode
, dest
, nmask
);
23311 else /* alternative 3,4 */
23313 gcc_assert (REGNO (nmask
) == REGNO (dest
));
23315 op0
= lowpart_subreg (vmode
, op0
, mode
);
23316 x
= gen_rtx_AND (vmode
, dest
, op0
);
23318 emit_insn (gen_rtx_SET (dest
, x
));
23321 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
23322 emit_insn (gen_rtx_SET (dest
, x
));
23325 /* Return TRUE or FALSE depending on whether the first SET in INSN
23326 has source and destination with matching CC modes, and that the
23327 CC mode is at least as constrained as REQ_MODE. */
23330 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
23333 machine_mode set_mode
;
23335 set
= PATTERN (insn
);
23336 if (GET_CODE (set
) == PARALLEL
)
23337 set
= XVECEXP (set
, 0, 0);
23338 gcc_assert (GET_CODE (set
) == SET
);
23339 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
23341 set_mode
= GET_MODE (SET_DEST (set
));
23345 if (req_mode
!= CCNOmode
23346 && (req_mode
!= CCmode
23347 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
23351 if (req_mode
== CCGCmode
)
23355 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
23359 if (req_mode
== CCZmode
)
23370 if (set_mode
!= req_mode
)
23375 gcc_unreachable ();
23378 return GET_MODE (SET_SRC (set
)) == set_mode
;
23381 /* Generate insn patterns to do an integer compare of OPERANDS. */
23384 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
23386 machine_mode cmpmode
;
23389 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
23390 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
23392 /* This is very simple, but making the interface the same as in the
23393 FP case makes the rest of the code easier. */
23394 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
23395 emit_insn (gen_rtx_SET (flags
, tmp
));
23397 /* Return the test that should be put into the flags user, i.e.
23398 the bcc, scc, or cmov instruction. */
23399 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
23402 /* Figure out whether to use ordered or unordered fp comparisons.
23403 Return the appropriate mode to use. */
23406 ix86_fp_compare_mode (enum rtx_code
)
23408 /* ??? In order to make all comparisons reversible, we do all comparisons
23409 non-trapping when compiling for IEEE. Once gcc is able to distinguish
23410 all forms trapping and nontrapping comparisons, we can make inequality
23411 comparisons trapping again, since it results in better code when using
23412 FCOM based compares. */
23413 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
23417 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
23419 machine_mode mode
= GET_MODE (op0
);
23421 if (SCALAR_FLOAT_MODE_P (mode
))
23423 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
23424 return ix86_fp_compare_mode (code
);
23429 /* Only zero flag is needed. */
23430 case EQ
: /* ZF=0 */
23431 case NE
: /* ZF!=0 */
23433 /* Codes needing carry flag. */
23434 case GEU
: /* CF=0 */
23435 case LTU
: /* CF=1 */
23436 /* Detect overflow checks. They need just the carry flag. */
23437 if (GET_CODE (op0
) == PLUS
23438 && (rtx_equal_p (op1
, XEXP (op0
, 0))
23439 || rtx_equal_p (op1
, XEXP (op0
, 1))))
23443 case GTU
: /* CF=0 & ZF=0 */
23444 case LEU
: /* CF=1 | ZF=1 */
23446 /* Codes possibly doable only with sign flag when
23447 comparing against zero. */
23448 case GE
: /* SF=OF or SF=0 */
23449 case LT
: /* SF<>OF or SF=1 */
23450 if (op1
== const0_rtx
)
23453 /* For other cases Carry flag is not required. */
23455 /* Codes doable only with sign flag when comparing
23456 against zero, but we miss jump instruction for it
23457 so we need to use relational tests against overflow
23458 that thus needs to be zero. */
23459 case GT
: /* ZF=0 & SF=OF */
23460 case LE
: /* ZF=1 | SF<>OF */
23461 if (op1
== const0_rtx
)
23465 /* strcmp pattern do (use flags) and combine may ask us for proper
23470 gcc_unreachable ();
23474 /* Return the fixed registers used for condition codes. */
23477 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
23484 /* If two condition code modes are compatible, return a condition code
23485 mode which is compatible with both. Otherwise, return
23488 static machine_mode
23489 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
23494 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
23497 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
23498 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
23501 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
23502 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
23506 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
23508 else if (m2
== CCZmode
23509 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
23515 gcc_unreachable ();
23547 /* These are only compatible with themselves, which we already
23554 /* Return a comparison we can do and that it is equivalent to
23555 swap_condition (code) apart possibly from orderedness.
23556 But, never change orderedness if TARGET_IEEE_FP, returning
23557 UNKNOWN in that case if necessary. */
23559 static enum rtx_code
23560 ix86_fp_swap_condition (enum rtx_code code
)
23564 case GT
: /* GTU - CF=0 & ZF=0 */
23565 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
23566 case GE
: /* GEU - CF=0 */
23567 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
23568 case UNLT
: /* LTU - CF=1 */
23569 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
23570 case UNLE
: /* LEU - CF=1 | ZF=1 */
23571 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
23573 return swap_condition (code
);
23577 /* Return cost of comparison CODE using the best strategy for performance.
23578 All following functions do use number of instructions as a cost metrics.
23579 In future this should be tweaked to compute bytes for optimize_size and
23580 take into account performance of various instructions on various CPUs. */
23583 ix86_fp_comparison_cost (enum rtx_code code
)
23587 /* The cost of code using bit-twiddling on %ah. */
23604 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
23608 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
23611 gcc_unreachable ();
23614 switch (ix86_fp_comparison_strategy (code
))
23616 case IX86_FPCMP_COMI
:
23617 return arith_cost
> 4 ? 3 : 2;
23618 case IX86_FPCMP_SAHF
:
23619 return arith_cost
> 4 ? 4 : 3;
23625 /* Return strategy to use for floating-point. We assume that fcomi is always
23626 preferrable where available, since that is also true when looking at size
23627 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
23629 enum ix86_fpcmp_strategy
23630 ix86_fp_comparison_strategy (enum rtx_code
)
23632 /* Do fcomi/sahf based test when profitable. */
23635 return IX86_FPCMP_COMI
;
23637 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
23638 return IX86_FPCMP_SAHF
;
23640 return IX86_FPCMP_ARITH
;
23643 /* Swap, force into registers, or otherwise massage the two operands
23644 to a fp comparison. The operands are updated in place; the new
23645 comparison code is returned. */
23647 static enum rtx_code
23648 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
23650 machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
23651 rtx op0
= *pop0
, op1
= *pop1
;
23652 machine_mode op_mode
= GET_MODE (op0
);
23653 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
23655 /* All of the unordered compare instructions only work on registers.
23656 The same is true of the fcomi compare instructions. The XFmode
23657 compare instructions require registers except when comparing
23658 against zero or when converting operand 1 from fixed point to
23662 && (fpcmp_mode
== CCFPUmode
23663 || (op_mode
== XFmode
23664 && ! (standard_80387_constant_p (op0
) == 1
23665 || standard_80387_constant_p (op1
) == 1)
23666 && GET_CODE (op1
) != FLOAT
)
23667 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
23669 op0
= force_reg (op_mode
, op0
);
23670 op1
= force_reg (op_mode
, op1
);
23674 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
23675 things around if they appear profitable, otherwise force op0
23676 into a register. */
23678 if (standard_80387_constant_p (op0
) == 0
23680 && ! (standard_80387_constant_p (op1
) == 0
23683 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
23684 if (new_code
!= UNKNOWN
)
23686 std::swap (op0
, op1
);
23692 op0
= force_reg (op_mode
, op0
);
23694 if (CONSTANT_P (op1
))
23696 int tmp
= standard_80387_constant_p (op1
);
23698 op1
= validize_mem (force_const_mem (op_mode
, op1
));
23702 op1
= force_reg (op_mode
, op1
);
23705 op1
= force_reg (op_mode
, op1
);
23709 /* Try to rearrange the comparison to make it cheaper. */
23710 if (ix86_fp_comparison_cost (code
)
23711 > ix86_fp_comparison_cost (swap_condition (code
))
23712 && (REG_P (op1
) || can_create_pseudo_p ()))
23714 std::swap (op0
, op1
);
23715 code
= swap_condition (code
);
23717 op0
= force_reg (op_mode
, op0
);
23725 /* Convert comparison codes we use to represent FP comparison to integer
23726 code that will result in proper branch. Return UNKNOWN if no such code
23730 ix86_fp_compare_code_to_integer (enum rtx_code code
)
23754 /* Generate insn patterns to do a floating point compare of OPERANDS. */
23757 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
23759 machine_mode fpcmp_mode
, intcmp_mode
;
23762 fpcmp_mode
= ix86_fp_compare_mode (code
);
23763 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
23765 /* Do fcomi/sahf based test when profitable. */
23766 switch (ix86_fp_comparison_strategy (code
))
23768 case IX86_FPCMP_COMI
:
23769 intcmp_mode
= fpcmp_mode
;
23770 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
23771 tmp
= gen_rtx_SET (gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
23775 case IX86_FPCMP_SAHF
:
23776 intcmp_mode
= fpcmp_mode
;
23777 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
23778 tmp
= gen_rtx_SET (gen_rtx_REG (fpcmp_mode
, FLAGS_REG
), tmp
);
23781 scratch
= gen_reg_rtx (HImode
);
23782 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
23783 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
23786 case IX86_FPCMP_ARITH
:
23787 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
23788 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
23789 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
23791 scratch
= gen_reg_rtx (HImode
);
23792 emit_insn (gen_rtx_SET (scratch
, tmp2
));
23794 /* In the unordered case, we have to check C2 for NaN's, which
23795 doesn't happen to work out to anything nice combination-wise.
23796 So do some bit twiddling on the value we've got in AH to come
23797 up with an appropriate set of condition codes. */
23799 intcmp_mode
= CCNOmode
;
23804 if (code
== GT
|| !TARGET_IEEE_FP
)
23806 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x45)));
23811 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23812 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
23813 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
23814 intcmp_mode
= CCmode
;
23820 if (code
== LT
&& TARGET_IEEE_FP
)
23822 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23823 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
23824 intcmp_mode
= CCmode
;
23829 emit_insn (gen_testqi_ext_1_ccno (scratch
, const1_rtx
));
23835 if (code
== GE
|| !TARGET_IEEE_FP
)
23837 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x05)));
23842 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23843 emit_insn (gen_xorqi_ext_1_cc (scratch
, scratch
, const1_rtx
));
23849 if (code
== LE
&& TARGET_IEEE_FP
)
23851 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23852 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
23853 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
23854 intcmp_mode
= CCmode
;
23859 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x45)));
23865 if (code
== EQ
&& TARGET_IEEE_FP
)
23867 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23868 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
23869 intcmp_mode
= CCmode
;
23874 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x40)));
23880 if (code
== NE
&& TARGET_IEEE_FP
)
23882 emit_insn (gen_andqi_ext_1 (scratch
, scratch
, GEN_INT (0x45)));
23883 emit_insn (gen_xorqi_ext_1_cc (scratch
, scratch
,
23889 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x40)));
23895 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x04)));
23899 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x04)));
23904 gcc_unreachable ();
23912 /* Return the test that should be put into the flags user, i.e.
23913 the bcc, scc, or cmov instruction. */
23914 return gen_rtx_fmt_ee (code
, VOIDmode
,
23915 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
23920 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
23924 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
23925 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
23927 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
23929 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
23930 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
23933 ret
= ix86_expand_int_compare (code
, op0
, op1
);
23939 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
23941 machine_mode mode
= GET_MODE (op0
);
23944 /* Handle special case - vector comparsion with boolean result, transform
23945 it using ptest instruction. */
23946 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
23948 rtx flag
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
23949 machine_mode p_mode
= GET_MODE_SIZE (mode
) == 32 ? V4DImode
: V2DImode
;
23951 gcc_assert (code
== EQ
|| code
== NE
);
23952 /* Generate XOR since we can't check that one operand is zero vector. */
23953 tmp
= gen_reg_rtx (mode
);
23954 emit_insn (gen_rtx_SET (tmp
, gen_rtx_XOR (mode
, op0
, op1
)));
23955 tmp
= gen_lowpart (p_mode
, tmp
);
23956 emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode
, FLAGS_REG
),
23957 gen_rtx_UNSPEC (CCmode
,
23958 gen_rtvec (2, tmp
, tmp
),
23960 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, flag
, const0_rtx
);
23961 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23962 gen_rtx_LABEL_REF (VOIDmode
, label
),
23964 emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
23977 tmp
= ix86_expand_compare (code
, op0
, op1
);
23978 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23979 gen_rtx_LABEL_REF (VOIDmode
, label
),
23981 emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
23987 /* For 32-bit target DI comparison may be performed on
23988 SSE registers. To allow this we should avoid split
23989 to SI mode which is achieved by doing xor in DI mode
23990 and then comparing with zero (which is recognized by
23991 STV pass). We don't compare using xor when optimizing
23993 if (!optimize_insn_for_size_p ()
23995 && (code
== EQ
|| code
== NE
))
23997 op0
= force_reg (mode
, gen_rtx_XOR (mode
, op0
, op1
));
24002 /* Expand DImode branch into multiple compare+branch. */
24005 rtx_code_label
*label2
;
24006 enum rtx_code code1
, code2
, code3
;
24007 machine_mode submode
;
24009 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
24011 std::swap (op0
, op1
);
24012 code
= swap_condition (code
);
24015 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
24016 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
24018 submode
= mode
== DImode
? SImode
: DImode
;
24020 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
24021 avoid two branches. This costs one extra insn, so disable when
24022 optimizing for size. */
24024 if ((code
== EQ
|| code
== NE
)
24025 && (!optimize_insn_for_size_p ()
24026 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
24031 if (hi
[1] != const0_rtx
)
24032 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
24033 NULL_RTX
, 0, OPTAB_WIDEN
);
24036 if (lo
[1] != const0_rtx
)
24037 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
24038 NULL_RTX
, 0, OPTAB_WIDEN
);
24040 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
24041 NULL_RTX
, 0, OPTAB_WIDEN
);
24043 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
24047 /* Otherwise, if we are doing less-than or greater-or-equal-than,
24048 op1 is a constant and the low word is zero, then we can just
24049 examine the high word. Similarly for low word -1 and
24050 less-or-equal-than or greater-than. */
24052 if (CONST_INT_P (hi
[1]))
24055 case LT
: case LTU
: case GE
: case GEU
:
24056 if (lo
[1] == const0_rtx
)
24058 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
24062 case LE
: case LEU
: case GT
: case GTU
:
24063 if (lo
[1] == constm1_rtx
)
24065 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
24073 /* Otherwise, we need two or three jumps. */
24075 label2
= gen_label_rtx ();
24078 code2
= swap_condition (code
);
24079 code3
= unsigned_condition (code
);
24083 case LT
: case GT
: case LTU
: case GTU
:
24086 case LE
: code1
= LT
; code2
= GT
; break;
24087 case GE
: code1
= GT
; code2
= LT
; break;
24088 case LEU
: code1
= LTU
; code2
= GTU
; break;
24089 case GEU
: code1
= GTU
; code2
= LTU
; break;
24091 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
24092 case NE
: code2
= UNKNOWN
; break;
24095 gcc_unreachable ();
24100 * if (hi(a) < hi(b)) goto true;
24101 * if (hi(a) > hi(b)) goto false;
24102 * if (lo(a) < lo(b)) goto true;
24106 if (code1
!= UNKNOWN
)
24107 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
24108 if (code2
!= UNKNOWN
)
24109 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
24111 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
24113 if (code2
!= UNKNOWN
)
24114 emit_label (label2
);
24119 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
24124 /* Split branch based on floating point condition. */
24126 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
24127 rtx target1
, rtx target2
, rtx tmp
)
24132 if (target2
!= pc_rtx
)
24134 std::swap (target1
, target2
);
24135 code
= reverse_condition_maybe_unordered (code
);
24138 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
24141 i
= emit_jump_insn (gen_rtx_SET
24143 gen_rtx_IF_THEN_ELSE (VOIDmode
,
24144 condition
, target1
, target2
)));
24145 if (split_branch_probability
.initialized_p ())
24146 add_reg_br_prob_note (i
, split_branch_probability
);
24150 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
24154 gcc_assert (GET_MODE (dest
) == QImode
);
24156 ret
= ix86_expand_compare (code
, op0
, op1
);
24157 PUT_MODE (ret
, QImode
);
24158 emit_insn (gen_rtx_SET (dest
, ret
));
24161 /* Expand comparison setting or clearing carry flag. Return true when
24162 successful and set pop for the operation. */
24164 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
24166 machine_mode mode
=
24167 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
24169 /* Do not handle double-mode compares that go through special path. */
24170 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
24173 if (SCALAR_FLOAT_MODE_P (mode
))
24176 rtx_insn
*compare_seq
;
24178 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
24180 /* Shortcut: following common codes never translate
24181 into carry flag compares. */
24182 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
24183 || code
== ORDERED
|| code
== UNORDERED
)
24186 /* These comparisons require zero flag; swap operands so they won't. */
24187 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
24188 && !TARGET_IEEE_FP
)
24190 std::swap (op0
, op1
);
24191 code
= swap_condition (code
);
24194 /* Try to expand the comparison and verify that we end up with
24195 carry flag based comparison. This fails to be true only when
24196 we decide to expand comparison using arithmetic that is not
24197 too common scenario. */
24199 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
24200 compare_seq
= get_insns ();
24203 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
24204 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
24205 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
24207 code
= GET_CODE (compare_op
);
24209 if (code
!= LTU
&& code
!= GEU
)
24212 emit_insn (compare_seq
);
24217 if (!INTEGRAL_MODE_P (mode
))
24226 /* Convert a==0 into (unsigned)a<1. */
24229 if (op1
!= const0_rtx
)
24232 code
= (code
== EQ
? LTU
: GEU
);
24235 /* Convert a>b into b<a or a>=b-1. */
24238 if (CONST_INT_P (op1
))
24240 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
24241 /* Bail out on overflow. We still can swap operands but that
24242 would force loading of the constant into register. */
24243 if (op1
== const0_rtx
24244 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
24246 code
= (code
== GTU
? GEU
: LTU
);
24250 std::swap (op0
, op1
);
24251 code
= (code
== GTU
? LTU
: GEU
);
24255 /* Convert a>=0 into (unsigned)a<0x80000000. */
24258 if (mode
== DImode
|| op1
!= const0_rtx
)
24260 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
24261 code
= (code
== LT
? GEU
: LTU
);
24265 if (mode
== DImode
|| op1
!= constm1_rtx
)
24267 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
24268 code
= (code
== LE
? GEU
: LTU
);
24274 /* Swapping operands may cause constant to appear as first operand. */
24275 if (!nonimmediate_operand (op0
, VOIDmode
))
24277 if (!can_create_pseudo_p ())
24279 op0
= force_reg (mode
, op0
);
24281 *pop
= ix86_expand_compare (code
, op0
, op1
);
24282 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
24287 ix86_expand_int_movcc (rtx operands
[])
24289 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
24290 rtx_insn
*compare_seq
;
24292 machine_mode mode
= GET_MODE (operands
[0]);
24293 bool sign_bit_compare_p
= false;
24294 rtx op0
= XEXP (operands
[1], 0);
24295 rtx op1
= XEXP (operands
[1], 1);
24297 if (GET_MODE (op0
) == TImode
24298 || (GET_MODE (op0
) == DImode
24303 compare_op
= ix86_expand_compare (code
, op0
, op1
);
24304 compare_seq
= get_insns ();
24307 compare_code
= GET_CODE (compare_op
);
24309 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
24310 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
24311 sign_bit_compare_p
= true;
24313 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
24314 HImode insns, we'd be swallowed in word prefix ops. */
24316 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
24317 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
24318 && CONST_INT_P (operands
[2])
24319 && CONST_INT_P (operands
[3]))
24321 rtx out
= operands
[0];
24322 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
24323 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
24324 HOST_WIDE_INT diff
;
24327 /* Sign bit compares are better done using shifts than we do by using
24329 if (sign_bit_compare_p
24330 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
24332 /* Detect overlap between destination and compare sources. */
24335 if (!sign_bit_compare_p
)
24338 bool fpcmp
= false;
24340 compare_code
= GET_CODE (compare_op
);
24342 flags
= XEXP (compare_op
, 0);
24344 if (GET_MODE (flags
) == CCFPmode
24345 || GET_MODE (flags
) == CCFPUmode
)
24349 = ix86_fp_compare_code_to_integer (compare_code
);
24352 /* To simplify rest of code, restrict to the GEU case. */
24353 if (compare_code
== LTU
)
24355 std::swap (ct
, cf
);
24356 compare_code
= reverse_condition (compare_code
);
24357 code
= reverse_condition (code
);
24362 PUT_CODE (compare_op
,
24363 reverse_condition_maybe_unordered
24364 (GET_CODE (compare_op
)));
24366 PUT_CODE (compare_op
,
24367 reverse_condition (GET_CODE (compare_op
)));
24371 if (reg_overlap_mentioned_p (out
, op0
)
24372 || reg_overlap_mentioned_p (out
, op1
))
24373 tmp
= gen_reg_rtx (mode
);
24375 if (mode
== DImode
)
24376 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
24378 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
24379 flags
, compare_op
));
24383 if (code
== GT
|| code
== GE
)
24384 code
= reverse_condition (code
);
24387 std::swap (ct
, cf
);
24390 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
24403 tmp
= expand_simple_binop (mode
, PLUS
,
24405 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
24416 tmp
= expand_simple_binop (mode
, IOR
,
24418 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
24420 else if (diff
== -1 && ct
)
24430 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
24432 tmp
= expand_simple_binop (mode
, PLUS
,
24433 copy_rtx (tmp
), GEN_INT (cf
),
24434 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
24442 * andl cf - ct, dest
24452 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
24455 tmp
= expand_simple_binop (mode
, AND
,
24457 gen_int_mode (cf
- ct
, mode
),
24458 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
24460 tmp
= expand_simple_binop (mode
, PLUS
,
24461 copy_rtx (tmp
), GEN_INT (ct
),
24462 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
24465 if (!rtx_equal_p (tmp
, out
))
24466 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
24473 machine_mode cmp_mode
= GET_MODE (op0
);
24474 enum rtx_code new_code
;
24476 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
24478 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
24480 /* We may be reversing unordered compare to normal compare, that
24481 is not valid in general (we may convert non-trapping condition
24482 to trapping one), however on i386 we currently emit all
24483 comparisons unordered. */
24484 new_code
= reverse_condition_maybe_unordered (code
);
24487 new_code
= ix86_reverse_condition (code
, cmp_mode
);
24488 if (new_code
!= UNKNOWN
)
24490 std::swap (ct
, cf
);
24496 compare_code
= UNKNOWN
;
24497 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
24498 && CONST_INT_P (op1
))
24500 if (op1
== const0_rtx
24501 && (code
== LT
|| code
== GE
))
24502 compare_code
= code
;
24503 else if (op1
== constm1_rtx
)
24507 else if (code
== GT
)
24512 /* Optimize dest = (op0 < 0) ? -1 : cf. */
24513 if (compare_code
!= UNKNOWN
24514 && GET_MODE (op0
) == GET_MODE (out
)
24515 && (cf
== -1 || ct
== -1))
24517 /* If lea code below could be used, only optimize
24518 if it results in a 2 insn sequence. */
24520 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
24521 || diff
== 3 || diff
== 5 || diff
== 9)
24522 || (compare_code
== LT
&& ct
== -1)
24523 || (compare_code
== GE
&& cf
== -1))
24526 * notl op1 (if necessary)
24534 code
= reverse_condition (code
);
24537 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
24539 out
= expand_simple_binop (mode
, IOR
,
24541 out
, 1, OPTAB_DIRECT
);
24542 if (out
!= operands
[0])
24543 emit_move_insn (operands
[0], out
);
24550 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
24551 || diff
== 3 || diff
== 5 || diff
== 9)
24552 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
24554 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
24560 * lea cf(dest*(ct-cf)),dest
24564 * This also catches the degenerate setcc-only case.
24570 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
24573 /* On x86_64 the lea instruction operates on Pmode, so we need
24574 to get arithmetics done in proper mode to match. */
24576 tmp
= copy_rtx (out
);
24580 out1
= copy_rtx (out
);
24581 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
24585 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
24591 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
24594 if (!rtx_equal_p (tmp
, out
))
24597 out
= force_operand (tmp
, copy_rtx (out
));
24599 emit_insn (gen_rtx_SET (copy_rtx (out
), copy_rtx (tmp
)));
24601 if (!rtx_equal_p (out
, operands
[0]))
24602 emit_move_insn (operands
[0], copy_rtx (out
));
24608 * General case: Jumpful:
24609 * xorl dest,dest cmpl op1, op2
24610 * cmpl op1, op2 movl ct, dest
24611 * setcc dest jcc 1f
24612 * decl dest movl cf, dest
24613 * andl (cf-ct),dest 1:
24616 * Size 20. Size 14.
24618 * This is reasonably steep, but branch mispredict costs are
24619 * high on modern cpus, so consider failing only if optimizing
24623 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
24624 && BRANCH_COST (optimize_insn_for_speed_p (),
24629 machine_mode cmp_mode
= GET_MODE (op0
);
24630 enum rtx_code new_code
;
24632 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
24634 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
24636 /* We may be reversing unordered compare to normal compare,
24637 that is not valid in general (we may convert non-trapping
24638 condition to trapping one), however on i386 we currently
24639 emit all comparisons unordered. */
24640 new_code
= reverse_condition_maybe_unordered (code
);
24644 new_code
= ix86_reverse_condition (code
, cmp_mode
);
24645 if (compare_code
!= UNKNOWN
&& new_code
!= UNKNOWN
)
24646 compare_code
= reverse_condition (compare_code
);
24649 if (new_code
!= UNKNOWN
)
24657 if (compare_code
!= UNKNOWN
)
24659 /* notl op1 (if needed)
24664 For x < 0 (resp. x <= -1) there will be no notl,
24665 so if possible swap the constants to get rid of the
24667 True/false will be -1/0 while code below (store flag
24668 followed by decrement) is 0/-1, so the constants need
24669 to be exchanged once more. */
24671 if (compare_code
== GE
|| !cf
)
24673 code
= reverse_condition (code
);
24677 std::swap (ct
, cf
);
24679 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
24683 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
24685 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
24687 copy_rtx (out
), 1, OPTAB_DIRECT
);
24690 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
24691 gen_int_mode (cf
- ct
, mode
),
24692 copy_rtx (out
), 1, OPTAB_DIRECT
);
24694 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
24695 copy_rtx (out
), 1, OPTAB_DIRECT
);
24696 if (!rtx_equal_p (out
, operands
[0]))
24697 emit_move_insn (operands
[0], copy_rtx (out
));
24703 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
24705 /* Try a few things more with specific constants and a variable. */
24708 rtx var
, orig_out
, out
, tmp
;
24710 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
24713 /* If one of the two operands is an interesting constant, load a
24714 constant with the above and mask it in with a logical operation. */
24716 if (CONST_INT_P (operands
[2]))
24719 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
24720 operands
[3] = constm1_rtx
, op
= and_optab
;
24721 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
24722 operands
[3] = const0_rtx
, op
= ior_optab
;
24726 else if (CONST_INT_P (operands
[3]))
24729 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
24730 operands
[2] = constm1_rtx
, op
= and_optab
;
24731 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
24732 operands
[2] = const0_rtx
, op
= ior_optab
;
24739 orig_out
= operands
[0];
24740 tmp
= gen_reg_rtx (mode
);
24743 /* Recurse to get the constant loaded. */
24744 if (!ix86_expand_int_movcc (operands
))
24747 /* Mask in the interesting variable. */
24748 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
24750 if (!rtx_equal_p (out
, orig_out
))
24751 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
24757 * For comparison with above,
24767 if (! nonimmediate_operand (operands
[2], mode
))
24768 operands
[2] = force_reg (mode
, operands
[2]);
24769 if (! nonimmediate_operand (operands
[3], mode
))
24770 operands
[3] = force_reg (mode
, operands
[3]);
24772 if (! register_operand (operands
[2], VOIDmode
)
24774 || ! register_operand (operands
[3], VOIDmode
)))
24775 operands
[2] = force_reg (mode
, operands
[2]);
24778 && ! register_operand (operands
[3], VOIDmode
))
24779 operands
[3] = force_reg (mode
, operands
[3]);
24781 emit_insn (compare_seq
);
24782 emit_insn (gen_rtx_SET (operands
[0],
24783 gen_rtx_IF_THEN_ELSE (mode
,
24784 compare_op
, operands
[2],
24789 /* Swap, force into registers, or otherwise massage the two operands
24790 to an sse comparison with a mask result. Thus we differ a bit from
24791 ix86_prepare_fp_compare_args which expects to produce a flags result.
24793 The DEST operand exists to help determine whether to commute commutative
24794 operators. The POP0/POP1 operands are updated in place. The new
24795 comparison code is returned, or UNKNOWN if not implementable. */
24797 static enum rtx_code
24798 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
24799 rtx
*pop0
, rtx
*pop1
)
24805 /* AVX supports all the needed comparisons. */
24808 /* We have no LTGT as an operator. We could implement it with
24809 NE & ORDERED, but this requires an extra temporary. It's
24810 not clear that it's worth it. */
24817 /* These are supported directly. */
24824 /* AVX has 3 operand comparisons, no need to swap anything. */
24827 /* For commutative operators, try to canonicalize the destination
24828 operand to be first in the comparison - this helps reload to
24829 avoid extra moves. */
24830 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
24838 /* These are not supported directly before AVX, and furthermore
24839 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
24840 comparison operands to transform into something that is
24842 std::swap (*pop0
, *pop1
);
24843 code
= swap_condition (code
);
24847 gcc_unreachable ();
24853 /* Detect conditional moves that exactly match min/max operational
24854 semantics. Note that this is IEEE safe, as long as we don't
24855 interchange the operands.
24857 Returns FALSE if this conditional move doesn't match a MIN/MAX,
24858 and TRUE if the operation is successful and instructions are emitted. */
24861 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
24862 rtx cmp_op1
, rtx if_true
, rtx if_false
)
24870 else if (code
== UNGE
)
24871 std::swap (if_true
, if_false
);
24875 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
24877 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
24882 mode
= GET_MODE (dest
);
24884 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
24885 but MODE may be a vector mode and thus not appropriate. */
24886 if (!flag_finite_math_only
|| flag_signed_zeros
)
24888 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
24891 if_true
= force_reg (mode
, if_true
);
24892 v
= gen_rtvec (2, if_true
, if_false
);
24893 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
24897 code
= is_min
? SMIN
: SMAX
;
24898 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
24901 emit_insn (gen_rtx_SET (dest
, tmp
));
24905 /* Expand an sse vector comparison. Return the register with the result. */
24908 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
24909 rtx op_true
, rtx op_false
)
24911 machine_mode mode
= GET_MODE (dest
);
24912 machine_mode cmp_ops_mode
= GET_MODE (cmp_op0
);
24914 /* In general case result of comparison can differ from operands' type. */
24915 machine_mode cmp_mode
;
24917 /* In AVX512F the result of comparison is an integer mask. */
24918 bool maskcmp
= false;
24921 if (GET_MODE_SIZE (cmp_ops_mode
) == 64)
24923 unsigned int nbits
= GET_MODE_NUNITS (cmp_ops_mode
);
24924 cmp_mode
= int_mode_for_size (nbits
, 0).require ();
24928 cmp_mode
= cmp_ops_mode
;
24931 cmp_op0
= force_reg (cmp_ops_mode
, cmp_op0
);
24932 if (!nonimmediate_operand (cmp_op1
, cmp_ops_mode
))
24933 cmp_op1
= force_reg (cmp_ops_mode
, cmp_op1
);
24936 || (maskcmp
&& cmp_mode
!= mode
)
24937 || (op_true
&& reg_overlap_mentioned_p (dest
, op_true
))
24938 || (op_false
&& reg_overlap_mentioned_p (dest
, op_false
)))
24939 dest
= gen_reg_rtx (maskcmp
? cmp_mode
: mode
);
24941 /* Compare patterns for int modes are unspec in AVX512F only. */
24942 if (maskcmp
&& (code
== GT
|| code
== EQ
))
24944 rtx (*gen
)(rtx
, rtx
, rtx
);
24946 switch (cmp_ops_mode
)
24949 gcc_assert (TARGET_AVX512BW
);
24950 gen
= code
== GT
? gen_avx512bw_gtv64qi3
: gen_avx512bw_eqv64qi3_1
;
24953 gcc_assert (TARGET_AVX512BW
);
24954 gen
= code
== GT
? gen_avx512bw_gtv32hi3
: gen_avx512bw_eqv32hi3_1
;
24957 gen
= code
== GT
? gen_avx512f_gtv16si3
: gen_avx512f_eqv16si3_1
;
24960 gen
= code
== GT
? gen_avx512f_gtv8di3
: gen_avx512f_eqv8di3_1
;
24968 emit_insn (gen (dest
, cmp_op0
, cmp_op1
));
24972 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
24974 if (cmp_mode
!= mode
&& !maskcmp
)
24976 x
= force_reg (cmp_ops_mode
, x
);
24977 convert_move (dest
, x
, false);
24980 emit_insn (gen_rtx_SET (dest
, x
));
24985 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
24986 operations. This is used for both scalar and vector conditional moves. */
24989 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
24991 machine_mode mode
= GET_MODE (dest
);
24992 machine_mode cmpmode
= GET_MODE (cmp
);
24994 /* In AVX512F the result of comparison is an integer mask. */
24995 bool maskcmp
= (mode
!= cmpmode
&& TARGET_AVX512F
);
24999 /* If we have an integer mask and FP value then we need
25000 to cast mask to FP mode. */
25001 if (mode
!= cmpmode
&& VECTOR_MODE_P (cmpmode
))
25003 cmp
= force_reg (cmpmode
, cmp
);
25004 cmp
= gen_rtx_SUBREG (mode
, cmp
, 0);
25007 if (vector_all_ones_operand (op_true
, mode
)
25008 && rtx_equal_p (op_false
, CONST0_RTX (mode
))
25011 emit_insn (gen_rtx_SET (dest
, cmp
));
25013 else if (op_false
== CONST0_RTX (mode
)
25016 op_true
= force_reg (mode
, op_true
);
25017 x
= gen_rtx_AND (mode
, cmp
, op_true
);
25018 emit_insn (gen_rtx_SET (dest
, x
));
25020 else if (op_true
== CONST0_RTX (mode
)
25023 op_false
= force_reg (mode
, op_false
);
25024 x
= gen_rtx_NOT (mode
, cmp
);
25025 x
= gen_rtx_AND (mode
, x
, op_false
);
25026 emit_insn (gen_rtx_SET (dest
, x
));
25028 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
)
25031 op_false
= force_reg (mode
, op_false
);
25032 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
25033 emit_insn (gen_rtx_SET (dest
, x
));
25035 else if (TARGET_XOP
25038 op_true
= force_reg (mode
, op_true
);
25040 if (!nonimmediate_operand (op_false
, mode
))
25041 op_false
= force_reg (mode
, op_false
);
25043 emit_insn (gen_rtx_SET (dest
, gen_rtx_IF_THEN_ELSE (mode
, cmp
,
25049 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
25052 if (!nonimmediate_operand (op_true
, mode
))
25053 op_true
= force_reg (mode
, op_true
);
25055 op_false
= force_reg (mode
, op_false
);
25061 gen
= gen_sse4_1_blendvps
;
25065 gen
= gen_sse4_1_blendvpd
;
25073 gen
= gen_sse4_1_pblendvb
;
25074 if (mode
!= V16QImode
)
25075 d
= gen_reg_rtx (V16QImode
);
25076 op_false
= gen_lowpart (V16QImode
, op_false
);
25077 op_true
= gen_lowpart (V16QImode
, op_true
);
25078 cmp
= gen_lowpart (V16QImode
, cmp
);
25083 gen
= gen_avx_blendvps256
;
25087 gen
= gen_avx_blendvpd256
;
25095 gen
= gen_avx2_pblendvb
;
25096 if (mode
!= V32QImode
)
25097 d
= gen_reg_rtx (V32QImode
);
25098 op_false
= gen_lowpart (V32QImode
, op_false
);
25099 op_true
= gen_lowpart (V32QImode
, op_true
);
25100 cmp
= gen_lowpart (V32QImode
, cmp
);
25105 gen
= gen_avx512bw_blendmv64qi
;
25108 gen
= gen_avx512bw_blendmv32hi
;
25111 gen
= gen_avx512f_blendmv16si
;
25114 gen
= gen_avx512f_blendmv8di
;
25117 gen
= gen_avx512f_blendmv8df
;
25120 gen
= gen_avx512f_blendmv16sf
;
25129 emit_insn (gen (d
, op_false
, op_true
, cmp
));
25131 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
25135 op_true
= force_reg (mode
, op_true
);
25137 t2
= gen_reg_rtx (mode
);
25139 t3
= gen_reg_rtx (mode
);
25143 x
= gen_rtx_AND (mode
, op_true
, cmp
);
25144 emit_insn (gen_rtx_SET (t2
, x
));
25146 x
= gen_rtx_NOT (mode
, cmp
);
25147 x
= gen_rtx_AND (mode
, x
, op_false
);
25148 emit_insn (gen_rtx_SET (t3
, x
));
25150 x
= gen_rtx_IOR (mode
, t3
, t2
);
25151 emit_insn (gen_rtx_SET (dest
, x
));
25156 /* Expand a floating-point conditional move. Return true if successful. */
25159 ix86_expand_fp_movcc (rtx operands
[])
25161 machine_mode mode
= GET_MODE (operands
[0]);
25162 enum rtx_code code
= GET_CODE (operands
[1]);
25163 rtx tmp
, compare_op
;
25164 rtx op0
= XEXP (operands
[1], 0);
25165 rtx op1
= XEXP (operands
[1], 1);
25167 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
25169 machine_mode cmode
;
25171 /* Since we've no cmove for sse registers, don't force bad register
25172 allocation just to gain access to it. Deny movcc when the
25173 comparison mode doesn't match the move mode. */
25174 cmode
= GET_MODE (op0
);
25175 if (cmode
== VOIDmode
)
25176 cmode
= GET_MODE (op1
);
25180 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
25181 if (code
== UNKNOWN
)
25184 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
25185 operands
[2], operands
[3]))
25188 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
25189 operands
[2], operands
[3]);
25190 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
25194 if (GET_MODE (op0
) == TImode
25195 || (GET_MODE (op0
) == DImode
25199 /* The floating point conditional move instructions don't directly
25200 support conditions resulting from a signed integer comparison. */
25202 compare_op
= ix86_expand_compare (code
, op0
, op1
);
25203 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
25205 tmp
= gen_reg_rtx (QImode
);
25206 ix86_expand_setcc (tmp
, code
, op0
, op1
);
25208 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
25211 emit_insn (gen_rtx_SET (operands
[0],
25212 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
25213 operands
[2], operands
[3])));
25218 /* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */
25221 ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code
)
25242 gcc_unreachable ();
25246 /* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */
25249 ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code
)
25282 gcc_unreachable ();
25286 /* Return immediate value to be used in UNSPEC_PCMP
25287 for comparison CODE in MODE. */
25290 ix86_cmp_code_to_pcmp_immediate (enum rtx_code code
, machine_mode mode
)
25292 if (FLOAT_MODE_P (mode
))
25293 return ix86_fp_cmp_code_to_pcmp_immediate (code
);
25294 return ix86_int_cmp_code_to_pcmp_immediate (code
);
25297 /* Expand AVX-512 vector comparison. */
25300 ix86_expand_mask_vec_cmp (rtx operands
[])
25302 machine_mode mask_mode
= GET_MODE (operands
[0]);
25303 machine_mode cmp_mode
= GET_MODE (operands
[2]);
25304 enum rtx_code code
= GET_CODE (operands
[1]);
25305 rtx imm
= GEN_INT (ix86_cmp_code_to_pcmp_immediate (code
, cmp_mode
));
25315 unspec_code
= UNSPEC_UNSIGNED_PCMP
;
25319 unspec_code
= UNSPEC_PCMP
;
25322 unspec
= gen_rtx_UNSPEC (mask_mode
, gen_rtvec (3, operands
[2],
25325 emit_insn (gen_rtx_SET (operands
[0], unspec
));
25330 /* Expand fp vector comparison. */
25333 ix86_expand_fp_vec_cmp (rtx operands
[])
25335 enum rtx_code code
= GET_CODE (operands
[1]);
25338 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
25339 &operands
[2], &operands
[3]);
25340 if (code
== UNKNOWN
)
25343 switch (GET_CODE (operands
[1]))
25346 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[2],
25347 operands
[3], NULL
, NULL
);
25348 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[2],
25349 operands
[3], NULL
, NULL
);
25353 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[2],
25354 operands
[3], NULL
, NULL
);
25355 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[2],
25356 operands
[3], NULL
, NULL
);
25360 gcc_unreachable ();
25362 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
25366 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[2], operands
[3],
25367 operands
[1], operands
[2]);
25369 if (operands
[0] != cmp
)
25370 emit_move_insn (operands
[0], cmp
);
25376 ix86_expand_int_sse_cmp (rtx dest
, enum rtx_code code
, rtx cop0
, rtx cop1
,
25377 rtx op_true
, rtx op_false
, bool *negate
)
25379 machine_mode data_mode
= GET_MODE (dest
);
25380 machine_mode mode
= GET_MODE (cop0
);
25385 /* XOP supports all of the comparisons on all 128-bit vector int types. */
25387 && (mode
== V16QImode
|| mode
== V8HImode
25388 || mode
== V4SImode
|| mode
== V2DImode
))
25392 /* Canonicalize the comparison to EQ, GT, GTU. */
25403 code
= reverse_condition (code
);
25409 code
= reverse_condition (code
);
25415 std::swap (cop0
, cop1
);
25416 code
= swap_condition (code
);
25420 gcc_unreachable ();
25423 /* Only SSE4.1/SSE4.2 supports V2DImode. */
25424 if (mode
== V2DImode
)
25429 /* SSE4.1 supports EQ. */
25430 if (!TARGET_SSE4_1
)
25436 /* SSE4.2 supports GT/GTU. */
25437 if (!TARGET_SSE4_2
)
25442 gcc_unreachable ();
25446 /* Unsigned parallel compare is not supported by the hardware.
25447 Play some tricks to turn this into a signed comparison
25451 cop0
= force_reg (mode
, cop0
);
25463 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
25467 case E_V16SImode
: gen_sub3
= gen_subv16si3
; break;
25468 case E_V8DImode
: gen_sub3
= gen_subv8di3
; break;
25469 case E_V8SImode
: gen_sub3
= gen_subv8si3
; break;
25470 case E_V4DImode
: gen_sub3
= gen_subv4di3
; break;
25471 case E_V4SImode
: gen_sub3
= gen_subv4si3
; break;
25472 case E_V2DImode
: gen_sub3
= gen_subv2di3
; break;
25474 gcc_unreachable ();
25476 /* Subtract (-(INT MAX) - 1) from both operands to make
25478 mask
= ix86_build_signbit_mask (mode
, true, false);
25479 t1
= gen_reg_rtx (mode
);
25480 emit_insn (gen_sub3 (t1
, cop0
, mask
));
25482 t2
= gen_reg_rtx (mode
);
25483 emit_insn (gen_sub3 (t2
, cop1
, mask
));
25497 /* Perform a parallel unsigned saturating subtraction. */
25498 x
= gen_reg_rtx (mode
);
25499 emit_insn (gen_rtx_SET (x
, gen_rtx_US_MINUS (mode
, cop0
,
25503 cop1
= CONST0_RTX (mode
);
25505 *negate
= !*negate
;
25509 gcc_unreachable ();
25515 std::swap (op_true
, op_false
);
25517 /* Allow the comparison to be done in one mode, but the movcc to
25518 happen in another mode. */
25519 if (data_mode
== mode
)
25521 x
= ix86_expand_sse_cmp (dest
, code
, cop0
, cop1
,
25522 op_true
, op_false
);
25526 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
25527 x
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), code
, cop0
, cop1
,
25528 op_true
, op_false
);
25529 if (GET_MODE (x
) == mode
)
25530 x
= gen_lowpart (data_mode
, x
);
25536 /* Expand integer vector comparison. */
25539 ix86_expand_int_vec_cmp (rtx operands
[])
25541 rtx_code code
= GET_CODE (operands
[1]);
25542 bool negate
= false;
25543 rtx cmp
= ix86_expand_int_sse_cmp (operands
[0], code
, operands
[2],
25544 operands
[3], NULL
, NULL
, &negate
);
25550 cmp
= ix86_expand_int_sse_cmp (operands
[0], EQ
, cmp
,
25551 CONST0_RTX (GET_MODE (cmp
)),
25552 NULL
, NULL
, &negate
);
25554 gcc_assert (!negate
);
25556 if (operands
[0] != cmp
)
25557 emit_move_insn (operands
[0], cmp
);
25562 /* Expand a floating-point vector conditional move; a vcond operation
25563 rather than a movcc operation. */
25566 ix86_expand_fp_vcond (rtx operands
[])
25568 enum rtx_code code
= GET_CODE (operands
[3]);
25571 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
25572 &operands
[4], &operands
[5]);
25573 if (code
== UNKNOWN
)
25576 switch (GET_CODE (operands
[3]))
25579 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
25580 operands
[5], operands
[0], operands
[0]);
25581 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
25582 operands
[5], operands
[1], operands
[2]);
25586 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
25587 operands
[5], operands
[0], operands
[0]);
25588 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
25589 operands
[5], operands
[1], operands
[2]);
25593 gcc_unreachable ();
25595 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
25597 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
25601 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
25602 operands
[5], operands
[1], operands
[2]))
25605 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
25606 operands
[1], operands
[2]);
25607 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
25611 /* Expand a signed/unsigned integral vector conditional move. */
25614 ix86_expand_int_vcond (rtx operands
[])
25616 machine_mode data_mode
= GET_MODE (operands
[0]);
25617 machine_mode mode
= GET_MODE (operands
[4]);
25618 enum rtx_code code
= GET_CODE (operands
[3]);
25619 bool negate
= false;
25622 cop0
= operands
[4];
25623 cop1
= operands
[5];
25625 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
25626 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
25627 if ((code
== LT
|| code
== GE
)
25628 && data_mode
== mode
25629 && cop1
== CONST0_RTX (mode
)
25630 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
25631 && GET_MODE_UNIT_SIZE (data_mode
) > 1
25632 && GET_MODE_UNIT_SIZE (data_mode
) <= 8
25633 && (GET_MODE_SIZE (data_mode
) == 16
25634 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
25636 rtx negop
= operands
[2 - (code
== LT
)];
25637 int shift
= GET_MODE_UNIT_BITSIZE (data_mode
) - 1;
25638 if (negop
== CONST1_RTX (data_mode
))
25640 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
25641 operands
[0], 1, OPTAB_DIRECT
);
25642 if (res
!= operands
[0])
25643 emit_move_insn (operands
[0], res
);
25646 else if (GET_MODE_INNER (data_mode
) != DImode
25647 && vector_all_ones_operand (negop
, data_mode
))
25649 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
25650 operands
[0], 0, OPTAB_DIRECT
);
25651 if (res
!= operands
[0])
25652 emit_move_insn (operands
[0], res
);
25657 if (!nonimmediate_operand (cop1
, mode
))
25658 cop1
= force_reg (mode
, cop1
);
25659 if (!general_operand (operands
[1], data_mode
))
25660 operands
[1] = force_reg (data_mode
, operands
[1]);
25661 if (!general_operand (operands
[2], data_mode
))
25662 operands
[2] = force_reg (data_mode
, operands
[2]);
25664 x
= ix86_expand_int_sse_cmp (operands
[0], code
, cop0
, cop1
,
25665 operands
[1], operands
[2], &negate
);
25670 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
25671 operands
[2-negate
]);
25675 /* AVX512F does support 64-byte integer vector operations,
25676 thus the longest vector we are faced with is V64QImode. */
25677 #define MAX_VECT_LEN 64
25679 struct expand_vec_perm_d
25681 rtx target
, op0
, op1
;
25682 unsigned char perm
[MAX_VECT_LEN
];
25683 machine_mode vmode
;
25684 unsigned char nelt
;
25685 bool one_operand_p
;
25690 ix86_expand_vec_perm_vpermi2 (rtx target
, rtx op0
, rtx mask
, rtx op1
,
25691 struct expand_vec_perm_d
*d
)
25693 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
25694 expander, so args are either in d, or in op0, op1 etc. */
25695 machine_mode mode
= GET_MODE (d
? d
->op0
: op0
);
25696 machine_mode maskmode
= mode
;
25697 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
25702 if (TARGET_AVX512VL
&& TARGET_AVX512BW
)
25703 gen
= gen_avx512vl_vpermi2varv8hi3
;
25706 if (TARGET_AVX512VL
&& TARGET_AVX512BW
)
25707 gen
= gen_avx512vl_vpermi2varv16hi3
;
25710 if (TARGET_AVX512VBMI
)
25711 gen
= gen_avx512bw_vpermi2varv64qi3
;
25714 if (TARGET_AVX512BW
)
25715 gen
= gen_avx512bw_vpermi2varv32hi3
;
25718 if (TARGET_AVX512VL
)
25719 gen
= gen_avx512vl_vpermi2varv4si3
;
25722 if (TARGET_AVX512VL
)
25723 gen
= gen_avx512vl_vpermi2varv8si3
;
25726 if (TARGET_AVX512F
)
25727 gen
= gen_avx512f_vpermi2varv16si3
;
25730 if (TARGET_AVX512VL
)
25732 gen
= gen_avx512vl_vpermi2varv4sf3
;
25733 maskmode
= V4SImode
;
25737 if (TARGET_AVX512VL
)
25739 gen
= gen_avx512vl_vpermi2varv8sf3
;
25740 maskmode
= V8SImode
;
25744 if (TARGET_AVX512F
)
25746 gen
= gen_avx512f_vpermi2varv16sf3
;
25747 maskmode
= V16SImode
;
25751 if (TARGET_AVX512VL
)
25752 gen
= gen_avx512vl_vpermi2varv2di3
;
25755 if (TARGET_AVX512VL
)
25756 gen
= gen_avx512vl_vpermi2varv4di3
;
25759 if (TARGET_AVX512F
)
25760 gen
= gen_avx512f_vpermi2varv8di3
;
25763 if (TARGET_AVX512VL
)
25765 gen
= gen_avx512vl_vpermi2varv2df3
;
25766 maskmode
= V2DImode
;
25770 if (TARGET_AVX512VL
)
25772 gen
= gen_avx512vl_vpermi2varv4df3
;
25773 maskmode
= V4DImode
;
25777 if (TARGET_AVX512F
)
25779 gen
= gen_avx512f_vpermi2varv8df3
;
25780 maskmode
= V8DImode
;
25790 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
25791 expander, so args are either in d, or in op0, op1 etc. */
25795 target
= d
->target
;
25798 for (int i
= 0; i
< d
->nelt
; ++i
)
25799 vec
[i
] = GEN_INT (d
->perm
[i
]);
25800 mask
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (d
->nelt
, vec
));
25803 emit_insn (gen (target
, op0
, force_reg (maskmode
, mask
), op1
));
25807 /* Expand a variable vector permutation. */
25810 ix86_expand_vec_perm (rtx operands
[])
25812 rtx target
= operands
[0];
25813 rtx op0
= operands
[1];
25814 rtx op1
= operands
[2];
25815 rtx mask
= operands
[3];
25816 rtx t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, vt
, vt2
, vec
[32];
25817 machine_mode mode
= GET_MODE (op0
);
25818 machine_mode maskmode
= GET_MODE (mask
);
25820 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
25822 /* Number of elements in the vector. */
25823 w
= GET_MODE_NUNITS (mode
);
25824 e
= GET_MODE_UNIT_SIZE (mode
);
25825 gcc_assert (w
<= 64);
25827 if (TARGET_AVX512F
&& one_operand_shuffle
)
25829 rtx (*gen
) (rtx
, rtx
, rtx
) = NULL
;
25833 gen
=gen_avx512f_permvarv16si
;
25836 gen
= gen_avx512f_permvarv16sf
;
25839 gen
= gen_avx512f_permvarv8di
;
25842 gen
= gen_avx512f_permvarv8df
;
25849 emit_insn (gen (target
, op0
, mask
));
25854 if (ix86_expand_vec_perm_vpermi2 (target
, op0
, mask
, op1
, NULL
))
25859 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
25861 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
25862 an constant shuffle operand. With a tiny bit of effort we can
25863 use VPERMD instead. A re-interpretation stall for V4DFmode is
25864 unfortunate but there's no avoiding it.
25865 Similarly for V16HImode we don't have instructions for variable
25866 shuffling, while for V32QImode we can use after preparing suitable
25867 masks vpshufb; vpshufb; vpermq; vpor. */
25869 if (mode
== V16HImode
)
25871 maskmode
= mode
= V32QImode
;
25877 maskmode
= mode
= V8SImode
;
25881 t1
= gen_reg_rtx (maskmode
);
25883 /* Replicate the low bits of the V4DImode mask into V8SImode:
25885 t1 = { A A B B C C D D }. */
25886 for (i
= 0; i
< w
/ 2; ++i
)
25887 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
25888 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
25889 vt
= force_reg (maskmode
, vt
);
25890 mask
= gen_lowpart (maskmode
, mask
);
25891 if (maskmode
== V8SImode
)
25892 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
25894 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
25896 /* Multiply the shuffle indicies by two. */
25897 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
25900 /* Add one to the odd shuffle indicies:
25901 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
25902 for (i
= 0; i
< w
/ 2; ++i
)
25904 vec
[i
* 2] = const0_rtx
;
25905 vec
[i
* 2 + 1] = const1_rtx
;
25907 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
25908 vt
= validize_mem (force_const_mem (maskmode
, vt
));
25909 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
25912 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
25913 operands
[3] = mask
= t1
;
25914 target
= gen_reg_rtx (mode
);
25915 op0
= gen_lowpart (mode
, op0
);
25916 op1
= gen_lowpart (mode
, op1
);
25922 /* The VPERMD and VPERMPS instructions already properly ignore
25923 the high bits of the shuffle elements. No need for us to
25924 perform an AND ourselves. */
25925 if (one_operand_shuffle
)
25927 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
25928 if (target
!= operands
[0])
25929 emit_move_insn (operands
[0],
25930 gen_lowpart (GET_MODE (operands
[0]), target
));
25934 t1
= gen_reg_rtx (V8SImode
);
25935 t2
= gen_reg_rtx (V8SImode
);
25936 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
25937 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
25943 mask
= gen_lowpart (V8SImode
, mask
);
25944 if (one_operand_shuffle
)
25945 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
25948 t1
= gen_reg_rtx (V8SFmode
);
25949 t2
= gen_reg_rtx (V8SFmode
);
25950 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
25951 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
25957 /* By combining the two 128-bit input vectors into one 256-bit
25958 input vector, we can use VPERMD and VPERMPS for the full
25959 two-operand shuffle. */
25960 t1
= gen_reg_rtx (V8SImode
);
25961 t2
= gen_reg_rtx (V8SImode
);
25962 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
25963 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
25964 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
25965 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
25969 t1
= gen_reg_rtx (V8SFmode
);
25970 t2
= gen_reg_rtx (V8SImode
);
25971 mask
= gen_lowpart (V4SImode
, mask
);
25972 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
25973 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
25974 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
25975 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
25979 t1
= gen_reg_rtx (V32QImode
);
25980 t2
= gen_reg_rtx (V32QImode
);
25981 t3
= gen_reg_rtx (V32QImode
);
25982 vt2
= GEN_INT (-128);
25983 for (i
= 0; i
< 32; i
++)
25985 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
25986 vt
= force_reg (V32QImode
, vt
);
25987 for (i
= 0; i
< 32; i
++)
25988 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
25989 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
25990 vt2
= force_reg (V32QImode
, vt2
);
25991 /* From mask create two adjusted masks, which contain the same
25992 bits as mask in the low 7 bits of each vector element.
25993 The first mask will have the most significant bit clear
25994 if it requests element from the same 128-bit lane
25995 and MSB set if it requests element from the other 128-bit lane.
25996 The second mask will have the opposite values of the MSB,
25997 and additionally will have its 128-bit lanes swapped.
25998 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
25999 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
26000 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
26001 stands for other 12 bytes. */
26002 /* The bit whether element is from the same lane or the other
26003 lane is bit 4, so shift it up by 3 to the MSB position. */
26004 t5
= gen_reg_rtx (V4DImode
);
26005 emit_insn (gen_ashlv4di3 (t5
, gen_lowpart (V4DImode
, mask
),
26007 /* Clear MSB bits from the mask just in case it had them set. */
26008 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
26009 /* After this t1 will have MSB set for elements from other lane. */
26010 emit_insn (gen_xorv32qi3 (t1
, gen_lowpart (V32QImode
, t5
), vt2
));
26011 /* Clear bits other than MSB. */
26012 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
26013 /* Or in the lower bits from mask into t3. */
26014 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
26015 /* And invert MSB bits in t1, so MSB is set for elements from the same
26017 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
26018 /* Swap 128-bit lanes in t3. */
26019 t6
= gen_reg_rtx (V4DImode
);
26020 emit_insn (gen_avx2_permv4di_1 (t6
, gen_lowpart (V4DImode
, t3
),
26021 const2_rtx
, GEN_INT (3),
26022 const0_rtx
, const1_rtx
));
26023 /* And or in the lower bits from mask into t1. */
26024 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
26025 if (one_operand_shuffle
)
26027 /* Each of these shuffles will put 0s in places where
26028 element from the other 128-bit lane is needed, otherwise
26029 will shuffle in the requested value. */
26030 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
,
26031 gen_lowpart (V32QImode
, t6
)));
26032 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
26033 /* For t3 the 128-bit lanes are swapped again. */
26034 t7
= gen_reg_rtx (V4DImode
);
26035 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t3
),
26036 const2_rtx
, GEN_INT (3),
26037 const0_rtx
, const1_rtx
));
26038 /* And oring both together leads to the result. */
26039 emit_insn (gen_iorv32qi3 (target
, t1
,
26040 gen_lowpart (V32QImode
, t7
)));
26041 if (target
!= operands
[0])
26042 emit_move_insn (operands
[0],
26043 gen_lowpart (GET_MODE (operands
[0]), target
));
26047 t4
= gen_reg_rtx (V32QImode
);
26048 /* Similarly to the above one_operand_shuffle code,
26049 just for repeated twice for each operand. merge_two:
26050 code will merge the two results together. */
26051 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
,
26052 gen_lowpart (V32QImode
, t6
)));
26053 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
,
26054 gen_lowpart (V32QImode
, t6
)));
26055 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
26056 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
26057 t7
= gen_reg_rtx (V4DImode
);
26058 emit_insn (gen_avx2_permv4di_1 (t7
, gen_lowpart (V4DImode
, t4
),
26059 const2_rtx
, GEN_INT (3),
26060 const0_rtx
, const1_rtx
));
26061 t8
= gen_reg_rtx (V4DImode
);
26062 emit_insn (gen_avx2_permv4di_1 (t8
, gen_lowpart (V4DImode
, t3
),
26063 const2_rtx
, GEN_INT (3),
26064 const0_rtx
, const1_rtx
));
26065 emit_insn (gen_iorv32qi3 (t4
, t2
, gen_lowpart (V32QImode
, t7
)));
26066 emit_insn (gen_iorv32qi3 (t3
, t1
, gen_lowpart (V32QImode
, t8
)));
26072 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
26079 /* The XOP VPPERM insn supports three inputs. By ignoring the
26080 one_operand_shuffle special case, we avoid creating another
26081 set of constant vectors in memory. */
26082 one_operand_shuffle
= false;
26084 /* mask = mask & {2*w-1, ...} */
26085 vt
= GEN_INT (2*w
- 1);
26089 /* mask = mask & {w-1, ...} */
26090 vt
= GEN_INT (w
- 1);
26093 for (i
= 0; i
< w
; i
++)
26095 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
26096 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
26097 NULL_RTX
, 0, OPTAB_DIRECT
);
26099 /* For non-QImode operations, convert the word permutation control
26100 into a byte permutation control. */
26101 if (mode
!= V16QImode
)
26103 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
26104 GEN_INT (exact_log2 (e
)),
26105 NULL_RTX
, 0, OPTAB_DIRECT
);
26107 /* Convert mask to vector of chars. */
26108 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
26110 /* Replicate each of the input bytes into byte positions:
26111 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
26112 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
26113 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
26114 for (i
= 0; i
< 16; ++i
)
26115 vec
[i
] = GEN_INT (i
/e
* e
);
26116 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
26117 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
26119 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
26121 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
26123 /* Convert it into the byte positions by doing
26124 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
26125 for (i
= 0; i
< 16; ++i
)
26126 vec
[i
] = GEN_INT (i
% e
);
26127 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
26128 vt
= validize_mem (force_const_mem (V16QImode
, vt
));
26129 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
26132 /* The actual shuffle operations all operate on V16QImode. */
26133 op0
= gen_lowpart (V16QImode
, op0
);
26134 op1
= gen_lowpart (V16QImode
, op1
);
26138 if (GET_MODE (target
) != V16QImode
)
26139 target
= gen_reg_rtx (V16QImode
);
26140 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
26141 if (target
!= operands
[0])
26142 emit_move_insn (operands
[0],
26143 gen_lowpart (GET_MODE (operands
[0]), target
));
26145 else if (one_operand_shuffle
)
26147 if (GET_MODE (target
) != V16QImode
)
26148 target
= gen_reg_rtx (V16QImode
);
26149 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
26150 if (target
!= operands
[0])
26151 emit_move_insn (operands
[0],
26152 gen_lowpart (GET_MODE (operands
[0]), target
));
26159 /* Shuffle the two input vectors independently. */
26160 t1
= gen_reg_rtx (V16QImode
);
26161 t2
= gen_reg_rtx (V16QImode
);
26162 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
26163 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
26166 /* Then merge them together. The key is whether any given control
26167 element contained a bit set that indicates the second word. */
26168 mask
= operands
[3];
26170 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
26172 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
26173 more shuffle to convert the V2DI input mask into a V4SI
26174 input mask. At which point the masking that expand_int_vcond
26175 will work as desired. */
26176 rtx t3
= gen_reg_rtx (V4SImode
);
26177 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
26178 const0_rtx
, const0_rtx
,
26179 const2_rtx
, const2_rtx
));
26181 maskmode
= V4SImode
;
26185 for (i
= 0; i
< w
; i
++)
26187 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
26188 vt
= force_reg (maskmode
, vt
);
26189 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
26190 NULL_RTX
, 0, OPTAB_DIRECT
);
26192 if (GET_MODE (target
) != mode
)
26193 target
= gen_reg_rtx (mode
);
26195 xops
[1] = gen_lowpart (mode
, t2
);
26196 xops
[2] = gen_lowpart (mode
, t1
);
26197 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
26200 ok
= ix86_expand_int_vcond (xops
);
26202 if (target
!= operands
[0])
26203 emit_move_insn (operands
[0],
26204 gen_lowpart (GET_MODE (operands
[0]), target
));
26208 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
26209 true if we should do zero extension, else sign extension. HIGH_P is
26210 true if we want the N/2 high elements, else the low elements. */
26213 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
26215 machine_mode imode
= GET_MODE (src
);
26220 rtx (*unpack
)(rtx
, rtx
);
26221 rtx (*extract
)(rtx
, rtx
) = NULL
;
26222 machine_mode halfmode
= BLKmode
;
26228 unpack
= gen_avx512bw_zero_extendv32qiv32hi2
;
26230 unpack
= gen_avx512bw_sign_extendv32qiv32hi2
;
26231 halfmode
= V32QImode
;
26233 = high_p
? gen_vec_extract_hi_v64qi
: gen_vec_extract_lo_v64qi
;
26237 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
26239 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
26240 halfmode
= V16QImode
;
26242 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
26246 unpack
= gen_avx512f_zero_extendv16hiv16si2
;
26248 unpack
= gen_avx512f_sign_extendv16hiv16si2
;
26249 halfmode
= V16HImode
;
26251 = high_p
? gen_vec_extract_hi_v32hi
: gen_vec_extract_lo_v32hi
;
26255 unpack
= gen_avx2_zero_extendv8hiv8si2
;
26257 unpack
= gen_avx2_sign_extendv8hiv8si2
;
26258 halfmode
= V8HImode
;
26260 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
26264 unpack
= gen_avx512f_zero_extendv8siv8di2
;
26266 unpack
= gen_avx512f_sign_extendv8siv8di2
;
26267 halfmode
= V8SImode
;
26269 = high_p
? gen_vec_extract_hi_v16si
: gen_vec_extract_lo_v16si
;
26273 unpack
= gen_avx2_zero_extendv4siv4di2
;
26275 unpack
= gen_avx2_sign_extendv4siv4di2
;
26276 halfmode
= V4SImode
;
26278 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
26282 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
26284 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
26288 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
26290 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
26294 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
26296 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
26299 gcc_unreachable ();
26302 if (GET_MODE_SIZE (imode
) >= 32)
26304 tmp
= gen_reg_rtx (halfmode
);
26305 emit_insn (extract (tmp
, src
));
26309 /* Shift higher 8 bytes to lower 8 bytes. */
26310 tmp
= gen_reg_rtx (V1TImode
);
26311 emit_insn (gen_sse2_lshrv1ti3 (tmp
, gen_lowpart (V1TImode
, src
),
26313 tmp
= gen_lowpart (imode
, tmp
);
26318 emit_insn (unpack (dest
, tmp
));
26322 rtx (*unpack
)(rtx
, rtx
, rtx
);
26328 unpack
= gen_vec_interleave_highv16qi
;
26330 unpack
= gen_vec_interleave_lowv16qi
;
26334 unpack
= gen_vec_interleave_highv8hi
;
26336 unpack
= gen_vec_interleave_lowv8hi
;
26340 unpack
= gen_vec_interleave_highv4si
;
26342 unpack
= gen_vec_interleave_lowv4si
;
26345 gcc_unreachable ();
26349 tmp
= force_reg (imode
, CONST0_RTX (imode
));
26351 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
26352 src
, pc_rtx
, pc_rtx
);
26354 rtx tmp2
= gen_reg_rtx (imode
);
26355 emit_insn (unpack (tmp2
, src
, tmp
));
26356 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), tmp2
));
26360 /* Expand conditional increment or decrement using adb/sbb instructions.
26361 The default case using setcc followed by the conditional move can be
26362 done by generic code. */
26364 ix86_expand_int_addcc (rtx operands
[])
26366 enum rtx_code code
= GET_CODE (operands
[1]);
26368 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
26370 rtx val
= const0_rtx
;
26371 bool fpcmp
= false;
26373 rtx op0
= XEXP (operands
[1], 0);
26374 rtx op1
= XEXP (operands
[1], 1);
26376 if (operands
[3] != const1_rtx
26377 && operands
[3] != constm1_rtx
)
26379 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
26381 code
= GET_CODE (compare_op
);
26383 flags
= XEXP (compare_op
, 0);
26385 if (GET_MODE (flags
) == CCFPmode
26386 || GET_MODE (flags
) == CCFPUmode
)
26389 code
= ix86_fp_compare_code_to_integer (code
);
26396 PUT_CODE (compare_op
,
26397 reverse_condition_maybe_unordered
26398 (GET_CODE (compare_op
)));
26400 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
26403 mode
= GET_MODE (operands
[0]);
26405 /* Construct either adc or sbb insn. */
26406 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
26411 insn
= gen_subqi3_carry
;
26414 insn
= gen_subhi3_carry
;
26417 insn
= gen_subsi3_carry
;
26420 insn
= gen_subdi3_carry
;
26423 gcc_unreachable ();
26431 insn
= gen_addqi3_carry
;
26434 insn
= gen_addhi3_carry
;
26437 insn
= gen_addsi3_carry
;
26440 insn
= gen_adddi3_carry
;
26443 gcc_unreachable ();
26446 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
26452 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
26453 but works for floating pointer parameters and nonoffsetable memories.
26454 For pushes, it returns just stack offsets; the values will be saved
26455 in the right order. Maximally three parts are generated. */
26458 ix86_split_to_parts (rtx operand
, rtx
*parts
, machine_mode mode
)
26463 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
26465 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
26467 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
26468 gcc_assert (size
>= 2 && size
<= 4);
26470 /* Optimize constant pool reference to immediates. This is used by fp
26471 moves, that force all constants to memory to allow combining. */
26472 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
26474 rtx tmp
= maybe_get_pool_constant (operand
);
26479 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
26481 /* The only non-offsetable memories we handle are pushes. */
26482 int ok
= push_operand (operand
, VOIDmode
);
26486 operand
= copy_rtx (operand
);
26487 PUT_MODE (operand
, word_mode
);
26488 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
26492 if (GET_CODE (operand
) == CONST_VECTOR
)
26494 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
26495 /* Caution: if we looked through a constant pool memory above,
26496 the operand may actually have a different mode now. That's
26497 ok, since we want to pun this all the way back to an integer. */
26498 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
26499 gcc_assert (operand
!= NULL
);
26505 if (mode
== DImode
)
26506 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
26511 if (REG_P (operand
))
26513 gcc_assert (reload_completed
);
26514 for (i
= 0; i
< size
; i
++)
26515 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
26517 else if (offsettable_memref_p (operand
))
26519 operand
= adjust_address (operand
, SImode
, 0);
26520 parts
[0] = operand
;
26521 for (i
= 1; i
< size
; i
++)
26522 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
26524 else if (CONST_DOUBLE_P (operand
))
26526 const REAL_VALUE_TYPE
*r
;
26529 r
= CONST_DOUBLE_REAL_VALUE (operand
);
26533 real_to_target (l
, r
, mode
);
26534 parts
[3] = gen_int_mode (l
[3], SImode
);
26535 parts
[2] = gen_int_mode (l
[2], SImode
);
26538 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
26539 long double may not be 80-bit. */
26540 real_to_target (l
, r
, mode
);
26541 parts
[2] = gen_int_mode (l
[2], SImode
);
26544 REAL_VALUE_TO_TARGET_DOUBLE (*r
, l
);
26547 gcc_unreachable ();
26549 parts
[1] = gen_int_mode (l
[1], SImode
);
26550 parts
[0] = gen_int_mode (l
[0], SImode
);
26553 gcc_unreachable ();
26558 if (mode
== TImode
)
26559 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
26560 if (mode
== XFmode
|| mode
== TFmode
)
26562 machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
26563 if (REG_P (operand
))
26565 gcc_assert (reload_completed
);
26566 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
26567 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
26569 else if (offsettable_memref_p (operand
))
26571 operand
= adjust_address (operand
, DImode
, 0);
26572 parts
[0] = operand
;
26573 parts
[1] = adjust_address (operand
, upper_mode
, 8);
26575 else if (CONST_DOUBLE_P (operand
))
26579 real_to_target (l
, CONST_DOUBLE_REAL_VALUE (operand
), mode
);
26581 /* real_to_target puts 32-bit pieces in each long. */
26582 parts
[0] = gen_int_mode ((l
[0] & HOST_WIDE_INT_C (0xffffffff))
26583 | ((l
[1] & HOST_WIDE_INT_C (0xffffffff))
26586 if (upper_mode
== SImode
)
26587 parts
[1] = gen_int_mode (l
[2], SImode
);
26590 = gen_int_mode ((l
[2] & HOST_WIDE_INT_C (0xffffffff))
26591 | ((l
[3] & HOST_WIDE_INT_C (0xffffffff))
26595 gcc_unreachable ();
26602 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
26603 Return false when normal moves are needed; true when all required
26604 insns have been emitted. Operands 2-4 contain the input values
26605 int the correct order; operands 5-7 contain the output values. */
26608 ix86_split_long_move (rtx operands
[])
26613 int collisions
= 0;
26614 machine_mode mode
= GET_MODE (operands
[0]);
26615 bool collisionparts
[4];
26617 /* The DFmode expanders may ask us to move double.
26618 For 64bit target this is single move. By hiding the fact
26619 here we simplify i386.md splitters. */
26620 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
26622 /* Optimize constant pool reference to immediates. This is used by
26623 fp moves, that force all constants to memory to allow combining. */
26625 if (MEM_P (operands
[1])
26626 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
26627 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
26628 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
26629 if (push_operand (operands
[0], VOIDmode
))
26631 operands
[0] = copy_rtx (operands
[0]);
26632 PUT_MODE (operands
[0], word_mode
);
26635 operands
[0] = gen_lowpart (DImode
, operands
[0]);
26636 operands
[1] = gen_lowpart (DImode
, operands
[1]);
26637 emit_move_insn (operands
[0], operands
[1]);
26641 /* The only non-offsettable memory we handle is push. */
26642 if (push_operand (operands
[0], VOIDmode
))
26645 gcc_assert (!MEM_P (operands
[0])
26646 || offsettable_memref_p (operands
[0]));
26648 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
26649 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
26651 /* When emitting push, take care for source operands on the stack. */
26652 if (push
&& MEM_P (operands
[1])
26653 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
26655 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
26657 /* Compensate for the stack decrement by 4. */
26658 if (!TARGET_64BIT
&& nparts
== 3
26659 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
26660 src_base
= plus_constant (Pmode
, src_base
, 4);
26662 /* src_base refers to the stack pointer and is
26663 automatically decreased by emitted push. */
26664 for (i
= 0; i
< nparts
; i
++)
26665 part
[1][i
] = change_address (part
[1][i
],
26666 GET_MODE (part
[1][i
]), src_base
);
26669 /* We need to do copy in the right order in case an address register
26670 of the source overlaps the destination. */
26671 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
26675 for (i
= 0; i
< nparts
; i
++)
26678 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
26679 if (collisionparts
[i
])
26683 /* Collision in the middle part can be handled by reordering. */
26684 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
26686 std::swap (part
[0][1], part
[0][2]);
26687 std::swap (part
[1][1], part
[1][2]);
26689 else if (collisions
== 1
26691 && (collisionparts
[1] || collisionparts
[2]))
26693 if (collisionparts
[1])
26695 std::swap (part
[0][1], part
[0][2]);
26696 std::swap (part
[1][1], part
[1][2]);
26700 std::swap (part
[0][2], part
[0][3]);
26701 std::swap (part
[1][2], part
[1][3]);
26705 /* If there are more collisions, we can't handle it by reordering.
26706 Do an lea to the last part and use only one colliding move. */
26707 else if (collisions
> 1)
26709 rtx base
, addr
, tls_base
= NULL_RTX
;
26713 base
= part
[0][nparts
- 1];
26715 /* Handle the case when the last part isn't valid for lea.
26716 Happens in 64-bit mode storing the 12-byte XFmode. */
26717 if (GET_MODE (base
) != Pmode
)
26718 base
= gen_rtx_REG (Pmode
, REGNO (base
));
26720 addr
= XEXP (part
[1][0], 0);
26721 if (TARGET_TLS_DIRECT_SEG_REFS
)
26723 struct ix86_address parts
;
26724 int ok
= ix86_decompose_address (addr
, &parts
);
26726 if (parts
.seg
== DEFAULT_TLS_SEG_REG
)
26728 /* It is not valid to use %gs: or %fs: in
26729 lea though, so we need to remove it from the
26730 address used for lea and add it to each individual
26731 memory loads instead. */
26732 addr
= copy_rtx (addr
);
26734 while (GET_CODE (*x
) == PLUS
)
26736 for (i
= 0; i
< 2; i
++)
26738 rtx u
= XEXP (*x
, i
);
26739 if (GET_CODE (u
) == ZERO_EXTEND
)
26741 if (GET_CODE (u
) == UNSPEC
26742 && XINT (u
, 1) == UNSPEC_TP
)
26744 tls_base
= XEXP (*x
, i
);
26745 *x
= XEXP (*x
, 1 - i
);
26753 gcc_assert (tls_base
);
26756 emit_insn (gen_rtx_SET (base
, addr
));
26758 base
= gen_rtx_PLUS (GET_MODE (base
), base
, tls_base
);
26759 part
[1][0] = replace_equiv_address (part
[1][0], base
);
26760 for (i
= 1; i
< nparts
; i
++)
26763 base
= copy_rtx (base
);
26764 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
26765 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
26776 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
26777 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
26778 stack_pointer_rtx
, GEN_INT (-4)));
26779 emit_move_insn (part
[0][2], part
[1][2]);
26781 else if (nparts
== 4)
26783 emit_move_insn (part
[0][3], part
[1][3]);
26784 emit_move_insn (part
[0][2], part
[1][2]);
26789 /* In 64bit mode we don't have 32bit push available. In case this is
26790 register, it is OK - we will just use larger counterpart. We also
26791 retype memory - these comes from attempt to avoid REX prefix on
26792 moving of second half of TFmode value. */
26793 if (GET_MODE (part
[1][1]) == SImode
)
26795 switch (GET_CODE (part
[1][1]))
26798 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
26802 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
26806 gcc_unreachable ();
26809 if (GET_MODE (part
[1][0]) == SImode
)
26810 part
[1][0] = part
[1][1];
26813 emit_move_insn (part
[0][1], part
[1][1]);
26814 emit_move_insn (part
[0][0], part
[1][0]);
26818 /* Choose correct order to not overwrite the source before it is copied. */
26819 if ((REG_P (part
[0][0])
26820 && REG_P (part
[1][1])
26821 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
26823 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
26825 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
26827 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
26829 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
26831 operands
[2 + i
] = part
[0][j
];
26832 operands
[6 + i
] = part
[1][j
];
26837 for (i
= 0; i
< nparts
; i
++)
26839 operands
[2 + i
] = part
[0][i
];
26840 operands
[6 + i
] = part
[1][i
];
26844 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
26845 if (optimize_insn_for_size_p ())
26847 for (j
= 0; j
< nparts
- 1; j
++)
26848 if (CONST_INT_P (operands
[6 + j
])
26849 && operands
[6 + j
] != const0_rtx
26850 && REG_P (operands
[2 + j
]))
26851 for (i
= j
; i
< nparts
- 1; i
++)
26852 if (CONST_INT_P (operands
[7 + i
])
26853 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
26854 operands
[7 + i
] = operands
[2 + j
];
26857 for (i
= 0; i
< nparts
; i
++)
26858 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
26863 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
26864 left shift by a constant, either using a single shift or
26865 a sequence of add instructions. */
26868 ix86_expand_ashl_const (rtx operand
, int count
, machine_mode mode
)
26870 rtx (*insn
)(rtx
, rtx
, rtx
);
26873 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
26874 && !optimize_insn_for_size_p ()))
26876 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
26877 while (count
-- > 0)
26878 emit_insn (insn (operand
, operand
, operand
));
26882 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
26883 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
26888 ix86_split_ashl (rtx
*operands
, rtx scratch
, machine_mode mode
)
26890 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
26891 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
26892 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
26894 rtx low
[2], high
[2];
26897 if (CONST_INT_P (operands
[2]))
26899 split_double_mode (mode
, operands
, 2, low
, high
);
26900 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
26902 if (count
>= half_width
)
26904 emit_move_insn (high
[0], low
[1]);
26905 emit_move_insn (low
[0], const0_rtx
);
26907 if (count
> half_width
)
26908 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
26912 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
26914 if (!rtx_equal_p (operands
[0], operands
[1]))
26915 emit_move_insn (operands
[0], operands
[1]);
26917 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
26918 ix86_expand_ashl_const (low
[0], count
, mode
);
26923 split_double_mode (mode
, operands
, 1, low
, high
);
26925 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
26927 if (operands
[1] == const1_rtx
)
26929 /* Assuming we've chosen a QImode capable registers, then 1 << N
26930 can be done with two 32/64-bit shifts, no branches, no cmoves. */
26931 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
26933 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
26935 ix86_expand_clear (low
[0]);
26936 ix86_expand_clear (high
[0]);
26937 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
26939 d
= gen_lowpart (QImode
, low
[0]);
26940 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
26941 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
26942 emit_insn (gen_rtx_SET (d
, s
));
26944 d
= gen_lowpart (QImode
, high
[0]);
26945 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
26946 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
26947 emit_insn (gen_rtx_SET (d
, s
));
26950 /* Otherwise, we can get the same results by manually performing
26951 a bit extract operation on bit 5/6, and then performing the two
26952 shifts. The two methods of getting 0/1 into low/high are exactly
26953 the same size. Avoiding the shift in the bit extract case helps
26954 pentium4 a bit; no one else seems to care much either way. */
26957 machine_mode half_mode
;
26958 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
26959 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
26960 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
26961 HOST_WIDE_INT bits
;
26964 if (mode
== DImode
)
26966 half_mode
= SImode
;
26967 gen_lshr3
= gen_lshrsi3
;
26968 gen_and3
= gen_andsi3
;
26969 gen_xor3
= gen_xorsi3
;
26974 half_mode
= DImode
;
26975 gen_lshr3
= gen_lshrdi3
;
26976 gen_and3
= gen_anddi3
;
26977 gen_xor3
= gen_xordi3
;
26981 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
26982 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
26984 x
= gen_lowpart (half_mode
, operands
[2]);
26985 emit_insn (gen_rtx_SET (high
[0], x
));
26987 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
26988 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
26989 emit_move_insn (low
[0], high
[0]);
26990 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
26993 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
26994 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
26998 if (operands
[1] == constm1_rtx
)
27000 /* For -1 << N, we can avoid the shld instruction, because we
27001 know that we're shifting 0...31/63 ones into a -1. */
27002 emit_move_insn (low
[0], constm1_rtx
);
27003 if (optimize_insn_for_size_p ())
27004 emit_move_insn (high
[0], low
[0]);
27006 emit_move_insn (high
[0], constm1_rtx
);
27010 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
27012 if (!rtx_equal_p (operands
[0], operands
[1]))
27013 emit_move_insn (operands
[0], operands
[1]);
27015 split_double_mode (mode
, operands
, 1, low
, high
);
27016 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
27019 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
27021 if (TARGET_CMOVE
&& scratch
)
27023 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
27024 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
27026 ix86_expand_clear (scratch
);
27027 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
27031 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
27032 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
27034 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
27039 ix86_split_ashr (rtx
*operands
, rtx scratch
, machine_mode mode
)
27041 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
27042 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
27043 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
27044 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
27046 rtx low
[2], high
[2];
27049 if (CONST_INT_P (operands
[2]))
27051 split_double_mode (mode
, operands
, 2, low
, high
);
27052 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
27054 if (count
== GET_MODE_BITSIZE (mode
) - 1)
27056 emit_move_insn (high
[0], high
[1]);
27057 emit_insn (gen_ashr3 (high
[0], high
[0],
27058 GEN_INT (half_width
- 1)));
27059 emit_move_insn (low
[0], high
[0]);
27062 else if (count
>= half_width
)
27064 emit_move_insn (low
[0], high
[1]);
27065 emit_move_insn (high
[0], low
[0]);
27066 emit_insn (gen_ashr3 (high
[0], high
[0],
27067 GEN_INT (half_width
- 1)));
27069 if (count
> half_width
)
27070 emit_insn (gen_ashr3 (low
[0], low
[0],
27071 GEN_INT (count
- half_width
)));
27075 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
27077 if (!rtx_equal_p (operands
[0], operands
[1]))
27078 emit_move_insn (operands
[0], operands
[1]);
27080 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
27081 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
27086 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
27088 if (!rtx_equal_p (operands
[0], operands
[1]))
27089 emit_move_insn (operands
[0], operands
[1]);
27091 split_double_mode (mode
, operands
, 1, low
, high
);
27093 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
27094 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
27096 if (TARGET_CMOVE
&& scratch
)
27098 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
27099 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
27101 emit_move_insn (scratch
, high
[0]);
27102 emit_insn (gen_ashr3 (scratch
, scratch
,
27103 GEN_INT (half_width
- 1)));
27104 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
27109 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
27110 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
27112 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
27118 ix86_split_lshr (rtx
*operands
, rtx scratch
, machine_mode mode
)
27120 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
27121 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
27122 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
27123 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
27125 rtx low
[2], high
[2];
27128 if (CONST_INT_P (operands
[2]))
27130 split_double_mode (mode
, operands
, 2, low
, high
);
27131 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
27133 if (count
>= half_width
)
27135 emit_move_insn (low
[0], high
[1]);
27136 ix86_expand_clear (high
[0]);
27138 if (count
> half_width
)
27139 emit_insn (gen_lshr3 (low
[0], low
[0],
27140 GEN_INT (count
- half_width
)));
27144 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
27146 if (!rtx_equal_p (operands
[0], operands
[1]))
27147 emit_move_insn (operands
[0], operands
[1]);
27149 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
27150 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
27155 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
27157 if (!rtx_equal_p (operands
[0], operands
[1]))
27158 emit_move_insn (operands
[0], operands
[1]);
27160 split_double_mode (mode
, operands
, 1, low
, high
);
27162 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
27163 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
27165 if (TARGET_CMOVE
&& scratch
)
27167 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
27168 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
27170 ix86_expand_clear (scratch
);
27171 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
27176 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
27177 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
27179 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
27184 /* Predict just emitted jump instruction to be taken with probability PROB. */
27186 predict_jump (int prob
)
27188 rtx_insn
*insn
= get_last_insn ();
27189 gcc_assert (JUMP_P (insn
));
27190 add_reg_br_prob_note (insn
, profile_probability::from_reg_br_prob_base (prob
));
27193 /* Helper function for the string operations below. Dest VARIABLE whether
27194 it is aligned to VALUE bytes. If true, jump to the label. */
27195 static rtx_code_label
*
27196 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
27198 rtx_code_label
*label
= gen_label_rtx ();
27199 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
27200 if (GET_MODE (variable
) == DImode
)
27201 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
27203 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
27204 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
27207 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
27209 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
27213 /* Adjust COUNTER by the VALUE. */
27215 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
27217 rtx (*gen_add
)(rtx
, rtx
, rtx
)
27218 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
27220 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
27223 /* Zero extend possibly SImode EXP to Pmode register. */
27225 ix86_zero_extend_to_Pmode (rtx exp
)
27227 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
27230 /* Divide COUNTREG by SCALE. */
27232 scale_counter (rtx countreg
, int scale
)
27238 if (CONST_INT_P (countreg
))
27239 return GEN_INT (INTVAL (countreg
) / scale
);
27240 gcc_assert (REG_P (countreg
));
27242 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
27243 GEN_INT (exact_log2 (scale
)),
27244 NULL
, 1, OPTAB_DIRECT
);
27248 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
27249 DImode for constant loop counts. */
27251 static machine_mode
27252 counter_mode (rtx count_exp
)
27254 if (GET_MODE (count_exp
) != VOIDmode
)
27255 return GET_MODE (count_exp
);
27256 if (!CONST_INT_P (count_exp
))
27258 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
27263 /* Copy the address to a Pmode register. This is used for x32 to
27264 truncate DImode TLS address to a SImode register. */
27267 ix86_copy_addr_to_reg (rtx addr
)
27270 if (GET_MODE (addr
) == Pmode
|| GET_MODE (addr
) == VOIDmode
)
27272 reg
= copy_addr_to_reg (addr
);
27273 REG_POINTER (reg
) = 1;
27278 gcc_assert (GET_MODE (addr
) == DImode
&& Pmode
== SImode
);
27279 reg
= copy_to_mode_reg (DImode
, addr
);
27280 REG_POINTER (reg
) = 1;
27281 return gen_rtx_SUBREG (SImode
, reg
, 0);
27285 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
27286 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
27287 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
27288 memory by VALUE (supposed to be in MODE).
27290 The size is rounded down to whole number of chunk size moved at once.
27291 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
27295 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
27296 rtx destptr
, rtx srcptr
, rtx value
,
27297 rtx count
, machine_mode mode
, int unroll
,
27298 int expected_size
, bool issetmem
)
27300 rtx_code_label
*out_label
, *top_label
;
27302 machine_mode iter_mode
= counter_mode (count
);
27303 int piece_size_n
= GET_MODE_SIZE (mode
) * unroll
;
27304 rtx piece_size
= GEN_INT (piece_size_n
);
27305 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
27309 top_label
= gen_label_rtx ();
27310 out_label
= gen_label_rtx ();
27311 iter
= gen_reg_rtx (iter_mode
);
27313 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
27314 NULL
, 1, OPTAB_DIRECT
);
27315 /* Those two should combine. */
27316 if (piece_size
== const1_rtx
)
27318 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
27320 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
27322 emit_move_insn (iter
, const0_rtx
);
27324 emit_label (top_label
);
27326 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
27328 /* This assert could be relaxed - in this case we'll need to compute
27329 smallest power of two, containing in PIECE_SIZE_N and pass it to
27331 gcc_assert ((piece_size_n
& (piece_size_n
- 1)) == 0);
27332 destmem
= offset_address (destmem
, tmp
, piece_size_n
);
27333 destmem
= adjust_address (destmem
, mode
, 0);
27337 srcmem
= offset_address (srcmem
, copy_rtx (tmp
), piece_size_n
);
27338 srcmem
= adjust_address (srcmem
, mode
, 0);
27340 /* When unrolling for chips that reorder memory reads and writes,
27341 we can save registers by using single temporary.
27342 Also using 4 temporaries is overkill in 32bit mode. */
27343 if (!TARGET_64BIT
&& 0)
27345 for (i
= 0; i
< unroll
; i
++)
27350 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
27352 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
27354 emit_move_insn (destmem
, srcmem
);
27360 gcc_assert (unroll
<= 4);
27361 for (i
= 0; i
< unroll
; i
++)
27363 tmpreg
[i
] = gen_reg_rtx (mode
);
27367 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
27369 emit_move_insn (tmpreg
[i
], srcmem
);
27371 for (i
= 0; i
< unroll
; i
++)
27376 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
27378 emit_move_insn (destmem
, tmpreg
[i
]);
27383 for (i
= 0; i
< unroll
; i
++)
27387 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
27388 emit_move_insn (destmem
, value
);
27391 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
27392 true, OPTAB_LIB_WIDEN
);
27394 emit_move_insn (iter
, tmp
);
27396 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
27398 if (expected_size
!= -1)
27400 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
27401 if (expected_size
== 0)
27403 else if (expected_size
> REG_BR_PROB_BASE
)
27404 predict_jump (REG_BR_PROB_BASE
- 1);
27406 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
27409 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
27410 iter
= ix86_zero_extend_to_Pmode (iter
);
27411 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
27412 true, OPTAB_LIB_WIDEN
);
27413 if (tmp
!= destptr
)
27414 emit_move_insn (destptr
, tmp
);
27417 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
27418 true, OPTAB_LIB_WIDEN
);
27420 emit_move_insn (srcptr
, tmp
);
27422 emit_label (out_label
);
27425 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
27426 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
27427 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
27428 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
27429 ORIG_VALUE is the original value passed to memset to fill the memory with.
27430 Other arguments have same meaning as for previous function. */
27433 expand_set_or_movmem_via_rep (rtx destmem
, rtx srcmem
,
27434 rtx destptr
, rtx srcptr
, rtx value
, rtx orig_value
,
27436 machine_mode mode
, bool issetmem
)
27441 HOST_WIDE_INT rounded_count
;
27443 /* If possible, it is shorter to use rep movs.
27444 TODO: Maybe it is better to move this logic to decide_alg. */
27445 if (mode
== QImode
&& CONST_INT_P (count
) && !(INTVAL (count
) & 3)
27446 && (!issetmem
|| orig_value
== const0_rtx
))
27449 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
27450 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
27452 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
,
27453 GET_MODE_SIZE (mode
)));
27454 if (mode
!= QImode
)
27456 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
27457 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
27458 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
27461 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
27462 if ((!issetmem
|| orig_value
== const0_rtx
) && CONST_INT_P (count
))
27465 = ROUND_DOWN (INTVAL (count
), (HOST_WIDE_INT
) GET_MODE_SIZE (mode
));
27466 destmem
= shallow_copy_rtx (destmem
);
27467 set_mem_size (destmem
, rounded_count
);
27469 else if (MEM_SIZE_KNOWN_P (destmem
))
27470 clear_mem_size (destmem
);
27474 value
= force_reg (mode
, gen_lowpart (mode
, value
));
27475 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
27479 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
27480 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
27481 if (mode
!= QImode
)
27483 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
27484 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
27485 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
27488 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
27489 if (CONST_INT_P (count
))
27492 = ROUND_DOWN (INTVAL (count
), (HOST_WIDE_INT
) GET_MODE_SIZE (mode
));
27493 srcmem
= shallow_copy_rtx (srcmem
);
27494 set_mem_size (srcmem
, rounded_count
);
27498 if (MEM_SIZE_KNOWN_P (srcmem
))
27499 clear_mem_size (srcmem
);
27501 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
27506 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
27508 SRC is passed by pointer to be updated on return.
27509 Return value is updated DST. */
27511 emit_memmov (rtx destmem
, rtx
*srcmem
, rtx destptr
, rtx srcptr
,
27512 HOST_WIDE_INT size_to_move
)
27514 rtx dst
= destmem
, src
= *srcmem
, adjust
, tempreg
;
27515 enum insn_code code
;
27516 machine_mode move_mode
;
27519 /* Find the widest mode in which we could perform moves.
27520 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
27521 it until move of such size is supported. */
27522 piece_size
= 1 << floor_log2 (size_to_move
);
27523 while (!int_mode_for_size (piece_size
* BITS_PER_UNIT
, 0).exists (&move_mode
)
27524 || (code
= optab_handler (mov_optab
, move_mode
)) == CODE_FOR_nothing
)
27526 gcc_assert (piece_size
> 1);
27530 /* Find the corresponding vector mode with the same size as MOVE_MODE.
27531 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
27532 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
27534 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
27535 if (!mode_for_vector (word_mode
, nunits
).exists (&move_mode
)
27536 || (code
= optab_handler (mov_optab
, move_mode
)) == CODE_FOR_nothing
)
27538 move_mode
= word_mode
;
27539 piece_size
= GET_MODE_SIZE (move_mode
);
27540 code
= optab_handler (mov_optab
, move_mode
);
27543 gcc_assert (code
!= CODE_FOR_nothing
);
27545 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
27546 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
, 0);
27548 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
27549 gcc_assert (size_to_move
% piece_size
== 0);
27550 adjust
= GEN_INT (piece_size
);
27551 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
27553 /* We move from memory to memory, so we'll need to do it via
27554 a temporary register. */
27555 tempreg
= gen_reg_rtx (move_mode
);
27556 emit_insn (GEN_FCN (code
) (tempreg
, src
));
27557 emit_insn (GEN_FCN (code
) (dst
, tempreg
));
27559 emit_move_insn (destptr
,
27560 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
27561 emit_move_insn (srcptr
,
27562 gen_rtx_PLUS (Pmode
, copy_rtx (srcptr
), adjust
));
27564 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
27566 src
= adjust_automodify_address_nv (src
, move_mode
, srcptr
,
27570 /* Update DST and SRC rtx. */
27575 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
27577 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
27578 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
27581 if (CONST_INT_P (count
))
27583 HOST_WIDE_INT countval
= INTVAL (count
);
27584 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
27587 /* For now MAX_SIZE should be a power of 2. This assert could be
27588 relaxed, but it'll require a bit more complicated epilogue
27590 gcc_assert ((max_size
& (max_size
- 1)) == 0);
27591 for (i
= max_size
; i
>= 1; i
>>= 1)
27593 if (epilogue_size
& i
)
27594 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
27600 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
27601 count
, 1, OPTAB_DIRECT
);
27602 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
27603 count
, QImode
, 1, 4, false);
27607 /* When there are stringops, we can cheaply increase dest and src pointers.
27608 Otherwise we save code size by maintaining offset (zero is readily
27609 available from preceding rep operation) and using x86 addressing modes.
27611 if (TARGET_SINGLE_STRINGOP
)
27615 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
27616 src
= change_address (srcmem
, SImode
, srcptr
);
27617 dest
= change_address (destmem
, SImode
, destptr
);
27618 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
27619 emit_label (label
);
27620 LABEL_NUSES (label
) = 1;
27624 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
27625 src
= change_address (srcmem
, HImode
, srcptr
);
27626 dest
= change_address (destmem
, HImode
, destptr
);
27627 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
27628 emit_label (label
);
27629 LABEL_NUSES (label
) = 1;
27633 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
27634 src
= change_address (srcmem
, QImode
, srcptr
);
27635 dest
= change_address (destmem
, QImode
, destptr
);
27636 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
27637 emit_label (label
);
27638 LABEL_NUSES (label
) = 1;
27643 rtx offset
= force_reg (Pmode
, const0_rtx
);
27648 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
27649 src
= change_address (srcmem
, SImode
, srcptr
);
27650 dest
= change_address (destmem
, SImode
, destptr
);
27651 emit_move_insn (dest
, src
);
27652 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
27653 true, OPTAB_LIB_WIDEN
);
27655 emit_move_insn (offset
, tmp
);
27656 emit_label (label
);
27657 LABEL_NUSES (label
) = 1;
27661 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
27662 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
27663 src
= change_address (srcmem
, HImode
, tmp
);
27664 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
27665 dest
= change_address (destmem
, HImode
, tmp
);
27666 emit_move_insn (dest
, src
);
27667 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
27668 true, OPTAB_LIB_WIDEN
);
27670 emit_move_insn (offset
, tmp
);
27671 emit_label (label
);
27672 LABEL_NUSES (label
) = 1;
27676 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
27677 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
27678 src
= change_address (srcmem
, QImode
, tmp
);
27679 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
27680 dest
= change_address (destmem
, QImode
, tmp
);
27681 emit_move_insn (dest
, src
);
27682 emit_label (label
);
27683 LABEL_NUSES (label
) = 1;
27688 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
27689 with value PROMOTED_VAL.
27690 SRC is passed by pointer to be updated on return.
27691 Return value is updated DST. */
27693 emit_memset (rtx destmem
, rtx destptr
, rtx promoted_val
,
27694 HOST_WIDE_INT size_to_move
)
27696 rtx dst
= destmem
, adjust
;
27697 enum insn_code code
;
27698 machine_mode move_mode
;
27701 /* Find the widest mode in which we could perform moves.
27702 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
27703 it until move of such size is supported. */
27704 move_mode
= GET_MODE (promoted_val
);
27705 if (move_mode
== VOIDmode
)
27706 move_mode
= QImode
;
27707 if (size_to_move
< GET_MODE_SIZE (move_mode
))
27709 unsigned int move_bits
= size_to_move
* BITS_PER_UNIT
;
27710 move_mode
= int_mode_for_size (move_bits
, 0).require ();
27711 promoted_val
= gen_lowpart (move_mode
, promoted_val
);
27713 piece_size
= GET_MODE_SIZE (move_mode
);
27714 code
= optab_handler (mov_optab
, move_mode
);
27715 gcc_assert (code
!= CODE_FOR_nothing
&& promoted_val
!= NULL_RTX
);
27717 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
, 0);
27719 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
27720 gcc_assert (size_to_move
% piece_size
== 0);
27721 adjust
= GEN_INT (piece_size
);
27722 for (i
= 0; i
< size_to_move
; i
+= piece_size
)
27724 if (piece_size
<= GET_MODE_SIZE (word_mode
))
27726 emit_insn (gen_strset (destptr
, dst
, promoted_val
));
27727 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
27732 emit_insn (GEN_FCN (code
) (dst
, promoted_val
));
27734 emit_move_insn (destptr
,
27735 gen_rtx_PLUS (Pmode
, copy_rtx (destptr
), adjust
));
27737 dst
= adjust_automodify_address_nv (dst
, move_mode
, destptr
,
27741 /* Update DST rtx. */
27744 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
27746 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
27747 rtx count
, int max_size
)
27750 expand_simple_binop (counter_mode (count
), AND
, count
,
27751 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
27752 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
27753 gen_lowpart (QImode
, value
), count
, QImode
,
27754 1, max_size
/ 2, true);
27757 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
27759 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx vec_value
,
27760 rtx count
, int max_size
)
27764 if (CONST_INT_P (count
))
27766 HOST_WIDE_INT countval
= INTVAL (count
);
27767 HOST_WIDE_INT epilogue_size
= countval
% max_size
;
27770 /* For now MAX_SIZE should be a power of 2. This assert could be
27771 relaxed, but it'll require a bit more complicated epilogue
27773 gcc_assert ((max_size
& (max_size
- 1)) == 0);
27774 for (i
= max_size
; i
>= 1; i
>>= 1)
27776 if (epilogue_size
& i
)
27778 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
27779 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
27781 destmem
= emit_memset (destmem
, destptr
, value
, i
);
27788 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
27793 rtx_code_label
*label
= ix86_expand_aligntest (count
, 16, true);
27796 dest
= change_address (destmem
, DImode
, destptr
);
27797 emit_insn (gen_strset (destptr
, dest
, value
));
27798 dest
= adjust_automodify_address_nv (dest
, DImode
, destptr
, 8);
27799 emit_insn (gen_strset (destptr
, dest
, value
));
27803 dest
= change_address (destmem
, SImode
, destptr
);
27804 emit_insn (gen_strset (destptr
, dest
, value
));
27805 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
27806 emit_insn (gen_strset (destptr
, dest
, value
));
27807 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 8);
27808 emit_insn (gen_strset (destptr
, dest
, value
));
27809 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 12);
27810 emit_insn (gen_strset (destptr
, dest
, value
));
27812 emit_label (label
);
27813 LABEL_NUSES (label
) = 1;
27817 rtx_code_label
*label
= ix86_expand_aligntest (count
, 8, true);
27820 dest
= change_address (destmem
, DImode
, destptr
);
27821 emit_insn (gen_strset (destptr
, dest
, value
));
27825 dest
= change_address (destmem
, SImode
, destptr
);
27826 emit_insn (gen_strset (destptr
, dest
, value
));
27827 dest
= adjust_automodify_address_nv (dest
, SImode
, destptr
, 4);
27828 emit_insn (gen_strset (destptr
, dest
, value
));
27830 emit_label (label
);
27831 LABEL_NUSES (label
) = 1;
27835 rtx_code_label
*label
= ix86_expand_aligntest (count
, 4, true);
27836 dest
= change_address (destmem
, SImode
, destptr
);
27837 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
27838 emit_label (label
);
27839 LABEL_NUSES (label
) = 1;
27843 rtx_code_label
*label
= ix86_expand_aligntest (count
, 2, true);
27844 dest
= change_address (destmem
, HImode
, destptr
);
27845 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
27846 emit_label (label
);
27847 LABEL_NUSES (label
) = 1;
27851 rtx_code_label
*label
= ix86_expand_aligntest (count
, 1, true);
27852 dest
= change_address (destmem
, QImode
, destptr
);
27853 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
27854 emit_label (label
);
27855 LABEL_NUSES (label
) = 1;
27859 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
27860 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
27861 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
27863 Return value is updated DESTMEM. */
27865 expand_set_or_movmem_prologue (rtx destmem
, rtx srcmem
,
27866 rtx destptr
, rtx srcptr
, rtx value
,
27867 rtx vec_value
, rtx count
, int align
,
27868 int desired_alignment
, bool issetmem
)
27871 for (i
= 1; i
< desired_alignment
; i
<<= 1)
27875 rtx_code_label
*label
= ix86_expand_aligntest (destptr
, i
, false);
27878 if (vec_value
&& i
> GET_MODE_SIZE (GET_MODE (value
)))
27879 destmem
= emit_memset (destmem
, destptr
, vec_value
, i
);
27881 destmem
= emit_memset (destmem
, destptr
, value
, i
);
27884 destmem
= emit_memmov (destmem
, &srcmem
, destptr
, srcptr
, i
);
27885 ix86_adjust_counter (count
, i
);
27886 emit_label (label
);
27887 LABEL_NUSES (label
) = 1;
27888 set_mem_align (destmem
, i
* 2 * BITS_PER_UNIT
);
27894 /* Test if COUNT&SIZE is nonzero and if so, expand movme
27895 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
27896 and jump to DONE_LABEL. */
27898 expand_small_movmem_or_setmem (rtx destmem
, rtx srcmem
,
27899 rtx destptr
, rtx srcptr
,
27900 rtx value
, rtx vec_value
,
27901 rtx count
, int size
,
27902 rtx done_label
, bool issetmem
)
27904 rtx_code_label
*label
= ix86_expand_aligntest (count
, size
, false);
27905 machine_mode mode
= int_mode_for_size (size
* BITS_PER_UNIT
, 1).else_blk ();
27909 /* If we do not have vector value to copy, we must reduce size. */
27914 if (GET_MODE (value
) == VOIDmode
&& size
> 8)
27916 else if (GET_MODE_SIZE (mode
) > GET_MODE_SIZE (GET_MODE (value
)))
27917 mode
= GET_MODE (value
);
27920 mode
= GET_MODE (vec_value
), value
= vec_value
;
27924 /* Choose appropriate vector mode. */
27926 mode
= TARGET_AVX
? V32QImode
: TARGET_SSE
? V16QImode
: DImode
;
27927 else if (size
>= 16)
27928 mode
= TARGET_SSE
? V16QImode
: DImode
;
27929 srcmem
= change_address (srcmem
, mode
, srcptr
);
27931 destmem
= change_address (destmem
, mode
, destptr
);
27932 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
27933 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
27934 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
27937 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
27940 emit_move_insn (destmem
, srcmem
);
27941 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
27943 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
27946 destmem
= offset_address (destmem
, count
, 1);
27947 destmem
= offset_address (destmem
, GEN_INT (-2 * size
),
27948 GET_MODE_SIZE (mode
));
27951 srcmem
= offset_address (srcmem
, count
, 1);
27952 srcmem
= offset_address (srcmem
, GEN_INT (-2 * size
),
27953 GET_MODE_SIZE (mode
));
27955 for (n
= 0; n
* GET_MODE_SIZE (mode
) < size
; n
++)
27958 emit_move_insn (destmem
, gen_lowpart (mode
, value
));
27961 emit_move_insn (destmem
, srcmem
);
27962 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
27964 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
27966 emit_jump_insn (gen_jump (done_label
));
27969 emit_label (label
);
27970 LABEL_NUSES (label
) = 1;
27973 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
27974 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
27975 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
27976 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
27977 DONE_LABEL is a label after the whole copying sequence. The label is created
27978 on demand if *DONE_LABEL is NULL.
27979 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
27980 bounds after the initial copies.
27982 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
27983 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
27984 we will dispatch to a library call for large blocks.
27986 In pseudocode we do:
27990 Assume that SIZE is 4. Bigger sizes are handled analogously
27993 copy 4 bytes from SRCPTR to DESTPTR
27994 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
27999 copy 1 byte from SRCPTR to DESTPTR
28002 copy 2 bytes from SRCPTR to DESTPTR
28003 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
28008 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
28009 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
28011 OLD_DESPTR = DESTPTR;
28012 Align DESTPTR up to DESIRED_ALIGN
28013 SRCPTR += DESTPTR - OLD_DESTPTR
28014 COUNT -= DEST_PTR - OLD_DESTPTR
28016 Round COUNT down to multiple of SIZE
28017 << optional caller supplied zero size guard is here >>
28018 << optional caller supplied dynamic check is here >>
28019 << caller supplied main copy loop is here >>
28024 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem
, rtx srcmem
,
28025 rtx
*destptr
, rtx
*srcptr
,
28027 rtx value
, rtx vec_value
,
28029 rtx_code_label
**done_label
,
28033 unsigned HOST_WIDE_INT
*min_size
,
28034 bool dynamic_check
,
28037 rtx_code_label
*loop_label
= NULL
, *label
;
28040 int prolog_size
= 0;
28043 /* Chose proper value to copy. */
28044 if (issetmem
&& VECTOR_MODE_P (mode
))
28045 mode_value
= vec_value
;
28047 mode_value
= value
;
28048 gcc_assert (GET_MODE_SIZE (mode
) <= size
);
28050 /* See if block is big or small, handle small blocks. */
28051 if (!CONST_INT_P (*count
) && *min_size
< (unsigned HOST_WIDE_INT
)size
)
28054 loop_label
= gen_label_rtx ();
28057 *done_label
= gen_label_rtx ();
28059 emit_cmp_and_jump_insns (*count
, GEN_INT (size2
), GE
, 0, GET_MODE (*count
),
28063 /* Handle sizes > 3. */
28064 for (;size2
> 2; size2
>>= 1)
28065 expand_small_movmem_or_setmem (destmem
, srcmem
,
28069 size2
, *done_label
, issetmem
);
28070 /* Nothing to copy? Jump to DONE_LABEL if so */
28071 emit_cmp_and_jump_insns (*count
, const0_rtx
, EQ
, 0, GET_MODE (*count
),
28074 /* Do a byte copy. */
28075 destmem
= change_address (destmem
, QImode
, *destptr
);
28077 emit_move_insn (destmem
, gen_lowpart (QImode
, value
));
28080 srcmem
= change_address (srcmem
, QImode
, *srcptr
);
28081 emit_move_insn (destmem
, srcmem
);
28084 /* Handle sizes 2 and 3. */
28085 label
= ix86_expand_aligntest (*count
, 2, false);
28086 destmem
= change_address (destmem
, HImode
, *destptr
);
28087 destmem
= offset_address (destmem
, *count
, 1);
28088 destmem
= offset_address (destmem
, GEN_INT (-2), 2);
28090 emit_move_insn (destmem
, gen_lowpart (HImode
, value
));
28093 srcmem
= change_address (srcmem
, HImode
, *srcptr
);
28094 srcmem
= offset_address (srcmem
, *count
, 1);
28095 srcmem
= offset_address (srcmem
, GEN_INT (-2), 2);
28096 emit_move_insn (destmem
, srcmem
);
28099 emit_label (label
);
28100 LABEL_NUSES (label
) = 1;
28101 emit_jump_insn (gen_jump (*done_label
));
28105 gcc_assert (*min_size
>= (unsigned HOST_WIDE_INT
)size
28106 || UINTVAL (*count
) >= (unsigned HOST_WIDE_INT
)size
);
28108 /* Start memcpy for COUNT >= SIZE. */
28111 emit_label (loop_label
);
28112 LABEL_NUSES (loop_label
) = 1;
28115 /* Copy first desired_align bytes. */
28117 srcmem
= change_address (srcmem
, mode
, *srcptr
);
28118 destmem
= change_address (destmem
, mode
, *destptr
);
28119 modesize
= GEN_INT (GET_MODE_SIZE (mode
));
28120 for (n
= 0; prolog_size
< desired_align
- align
; n
++)
28123 emit_move_insn (destmem
, mode_value
);
28126 emit_move_insn (destmem
, srcmem
);
28127 srcmem
= offset_address (srcmem
, modesize
, GET_MODE_SIZE (mode
));
28129 destmem
= offset_address (destmem
, modesize
, GET_MODE_SIZE (mode
));
28130 prolog_size
+= GET_MODE_SIZE (mode
);
28134 /* Copy last SIZE bytes. */
28135 destmem
= offset_address (destmem
, *count
, 1);
28136 destmem
= offset_address (destmem
,
28137 GEN_INT (-size
- prolog_size
),
28140 emit_move_insn (destmem
, mode_value
);
28143 srcmem
= offset_address (srcmem
, *count
, 1);
28144 srcmem
= offset_address (srcmem
,
28145 GEN_INT (-size
- prolog_size
),
28147 emit_move_insn (destmem
, srcmem
);
28149 for (n
= 1; n
* GET_MODE_SIZE (mode
) < size
; n
++)
28151 destmem
= offset_address (destmem
, modesize
, 1);
28153 emit_move_insn (destmem
, mode_value
);
28156 srcmem
= offset_address (srcmem
, modesize
, 1);
28157 emit_move_insn (destmem
, srcmem
);
28161 /* Align destination. */
28162 if (desired_align
> 1 && desired_align
> align
)
28164 rtx saveddest
= *destptr
;
28166 gcc_assert (desired_align
<= size
);
28167 /* Align destptr up, place it to new register. */
28168 *destptr
= expand_simple_binop (GET_MODE (*destptr
), PLUS
, *destptr
,
28169 GEN_INT (prolog_size
),
28170 NULL_RTX
, 1, OPTAB_DIRECT
);
28171 if (REG_P (*destptr
) && REG_P (saveddest
) && REG_POINTER (saveddest
))
28172 REG_POINTER (*destptr
) = 1;
28173 *destptr
= expand_simple_binop (GET_MODE (*destptr
), AND
, *destptr
,
28174 GEN_INT (-desired_align
),
28175 *destptr
, 1, OPTAB_DIRECT
);
28176 /* See how many bytes we skipped. */
28177 saveddest
= expand_simple_binop (GET_MODE (*destptr
), MINUS
, saveddest
,
28179 saveddest
, 1, OPTAB_DIRECT
);
28180 /* Adjust srcptr and count. */
28182 *srcptr
= expand_simple_binop (GET_MODE (*srcptr
), MINUS
, *srcptr
,
28183 saveddest
, *srcptr
, 1, OPTAB_DIRECT
);
28184 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
28185 saveddest
, *count
, 1, OPTAB_DIRECT
);
28186 /* We copied at most size + prolog_size. */
28187 if (*min_size
> (unsigned HOST_WIDE_INT
)(size
+ prolog_size
))
28189 = ROUND_DOWN (*min_size
- size
, (unsigned HOST_WIDE_INT
)size
);
28193 /* Our loops always round down the block size, but for dispatch to
28194 library we need precise value. */
28196 *count
= expand_simple_binop (GET_MODE (*count
), AND
, *count
,
28197 GEN_INT (-size
), *count
, 1, OPTAB_DIRECT
);
28201 gcc_assert (prolog_size
== 0);
28202 /* Decrease count, so we won't end up copying last word twice. */
28203 if (!CONST_INT_P (*count
))
28204 *count
= expand_simple_binop (GET_MODE (*count
), PLUS
, *count
,
28205 constm1_rtx
, *count
, 1, OPTAB_DIRECT
);
28207 *count
= GEN_INT (ROUND_DOWN (UINTVAL (*count
) - 1,
28208 (unsigned HOST_WIDE_INT
)size
));
28210 *min_size
= ROUND_DOWN (*min_size
- 1, (unsigned HOST_WIDE_INT
)size
);
28215 /* This function is like the previous one, except here we know how many bytes
28216 need to be copied. That allows us to update alignment not only of DST, which
28217 is returned, but also of SRC, which is passed as a pointer for that
28220 expand_set_or_movmem_constant_prologue (rtx dst
, rtx
*srcp
, rtx destreg
,
28221 rtx srcreg
, rtx value
, rtx vec_value
,
28222 int desired_align
, int align_bytes
,
28226 rtx orig_dst
= dst
;
28227 rtx orig_src
= NULL
;
28228 int piece_size
= 1;
28229 int copied_bytes
= 0;
28233 gcc_assert (srcp
!= NULL
);
28238 for (piece_size
= 1;
28239 piece_size
<= desired_align
&& copied_bytes
< align_bytes
;
28242 if (align_bytes
& piece_size
)
28246 if (vec_value
&& piece_size
> GET_MODE_SIZE (GET_MODE (value
)))
28247 dst
= emit_memset (dst
, destreg
, vec_value
, piece_size
);
28249 dst
= emit_memset (dst
, destreg
, value
, piece_size
);
28252 dst
= emit_memmov (dst
, &src
, destreg
, srcreg
, piece_size
);
28253 copied_bytes
+= piece_size
;
28256 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
28257 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
28258 if (MEM_SIZE_KNOWN_P (orig_dst
))
28259 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
28263 int src_align_bytes
= get_mem_align_offset (src
, desired_align
28265 if (src_align_bytes
>= 0)
28266 src_align_bytes
= desired_align
- src_align_bytes
;
28267 if (src_align_bytes
>= 0)
28269 unsigned int src_align
;
28270 for (src_align
= desired_align
; src_align
>= 2; src_align
>>= 1)
28272 if ((src_align_bytes
& (src_align
- 1))
28273 == (align_bytes
& (src_align
- 1)))
28276 if (src_align
> (unsigned int) desired_align
)
28277 src_align
= desired_align
;
28278 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
28279 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
28281 if (MEM_SIZE_KNOWN_P (orig_src
))
28282 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
28289 /* Return true if ALG can be used in current context.
28290 Assume we expand memset if MEMSET is true. */
28292 alg_usable_p (enum stringop_alg alg
, bool memset
, bool have_as
)
28294 if (alg
== no_stringop
)
28296 if (alg
== vector_loop
)
28297 return TARGET_SSE
|| TARGET_AVX
;
28298 /* Algorithms using the rep prefix want at least edi and ecx;
28299 additionally, memset wants eax and memcpy wants esi. Don't
28300 consider such algorithms if the user has appropriated those
28301 registers for their own purposes, or if we have a non-default
28302 address space, since some string insns cannot override the segment. */
28303 if (alg
== rep_prefix_1_byte
28304 || alg
== rep_prefix_4_byte
28305 || alg
== rep_prefix_8_byte
)
28309 if (fixed_regs
[CX_REG
]
28310 || fixed_regs
[DI_REG
]
28311 || (memset
? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]))
28317 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
28318 static enum stringop_alg
28319 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
,
28320 unsigned HOST_WIDE_INT min_size
, unsigned HOST_WIDE_INT max_size
,
28321 bool memset
, bool zero_memset
, bool have_as
,
28322 int *dynamic_check
, bool *noalign
, bool recur
)
28324 const struct stringop_algs
*algs
;
28325 bool optimize_for_speed
;
28327 const struct processor_costs
*cost
;
28329 bool any_alg_usable_p
= false;
28332 *dynamic_check
= -1;
28334 /* Even if the string operation call is cold, we still might spend a lot
28335 of time processing large blocks. */
28336 if (optimize_function_for_size_p (cfun
)
28337 || (optimize_insn_for_size_p ()
28339 || (expected_size
!= -1 && expected_size
< 256))))
28340 optimize_for_speed
= false;
28342 optimize_for_speed
= true;
28344 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
28346 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
28348 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
28350 /* See maximal size for user defined algorithm. */
28351 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
28353 enum stringop_alg candidate
= algs
->size
[i
].alg
;
28354 bool usable
= alg_usable_p (candidate
, memset
, have_as
);
28355 any_alg_usable_p
|= usable
;
28357 if (candidate
!= libcall
&& candidate
&& usable
)
28358 max
= algs
->size
[i
].max
;
28361 /* If expected size is not known but max size is small enough
28362 so inline version is a win, set expected size into
28364 if (((max
> 1 && (unsigned HOST_WIDE_INT
) max
>= max_size
) || max
== -1)
28365 && expected_size
== -1)
28366 expected_size
= min_size
/ 2 + max_size
/ 2;
28368 /* If user specified the algorithm, honor it if possible. */
28369 if (ix86_stringop_alg
!= no_stringop
28370 && alg_usable_p (ix86_stringop_alg
, memset
, have_as
))
28371 return ix86_stringop_alg
;
28372 /* rep; movq or rep; movl is the smallest variant. */
28373 else if (!optimize_for_speed
)
28376 if (!count
|| (count
& 3) || (memset
&& !zero_memset
))
28377 return alg_usable_p (rep_prefix_1_byte
, memset
, have_as
)
28378 ? rep_prefix_1_byte
: loop_1_byte
;
28380 return alg_usable_p (rep_prefix_4_byte
, memset
, have_as
)
28381 ? rep_prefix_4_byte
: loop
;
28383 /* Very tiny blocks are best handled via the loop, REP is expensive to
28385 else if (expected_size
!= -1 && expected_size
< 4)
28386 return loop_1_byte
;
28387 else if (expected_size
!= -1)
28389 enum stringop_alg alg
= libcall
;
28390 bool alg_noalign
= false;
28391 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
28393 /* We get here if the algorithms that were not libcall-based
28394 were rep-prefix based and we are unable to use rep prefixes
28395 based on global register usage. Break out of the loop and
28396 use the heuristic below. */
28397 if (algs
->size
[i
].max
== 0)
28399 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
28401 enum stringop_alg candidate
= algs
->size
[i
].alg
;
28403 if (candidate
!= libcall
28404 && alg_usable_p (candidate
, memset
, have_as
))
28407 alg_noalign
= algs
->size
[i
].noalign
;
28409 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
28410 last non-libcall inline algorithm. */
28411 if (TARGET_INLINE_ALL_STRINGOPS
)
28413 /* When the current size is best to be copied by a libcall,
28414 but we are still forced to inline, run the heuristic below
28415 that will pick code for medium sized blocks. */
28416 if (alg
!= libcall
)
28418 *noalign
= alg_noalign
;
28421 else if (!any_alg_usable_p
)
28424 else if (alg_usable_p (candidate
, memset
, have_as
))
28426 *noalign
= algs
->size
[i
].noalign
;
28432 /* When asked to inline the call anyway, try to pick meaningful choice.
28433 We look for maximal size of block that is faster to copy by hand and
28434 take blocks of at most of that size guessing that average size will
28435 be roughly half of the block.
28437 If this turns out to be bad, we might simply specify the preferred
28438 choice in ix86_costs. */
28439 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
28440 && (algs
->unknown_size
== libcall
28441 || !alg_usable_p (algs
->unknown_size
, memset
, have_as
)))
28443 enum stringop_alg alg
;
28444 HOST_WIDE_INT new_expected_size
= (max
> 0 ? max
: 4096) / 2;
28446 /* If there aren't any usable algorithms or if recursing already,
28447 then recursing on smaller sizes or same size isn't going to
28448 find anything. Just return the simple byte-at-a-time copy loop. */
28449 if (!any_alg_usable_p
|| recur
)
28451 /* Pick something reasonable. */
28452 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
&& !recur
)
28453 *dynamic_check
= 128;
28454 return loop_1_byte
;
28456 alg
= decide_alg (count
, new_expected_size
, min_size
, max_size
, memset
,
28457 zero_memset
, have_as
, dynamic_check
, noalign
, true);
28458 gcc_assert (*dynamic_check
== -1);
28459 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
28460 *dynamic_check
= max
;
28462 gcc_assert (alg
!= libcall
);
28465 return (alg_usable_p (algs
->unknown_size
, memset
, have_as
)
28466 ? algs
->unknown_size
: libcall
);
28469 /* Decide on alignment. We know that the operand is already aligned to ALIGN
28470 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
28472 decide_alignment (int align
,
28473 enum stringop_alg alg
,
28475 machine_mode move_mode
)
28477 int desired_align
= 0;
28479 gcc_assert (alg
!= no_stringop
);
28481 if (alg
== libcall
)
28483 if (move_mode
== VOIDmode
)
28486 desired_align
= GET_MODE_SIZE (move_mode
);
28487 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
28488 copying whole cacheline at once. */
28489 if (TARGET_PENTIUMPRO
28490 && (alg
== rep_prefix_4_byte
|| alg
== rep_prefix_1_byte
))
28495 if (desired_align
< align
)
28496 desired_align
= align
;
28497 if (expected_size
!= -1 && expected_size
< 4)
28498 desired_align
= align
;
28500 return desired_align
;
28504 /* Helper function for memcpy. For QImode value 0xXY produce
28505 0xXYXYXYXY of wide specified by MODE. This is essentially
28506 a * 0x10101010, but we can do slightly better than
28507 synth_mult by unwinding the sequence by hand on CPUs with
28510 promote_duplicated_reg (machine_mode mode
, rtx val
)
28512 machine_mode valmode
= GET_MODE (val
);
28514 int nops
= mode
== DImode
? 3 : 2;
28516 gcc_assert (mode
== SImode
|| mode
== DImode
|| val
== const0_rtx
);
28517 if (val
== const0_rtx
)
28518 return copy_to_mode_reg (mode
, CONST0_RTX (mode
));
28519 if (CONST_INT_P (val
))
28521 HOST_WIDE_INT v
= INTVAL (val
) & 255;
28525 if (mode
== DImode
)
28526 v
|= (v
<< 16) << 16;
28527 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
28530 if (valmode
== VOIDmode
)
28532 if (valmode
!= QImode
)
28533 val
= gen_lowpart (QImode
, val
);
28534 if (mode
== QImode
)
28536 if (!TARGET_PARTIAL_REG_STALL
)
28538 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
28539 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
28540 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
28541 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
28543 rtx reg
= convert_modes (mode
, QImode
, val
, true);
28544 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
28545 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
28550 rtx reg
= convert_modes (mode
, QImode
, val
, true);
28552 if (!TARGET_PARTIAL_REG_STALL
)
28553 if (mode
== SImode
)
28554 emit_insn (gen_insvsi_1 (reg
, reg
));
28556 emit_insn (gen_insvdi_1 (reg
, reg
));
28559 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
28560 NULL
, 1, OPTAB_DIRECT
);
28562 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
28564 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
28565 NULL
, 1, OPTAB_DIRECT
);
28566 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
28567 if (mode
== SImode
)
28569 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
28570 NULL
, 1, OPTAB_DIRECT
);
28571 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
28576 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
28577 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
28578 alignment from ALIGN to DESIRED_ALIGN. */
28580 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
,
28586 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
28587 promoted_val
= promote_duplicated_reg (DImode
, val
);
28588 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
28589 promoted_val
= promote_duplicated_reg (SImode
, val
);
28590 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
28591 promoted_val
= promote_duplicated_reg (HImode
, val
);
28593 promoted_val
= val
;
28595 return promoted_val
;
28598 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
28599 operations when profitable. The code depends upon architecture, block size
28600 and alignment, but always has one of the following overall structures:
28602 Aligned move sequence:
28604 1) Prologue guard: Conditional that jumps up to epilogues for small
28605 blocks that can be handled by epilogue alone. This is faster
28606 but also needed for correctness, since prologue assume the block
28607 is larger than the desired alignment.
28609 Optional dynamic check for size and libcall for large
28610 blocks is emitted here too, with -minline-stringops-dynamically.
28612 2) Prologue: copy first few bytes in order to get destination
28613 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
28614 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
28615 copied. We emit either a jump tree on power of two sized
28616 blocks, or a byte loop.
28618 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
28619 with specified algorithm.
28621 4) Epilogue: code copying tail of the block that is too small to be
28622 handled by main body (or up to size guarded by prologue guard).
28624 Misaligned move sequence
28626 1) missaligned move prologue/epilogue containing:
28627 a) Prologue handling small memory blocks and jumping to done_label
28628 (skipped if blocks are known to be large enough)
28629 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
28630 needed by single possibly misaligned move
28631 (skipped if alignment is not needed)
28632 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
28634 2) Zero size guard dispatching to done_label, if needed
28636 3) dispatch to library call, if needed,
28638 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
28639 with specified algorithm. */
28641 ix86_expand_set_or_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx val_exp
,
28642 rtx align_exp
, rtx expected_align_exp
,
28643 rtx expected_size_exp
, rtx min_size_exp
,
28644 rtx max_size_exp
, rtx probable_max_size_exp
,
28649 rtx_code_label
*label
= NULL
;
28651 rtx_code_label
*jump_around_label
= NULL
;
28652 HOST_WIDE_INT align
= 1;
28653 unsigned HOST_WIDE_INT count
= 0;
28654 HOST_WIDE_INT expected_size
= -1;
28655 int size_needed
= 0, epilogue_size_needed
;
28656 int desired_align
= 0, align_bytes
= 0;
28657 enum stringop_alg alg
;
28658 rtx promoted_val
= NULL
;
28659 rtx vec_promoted_val
= NULL
;
28660 bool force_loopy_epilogue
= false;
28662 bool need_zero_guard
= false;
28664 machine_mode move_mode
= VOIDmode
;
28665 machine_mode wider_mode
;
28666 int unroll_factor
= 1;
28667 /* TODO: Once value ranges are available, fill in proper data. */
28668 unsigned HOST_WIDE_INT min_size
= 0;
28669 unsigned HOST_WIDE_INT max_size
= -1;
28670 unsigned HOST_WIDE_INT probable_max_size
= -1;
28671 bool misaligned_prologue_used
= false;
28674 if (CONST_INT_P (align_exp
))
28675 align
= INTVAL (align_exp
);
28676 /* i386 can do misaligned access on reasonably increased cost. */
28677 if (CONST_INT_P (expected_align_exp
)
28678 && INTVAL (expected_align_exp
) > align
)
28679 align
= INTVAL (expected_align_exp
);
28680 /* ALIGN is the minimum of destination and source alignment, but we care here
28681 just about destination alignment. */
28683 && MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
28684 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
28686 if (CONST_INT_P (count_exp
))
28688 min_size
= max_size
= probable_max_size
= count
= expected_size
28689 = INTVAL (count_exp
);
28690 /* When COUNT is 0, there is nothing to do. */
28697 min_size
= INTVAL (min_size_exp
);
28699 max_size
= INTVAL (max_size_exp
);
28700 if (probable_max_size_exp
)
28701 probable_max_size
= INTVAL (probable_max_size_exp
);
28702 if (CONST_INT_P (expected_size_exp
))
28703 expected_size
= INTVAL (expected_size_exp
);
28706 /* Make sure we don't need to care about overflow later on. */
28707 if (count
> (HOST_WIDE_INT_1U
<< 30))
28710 have_as
= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst
));
28712 have_as
|= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src
));
28714 /* Step 0: Decide on preferred algorithm, desired alignment and
28715 size of chunks to be copied by main loop. */
28716 alg
= decide_alg (count
, expected_size
, min_size
, probable_max_size
,
28718 issetmem
&& val_exp
== const0_rtx
, have_as
,
28719 &dynamic_check
, &noalign
, false);
28720 if (alg
== libcall
)
28722 gcc_assert (alg
!= no_stringop
);
28724 /* For now vector-version of memset is generated only for memory zeroing, as
28725 creating of promoted vector value is very cheap in this case. */
28726 if (issetmem
&& alg
== vector_loop
&& val_exp
!= const0_rtx
)
28727 alg
= unrolled_loop
;
28730 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
28731 destreg
= ix86_copy_addr_to_reg (XEXP (dst
, 0));
28733 srcreg
= ix86_copy_addr_to_reg (XEXP (src
, 0));
28736 move_mode
= word_mode
;
28742 gcc_unreachable ();
28744 need_zero_guard
= true;
28745 move_mode
= QImode
;
28748 need_zero_guard
= true;
28750 case unrolled_loop
:
28751 need_zero_guard
= true;
28752 unroll_factor
= (TARGET_64BIT
? 4 : 2);
28755 need_zero_guard
= true;
28757 /* Find the widest supported mode. */
28758 move_mode
= word_mode
;
28759 while (GET_MODE_WIDER_MODE (move_mode
).exists (&wider_mode
)
28760 && optab_handler (mov_optab
, wider_mode
) != CODE_FOR_nothing
)
28761 move_mode
= wider_mode
;
28763 /* Find the corresponding vector mode with the same size as MOVE_MODE.
28764 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
28765 if (GET_MODE_SIZE (move_mode
) > GET_MODE_SIZE (word_mode
))
28767 int nunits
= GET_MODE_SIZE (move_mode
) / GET_MODE_SIZE (word_mode
);
28768 if (!mode_for_vector (word_mode
, nunits
).exists (&move_mode
)
28769 || optab_handler (mov_optab
, move_mode
) == CODE_FOR_nothing
)
28770 move_mode
= word_mode
;
28772 gcc_assert (optab_handler (mov_optab
, move_mode
) != CODE_FOR_nothing
);
28774 case rep_prefix_8_byte
:
28775 move_mode
= DImode
;
28777 case rep_prefix_4_byte
:
28778 move_mode
= SImode
;
28780 case rep_prefix_1_byte
:
28781 move_mode
= QImode
;
28784 size_needed
= GET_MODE_SIZE (move_mode
) * unroll_factor
;
28785 epilogue_size_needed
= size_needed
;
28787 /* If we are going to call any library calls conditionally, make sure any
28788 pending stack adjustment happen before the first conditional branch,
28789 otherwise they will be emitted before the library call only and won't
28790 happen from the other branches. */
28791 if (dynamic_check
!= -1)
28792 do_pending_stack_adjust ();
28794 desired_align
= decide_alignment (align
, alg
, expected_size
, move_mode
);
28795 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
28796 align
= desired_align
;
28798 /* Step 1: Prologue guard. */
28800 /* Alignment code needs count to be in register. */
28801 if (CONST_INT_P (count_exp
) && desired_align
> align
)
28803 if (INTVAL (count_exp
) > desired_align
28804 && INTVAL (count_exp
) > size_needed
)
28807 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
28808 if (align_bytes
<= 0)
28811 align_bytes
= desired_align
- align_bytes
;
28813 if (align_bytes
== 0)
28814 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
28816 gcc_assert (desired_align
>= 1 && align
>= 1);
28818 /* Misaligned move sequences handle both prologue and epilogue at once.
28819 Default code generation results in a smaller code for large alignments
28820 and also avoids redundant job when sizes are known precisely. */
28821 misaligned_prologue_used
28822 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
28823 && MAX (desired_align
, epilogue_size_needed
) <= 32
28824 && desired_align
<= epilogue_size_needed
28825 && ((desired_align
> align
&& !align_bytes
)
28826 || (!count
&& epilogue_size_needed
> 1)));
28828 /* Do the cheap promotion to allow better CSE across the
28829 main loop and epilogue (ie one load of the big constant in the
28831 For now the misaligned move sequences do not have fast path
28832 without broadcasting. */
28833 if (issetmem
&& ((CONST_INT_P (val_exp
) || misaligned_prologue_used
)))
28835 if (alg
== vector_loop
)
28837 gcc_assert (val_exp
== const0_rtx
);
28838 vec_promoted_val
= promote_duplicated_reg (move_mode
, val_exp
);
28839 promoted_val
= promote_duplicated_reg_to_size (val_exp
,
28840 GET_MODE_SIZE (word_mode
),
28841 desired_align
, align
);
28845 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
28846 desired_align
, align
);
28849 /* Misaligned move sequences handles both prologues and epilogues at once.
28850 Default code generation results in smaller code for large alignments and
28851 also avoids redundant job when sizes are known precisely. */
28852 if (misaligned_prologue_used
)
28854 /* Misaligned move prologue handled small blocks by itself. */
28855 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
28856 (dst
, src
, &destreg
, &srcreg
,
28857 move_mode
, promoted_val
, vec_promoted_val
,
28859 &jump_around_label
,
28860 desired_align
< align
28861 ? MAX (desired_align
, epilogue_size_needed
) : epilogue_size_needed
,
28862 desired_align
, align
, &min_size
, dynamic_check
, issetmem
);
28864 src
= change_address (src
, BLKmode
, srcreg
);
28865 dst
= change_address (dst
, BLKmode
, destreg
);
28866 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
28867 epilogue_size_needed
= 0;
28868 if (need_zero_guard
28869 && min_size
< (unsigned HOST_WIDE_INT
) size_needed
)
28871 /* It is possible that we copied enough so the main loop will not
28873 gcc_assert (size_needed
> 1);
28874 if (jump_around_label
== NULL_RTX
)
28875 jump_around_label
= gen_label_rtx ();
28876 emit_cmp_and_jump_insns (count_exp
,
28877 GEN_INT (size_needed
),
28878 LTU
, 0, counter_mode (count_exp
), 1, jump_around_label
);
28879 if (expected_size
== -1
28880 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
28881 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
28883 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
28886 /* Ensure that alignment prologue won't copy past end of block. */
28887 else if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
28889 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
28890 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
28891 Make sure it is power of 2. */
28892 epilogue_size_needed
= 1 << (floor_log2 (epilogue_size_needed
) + 1);
28894 /* To improve performance of small blocks, we jump around the VAL
28895 promoting mode. This mean that if the promoted VAL is not constant,
28896 we might not use it in the epilogue and have to use byte
28898 if (issetmem
&& epilogue_size_needed
> 2 && !promoted_val
)
28899 force_loopy_epilogue
= true;
28900 if ((count
&& count
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
28901 || max_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
28903 /* If main algorithm works on QImode, no epilogue is needed.
28904 For small sizes just don't align anything. */
28905 if (size_needed
== 1)
28906 desired_align
= align
;
28911 && min_size
< (unsigned HOST_WIDE_INT
) epilogue_size_needed
)
28913 label
= gen_label_rtx ();
28914 emit_cmp_and_jump_insns (count_exp
,
28915 GEN_INT (epilogue_size_needed
),
28916 LTU
, 0, counter_mode (count_exp
), 1, label
);
28917 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
28918 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
28920 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
28924 /* Emit code to decide on runtime whether library call or inline should be
28926 if (dynamic_check
!= -1)
28928 if (!issetmem
&& CONST_INT_P (count_exp
))
28930 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
28932 emit_block_copy_via_libcall (dst
, src
, count_exp
);
28933 count_exp
= const0_rtx
;
28939 rtx_code_label
*hot_label
= gen_label_rtx ();
28940 if (jump_around_label
== NULL_RTX
)
28941 jump_around_label
= gen_label_rtx ();
28942 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
28943 LEU
, 0, counter_mode (count_exp
),
28945 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
28947 set_storage_via_libcall (dst
, count_exp
, val_exp
);
28949 emit_block_copy_via_libcall (dst
, src
, count_exp
);
28950 emit_jump (jump_around_label
);
28951 emit_label (hot_label
);
28955 /* Step 2: Alignment prologue. */
28956 /* Do the expensive promotion once we branched off the small blocks. */
28957 if (issetmem
&& !promoted_val
)
28958 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
28959 desired_align
, align
);
28961 if (desired_align
> align
&& !misaligned_prologue_used
)
28963 if (align_bytes
== 0)
28965 /* Except for the first move in prologue, we no longer know
28966 constant offset in aliasing info. It don't seems to worth
28967 the pain to maintain it for the first move, so throw away
28969 dst
= change_address (dst
, BLKmode
, destreg
);
28971 src
= change_address (src
, BLKmode
, srcreg
);
28972 dst
= expand_set_or_movmem_prologue (dst
, src
, destreg
, srcreg
,
28973 promoted_val
, vec_promoted_val
,
28974 count_exp
, align
, desired_align
,
28976 /* At most desired_align - align bytes are copied. */
28977 if (min_size
< (unsigned)(desired_align
- align
))
28980 min_size
-= desired_align
- align
;
28984 /* If we know how many bytes need to be stored before dst is
28985 sufficiently aligned, maintain aliasing info accurately. */
28986 dst
= expand_set_or_movmem_constant_prologue (dst
, &src
, destreg
,
28994 count_exp
= plus_constant (counter_mode (count_exp
),
28995 count_exp
, -align_bytes
);
28996 count
-= align_bytes
;
28997 min_size
-= align_bytes
;
28998 max_size
-= align_bytes
;
29000 if (need_zero_guard
29001 && min_size
< (unsigned HOST_WIDE_INT
) size_needed
29002 && (count
< (unsigned HOST_WIDE_INT
) size_needed
29003 || (align_bytes
== 0
29004 && count
< ((unsigned HOST_WIDE_INT
) size_needed
29005 + desired_align
- align
))))
29007 /* It is possible that we copied enough so the main loop will not
29009 gcc_assert (size_needed
> 1);
29010 if (label
== NULL_RTX
)
29011 label
= gen_label_rtx ();
29012 emit_cmp_and_jump_insns (count_exp
,
29013 GEN_INT (size_needed
),
29014 LTU
, 0, counter_mode (count_exp
), 1, label
);
29015 if (expected_size
== -1
29016 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
29017 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
29019 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
29022 if (label
&& size_needed
== 1)
29024 emit_label (label
);
29025 LABEL_NUSES (label
) = 1;
29027 epilogue_size_needed
= 1;
29029 promoted_val
= val_exp
;
29031 else if (label
== NULL_RTX
&& !misaligned_prologue_used
)
29032 epilogue_size_needed
= size_needed
;
29034 /* Step 3: Main loop. */
29041 gcc_unreachable ();
29044 case unrolled_loop
:
29045 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, promoted_val
,
29046 count_exp
, move_mode
, unroll_factor
,
29047 expected_size
, issetmem
);
29050 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
,
29051 vec_promoted_val
, count_exp
, move_mode
,
29052 unroll_factor
, expected_size
, issetmem
);
29054 case rep_prefix_8_byte
:
29055 case rep_prefix_4_byte
:
29056 case rep_prefix_1_byte
:
29057 expand_set_or_movmem_via_rep (dst
, src
, destreg
, srcreg
, promoted_val
,
29058 val_exp
, count_exp
, move_mode
, issetmem
);
29061 /* Adjust properly the offset of src and dest memory for aliasing. */
29062 if (CONST_INT_P (count_exp
))
29065 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
29066 (count
/ size_needed
) * size_needed
);
29067 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
29068 (count
/ size_needed
) * size_needed
);
29073 src
= change_address (src
, BLKmode
, srcreg
);
29074 dst
= change_address (dst
, BLKmode
, destreg
);
29077 /* Step 4: Epilogue to copy the remaining bytes. */
29081 /* When the main loop is done, COUNT_EXP might hold original count,
29082 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
29083 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
29084 bytes. Compensate if needed. */
29086 if (size_needed
< epilogue_size_needed
)
29089 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
29090 GEN_INT (size_needed
- 1), count_exp
, 1,
29092 if (tmp
!= count_exp
)
29093 emit_move_insn (count_exp
, tmp
);
29095 emit_label (label
);
29096 LABEL_NUSES (label
) = 1;
29099 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
29101 if (force_loopy_epilogue
)
29102 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
29103 epilogue_size_needed
);
29107 expand_setmem_epilogue (dst
, destreg
, promoted_val
,
29108 vec_promoted_val
, count_exp
,
29109 epilogue_size_needed
);
29111 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
29112 epilogue_size_needed
);
29115 if (jump_around_label
)
29116 emit_label (jump_around_label
);
29121 /* Expand the appropriate insns for doing strlen if not just doing
29124 out = result, initialized with the start address
29125 align_rtx = alignment of the address.
29126 scratch = scratch register, initialized with the startaddress when
29127 not aligned, otherwise undefined
29129 This is just the body. It needs the initializations mentioned above and
29130 some address computing at the end. These things are done in i386.md. */
29133 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
29137 rtx_code_label
*align_2_label
= NULL
;
29138 rtx_code_label
*align_3_label
= NULL
;
29139 rtx_code_label
*align_4_label
= gen_label_rtx ();
29140 rtx_code_label
*end_0_label
= gen_label_rtx ();
29142 rtx tmpreg
= gen_reg_rtx (SImode
);
29143 rtx scratch
= gen_reg_rtx (SImode
);
29147 if (CONST_INT_P (align_rtx
))
29148 align
= INTVAL (align_rtx
);
29150 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
29152 /* Is there a known alignment and is it less than 4? */
29155 rtx scratch1
= gen_reg_rtx (Pmode
);
29156 emit_move_insn (scratch1
, out
);
29157 /* Is there a known alignment and is it not 2? */
29160 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
29161 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
29163 /* Leave just the 3 lower bits. */
29164 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
29165 NULL_RTX
, 0, OPTAB_WIDEN
);
29167 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
29168 Pmode
, 1, align_4_label
);
29169 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
29170 Pmode
, 1, align_2_label
);
29171 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
29172 Pmode
, 1, align_3_label
);
29176 /* Since the alignment is 2, we have to check 2 or 0 bytes;
29177 check if is aligned to 4 - byte. */
29179 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
29180 NULL_RTX
, 0, OPTAB_WIDEN
);
29182 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
29183 Pmode
, 1, align_4_label
);
29186 mem
= change_address (src
, QImode
, out
);
29188 /* Now compare the bytes. */
29190 /* Compare the first n unaligned byte on a byte per byte basis. */
29191 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
29192 QImode
, 1, end_0_label
);
29194 /* Increment the address. */
29195 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
29197 /* Not needed with an alignment of 2 */
29200 emit_label (align_2_label
);
29202 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
29205 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
29207 emit_label (align_3_label
);
29210 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
29213 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
29216 /* Generate loop to check 4 bytes at a time. It is not a good idea to
29217 align this loop. It gives only huge programs, but does not help to
29219 emit_label (align_4_label
);
29221 mem
= change_address (src
, SImode
, out
);
29222 emit_move_insn (scratch
, mem
);
29223 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
29225 /* This formula yields a nonzero result iff one of the bytes is zero.
29226 This saves three branches inside loop and many cycles. */
29228 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
29229 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
29230 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
29231 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
29232 gen_int_mode (0x80808080, SImode
)));
29233 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
29238 rtx reg
= gen_reg_rtx (SImode
);
29239 rtx reg2
= gen_reg_rtx (Pmode
);
29240 emit_move_insn (reg
, tmpreg
);
29241 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
29243 /* If zero is not in the first two bytes, move two bytes forward. */
29244 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
29245 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
29246 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
29247 emit_insn (gen_rtx_SET (tmpreg
,
29248 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
29251 /* Emit lea manually to avoid clobbering of flags. */
29252 emit_insn (gen_rtx_SET (reg2
, gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
29254 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
29255 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
29256 emit_insn (gen_rtx_SET (out
,
29257 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
29263 rtx_code_label
*end_2_label
= gen_label_rtx ();
29264 /* Is zero in the first two bytes? */
29266 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
29267 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
29268 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
29269 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
29270 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
29272 tmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
29273 JUMP_LABEL (tmp
) = end_2_label
;
29275 /* Not in the first two. Move two bytes forward. */
29276 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
29277 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
29279 emit_label (end_2_label
);
29283 /* Avoid branch in fixing the byte. */
29284 tmpreg
= gen_lowpart (QImode
, tmpreg
);
29285 emit_insn (gen_addqi3_cconly_overflow (tmpreg
, tmpreg
));
29286 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
29287 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
29288 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
29290 emit_label (end_0_label
);
29293 /* Expand strlen. */
29296 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
29298 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
29300 /* The generic case of strlen expander is long. Avoid it's
29301 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
29303 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
29304 && !TARGET_INLINE_ALL_STRINGOPS
29305 && !optimize_insn_for_size_p ()
29306 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
29309 addr
= force_reg (Pmode
, XEXP (src
, 0));
29310 scratch1
= gen_reg_rtx (Pmode
);
29312 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
29313 && !optimize_insn_for_size_p ())
29315 /* Well it seems that some optimizer does not combine a call like
29316 foo(strlen(bar), strlen(bar));
29317 when the move and the subtraction is done here. It does calculate
29318 the length just once when these instructions are done inside of
29319 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
29320 often used and I use one fewer register for the lifetime of
29321 output_strlen_unroll() this is better. */
29323 emit_move_insn (out
, addr
);
29325 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
29327 /* strlensi_unroll_1 returns the address of the zero at the end of
29328 the string, like memchr(), so compute the length by subtracting
29329 the start address. */
29330 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
29336 /* Can't use this if the user has appropriated eax, ecx, or edi. */
29337 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
29339 /* Can't use this for non-default address spaces. */
29340 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src
)))
29343 scratch2
= gen_reg_rtx (Pmode
);
29344 scratch3
= gen_reg_rtx (Pmode
);
29345 scratch4
= force_reg (Pmode
, constm1_rtx
);
29347 emit_move_insn (scratch3
, addr
);
29348 eoschar
= force_reg (QImode
, eoschar
);
29350 src
= replace_equiv_address_nv (src
, scratch3
);
29352 /* If .md starts supporting :P, this can be done in .md. */
29353 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
29354 scratch4
), UNSPEC_SCAS
);
29355 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
29356 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
29357 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
29362 /* For given symbol (function) construct code to compute address of it's PLT
29363 entry in large x86-64 PIC model. */
29365 construct_plt_address (rtx symbol
)
29369 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
29370 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
);
29371 gcc_assert (Pmode
== DImode
);
29373 tmp
= gen_reg_rtx (Pmode
);
29374 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
29376 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
29377 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
29382 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
29384 rtx pop
, bool sibcall
)
29387 rtx use
= NULL
, call
;
29388 unsigned int vec_len
= 0;
29391 if (GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
29393 fndecl
= SYMBOL_REF_DECL (XEXP (fnaddr
, 0));
29395 && (lookup_attribute ("interrupt",
29396 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
)))))
29397 error ("interrupt service routine can't be called directly");
29400 fndecl
= NULL_TREE
;
29402 if (pop
== const0_rtx
)
29404 gcc_assert (!TARGET_64BIT
|| !pop
);
29406 if (TARGET_MACHO
&& !TARGET_64BIT
)
29409 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
29410 fnaddr
= machopic_indirect_call_target (fnaddr
);
29415 /* Static functions and indirect calls don't need the pic register. Also,
29416 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
29417 it an indirect call. */
29418 rtx addr
= XEXP (fnaddr
, 0);
29420 && GET_CODE (addr
) == SYMBOL_REF
29421 && !SYMBOL_REF_LOCAL_P (addr
))
29424 && (SYMBOL_REF_DECL (addr
) == NULL_TREE
29425 || !lookup_attribute ("noplt",
29426 DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr
)))))
29429 || (ix86_cmodel
== CM_LARGE_PIC
29430 && DEFAULT_ABI
!= MS_ABI
))
29432 use_reg (&use
, gen_rtx_REG (Pmode
,
29433 REAL_PIC_OFFSET_TABLE_REGNUM
));
29434 if (ix86_use_pseudo_pic_reg ())
29435 emit_move_insn (gen_rtx_REG (Pmode
,
29436 REAL_PIC_OFFSET_TABLE_REGNUM
),
29437 pic_offset_table_rtx
);
29440 else if (!TARGET_PECOFF
&& !TARGET_MACHO
)
29444 fnaddr
= gen_rtx_UNSPEC (Pmode
,
29445 gen_rtvec (1, addr
),
29447 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
29451 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
29453 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
29454 fnaddr
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
29457 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
29458 /* Pmode may not be the same as word_mode for x32, which
29459 doesn't support indirect branch via 32-bit memory slot.
29460 Since x32 GOT slot is 64 bit with zero upper 32 bits,
29461 indirect branch via x32 GOT slot is OK. */
29462 if (GET_MODE (fnaddr
) != word_mode
)
29463 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
29464 fnaddr
= gen_rtx_MEM (QImode
, fnaddr
);
29469 /* Skip setting up RAX register for -mskip-rax-setup when there are no
29470 parameters passed in vector registers. */
29472 && (INTVAL (callarg2
) > 0
29473 || (INTVAL (callarg2
) == 0
29474 && (TARGET_SSE
|| !flag_skip_rax_setup
))))
29476 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
29477 emit_move_insn (al
, callarg2
);
29478 use_reg (&use
, al
);
29481 if (ix86_cmodel
== CM_LARGE_PIC
29484 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
29485 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
29486 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
29487 /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
29488 branch via x32 GOT slot is OK. */
29489 else if (!(TARGET_X32
29491 && GET_CODE (XEXP (fnaddr
, 0)) == ZERO_EXTEND
29492 && GOT_memory_operand (XEXP (XEXP (fnaddr
, 0), 0), Pmode
))
29494 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
29495 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
)))
29497 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
29498 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
29501 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
29505 /* We should add bounds as destination register in case
29506 pointer with bounds may be returned. */
29507 if (TARGET_MPX
&& SCALAR_INT_MODE_P (GET_MODE (retval
)))
29509 rtx b0
= gen_rtx_REG (BND64mode
, FIRST_BND_REG
);
29510 rtx b1
= gen_rtx_REG (BND64mode
, FIRST_BND_REG
+ 1);
29511 if (GET_CODE (retval
) == PARALLEL
)
29513 b0
= gen_rtx_EXPR_LIST (VOIDmode
, b0
, const0_rtx
);
29514 b1
= gen_rtx_EXPR_LIST (VOIDmode
, b1
, const0_rtx
);
29515 rtx par
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, b0
, b1
));
29516 retval
= chkp_join_splitted_slot (retval
, par
);
29520 retval
= gen_rtx_PARALLEL (VOIDmode
,
29521 gen_rtvec (3, retval
, b0
, b1
));
29522 chkp_put_regs_to_expr_list (retval
);
29526 call
= gen_rtx_SET (retval
, call
);
29528 vec
[vec_len
++] = call
;
29532 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
29533 pop
= gen_rtx_SET (stack_pointer_rtx
, pop
);
29534 vec
[vec_len
++] = pop
;
29537 if (cfun
->machine
->no_caller_saved_registers
29539 || (!TREE_THIS_VOLATILE (fndecl
)
29540 && !lookup_attribute ("no_caller_saved_registers",
29541 TYPE_ATTRIBUTES (TREE_TYPE (fndecl
))))))
29543 static const char ix86_call_used_regs
[] = CALL_USED_REGISTERS
;
29544 bool is_64bit_ms_abi
= (TARGET_64BIT
29545 && ix86_function_abi (fndecl
) == MS_ABI
);
29546 char c_mask
= CALL_USED_REGISTERS_MASK (is_64bit_ms_abi
);
29548 /* If there are no caller-saved registers, add all registers
29549 that are clobbered by the call which returns. */
29550 for (int i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
29552 && (ix86_call_used_regs
[i
] == 1
29553 || (ix86_call_used_regs
[i
] & c_mask
))
29554 && !STACK_REGNO_P (i
)
29555 && !MMX_REGNO_P (i
))
29557 gen_rtx_REG (GET_MODE (regno_reg_rtx
[i
]), i
));
29559 else if (TARGET_64BIT_MS_ABI
29560 && (!callarg2
|| INTVAL (callarg2
) != -2))
29564 for (i
= 0; i
< NUM_X86_64_MS_CLOBBERED_REGS
; i
++)
29566 int regno
= x86_64_ms_sysv_extra_clobbered_registers
[i
];
29567 machine_mode mode
= SSE_REGNO_P (regno
) ? TImode
: DImode
;
29569 clobber_reg (&use
, gen_rtx_REG (mode
, regno
));
29572 /* Set here, but it may get cleared later. */
29573 if (TARGET_CALL_MS2SYSV_XLOGUES
)
29578 /* Don't break hot-patched functions. */
29579 else if (ix86_function_ms_hook_prologue (current_function_decl
))
29582 /* TODO: Cases not yet examined. */
29583 else if (flag_split_stack
)
29584 warn_once_call_ms2sysv_xlogues ("-fsplit-stack");
29588 gcc_assert (!reload_completed
);
29589 cfun
->machine
->call_ms2sysv
= true;
29595 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
29596 call
= emit_call_insn (call
);
29598 CALL_INSN_FUNCTION_USAGE (call
) = use
;
29603 /* Return true if the function being called was marked with attribute
29604 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
29605 to handle the non-PIC case in the backend because there is no easy
29606 interface for the front-end to force non-PLT calls to use the GOT.
29607 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
29608 to call the function marked "noplt" indirectly. */
29611 ix86_nopic_noplt_attribute_p (rtx call_op
)
29613 if (flag_pic
|| ix86_cmodel
== CM_LARGE
29614 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
29615 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
29616 || SYMBOL_REF_LOCAL_P (call_op
))
29619 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
29622 || (symbol_decl
!= NULL_TREE
29623 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
29629 /* Output the assembly for a call instruction. */
29632 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
29634 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
29635 bool seh_nop_p
= false;
29638 if (SIBLING_CALL_P (insn
))
29642 if (ix86_nopic_noplt_attribute_p (call_op
))
29645 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29647 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29650 xasm
= "%!jmp\t%P0";
29652 /* SEH epilogue detection requires the indirect branch case
29653 to include REX.W. */
29654 else if (TARGET_SEH
)
29655 xasm
= "%!rex.W jmp\t%A0";
29657 xasm
= "%!jmp\t%A0";
29659 output_asm_insn (xasm
, &call_op
);
29663 /* SEH unwinding can require an extra nop to be emitted in several
29664 circumstances. Determine if we have one of those. */
29669 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
29671 /* If we get to another real insn, we don't need the nop. */
29675 /* If we get to the epilogue note, prevent a catch region from
29676 being adjacent to the standard epilogue sequence. If non-
29677 call-exceptions, we'll have done this during epilogue emission. */
29678 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
29679 && !flag_non_call_exceptions
29680 && !can_throw_internal (insn
))
29687 /* If we didn't find a real insn following the call, prevent the
29688 unwinder from looking into the next function. */
29695 if (ix86_nopic_noplt_attribute_p (call_op
))
29698 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
29700 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
29703 xasm
= "%!call\t%P0";
29706 xasm
= "%!call\t%A0";
29708 output_asm_insn (xasm
, &call_op
);
29716 /* Clear stack slot assignments remembered from previous functions.
29717 This is called from INIT_EXPANDERS once before RTL is emitted for each
29720 static struct machine_function
*
29721 ix86_init_machine_status (void)
29723 struct machine_function
*f
;
29725 f
= ggc_cleared_alloc
<machine_function
> ();
29726 f
->call_abi
= ix86_abi
;
29731 /* Return a MEM corresponding to a stack slot with mode MODE.
29732 Allocate a new slot if necessary.
29734 The RTL for a function can have several slots available: N is
29735 which slot to use. */
29738 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
29740 struct stack_local_entry
*s
;
29742 gcc_assert (n
< MAX_386_STACK_LOCALS
);
29744 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
29745 if (s
->mode
== mode
&& s
->n
== n
)
29746 return validize_mem (copy_rtx (s
->rtl
));
29748 s
= ggc_alloc
<stack_local_entry
> ();
29751 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
29753 s
->next
= ix86_stack_locals
;
29754 ix86_stack_locals
= s
;
29755 return validize_mem (copy_rtx (s
->rtl
));
29759 ix86_instantiate_decls (void)
29761 struct stack_local_entry
*s
;
29763 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
29764 if (s
->rtl
!= NULL_RTX
)
29765 instantiate_decl_rtl (s
->rtl
);
29768 /* Return the number used for encoding REG, in the range 0..7. */
29771 reg_encoded_number (rtx reg
)
29773 unsigned regno
= REGNO (reg
);
29795 if (IN_RANGE (regno
, FIRST_STACK_REG
, LAST_STACK_REG
))
29796 return regno
- FIRST_STACK_REG
;
29797 if (IN_RANGE (regno
, FIRST_SSE_REG
, LAST_SSE_REG
))
29798 return regno
- FIRST_SSE_REG
;
29799 if (IN_RANGE (regno
, FIRST_MMX_REG
, LAST_MMX_REG
))
29800 return regno
- FIRST_MMX_REG
;
29801 if (IN_RANGE (regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
29802 return regno
- FIRST_REX_SSE_REG
;
29803 if (IN_RANGE (regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
29804 return regno
- FIRST_REX_INT_REG
;
29805 if (IN_RANGE (regno
, FIRST_MASK_REG
, LAST_MASK_REG
))
29806 return regno
- FIRST_MASK_REG
;
29807 if (IN_RANGE (regno
, FIRST_BND_REG
, LAST_BND_REG
))
29808 return regno
- FIRST_BND_REG
;
29812 /* Given an insn INSN with NOPERANDS OPERANDS, return the modr/m byte used
29813 in its encoding if it could be relevant for ROP mitigation, otherwise
29814 return -1. If POPNO0 and POPNO1 are nonnull, store the operand numbers
29815 used for calculating it into them. */
29818 ix86_get_modrm_for_rop (rtx_insn
*insn
, rtx
*operands
, int noperands
,
29819 int *popno0
= 0, int *popno1
= 0)
29821 if (asm_noperands (PATTERN (insn
)) >= 0)
29823 int has_modrm
= get_attr_modrm (insn
);
29826 enum attr_modrm_class cls
= get_attr_modrm_class (insn
);
29830 case MODRM_CLASS_OP02
:
29831 gcc_assert (noperands
>= 3);
29840 case MODRM_CLASS_OP01
:
29841 gcc_assert (noperands
>= 2);
29853 if (REG_P (op0
) && REG_P (op1
))
29855 int enc0
= reg_encoded_number (op0
);
29856 int enc1
= reg_encoded_number (op1
);
29857 return 0xc0 + (enc1
<< 3) + enc0
;
29862 /* Check whether x86 address PARTS is a pc-relative address. */
29865 rip_relative_addr_p (struct ix86_address
*parts
)
29867 rtx base
, index
, disp
;
29869 base
= parts
->base
;
29870 index
= parts
->index
;
29871 disp
= parts
->disp
;
29873 if (disp
&& !base
&& !index
)
29879 if (GET_CODE (disp
) == CONST
)
29880 symbol
= XEXP (disp
, 0);
29881 if (GET_CODE (symbol
) == PLUS
29882 && CONST_INT_P (XEXP (symbol
, 1)))
29883 symbol
= XEXP (symbol
, 0);
29885 if (GET_CODE (symbol
) == LABEL_REF
29886 || (GET_CODE (symbol
) == SYMBOL_REF
29887 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
29888 || (GET_CODE (symbol
) == UNSPEC
29889 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
29890 || XINT (symbol
, 1) == UNSPEC_PCREL
29891 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
29898 /* Calculate the length of the memory address in the instruction encoding.
29899 Includes addr32 prefix, does not include the one-byte modrm, opcode,
29900 or other prefixes. We never generate addr32 prefix for LEA insn. */
29903 memory_address_length (rtx addr
, bool lea
)
29905 struct ix86_address parts
;
29906 rtx base
, index
, disp
;
29910 if (GET_CODE (addr
) == PRE_DEC
29911 || GET_CODE (addr
) == POST_INC
29912 || GET_CODE (addr
) == PRE_MODIFY
29913 || GET_CODE (addr
) == POST_MODIFY
)
29916 ok
= ix86_decompose_address (addr
, &parts
);
29919 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
29921 /* If this is not LEA instruction, add the length of addr32 prefix. */
29922 if (TARGET_64BIT
&& !lea
29923 && (SImode_address_operand (addr
, VOIDmode
)
29924 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
29925 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
29929 index
= parts
.index
;
29932 if (base
&& SUBREG_P (base
))
29933 base
= SUBREG_REG (base
);
29934 if (index
&& SUBREG_P (index
))
29935 index
= SUBREG_REG (index
);
29937 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
29938 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
29941 - esp as the base always wants an index,
29942 - ebp as the base always wants a displacement,
29943 - r12 as the base always wants an index,
29944 - r13 as the base always wants a displacement. */
29946 /* Register Indirect. */
29947 if (base
&& !index
&& !disp
)
29949 /* esp (for its index) and ebp (for its displacement) need
29950 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
29952 if (base
== arg_pointer_rtx
29953 || base
== frame_pointer_rtx
29954 || REGNO (base
) == SP_REG
29955 || REGNO (base
) == BP_REG
29956 || REGNO (base
) == R12_REG
29957 || REGNO (base
) == R13_REG
)
29961 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
29962 is not disp32, but disp32(%rip), so for disp32
29963 SIB byte is needed, unless print_operand_address
29964 optimizes it into disp32(%rip) or (%rip) is implied
29966 else if (disp
&& !base
&& !index
)
29969 if (!rip_relative_addr_p (&parts
))
29974 /* Find the length of the displacement constant. */
29977 if (base
&& satisfies_constraint_K (disp
))
29982 /* ebp always wants a displacement. Similarly r13. */
29983 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
29986 /* An index requires the two-byte modrm form.... */
29988 /* ...like esp (or r12), which always wants an index. */
29989 || base
== arg_pointer_rtx
29990 || base
== frame_pointer_rtx
29991 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
29998 /* Compute default value for "length_immediate" attribute. When SHORTFORM
29999 is set, expect that insn have 8bit immediate alternative. */
30001 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
30005 extract_insn_cached (insn
);
30006 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
30007 if (CONSTANT_P (recog_data
.operand
[i
]))
30009 enum attr_mode mode
= get_attr_mode (insn
);
30012 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
30014 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
30021 ival
= trunc_int_for_mode (ival
, HImode
);
30024 ival
= trunc_int_for_mode (ival
, SImode
);
30029 if (IN_RANGE (ival
, -128, 127))
30046 /* Immediates for DImode instructions are encoded
30047 as 32bit sign extended values. */
30052 fatal_insn ("unknown insn mode", insn
);
30058 /* Compute default value for "length_address" attribute. */
30060 ix86_attr_length_address_default (rtx_insn
*insn
)
30064 if (get_attr_type (insn
) == TYPE_LEA
)
30066 rtx set
= PATTERN (insn
), addr
;
30068 if (GET_CODE (set
) == PARALLEL
)
30069 set
= XVECEXP (set
, 0, 0);
30071 gcc_assert (GET_CODE (set
) == SET
);
30073 addr
= SET_SRC (set
);
30075 return memory_address_length (addr
, true);
30078 extract_insn_cached (insn
);
30079 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
30081 rtx op
= recog_data
.operand
[i
];
30084 constrain_operands_cached (insn
, reload_completed
);
30085 if (which_alternative
!= -1)
30087 const char *constraints
= recog_data
.constraints
[i
];
30088 int alt
= which_alternative
;
30090 while (*constraints
== '=' || *constraints
== '+')
30093 while (*constraints
++ != ',')
30095 /* Skip ignored operands. */
30096 if (*constraints
== 'X')
30100 int len
= memory_address_length (XEXP (op
, 0), false);
30102 /* Account for segment prefix for non-default addr spaces. */
30103 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
30112 /* Compute default value for "length_vex" attribute. It includes
30113 2 or 3 byte VEX prefix and 1 opcode byte. */
30116 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
30121 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
30122 byte VEX prefix. */
30123 if (!has_0f_opcode
|| has_vex_w
)
30126 /* We can always use 2 byte VEX prefix in 32bit. */
30130 extract_insn_cached (insn
);
30132 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
30133 if (REG_P (recog_data
.operand
[i
]))
30135 /* REX.W bit uses 3 byte VEX prefix. */
30136 if (GET_MODE (recog_data
.operand
[i
]) == DImode
30137 && GENERAL_REG_P (recog_data
.operand
[i
]))
30142 /* REX.X or REX.B bits use 3 byte VEX prefix. */
30143 if (MEM_P (recog_data
.operand
[i
])
30144 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
30151 /* Return the maximum number of instructions a cpu can issue. */
30154 ix86_issue_rate (void)
30158 case PROCESSOR_PENTIUM
:
30159 case PROCESSOR_LAKEMONT
:
30160 case PROCESSOR_BONNELL
:
30161 case PROCESSOR_SILVERMONT
:
30162 case PROCESSOR_KNL
:
30163 case PROCESSOR_INTEL
:
30165 case PROCESSOR_BTVER2
:
30166 case PROCESSOR_PENTIUM4
:
30167 case PROCESSOR_NOCONA
:
30170 case PROCESSOR_PENTIUMPRO
:
30171 case PROCESSOR_ATHLON
:
30173 case PROCESSOR_AMDFAM10
:
30174 case PROCESSOR_GENERIC
:
30175 case PROCESSOR_BTVER1
:
30178 case PROCESSOR_BDVER1
:
30179 case PROCESSOR_BDVER2
:
30180 case PROCESSOR_BDVER3
:
30181 case PROCESSOR_BDVER4
:
30182 case PROCESSOR_ZNVER1
:
30183 case PROCESSOR_CORE2
:
30184 case PROCESSOR_NEHALEM
:
30185 case PROCESSOR_SANDYBRIDGE
:
30186 case PROCESSOR_HASWELL
:
30194 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
30195 by DEP_INSN and nothing set by DEP_INSN. */
30198 ix86_flags_dependent (rtx_insn
*insn
, rtx_insn
*dep_insn
, enum attr_type insn_type
)
30202 /* Simplify the test for uninteresting insns. */
30203 if (insn_type
!= TYPE_SETCC
30204 && insn_type
!= TYPE_ICMOV
30205 && insn_type
!= TYPE_FCMOV
30206 && insn_type
!= TYPE_IBR
)
30209 if ((set
= single_set (dep_insn
)) != 0)
30211 set
= SET_DEST (set
);
30214 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
30215 && XVECLEN (PATTERN (dep_insn
), 0) == 2
30216 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
30217 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
30219 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
30220 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
30225 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
30228 /* This test is true if the dependent insn reads the flags but
30229 not any other potentially set register. */
30230 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
30233 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
30239 /* Return true iff USE_INSN has a memory address with operands set by
30243 ix86_agi_dependent (rtx_insn
*set_insn
, rtx_insn
*use_insn
)
30246 extract_insn_cached (use_insn
);
30247 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
30248 if (MEM_P (recog_data
.operand
[i
]))
30250 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
30251 if (modified_in_p (addr
, set_insn
) != 0)
30253 /* No AGI stall if SET_INSN is a push or pop and USE_INSN
30254 has SP based memory (unless index reg is modified in a pop). */
30255 rtx set
= single_set (set_insn
);
30257 && (push_operand (SET_DEST (set
), GET_MODE (SET_DEST (set
)))
30258 || pop_operand (SET_SRC (set
), GET_MODE (SET_SRC (set
)))))
30260 struct ix86_address parts
;
30261 if (ix86_decompose_address (addr
, &parts
)
30262 && parts
.base
== stack_pointer_rtx
30263 && (parts
.index
== NULL_RTX
30264 || MEM_P (SET_DEST (set
))
30265 || !modified_in_p (parts
.index
, set_insn
)))
30275 /* Helper function for exact_store_load_dependency.
30276 Return true if addr is found in insn. */
30278 exact_dependency_1 (rtx addr
, rtx insn
)
30280 enum rtx_code code
;
30281 const char *format_ptr
;
30284 code
= GET_CODE (insn
);
30288 if (rtx_equal_p (addr
, insn
))
30303 format_ptr
= GET_RTX_FORMAT (code
);
30304 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
30306 switch (*format_ptr
++)
30309 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
30313 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
30314 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
30322 /* Return true if there exists exact dependency for store & load, i.e.
30323 the same memory address is used in them. */
30325 exact_store_load_dependency (rtx_insn
*store
, rtx_insn
*load
)
30329 set1
= single_set (store
);
30332 if (!MEM_P (SET_DEST (set1
)))
30334 set2
= single_set (load
);
30337 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
30343 ix86_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
30346 enum attr_type insn_type
, dep_insn_type
;
30347 enum attr_memory memory
;
30349 int dep_insn_code_number
;
30351 /* Anti and output dependencies have zero cost on all CPUs. */
30355 dep_insn_code_number
= recog_memoized (dep_insn
);
30357 /* If we can't recognize the insns, we can't really do anything. */
30358 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
30361 insn_type
= get_attr_type (insn
);
30362 dep_insn_type
= get_attr_type (dep_insn
);
30366 case PROCESSOR_PENTIUM
:
30367 case PROCESSOR_LAKEMONT
:
30368 /* Address Generation Interlock adds a cycle of latency. */
30369 if (insn_type
== TYPE_LEA
)
30371 rtx addr
= PATTERN (insn
);
30373 if (GET_CODE (addr
) == PARALLEL
)
30374 addr
= XVECEXP (addr
, 0, 0);
30376 gcc_assert (GET_CODE (addr
) == SET
);
30378 addr
= SET_SRC (addr
);
30379 if (modified_in_p (addr
, dep_insn
))
30382 else if (ix86_agi_dependent (dep_insn
, insn
))
30385 /* ??? Compares pair with jump/setcc. */
30386 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
30389 /* Floating point stores require value to be ready one cycle earlier. */
30390 if (insn_type
== TYPE_FMOV
30391 && get_attr_memory (insn
) == MEMORY_STORE
30392 && !ix86_agi_dependent (dep_insn
, insn
))
30396 case PROCESSOR_PENTIUMPRO
:
30397 /* INT->FP conversion is expensive. */
30398 if (get_attr_fp_int_src (dep_insn
))
30401 /* There is one cycle extra latency between an FP op and a store. */
30402 if (insn_type
== TYPE_FMOV
30403 && (set
= single_set (dep_insn
)) != NULL_RTX
30404 && (set2
= single_set (insn
)) != NULL_RTX
30405 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
30406 && MEM_P (SET_DEST (set2
)))
30409 memory
= get_attr_memory (insn
);
30411 /* Show ability of reorder buffer to hide latency of load by executing
30412 in parallel with previous instruction in case
30413 previous instruction is not needed to compute the address. */
30414 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
30415 && !ix86_agi_dependent (dep_insn
, insn
))
30417 /* Claim moves to take one cycle, as core can issue one load
30418 at time and the next load can start cycle later. */
30419 if (dep_insn_type
== TYPE_IMOV
30420 || dep_insn_type
== TYPE_FMOV
)
30428 /* The esp dependency is resolved before
30429 the instruction is really finished. */
30430 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
30431 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
30434 /* INT->FP conversion is expensive. */
30435 if (get_attr_fp_int_src (dep_insn
))
30438 memory
= get_attr_memory (insn
);
30440 /* Show ability of reorder buffer to hide latency of load by executing
30441 in parallel with previous instruction in case
30442 previous instruction is not needed to compute the address. */
30443 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
30444 && !ix86_agi_dependent (dep_insn
, insn
))
30446 /* Claim moves to take one cycle, as core can issue one load
30447 at time and the next load can start cycle later. */
30448 if (dep_insn_type
== TYPE_IMOV
30449 || dep_insn_type
== TYPE_FMOV
)
30458 case PROCESSOR_AMDFAM10
:
30459 case PROCESSOR_BDVER1
:
30460 case PROCESSOR_BDVER2
:
30461 case PROCESSOR_BDVER3
:
30462 case PROCESSOR_BDVER4
:
30463 case PROCESSOR_ZNVER1
:
30464 case PROCESSOR_BTVER1
:
30465 case PROCESSOR_BTVER2
:
30466 case PROCESSOR_GENERIC
:
30467 /* Stack engine allows to execute push&pop instructions in parall. */
30468 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
30469 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
30473 case PROCESSOR_ATHLON
:
30475 memory
= get_attr_memory (insn
);
30477 /* Show ability of reorder buffer to hide latency of load by executing
30478 in parallel with previous instruction in case
30479 previous instruction is not needed to compute the address. */
30480 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
30481 && !ix86_agi_dependent (dep_insn
, insn
))
30483 enum attr_unit unit
= get_attr_unit (insn
);
30486 /* Because of the difference between the length of integer and
30487 floating unit pipeline preparation stages, the memory operands
30488 for floating point are cheaper.
30490 ??? For Athlon it the difference is most probably 2. */
30491 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
30494 loadcost
= TARGET_ATHLON
? 2 : 0;
30496 if (cost
>= loadcost
)
30503 case PROCESSOR_CORE2
:
30504 case PROCESSOR_NEHALEM
:
30505 case PROCESSOR_SANDYBRIDGE
:
30506 case PROCESSOR_HASWELL
:
30507 /* Stack engine allows to execute push&pop instructions in parall. */
30508 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
30509 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
30512 memory
= get_attr_memory (insn
);
30514 /* Show ability of reorder buffer to hide latency of load by executing
30515 in parallel with previous instruction in case
30516 previous instruction is not needed to compute the address. */
30517 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
30518 && !ix86_agi_dependent (dep_insn
, insn
))
30527 case PROCESSOR_SILVERMONT
:
30528 case PROCESSOR_KNL
:
30529 case PROCESSOR_INTEL
:
30530 if (!reload_completed
)
30533 /* Increase cost of integer loads. */
30534 memory
= get_attr_memory (dep_insn
);
30535 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
30537 enum attr_unit unit
= get_attr_unit (dep_insn
);
30538 if (unit
== UNIT_INTEGER
&& cost
== 1)
30540 if (memory
== MEMORY_LOAD
)
30544 /* Increase cost of ld/st for short int types only
30545 because of store forwarding issue. */
30546 rtx set
= single_set (dep_insn
);
30547 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
30548 || GET_MODE (SET_DEST (set
)) == HImode
))
30550 /* Increase cost of store/load insn if exact
30551 dependence exists and it is load insn. */
30552 enum attr_memory insn_memory
= get_attr_memory (insn
);
30553 if (insn_memory
== MEMORY_LOAD
30554 && exact_store_load_dependency (dep_insn
, insn
))
30568 /* How many alternative schedules to try. This should be as wide as the
30569 scheduling freedom in the DFA, but no wider. Making this value too
30570 large results extra work for the scheduler. */
30573 ia32_multipass_dfa_lookahead (void)
30577 case PROCESSOR_PENTIUM
:
30578 case PROCESSOR_LAKEMONT
:
30581 case PROCESSOR_PENTIUMPRO
:
30585 case PROCESSOR_BDVER1
:
30586 case PROCESSOR_BDVER2
:
30587 case PROCESSOR_BDVER3
:
30588 case PROCESSOR_BDVER4
:
30589 /* We use lookahead value 4 for BD both before and after reload
30590 schedules. Plan is to have value 8 included for O3. */
30593 case PROCESSOR_CORE2
:
30594 case PROCESSOR_NEHALEM
:
30595 case PROCESSOR_SANDYBRIDGE
:
30596 case PROCESSOR_HASWELL
:
30597 case PROCESSOR_BONNELL
:
30598 case PROCESSOR_SILVERMONT
:
30599 case PROCESSOR_KNL
:
30600 case PROCESSOR_INTEL
:
30601 /* Generally, we want haifa-sched:max_issue() to look ahead as far
30602 as many instructions can be executed on a cycle, i.e.,
30603 issue_rate. I wonder why tuning for many CPUs does not do this. */
30604 if (reload_completed
)
30605 return ix86_issue_rate ();
30606 /* Don't use lookahead for pre-reload schedule to save compile time. */
30614 /* Return true if target platform supports macro-fusion. */
30617 ix86_macro_fusion_p ()
30619 return TARGET_FUSE_CMP_AND_BRANCH
;
30622 /* Check whether current microarchitecture support macro fusion
30623 for insn pair "CONDGEN + CONDJMP". Refer to
30624 "Intel Architectures Optimization Reference Manual". */
30627 ix86_macro_fusion_pair_p (rtx_insn
*condgen
, rtx_insn
*condjmp
)
30630 enum rtx_code ccode
;
30631 rtx compare_set
= NULL_RTX
, test_if
, cond
;
30632 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
30634 if (!any_condjump_p (condjmp
))
30637 unsigned int condreg1
, condreg2
;
30639 ix86_fixed_condition_code_regs (&condreg1
, &condreg2
);
30640 cc_reg_1
= gen_rtx_REG (CCmode
, condreg1
);
30641 if (!reg_referenced_p (cc_reg_1
, PATTERN (condjmp
))
30643 || !modified_in_p (cc_reg_1
, condgen
))
30646 if (get_attr_type (condgen
) != TYPE_TEST
30647 && get_attr_type (condgen
) != TYPE_ICMP
30648 && get_attr_type (condgen
) != TYPE_INCDEC
30649 && get_attr_type (condgen
) != TYPE_ALU
)
30652 compare_set
= single_set (condgen
);
30653 if (compare_set
== NULL_RTX
30654 && !TARGET_FUSE_ALU_AND_BRANCH
)
30657 if (compare_set
== NULL_RTX
)
30660 rtx pat
= PATTERN (condgen
);
30661 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
30662 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
30664 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
30665 if (GET_CODE (set_src
) == COMPARE
)
30666 compare_set
= XVECEXP (pat
, 0, i
);
30668 alu_set
= XVECEXP (pat
, 0, i
);
30671 if (compare_set
== NULL_RTX
)
30673 src
= SET_SRC (compare_set
);
30674 if (GET_CODE (src
) != COMPARE
)
30677 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
30679 if ((MEM_P (XEXP (src
, 0))
30680 && CONST_INT_P (XEXP (src
, 1)))
30681 || (MEM_P (XEXP (src
, 1))
30682 && CONST_INT_P (XEXP (src
, 0))))
30685 /* No fusion for RIP-relative address. */
30686 if (MEM_P (XEXP (src
, 0)))
30687 addr
= XEXP (XEXP (src
, 0), 0);
30688 else if (MEM_P (XEXP (src
, 1)))
30689 addr
= XEXP (XEXP (src
, 1), 0);
30692 ix86_address parts
;
30693 int ok
= ix86_decompose_address (addr
, &parts
);
30696 if (rip_relative_addr_p (&parts
))
30700 test_if
= SET_SRC (pc_set (condjmp
));
30701 cond
= XEXP (test_if
, 0);
30702 ccode
= GET_CODE (cond
);
30703 /* Check whether conditional jump use Sign or Overflow Flags. */
30704 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
30711 /* Return true for TYPE_TEST and TYPE_ICMP. */
30712 if (get_attr_type (condgen
) == TYPE_TEST
30713 || get_attr_type (condgen
) == TYPE_ICMP
)
30716 /* The following is the case that macro-fusion for alu + jmp. */
30717 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
30720 /* No fusion for alu op with memory destination operand. */
30721 dest
= SET_DEST (alu_set
);
30725 /* Macro-fusion for inc/dec + unsigned conditional jump is not
30727 if (get_attr_type (condgen
) == TYPE_INCDEC
30737 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
30738 execution. It is applied if
30739 (1) IMUL instruction is on the top of list;
30740 (2) There exists the only producer of independent IMUL instruction in
30742 Return index of IMUL producer if it was found and -1 otherwise. */
30744 do_reorder_for_imul (rtx_insn
**ready
, int n_ready
)
30747 rtx set
, insn1
, insn2
;
30748 sd_iterator_def sd_it
;
30753 if (!TARGET_BONNELL
)
30756 /* Check that IMUL instruction is on the top of ready list. */
30757 insn
= ready
[n_ready
- 1];
30758 set
= single_set (insn
);
30761 if (!(GET_CODE (SET_SRC (set
)) == MULT
30762 && GET_MODE (SET_SRC (set
)) == SImode
))
30765 /* Search for producer of independent IMUL instruction. */
30766 for (i
= n_ready
- 2; i
>= 0; i
--)
30769 if (!NONDEBUG_INSN_P (insn
))
30771 /* Skip IMUL instruction. */
30772 insn2
= PATTERN (insn
);
30773 if (GET_CODE (insn2
) == PARALLEL
)
30774 insn2
= XVECEXP (insn2
, 0, 0);
30775 if (GET_CODE (insn2
) == SET
30776 && GET_CODE (SET_SRC (insn2
)) == MULT
30777 && GET_MODE (SET_SRC (insn2
)) == SImode
)
30780 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
30783 con
= DEP_CON (dep
);
30784 if (!NONDEBUG_INSN_P (con
))
30786 insn1
= PATTERN (con
);
30787 if (GET_CODE (insn1
) == PARALLEL
)
30788 insn1
= XVECEXP (insn1
, 0, 0);
30790 if (GET_CODE (insn1
) == SET
30791 && GET_CODE (SET_SRC (insn1
)) == MULT
30792 && GET_MODE (SET_SRC (insn1
)) == SImode
)
30794 sd_iterator_def sd_it1
;
30796 /* Check if there is no other dependee for IMUL. */
30798 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
30801 pro
= DEP_PRO (dep1
);
30802 if (!NONDEBUG_INSN_P (pro
))
30817 /* Try to find the best candidate on the top of ready list if two insns
30818 have the same priority - candidate is best if its dependees were
30819 scheduled earlier. Applied for Silvermont only.
30820 Return true if top 2 insns must be interchanged. */
30822 swap_top_of_ready_list (rtx_insn
**ready
, int n_ready
)
30824 rtx_insn
*top
= ready
[n_ready
- 1];
30825 rtx_insn
*next
= ready
[n_ready
- 2];
30827 sd_iterator_def sd_it
;
30831 #define INSN_TICK(INSN) (HID (INSN)->tick)
30833 if (!TARGET_SILVERMONT
&& !TARGET_INTEL
)
30836 if (!NONDEBUG_INSN_P (top
))
30838 if (!NONJUMP_INSN_P (top
))
30840 if (!NONDEBUG_INSN_P (next
))
30842 if (!NONJUMP_INSN_P (next
))
30844 set
= single_set (top
);
30847 set
= single_set (next
);
30851 if (INSN_PRIORITY_KNOWN (top
) && INSN_PRIORITY_KNOWN (next
))
30853 if (INSN_PRIORITY (top
) != INSN_PRIORITY (next
))
30855 /* Determine winner more precise. */
30856 FOR_EACH_DEP (top
, SD_LIST_RES_BACK
, sd_it
, dep
)
30859 pro
= DEP_PRO (dep
);
30860 if (!NONDEBUG_INSN_P (pro
))
30862 if (INSN_TICK (pro
) > clock1
)
30863 clock1
= INSN_TICK (pro
);
30865 FOR_EACH_DEP (next
, SD_LIST_RES_BACK
, sd_it
, dep
)
30868 pro
= DEP_PRO (dep
);
30869 if (!NONDEBUG_INSN_P (pro
))
30871 if (INSN_TICK (pro
) > clock2
)
30872 clock2
= INSN_TICK (pro
);
30875 if (clock1
== clock2
)
30877 /* Determine winner - load must win. */
30878 enum attr_memory memory1
, memory2
;
30879 memory1
= get_attr_memory (top
);
30880 memory2
= get_attr_memory (next
);
30881 if (memory2
== MEMORY_LOAD
&& memory1
!= MEMORY_LOAD
)
30884 return (bool) (clock2
< clock1
);
30890 /* Perform possible reodering of ready list for Atom/Silvermont only.
30891 Return issue rate. */
30893 ix86_sched_reorder (FILE *dump
, int sched_verbose
, rtx_insn
**ready
,
30894 int *pn_ready
, int clock_var
)
30896 int issue_rate
= -1;
30897 int n_ready
= *pn_ready
;
30902 /* Set up issue rate. */
30903 issue_rate
= ix86_issue_rate ();
30905 /* Do reodering for BONNELL/SILVERMONT only. */
30906 if (!TARGET_BONNELL
&& !TARGET_SILVERMONT
&& !TARGET_INTEL
)
30909 /* Nothing to do if ready list contains only 1 instruction. */
30913 /* Do reodering for post-reload scheduler only. */
30914 if (!reload_completed
)
30917 if ((index
= do_reorder_for_imul (ready
, n_ready
)) >= 0)
30919 if (sched_verbose
> 1)
30920 fprintf (dump
, ";;\tatom sched_reorder: put %d insn on top\n",
30921 INSN_UID (ready
[index
]));
30923 /* Put IMUL producer (ready[index]) at the top of ready list. */
30924 insn
= ready
[index
];
30925 for (i
= index
; i
< n_ready
- 1; i
++)
30926 ready
[i
] = ready
[i
+ 1];
30927 ready
[n_ready
- 1] = insn
;
30931 /* Skip selective scheduling since HID is not populated in it. */
30934 && swap_top_of_ready_list (ready
, n_ready
))
30936 if (sched_verbose
> 1)
30937 fprintf (dump
, ";;\tslm sched_reorder: swap %d and %d insns\n",
30938 INSN_UID (ready
[n_ready
- 1]), INSN_UID (ready
[n_ready
- 2]));
30939 /* Swap 2 top elements of ready list. */
30940 insn
= ready
[n_ready
- 1];
30941 ready
[n_ready
- 1] = ready
[n_ready
- 2];
30942 ready
[n_ready
- 2] = insn
;
30948 ix86_class_likely_spilled_p (reg_class_t
);
30950 /* Returns true if lhs of insn is HW function argument register and set up
30951 is_spilled to true if it is likely spilled HW register. */
30953 insn_is_function_arg (rtx insn
, bool* is_spilled
)
30957 if (!NONDEBUG_INSN_P (insn
))
30959 /* Call instructions are not movable, ignore it. */
30962 insn
= PATTERN (insn
);
30963 if (GET_CODE (insn
) == PARALLEL
)
30964 insn
= XVECEXP (insn
, 0, 0);
30965 if (GET_CODE (insn
) != SET
)
30967 dst
= SET_DEST (insn
);
30968 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
30969 && ix86_function_arg_regno_p (REGNO (dst
)))
30971 /* Is it likely spilled HW register? */
30972 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
30973 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
30974 *is_spilled
= true;
30980 /* Add output dependencies for chain of function adjacent arguments if only
30981 there is a move to likely spilled HW register. Return first argument
30982 if at least one dependence was added or NULL otherwise. */
30984 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
30987 rtx_insn
*last
= call
;
30988 rtx_insn
*first_arg
= NULL
;
30989 bool is_spilled
= false;
30991 head
= PREV_INSN (head
);
30993 /* Find nearest to call argument passing instruction. */
30996 last
= PREV_INSN (last
);
30999 if (!NONDEBUG_INSN_P (last
))
31001 if (insn_is_function_arg (last
, &is_spilled
))
31009 insn
= PREV_INSN (last
);
31010 if (!INSN_P (insn
))
31014 if (!NONDEBUG_INSN_P (insn
))
31019 if (insn_is_function_arg (insn
, &is_spilled
))
31021 /* Add output depdendence between two function arguments if chain
31022 of output arguments contains likely spilled HW registers. */
31024 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
31025 first_arg
= last
= insn
;
31035 /* Add output or anti dependency from insn to first_arg to restrict its code
31038 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
31043 /* Add anti dependencies for bounds stores. */
31045 && GET_CODE (PATTERN (insn
)) == PARALLEL
31046 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
31047 && XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_BNDSTX
)
31049 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
31053 set
= single_set (insn
);
31056 tmp
= SET_DEST (set
);
31059 /* Add output dependency to the first function argument. */
31060 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
31063 /* Add anti dependency. */
31064 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
31067 /* Avoid cross block motion of function argument through adding dependency
31068 from the first non-jump instruction in bb. */
31070 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
31072 rtx_insn
*insn
= BB_END (bb
);
31076 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
31078 rtx set
= single_set (insn
);
31081 avoid_func_arg_motion (arg
, insn
);
31085 if (insn
== BB_HEAD (bb
))
31087 insn
= PREV_INSN (insn
);
31091 /* Hook for pre-reload schedule - avoid motion of function arguments
31092 passed in likely spilled HW registers. */
31094 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
31097 rtx_insn
*first_arg
= NULL
;
31098 if (reload_completed
)
31100 while (head
!= tail
&& DEBUG_INSN_P (head
))
31101 head
= NEXT_INSN (head
);
31102 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
31103 if (INSN_P (insn
) && CALL_P (insn
))
31105 first_arg
= add_parameter_dependencies (insn
, head
);
31108 /* Add dependee for first argument to predecessors if only
31109 region contains more than one block. */
31110 basic_block bb
= BLOCK_FOR_INSN (insn
);
31111 int rgn
= CONTAINING_RGN (bb
->index
);
31112 int nr_blks
= RGN_NR_BLOCKS (rgn
);
31113 /* Skip trivial regions and region head blocks that can have
31114 predecessors outside of region. */
31115 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
31120 /* Regions are SCCs with the exception of selective
31121 scheduling with pipelining of outer blocks enabled.
31122 So also check that immediate predecessors of a non-head
31123 block are in the same region. */
31124 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
31126 /* Avoid creating of loop-carried dependencies through
31127 using topological ordering in the region. */
31128 if (rgn
== CONTAINING_RGN (e
->src
->index
)
31129 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
31130 add_dependee_for_func_arg (first_arg
, e
->src
);
31138 else if (first_arg
)
31139 avoid_func_arg_motion (first_arg
, insn
);
31142 /* Hook for pre-reload schedule - set priority of moves from likely spilled
31143 HW registers to maximum, to schedule them at soon as possible. These are
31144 moves from function argument registers at the top of the function entry
31145 and moves from function return value registers after call. */
31147 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
31151 if (reload_completed
)
31154 if (!NONDEBUG_INSN_P (insn
))
31157 set
= single_set (insn
);
31160 rtx tmp
= SET_SRC (set
);
31162 && HARD_REGISTER_P (tmp
)
31163 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
31164 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
31165 return current_sched_info
->sched_max_insns_priority
;
31171 /* Model decoder of Core 2/i7.
31172 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
31173 track the instruction fetch block boundaries and make sure that long
31174 (9+ bytes) instructions are assigned to D0. */
31176 /* Maximum length of an insn that can be handled by
31177 a secondary decoder unit. '8' for Core 2/i7. */
31178 static int core2i7_secondary_decoder_max_insn_size
;
31180 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
31181 '16' for Core 2/i7. */
31182 static int core2i7_ifetch_block_size
;
31184 /* Maximum number of instructions decoder can handle per cycle.
31185 '6' for Core 2/i7. */
31186 static int core2i7_ifetch_block_max_insns
;
31188 typedef struct ix86_first_cycle_multipass_data_
*
31189 ix86_first_cycle_multipass_data_t
;
31190 typedef const struct ix86_first_cycle_multipass_data_
*
31191 const_ix86_first_cycle_multipass_data_t
;
31193 /* A variable to store target state across calls to max_issue within
31195 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
31196 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
31198 /* Initialize DATA. */
31200 core2i7_first_cycle_multipass_init (void *_data
)
31202 ix86_first_cycle_multipass_data_t data
31203 = (ix86_first_cycle_multipass_data_t
) _data
;
31205 data
->ifetch_block_len
= 0;
31206 data
->ifetch_block_n_insns
= 0;
31207 data
->ready_try_change
= NULL
;
31208 data
->ready_try_change_size
= 0;
31211 /* Advancing the cycle; reset ifetch block counts. */
31213 core2i7_dfa_post_advance_cycle (void)
31215 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
31217 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
31219 data
->ifetch_block_len
= 0;
31220 data
->ifetch_block_n_insns
= 0;
31223 static int min_insn_size (rtx_insn
*);
31225 /* Filter out insns from ready_try that the core will not be able to issue
31226 on current cycle due to decoder. */
31228 core2i7_first_cycle_multipass_filter_ready_try
31229 (const_ix86_first_cycle_multipass_data_t data
,
31230 signed char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
31237 if (ready_try
[n_ready
])
31240 insn
= get_ready_element (n_ready
);
31241 insn_size
= min_insn_size (insn
);
31243 if (/* If this is a too long an insn for a secondary decoder ... */
31244 (!first_cycle_insn_p
31245 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
31246 /* ... or it would not fit into the ifetch block ... */
31247 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
31248 /* ... or the decoder is full already ... */
31249 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
31250 /* ... mask the insn out. */
31252 ready_try
[n_ready
] = 1;
31254 if (data
->ready_try_change
)
31255 bitmap_set_bit (data
->ready_try_change
, n_ready
);
31260 /* Prepare for a new round of multipass lookahead scheduling. */
31262 core2i7_first_cycle_multipass_begin (void *_data
,
31263 signed char *ready_try
, int n_ready
,
31264 bool first_cycle_insn_p
)
31266 ix86_first_cycle_multipass_data_t data
31267 = (ix86_first_cycle_multipass_data_t
) _data
;
31268 const_ix86_first_cycle_multipass_data_t prev_data
31269 = ix86_first_cycle_multipass_data
;
31271 /* Restore the state from the end of the previous round. */
31272 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
31273 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
31275 /* Filter instructions that cannot be issued on current cycle due to
31276 decoder restrictions. */
31277 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
31278 first_cycle_insn_p
);
31281 /* INSN is being issued in current solution. Account for its impact on
31282 the decoder model. */
31284 core2i7_first_cycle_multipass_issue (void *_data
,
31285 signed char *ready_try
, int n_ready
,
31286 rtx_insn
*insn
, const void *_prev_data
)
31288 ix86_first_cycle_multipass_data_t data
31289 = (ix86_first_cycle_multipass_data_t
) _data
;
31290 const_ix86_first_cycle_multipass_data_t prev_data
31291 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
31293 int insn_size
= min_insn_size (insn
);
31295 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
31296 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
31297 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
31298 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
31300 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
31301 if (!data
->ready_try_change
)
31303 data
->ready_try_change
= sbitmap_alloc (n_ready
);
31304 data
->ready_try_change_size
= n_ready
;
31306 else if (data
->ready_try_change_size
< n_ready
)
31308 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
31310 data
->ready_try_change_size
= n_ready
;
31312 bitmap_clear (data
->ready_try_change
);
31314 /* Filter out insns from ready_try that the core will not be able to issue
31315 on current cycle due to decoder. */
31316 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
31320 /* Revert the effect on ready_try. */
31322 core2i7_first_cycle_multipass_backtrack (const void *_data
,
31323 signed char *ready_try
,
31324 int n_ready ATTRIBUTE_UNUSED
)
31326 const_ix86_first_cycle_multipass_data_t data
31327 = (const_ix86_first_cycle_multipass_data_t
) _data
;
31328 unsigned int i
= 0;
31329 sbitmap_iterator sbi
;
31331 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
31332 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
31338 /* Save the result of multipass lookahead scheduling for the next round. */
31340 core2i7_first_cycle_multipass_end (const void *_data
)
31342 const_ix86_first_cycle_multipass_data_t data
31343 = (const_ix86_first_cycle_multipass_data_t
) _data
;
31344 ix86_first_cycle_multipass_data_t next_data
31345 = ix86_first_cycle_multipass_data
;
31349 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
31350 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
31354 /* Deallocate target data. */
31356 core2i7_first_cycle_multipass_fini (void *_data
)
31358 ix86_first_cycle_multipass_data_t data
31359 = (ix86_first_cycle_multipass_data_t
) _data
;
31361 if (data
->ready_try_change
)
31363 sbitmap_free (data
->ready_try_change
);
31364 data
->ready_try_change
= NULL
;
31365 data
->ready_try_change_size
= 0;
31369 /* Prepare for scheduling pass. */
31371 ix86_sched_init_global (FILE *, int, int)
31373 /* Install scheduling hooks for current CPU. Some of these hooks are used
31374 in time-critical parts of the scheduler, so we only set them up when
31375 they are actually used. */
31378 case PROCESSOR_CORE2
:
31379 case PROCESSOR_NEHALEM
:
31380 case PROCESSOR_SANDYBRIDGE
:
31381 case PROCESSOR_HASWELL
:
31382 /* Do not perform multipass scheduling for pre-reload schedule
31383 to save compile time. */
31384 if (reload_completed
)
31386 targetm
.sched
.dfa_post_advance_cycle
31387 = core2i7_dfa_post_advance_cycle
;
31388 targetm
.sched
.first_cycle_multipass_init
31389 = core2i7_first_cycle_multipass_init
;
31390 targetm
.sched
.first_cycle_multipass_begin
31391 = core2i7_first_cycle_multipass_begin
;
31392 targetm
.sched
.first_cycle_multipass_issue
31393 = core2i7_first_cycle_multipass_issue
;
31394 targetm
.sched
.first_cycle_multipass_backtrack
31395 = core2i7_first_cycle_multipass_backtrack
;
31396 targetm
.sched
.first_cycle_multipass_end
31397 = core2i7_first_cycle_multipass_end
;
31398 targetm
.sched
.first_cycle_multipass_fini
31399 = core2i7_first_cycle_multipass_fini
;
31401 /* Set decoder parameters. */
31402 core2i7_secondary_decoder_max_insn_size
= 8;
31403 core2i7_ifetch_block_size
= 16;
31404 core2i7_ifetch_block_max_insns
= 6;
31407 /* Fall through. */
31409 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
31410 targetm
.sched
.first_cycle_multipass_init
= NULL
;
31411 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
31412 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
31413 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
31414 targetm
.sched
.first_cycle_multipass_end
= NULL
;
31415 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
31421 /* Compute the alignment given to a constant that is being placed in memory.
31422 EXP is the constant and ALIGN is the alignment that the object would
31424 The value of this function is used instead of that alignment to align
31428 ix86_constant_alignment (tree exp
, int align
)
31430 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
31431 || TREE_CODE (exp
) == INTEGER_CST
)
31433 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
31435 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
31438 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
31439 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
31440 return BITS_PER_WORD
;
31445 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
31446 the data type, and ALIGN is the alignment that the object would
31447 ordinarily have. */
31450 iamcu_alignment (tree type
, int align
)
31454 if (align
< 32 || TYPE_USER_ALIGN (type
))
31457 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
31459 mode
= TYPE_MODE (strip_array_types (type
));
31460 switch (GET_MODE_CLASS (mode
))
31463 case MODE_COMPLEX_INT
:
31464 case MODE_COMPLEX_FLOAT
:
31466 case MODE_DECIMAL_FLOAT
:
31473 /* Compute the alignment for a static variable.
31474 TYPE is the data type, and ALIGN is the alignment that
31475 the object would ordinarily have. The value of this function is used
31476 instead of that alignment to align the object. */
31479 ix86_data_alignment (tree type
, int align
, bool opt
)
31481 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
31482 for symbols from other compilation units or symbols that don't need
31483 to bind locally. In order to preserve some ABI compatibility with
31484 those compilers, ensure we don't decrease alignment from what we
31487 int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
31489 /* A data structure, equal or greater than the size of a cache line
31490 (64 bytes in the Pentium 4 and other recent Intel processors, including
31491 processors based on Intel Core microarchitecture) should be aligned
31492 so that its base address is a multiple of a cache line size. */
31495 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
31497 if (max_align
< BITS_PER_WORD
)
31498 max_align
= BITS_PER_WORD
;
31500 switch (ix86_align_data_type
)
31502 case ix86_align_data_type_abi
: opt
= false; break;
31503 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
31504 case ix86_align_data_type_cacheline
: break;
31508 align
= iamcu_alignment (type
, align
);
31511 && AGGREGATE_TYPE_P (type
)
31512 && TYPE_SIZE (type
)
31513 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
31515 if (wi::geu_p (TYPE_SIZE (type
), max_align_compat
)
31516 && align
< max_align_compat
)
31517 align
= max_align_compat
;
31518 if (wi::geu_p (TYPE_SIZE (type
), max_align
)
31519 && align
< max_align
)
31523 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
31524 to 16byte boundary. */
31527 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
31528 && TYPE_SIZE (type
)
31529 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
31530 && wi::geu_p (TYPE_SIZE (type
), 128)
31538 if (TREE_CODE (type
) == ARRAY_TYPE
)
31540 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
31542 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
31545 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
31548 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
31550 if ((TYPE_MODE (type
) == XCmode
31551 || TYPE_MODE (type
) == TCmode
) && align
< 128)
31554 else if ((TREE_CODE (type
) == RECORD_TYPE
31555 || TREE_CODE (type
) == UNION_TYPE
31556 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
31557 && TYPE_FIELDS (type
))
31559 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
31561 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
31564 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
31565 || TREE_CODE (type
) == INTEGER_TYPE
)
31567 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
31569 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
31576 /* Compute the alignment for a local variable or a stack slot. EXP is
31577 the data type or decl itself, MODE is the widest mode available and
31578 ALIGN is the alignment that the object would ordinarily have. The
31579 value of this macro is used instead of that alignment to align the
31583 ix86_local_alignment (tree exp
, machine_mode mode
,
31584 unsigned int align
)
31588 if (exp
&& DECL_P (exp
))
31590 type
= TREE_TYPE (exp
);
31599 /* Don't do dynamic stack realignment for long long objects with
31600 -mpreferred-stack-boundary=2. */
31603 && ix86_preferred_stack_boundary
< 64
31604 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
31605 && (!type
|| !TYPE_USER_ALIGN (type
))
31606 && (!decl
|| !DECL_USER_ALIGN (decl
)))
31609 /* If TYPE is NULL, we are allocating a stack slot for caller-save
31610 register in MODE. We will return the largest alignment of XF
31614 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
31615 align
= GET_MODE_ALIGNMENT (DFmode
);
31619 /* Don't increase alignment for Intel MCU psABI. */
31623 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
31624 to 16byte boundary. Exact wording is:
31626 An array uses the same alignment as its elements, except that a local or
31627 global array variable of length at least 16 bytes or
31628 a C99 variable-length array variable always has alignment of at least 16 bytes.
31630 This was added to allow use of aligned SSE instructions at arrays. This
31631 rule is meant for static storage (where compiler can not do the analysis
31632 by itself). We follow it for automatic variables only when convenient.
31633 We fully control everything in the function compiled and functions from
31634 other unit can not rely on the alignment.
31636 Exclude va_list type. It is the common case of local array where
31637 we can not benefit from the alignment.
31639 TODO: Probably one should optimize for size only when var is not escaping. */
31640 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
31643 if (AGGREGATE_TYPE_P (type
)
31644 && (va_list_type_node
== NULL_TREE
31645 || (TYPE_MAIN_VARIANT (type
)
31646 != TYPE_MAIN_VARIANT (va_list_type_node
)))
31647 && TYPE_SIZE (type
)
31648 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
31649 && wi::geu_p (TYPE_SIZE (type
), 128)
31653 if (TREE_CODE (type
) == ARRAY_TYPE
)
31655 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
31657 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
31660 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
31662 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
31664 if ((TYPE_MODE (type
) == XCmode
31665 || TYPE_MODE (type
) == TCmode
) && align
< 128)
31668 else if ((TREE_CODE (type
) == RECORD_TYPE
31669 || TREE_CODE (type
) == UNION_TYPE
31670 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
31671 && TYPE_FIELDS (type
))
31673 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
31675 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
31678 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
31679 || TREE_CODE (type
) == INTEGER_TYPE
)
31682 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
31684 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
31690 /* Compute the minimum required alignment for dynamic stack realignment
31691 purposes for a local variable, parameter or a stack slot. EXP is
31692 the data type or decl itself, MODE is its mode and ALIGN is the
31693 alignment that the object would ordinarily have. */
31696 ix86_minimum_alignment (tree exp
, machine_mode mode
,
31697 unsigned int align
)
31701 if (exp
&& DECL_P (exp
))
31703 type
= TREE_TYPE (exp
);
31712 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
31715 /* Don't do dynamic stack realignment for long long objects with
31716 -mpreferred-stack-boundary=2. */
31717 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
31718 && (!type
|| !TYPE_USER_ALIGN (type
))
31719 && (!decl
|| !DECL_USER_ALIGN (decl
)))
31721 gcc_checking_assert (!TARGET_STV
);
31728 /* Find a location for the static chain incoming to a nested function.
31729 This is a register, unless all free registers are used by arguments. */
31732 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
31736 /* While this function won't be called by the middle-end when a static
31737 chain isn't needed, it's also used throughout the backend so it's
31738 easiest to keep this check centralized. */
31739 if (DECL_P (fndecl_or_type
) && !DECL_STATIC_CHAIN (fndecl_or_type
))
31744 /* We always use R10 in 64-bit mode. */
31749 const_tree fntype
, fndecl
;
31752 /* By default in 32-bit mode we use ECX to pass the static chain. */
31755 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
31757 fntype
= TREE_TYPE (fndecl_or_type
);
31758 fndecl
= fndecl_or_type
;
31762 fntype
= fndecl_or_type
;
31766 ccvt
= ix86_get_callcvt (fntype
);
31767 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
31769 /* Fastcall functions use ecx/edx for arguments, which leaves
31770 us with EAX for the static chain.
31771 Thiscall functions use ecx for arguments, which also
31772 leaves us with EAX for the static chain. */
31775 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
31777 /* Thiscall functions use ecx for arguments, which leaves
31778 us with EAX and EDX for the static chain.
31779 We are using for abi-compatibility EAX. */
31782 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
31784 /* For regparm 3, we have no free call-clobbered registers in
31785 which to store the static chain. In order to implement this,
31786 we have the trampoline push the static chain to the stack.
31787 However, we can't push a value below the return address when
31788 we call the nested function directly, so we have to use an
31789 alternate entry point. For this we use ESI, and have the
31790 alternate entry point push ESI, so that things appear the
31791 same once we're executing the nested function. */
31794 if (fndecl
== current_function_decl
31795 && !ix86_static_chain_on_stack
)
31797 gcc_assert (!reload_completed
);
31798 ix86_static_chain_on_stack
= true;
31800 return gen_frame_mem (SImode
,
31801 plus_constant (Pmode
,
31802 arg_pointer_rtx
, -8));
31808 return gen_rtx_REG (Pmode
, regno
);
31811 /* Emit RTL insns to initialize the variable parts of a trampoline.
31812 FNDECL is the decl of the target address; M_TRAMP is a MEM for
31813 the trampoline, and CHAIN_VALUE is an RTX for the static chain
31814 to be passed to the target function. */
31817 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
31823 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
31829 /* Load the function address to r11. Try to load address using
31830 the shorter movl instead of movabs. We may want to support
31831 movq for kernel mode, but kernel does not use trampolines at
31832 the moment. FNADDR is a 32bit address and may not be in
31833 DImode when ptr_mode == SImode. Always use movl in this
31835 if (ptr_mode
== SImode
31836 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
31838 fnaddr
= copy_addr_to_reg (fnaddr
);
31840 mem
= adjust_address (m_tramp
, HImode
, offset
);
31841 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
31843 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
31844 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
31849 mem
= adjust_address (m_tramp
, HImode
, offset
);
31850 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
31852 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
31853 emit_move_insn (mem
, fnaddr
);
31857 /* Load static chain using movabs to r10. Use the shorter movl
31858 instead of movabs when ptr_mode == SImode. */
31859 if (ptr_mode
== SImode
)
31870 mem
= adjust_address (m_tramp
, HImode
, offset
);
31871 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
31873 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
31874 emit_move_insn (mem
, chain_value
);
31877 /* Jump to r11; the last (unused) byte is a nop, only there to
31878 pad the write out to a single 32-bit store. */
31879 mem
= adjust_address (m_tramp
, SImode
, offset
);
31880 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
31887 /* Depending on the static chain location, either load a register
31888 with a constant, or push the constant to the stack. All of the
31889 instructions are the same size. */
31890 chain
= ix86_static_chain (fndecl
, true);
31893 switch (REGNO (chain
))
31896 opcode
= 0xb8; break;
31898 opcode
= 0xb9; break;
31900 gcc_unreachable ();
31906 mem
= adjust_address (m_tramp
, QImode
, offset
);
31907 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
31909 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
31910 emit_move_insn (mem
, chain_value
);
31913 mem
= adjust_address (m_tramp
, QImode
, offset
);
31914 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
31916 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
31918 /* Compute offset from the end of the jmp to the target function.
31919 In the case in which the trampoline stores the static chain on
31920 the stack, we need to skip the first insn which pushes the
31921 (call-saved) register static chain; this push is 1 byte. */
31923 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
31924 plus_constant (Pmode
, XEXP (m_tramp
, 0),
31925 offset
- (MEM_P (chain
) ? 1 : 0)),
31926 NULL_RTX
, 1, OPTAB_DIRECT
);
31927 emit_move_insn (mem
, disp
);
31930 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
31932 #ifdef HAVE_ENABLE_EXECUTE_STACK
31933 #ifdef CHECK_EXECUTE_STACK_ENABLED
31934 if (CHECK_EXECUTE_STACK_ENABLED
)
31936 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
31937 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
31942 ix86_allocate_stack_slots_for_args (void)
31944 /* Naked functions should not allocate stack slots for arguments. */
31945 return !ix86_function_naked (current_function_decl
);
31949 ix86_warn_func_return (tree decl
)
31951 /* Naked functions are implemented entirely in assembly, including the
31952 return sequence, so suppress warnings about this. */
31953 return !ix86_function_naked (decl
);
31956 /* The following file contains several enumerations and data structures
31957 built from the definitions in i386-builtin-types.def. */
31959 #include "i386-builtin-types.inc"
31961 /* Table for the ix86 builtin non-function types. */
31962 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
31964 /* Retrieve an element from the above table, building some of
31965 the types lazily. */
31968 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
31970 unsigned int index
;
31973 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
31975 type
= ix86_builtin_type_tab
[(int) tcode
];
31979 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
31980 if (tcode
<= IX86_BT_LAST_VECT
)
31984 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
31985 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
31986 mode
= ix86_builtin_type_vect_mode
[index
];
31988 type
= build_vector_type_for_mode (itype
, mode
);
31994 index
= tcode
- IX86_BT_LAST_VECT
- 1;
31995 if (tcode
<= IX86_BT_LAST_PTR
)
31996 quals
= TYPE_UNQUALIFIED
;
31998 quals
= TYPE_QUAL_CONST
;
32000 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
32001 if (quals
!= TYPE_UNQUALIFIED
)
32002 itype
= build_qualified_type (itype
, quals
);
32004 type
= build_pointer_type (itype
);
32007 ix86_builtin_type_tab
[(int) tcode
] = type
;
32011 /* Table for the ix86 builtin function types. */
32012 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
32014 /* Retrieve an element from the above table, building some of
32015 the types lazily. */
32018 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
32022 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
32024 type
= ix86_builtin_func_type_tab
[(int) tcode
];
32028 if (tcode
<= IX86_BT_LAST_FUNC
)
32030 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
32031 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
32032 tree rtype
, atype
, args
= void_list_node
;
32035 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
32036 for (i
= after
- 1; i
> start
; --i
)
32038 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
32039 args
= tree_cons (NULL
, atype
, args
);
32042 type
= build_function_type (rtype
, args
);
32046 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
32047 enum ix86_builtin_func_type icode
;
32049 icode
= ix86_builtin_func_alias_base
[index
];
32050 type
= ix86_get_builtin_func_type (icode
);
32053 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
32058 /* Codes for all the SSE/MMX builtins. Builtins not mentioned in any
32059 bdesc_* arrays below should come first, then builtins for each bdesc_*
32060 array in ascending order, so that we can use direct array accesses. */
32063 IX86_BUILTIN_MASKMOVQ
,
32064 IX86_BUILTIN_LDMXCSR
,
32065 IX86_BUILTIN_STMXCSR
,
32066 IX86_BUILTIN_MASKMOVDQU
,
32067 IX86_BUILTIN_PSLLDQ128
,
32068 IX86_BUILTIN_CLFLUSH
,
32069 IX86_BUILTIN_MONITOR
,
32070 IX86_BUILTIN_MWAIT
,
32071 IX86_BUILTIN_CLZERO
,
32072 IX86_BUILTIN_VEC_INIT_V2SI
,
32073 IX86_BUILTIN_VEC_INIT_V4HI
,
32074 IX86_BUILTIN_VEC_INIT_V8QI
,
32075 IX86_BUILTIN_VEC_EXT_V2DF
,
32076 IX86_BUILTIN_VEC_EXT_V2DI
,
32077 IX86_BUILTIN_VEC_EXT_V4SF
,
32078 IX86_BUILTIN_VEC_EXT_V4SI
,
32079 IX86_BUILTIN_VEC_EXT_V8HI
,
32080 IX86_BUILTIN_VEC_EXT_V2SI
,
32081 IX86_BUILTIN_VEC_EXT_V4HI
,
32082 IX86_BUILTIN_VEC_EXT_V16QI
,
32083 IX86_BUILTIN_VEC_SET_V2DI
,
32084 IX86_BUILTIN_VEC_SET_V4SF
,
32085 IX86_BUILTIN_VEC_SET_V4SI
,
32086 IX86_BUILTIN_VEC_SET_V8HI
,
32087 IX86_BUILTIN_VEC_SET_V4HI
,
32088 IX86_BUILTIN_VEC_SET_V16QI
,
32089 IX86_BUILTIN_GATHERSIV2DF
,
32090 IX86_BUILTIN_GATHERSIV4DF
,
32091 IX86_BUILTIN_GATHERDIV2DF
,
32092 IX86_BUILTIN_GATHERDIV4DF
,
32093 IX86_BUILTIN_GATHERSIV4SF
,
32094 IX86_BUILTIN_GATHERSIV8SF
,
32095 IX86_BUILTIN_GATHERDIV4SF
,
32096 IX86_BUILTIN_GATHERDIV8SF
,
32097 IX86_BUILTIN_GATHERSIV2DI
,
32098 IX86_BUILTIN_GATHERSIV4DI
,
32099 IX86_BUILTIN_GATHERDIV2DI
,
32100 IX86_BUILTIN_GATHERDIV4DI
,
32101 IX86_BUILTIN_GATHERSIV4SI
,
32102 IX86_BUILTIN_GATHERSIV8SI
,
32103 IX86_BUILTIN_GATHERDIV4SI
,
32104 IX86_BUILTIN_GATHERDIV8SI
,
32105 IX86_BUILTIN_VFMSUBSD3_MASK3
,
32106 IX86_BUILTIN_VFMSUBSS3_MASK3
,
32107 IX86_BUILTIN_GATHER3SIV8SF
,
32108 IX86_BUILTIN_GATHER3SIV4SF
,
32109 IX86_BUILTIN_GATHER3SIV4DF
,
32110 IX86_BUILTIN_GATHER3SIV2DF
,
32111 IX86_BUILTIN_GATHER3DIV8SF
,
32112 IX86_BUILTIN_GATHER3DIV4SF
,
32113 IX86_BUILTIN_GATHER3DIV4DF
,
32114 IX86_BUILTIN_GATHER3DIV2DF
,
32115 IX86_BUILTIN_GATHER3SIV8SI
,
32116 IX86_BUILTIN_GATHER3SIV4SI
,
32117 IX86_BUILTIN_GATHER3SIV4DI
,
32118 IX86_BUILTIN_GATHER3SIV2DI
,
32119 IX86_BUILTIN_GATHER3DIV8SI
,
32120 IX86_BUILTIN_GATHER3DIV4SI
,
32121 IX86_BUILTIN_GATHER3DIV4DI
,
32122 IX86_BUILTIN_GATHER3DIV2DI
,
32123 IX86_BUILTIN_SCATTERSIV8SF
,
32124 IX86_BUILTIN_SCATTERSIV4SF
,
32125 IX86_BUILTIN_SCATTERSIV4DF
,
32126 IX86_BUILTIN_SCATTERSIV2DF
,
32127 IX86_BUILTIN_SCATTERDIV8SF
,
32128 IX86_BUILTIN_SCATTERDIV4SF
,
32129 IX86_BUILTIN_SCATTERDIV4DF
,
32130 IX86_BUILTIN_SCATTERDIV2DF
,
32131 IX86_BUILTIN_SCATTERSIV8SI
,
32132 IX86_BUILTIN_SCATTERSIV4SI
,
32133 IX86_BUILTIN_SCATTERSIV4DI
,
32134 IX86_BUILTIN_SCATTERSIV2DI
,
32135 IX86_BUILTIN_SCATTERDIV8SI
,
32136 IX86_BUILTIN_SCATTERDIV4SI
,
32137 IX86_BUILTIN_SCATTERDIV4DI
,
32138 IX86_BUILTIN_SCATTERDIV2DI
,
32139 /* Alternate 4 and 8 element gather/scatter for the vectorizer
32140 where all operands are 32-byte or 64-byte wide respectively. */
32141 IX86_BUILTIN_GATHERALTSIV4DF
,
32142 IX86_BUILTIN_GATHERALTDIV8SF
,
32143 IX86_BUILTIN_GATHERALTSIV4DI
,
32144 IX86_BUILTIN_GATHERALTDIV8SI
,
32145 IX86_BUILTIN_GATHER3ALTDIV16SF
,
32146 IX86_BUILTIN_GATHER3ALTDIV16SI
,
32147 IX86_BUILTIN_GATHER3ALTSIV4DF
,
32148 IX86_BUILTIN_GATHER3ALTDIV8SF
,
32149 IX86_BUILTIN_GATHER3ALTSIV4DI
,
32150 IX86_BUILTIN_GATHER3ALTDIV8SI
,
32151 IX86_BUILTIN_GATHER3ALTSIV8DF
,
32152 IX86_BUILTIN_GATHER3ALTSIV8DI
,
32153 IX86_BUILTIN_GATHER3DIV16SF
,
32154 IX86_BUILTIN_GATHER3DIV16SI
,
32155 IX86_BUILTIN_GATHER3DIV8DF
,
32156 IX86_BUILTIN_GATHER3DIV8DI
,
32157 IX86_BUILTIN_GATHER3SIV16SF
,
32158 IX86_BUILTIN_GATHER3SIV16SI
,
32159 IX86_BUILTIN_GATHER3SIV8DF
,
32160 IX86_BUILTIN_GATHER3SIV8DI
,
32161 IX86_BUILTIN_SCATTERALTSIV8DF
,
32162 IX86_BUILTIN_SCATTERALTDIV16SF
,
32163 IX86_BUILTIN_SCATTERALTSIV8DI
,
32164 IX86_BUILTIN_SCATTERALTDIV16SI
,
32165 IX86_BUILTIN_SCATTERDIV16SF
,
32166 IX86_BUILTIN_SCATTERDIV16SI
,
32167 IX86_BUILTIN_SCATTERDIV8DF
,
32168 IX86_BUILTIN_SCATTERDIV8DI
,
32169 IX86_BUILTIN_SCATTERSIV16SF
,
32170 IX86_BUILTIN_SCATTERSIV16SI
,
32171 IX86_BUILTIN_SCATTERSIV8DF
,
32172 IX86_BUILTIN_SCATTERSIV8DI
,
32173 IX86_BUILTIN_GATHERPFQPD
,
32174 IX86_BUILTIN_GATHERPFDPS
,
32175 IX86_BUILTIN_GATHERPFDPD
,
32176 IX86_BUILTIN_GATHERPFQPS
,
32177 IX86_BUILTIN_SCATTERPFDPD
,
32178 IX86_BUILTIN_SCATTERPFDPS
,
32179 IX86_BUILTIN_SCATTERPFQPD
,
32180 IX86_BUILTIN_SCATTERPFQPS
,
32182 IX86_BUILTIN_CLFLUSHOPT
,
32184 IX86_BUILTIN_HUGE_VALQ
,
32186 IX86_BUILTIN_NANSQ
,
32187 IX86_BUILTIN_XABORT
,
32188 IX86_BUILTIN_ADDCARRYX32
,
32189 IX86_BUILTIN_ADDCARRYX64
,
32190 IX86_BUILTIN_SBB32
,
32191 IX86_BUILTIN_SBB64
,
32192 IX86_BUILTIN_RDRAND16_STEP
,
32193 IX86_BUILTIN_RDRAND32_STEP
,
32194 IX86_BUILTIN_RDRAND64_STEP
,
32195 IX86_BUILTIN_RDSEED16_STEP
,
32196 IX86_BUILTIN_RDSEED32_STEP
,
32197 IX86_BUILTIN_RDSEED64_STEP
,
32198 IX86_BUILTIN_MONITORX
,
32199 IX86_BUILTIN_MWAITX
,
32200 IX86_BUILTIN_CFSTRING
,
32201 IX86_BUILTIN_CPU_INIT
,
32202 IX86_BUILTIN_CPU_IS
,
32203 IX86_BUILTIN_CPU_SUPPORTS
,
32204 IX86_BUILTIN_READ_FLAGS
,
32205 IX86_BUILTIN_WRITE_FLAGS
,
32207 /* All the remaining builtins are tracked in bdesc_* arrays in
32208 i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after
32210 #define BDESC(mask, icode, name, code, comparison, flag) \
32212 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
32214 IX86_BUILTIN__BDESC_##kindu##_FIRST = code,
32215 #define BDESC_END(kind, next_kind)
32217 #include "i386-builtin.def"
32225 IX86_BUILTIN__BDESC_MAX_FIRST
= IX86_BUILTIN_MAX
,
32227 /* Now just the aliases for bdesc_* start/end. */
32228 #define BDESC(mask, icode, name, code, comparison, flag)
32229 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag)
32230 #define BDESC_END(kind, next_kind) \
32231 IX86_BUILTIN__BDESC_##kind##_LAST \
32232 = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1,
32234 #include "i386-builtin.def"
32240 /* Just to make sure there is no comma after the last enumerator. */
32241 IX86_BUILTIN__BDESC_MAX_LAST
= IX86_BUILTIN__BDESC_MAX_FIRST
32244 /* Table for the ix86 builtin decls. */
32245 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
32247 /* Table of all of the builtin functions that are possible with different ISA's
32248 but are waiting to be built until a function is declared to use that
32250 struct builtin_isa
{
32251 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
32252 HOST_WIDE_INT isa2
; /* additional isa_flags this builtin is defined for */
32253 const char *name
; /* function name */
32254 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
32255 unsigned char const_p
:1; /* true if the declaration is constant */
32256 unsigned char pure_p
:1; /* true if the declaration has pure attribute */
32257 bool leaf_p
; /* true if the declaration has leaf attribute */
32258 bool nothrow_p
; /* true if the declaration has nothrow attribute */
32259 bool set_and_not_built_p
;
32262 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
32264 /* Bits that can still enable any inclusion of a builtin. */
32265 static HOST_WIDE_INT deferred_isa_values
= 0;
32266 static HOST_WIDE_INT deferred_isa_values2
= 0;
32268 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
32269 of which isa_flags to use in the ix86_builtins_isa array. Stores the
32270 function decl in the ix86_builtins array. Returns the function decl or
32271 NULL_TREE, if the builtin was not added.
32273 If the front end has a special hook for builtin functions, delay adding
32274 builtin functions that aren't in the current ISA until the ISA is changed
32275 with function specific optimization. Doing so, can save about 300K for the
32276 default compiler. When the builtin is expanded, check at that time whether
32279 If the front end doesn't have a special hook, record all builtins, even if
32280 it isn't an instruction set in the current ISA in case the user uses
32281 function specific options for a different ISA, so that we don't get scope
32282 errors if a builtin is added in the middle of a function scope. */
32285 def_builtin (HOST_WIDE_INT mask
, const char *name
,
32286 enum ix86_builtin_func_type tcode
,
32287 enum ix86_builtins code
)
32289 tree decl
= NULL_TREE
;
32291 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
32293 ix86_builtins_isa
[(int) code
].isa
= mask
;
32295 /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
32296 where any bit set means that built-in is enable, this bit must be *and-ed*
32297 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
32298 means that *both* cpuid bits must be set for the built-in to be available.
32299 Handle this here. */
32300 if (mask
& ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
)
32301 mask
&= ~OPTION_MASK_ISA_AVX512VL
;
32303 mask
&= ~OPTION_MASK_ISA_64BIT
;
32305 || (mask
& ix86_isa_flags
) != 0
32306 || (lang_hooks
.builtin_function
32307 == lang_hooks
.builtin_function_ext_scope
))
32310 tree type
= ix86_get_builtin_func_type (tcode
);
32311 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
32313 ix86_builtins
[(int) code
] = decl
;
32314 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
32318 /* Just a MASK where set_and_not_built_p == true can potentially
32319 include a builtin. */
32320 deferred_isa_values
|= mask
;
32321 ix86_builtins
[(int) code
] = NULL_TREE
;
32322 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
32323 ix86_builtins_isa
[(int) code
].name
= name
;
32324 ix86_builtins_isa
[(int) code
].leaf_p
= false;
32325 ix86_builtins_isa
[(int) code
].nothrow_p
= false;
32326 ix86_builtins_isa
[(int) code
].const_p
= false;
32327 ix86_builtins_isa
[(int) code
].pure_p
= false;
32328 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
32335 /* Like def_builtin, but also marks the function decl "const". */
32338 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
32339 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
32341 tree decl
= def_builtin (mask
, name
, tcode
, code
);
32343 TREE_READONLY (decl
) = 1;
32345 ix86_builtins_isa
[(int) code
].const_p
= true;
32350 /* Like def_builtin, but also marks the function decl "pure". */
32353 def_builtin_pure (HOST_WIDE_INT mask
, const char *name
,
32354 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
32356 tree decl
= def_builtin (mask
, name
, tcode
, code
);
32358 DECL_PURE_P (decl
) = 1;
32360 ix86_builtins_isa
[(int) code
].pure_p
= true;
32365 /* Like def_builtin, but for additional isa2 flags. */
32368 def_builtin2 (HOST_WIDE_INT mask
, const char *name
,
32369 enum ix86_builtin_func_type tcode
,
32370 enum ix86_builtins code
)
32372 tree decl
= NULL_TREE
;
32374 ix86_builtins_isa
[(int) code
].isa2
= mask
;
32377 || (mask
& ix86_isa_flags2
) != 0
32378 || (lang_hooks
.builtin_function
32379 == lang_hooks
.builtin_function_ext_scope
))
32382 tree type
= ix86_get_builtin_func_type (tcode
);
32383 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
32385 ix86_builtins
[(int) code
] = decl
;
32386 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
32390 /* Just a MASK where set_and_not_built_p == true can potentially
32391 include a builtin. */
32392 deferred_isa_values2
|= mask
;
32393 ix86_builtins
[(int) code
] = NULL_TREE
;
32394 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
32395 ix86_builtins_isa
[(int) code
].name
= name
;
32396 ix86_builtins_isa
[(int) code
].leaf_p
= false;
32397 ix86_builtins_isa
[(int) code
].nothrow_p
= false;
32398 ix86_builtins_isa
[(int) code
].const_p
= false;
32399 ix86_builtins_isa
[(int) code
].pure_p
= false;
32400 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
32406 /* Like def_builtin, but also marks the function decl "const". */
32409 def_builtin_const2 (HOST_WIDE_INT mask
, const char *name
,
32410 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
32412 tree decl
= def_builtin2 (mask
, name
, tcode
, code
);
32414 TREE_READONLY (decl
) = 1;
32416 ix86_builtins_isa
[(int) code
].const_p
= true;
32421 /* Like def_builtin, but also marks the function decl "pure". */
32424 def_builtin_pure2 (HOST_WIDE_INT mask
, const char *name
,
32425 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
32427 tree decl
= def_builtin2 (mask
, name
, tcode
, code
);
32429 DECL_PURE_P (decl
) = 1;
32431 ix86_builtins_isa
[(int) code
].pure_p
= true;
32436 /* Add any new builtin functions for a given ISA that may not have been
32437 declared. This saves a bit of space compared to adding all of the
32438 declarations to the tree, even if we didn't use them. */
32441 ix86_add_new_builtins (HOST_WIDE_INT isa
, HOST_WIDE_INT isa2
)
32443 if ((isa
& deferred_isa_values
) == 0
32444 && (isa2
& deferred_isa_values2
) == 0)
32447 /* Bits in ISA value can be removed from potential isa values. */
32448 deferred_isa_values
&= ~isa
;
32449 deferred_isa_values2
&= ~isa2
;
32452 tree saved_current_target_pragma
= current_target_pragma
;
32453 current_target_pragma
= NULL_TREE
;
32455 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
32457 if (((ix86_builtins_isa
[i
].isa
& isa
) != 0
32458 || (ix86_builtins_isa
[i
].isa2
& isa2
) != 0)
32459 && ix86_builtins_isa
[i
].set_and_not_built_p
)
32463 /* Don't define the builtin again. */
32464 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
32466 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
32467 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
32468 type
, i
, BUILT_IN_MD
, NULL
,
32471 ix86_builtins
[i
] = decl
;
32472 if (ix86_builtins_isa
[i
].const_p
)
32473 TREE_READONLY (decl
) = 1;
32474 if (ix86_builtins_isa
[i
].pure_p
)
32475 DECL_PURE_P (decl
) = 1;
32476 if (ix86_builtins_isa
[i
].leaf_p
)
32477 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
32479 if (ix86_builtins_isa
[i
].nothrow_p
)
32480 TREE_NOTHROW (decl
) = 1;
32484 current_target_pragma
= saved_current_target_pragma
;
32487 /* Bits for builtin_description.flag. */
32489 /* Set when we don't support the comparison natively, and should
32490 swap_comparison in order to support it. */
32491 #define BUILTIN_DESC_SWAP_OPERANDS 1
32493 struct builtin_description
32495 const HOST_WIDE_INT mask
;
32496 const enum insn_code icode
;
32497 const char *const name
;
32498 const enum ix86_builtins code
;
32499 const enum rtx_code comparison
;
32503 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
32504 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
32505 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
32506 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
32507 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
32508 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
32509 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
32510 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
32511 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
32512 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
32513 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
32514 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
32515 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
32516 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
32517 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
32518 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
32519 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
32520 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
32521 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
32522 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
32523 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
32524 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
32525 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
32526 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
32527 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
32528 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
32529 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
32530 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
32531 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
32532 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
32533 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
32534 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
32535 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
32536 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
32537 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
32538 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
32539 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
32540 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
32541 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
32542 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
32543 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
32544 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
32545 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
32546 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
32547 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
32548 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
32549 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
32550 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
32551 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
32552 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
32553 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
32554 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
32556 #define BDESC(mask, icode, name, code, comparison, flag) \
32557 { mask, icode, name, code, comparison, flag },
32558 #define BDESC_FIRST(kind, kindu, mask, icode, name, code, comparison, flag) \
32559 static const struct builtin_description bdesc_##kind[] = \
32561 BDESC (mask, icode, name, code, comparison, flag)
32562 #define BDESC_END(kind, next_kind) \
32565 #include "i386-builtin.def"
32571 /* TM vector builtins. */
32573 /* Reuse the existing x86-specific `struct builtin_description' cause
32574 we're lazy. Add casts to make them fit. */
32575 static const struct builtin_description bdesc_tm
[] =
32577 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
32578 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
32579 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
32580 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
32581 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
32582 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
32583 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
32585 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
32586 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
32587 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
32588 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
32589 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
32590 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
32591 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
32593 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
32594 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
32595 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
32596 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
32597 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
32598 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
32599 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
32601 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
32602 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
32603 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
32606 /* Initialize the transactional memory vector load/store builtins. */
32609 ix86_init_tm_builtins (void)
32611 enum ix86_builtin_func_type ftype
;
32612 const struct builtin_description
*d
;
32615 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
32616 tree attrs_log
, attrs_type_log
;
32621 /* If there are no builtins defined, we must be compiling in a
32622 language without trans-mem support. */
32623 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
32626 /* Use whatever attributes a normal TM load has. */
32627 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
32628 attrs_load
= DECL_ATTRIBUTES (decl
);
32629 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
32630 /* Use whatever attributes a normal TM store has. */
32631 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
32632 attrs_store
= DECL_ATTRIBUTES (decl
);
32633 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
32634 /* Use whatever attributes a normal TM log has. */
32635 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
32636 attrs_log
= DECL_ATTRIBUTES (decl
);
32637 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
32639 for (i
= 0, d
= bdesc_tm
;
32640 i
< ARRAY_SIZE (bdesc_tm
);
32643 if ((d
->mask
& ix86_isa_flags
) != 0
32644 || (lang_hooks
.builtin_function
32645 == lang_hooks
.builtin_function_ext_scope
))
32647 tree type
, attrs
, attrs_type
;
32648 enum built_in_function code
= (enum built_in_function
) d
->code
;
32650 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
32651 type
= ix86_get_builtin_func_type (ftype
);
32653 if (BUILTIN_TM_LOAD_P (code
))
32655 attrs
= attrs_load
;
32656 attrs_type
= attrs_type_load
;
32658 else if (BUILTIN_TM_STORE_P (code
))
32660 attrs
= attrs_store
;
32661 attrs_type
= attrs_type_store
;
32666 attrs_type
= attrs_type_log
;
32668 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
32669 /* The builtin without the prefix for
32670 calling it directly. */
32671 d
->name
+ strlen ("__builtin_"),
32673 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
32674 set the TYPE_ATTRIBUTES. */
32675 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
32677 set_builtin_decl (code
, decl
, false);
32682 /* Macros for verification of enum ix86_builtins order. */
32683 #define BDESC_VERIFY(x, y, z) \
32684 gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z)))
32685 #define BDESC_VERIFYS(x, y, z) \
32686 STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z)))
32688 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST
,
32689 IX86_BUILTIN__BDESC_COMI_LAST
, 1);
32690 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST
,
32691 IX86_BUILTIN__BDESC_PCMPESTR_LAST
, 1);
32692 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
,
32693 IX86_BUILTIN__BDESC_PCMPISTR_LAST
, 1);
32694 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST
,
32695 IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
, 1);
32696 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
,
32697 IX86_BUILTIN__BDESC_ARGS_LAST
, 1);
32698 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS2_FIRST
,
32699 IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
, 1);
32700 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_FIRST
,
32701 IX86_BUILTIN__BDESC_ARGS2_LAST
, 1);
32702 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_FIRST
,
32703 IX86_BUILTIN__BDESC_MPX_LAST
, 1);
32704 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
,
32705 IX86_BUILTIN__BDESC_MPX_CONST_LAST
, 1);
32706 BDESC_VERIFYS (IX86_BUILTIN_MAX
,
32707 IX86_BUILTIN__BDESC_MULTI_ARG_LAST
, 1);
32709 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
32710 in the current target ISA to allow the user to compile particular modules
32711 with different target specific options that differ from the command line
32714 ix86_init_mmx_sse_builtins (void)
32716 const struct builtin_description
* d
;
32717 enum ix86_builtin_func_type ftype
;
32720 /* Add all special builtins with variable number of operands. */
32721 for (i
= 0, d
= bdesc_special_args
;
32722 i
< ARRAY_SIZE (bdesc_special_args
);
32725 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
, i
);
32729 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
32730 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
32732 BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
,
32733 IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
,
32734 ARRAY_SIZE (bdesc_special_args
) - 1);
32736 /* Add all builtins with variable number of operands. */
32737 for (i
= 0, d
= bdesc_args
;
32738 i
< ARRAY_SIZE (bdesc_args
);
32741 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_ARGS_FIRST
, i
);
32745 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
32746 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
32748 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST
,
32749 IX86_BUILTIN__BDESC_ARGS_FIRST
,
32750 ARRAY_SIZE (bdesc_args
) - 1);
32752 /* Add all builtins with variable number of operands. */
32753 for (i
= 0, d
= bdesc_args2
;
32754 i
< ARRAY_SIZE (bdesc_args2
);
32760 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
32761 def_builtin_const2 (d
->mask
, d
->name
, ftype
, d
->code
);
32764 /* Add all builtins with rounding. */
32765 for (i
= 0, d
= bdesc_round_args
;
32766 i
< ARRAY_SIZE (bdesc_round_args
);
32769 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
, i
);
32773 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
32774 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
32776 BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
,
32777 IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
,
32778 ARRAY_SIZE (bdesc_round_args
) - 1);
32780 /* pcmpestr[im] insns. */
32781 for (i
= 0, d
= bdesc_pcmpestr
;
32782 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32785 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_PCMPESTR_FIRST
, i
);
32786 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
32787 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
32789 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
32790 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
32792 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST
,
32793 IX86_BUILTIN__BDESC_PCMPESTR_FIRST
,
32794 ARRAY_SIZE (bdesc_pcmpestr
) - 1);
32796 /* pcmpistr[im] insns. */
32797 for (i
= 0, d
= bdesc_pcmpistr
;
32798 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32801 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_PCMPISTR_FIRST
, i
);
32802 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
32803 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
32805 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
32806 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
32808 BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST
,
32809 IX86_BUILTIN__BDESC_PCMPISTR_FIRST
,
32810 ARRAY_SIZE (bdesc_pcmpistr
) - 1);
32812 /* comi/ucomi insns. */
32813 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32815 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_COMI_FIRST
, i
);
32816 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
32817 ftype
= INT_FTYPE_V2DF_V2DF
;
32819 ftype
= INT_FTYPE_V4SF_V4SF
;
32820 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
32822 BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST
,
32823 IX86_BUILTIN__BDESC_COMI_FIRST
,
32824 ARRAY_SIZE (bdesc_comi
) - 1);
32827 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
32828 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
32829 def_builtin_pure (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
32830 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
32832 /* SSE or 3DNow!A */
32833 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
32834 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
32835 IX86_BUILTIN_MASKMOVQ
);
32838 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
32839 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
32841 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
32842 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
32843 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
32844 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
32847 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
32848 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
32849 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
32850 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
32853 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
32854 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
32855 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
32856 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
32857 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
32858 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
32859 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
32860 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
32861 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
32862 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
32863 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
32864 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
32867 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
32868 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
32871 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
32872 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
32873 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
32874 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
32875 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
32876 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
32877 IX86_BUILTIN_RDRAND64_STEP
);
32880 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
32881 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
32882 IX86_BUILTIN_GATHERSIV2DF
);
32884 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
32885 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
32886 IX86_BUILTIN_GATHERSIV4DF
);
32888 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
32889 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
32890 IX86_BUILTIN_GATHERDIV2DF
);
32892 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
32893 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
32894 IX86_BUILTIN_GATHERDIV4DF
);
32896 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
32897 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
32898 IX86_BUILTIN_GATHERSIV4SF
);
32900 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
32901 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
32902 IX86_BUILTIN_GATHERSIV8SF
);
32904 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
32905 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
32906 IX86_BUILTIN_GATHERDIV4SF
);
32908 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
32909 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
32910 IX86_BUILTIN_GATHERDIV8SF
);
32912 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
32913 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
32914 IX86_BUILTIN_GATHERSIV2DI
);
32916 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
32917 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
32918 IX86_BUILTIN_GATHERSIV4DI
);
32920 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
32921 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
32922 IX86_BUILTIN_GATHERDIV2DI
);
32924 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
32925 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
32926 IX86_BUILTIN_GATHERDIV4DI
);
32928 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
32929 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
32930 IX86_BUILTIN_GATHERSIV4SI
);
32932 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
32933 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
32934 IX86_BUILTIN_GATHERSIV8SI
);
32936 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
32937 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
32938 IX86_BUILTIN_GATHERDIV4SI
);
32940 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
32941 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
32942 IX86_BUILTIN_GATHERDIV8SI
);
32944 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
32945 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
32946 IX86_BUILTIN_GATHERALTSIV4DF
);
32948 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
32949 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
32950 IX86_BUILTIN_GATHERALTDIV8SF
);
32952 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
32953 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
32954 IX86_BUILTIN_GATHERALTSIV4DI
);
32956 def_builtin_pure (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
32957 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
32958 IX86_BUILTIN_GATHERALTDIV8SI
);
32961 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16sf",
32962 V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT
,
32963 IX86_BUILTIN_GATHER3SIV16SF
);
32965 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8df",
32966 V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT
,
32967 IX86_BUILTIN_GATHER3SIV8DF
);
32969 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16sf",
32970 V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT
,
32971 IX86_BUILTIN_GATHER3DIV16SF
);
32973 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8df",
32974 V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT
,
32975 IX86_BUILTIN_GATHER3DIV8DF
);
32977 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv16si",
32978 V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT
,
32979 IX86_BUILTIN_GATHER3SIV16SI
);
32981 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gathersiv8di",
32982 V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT
,
32983 IX86_BUILTIN_GATHER3SIV8DI
);
32985 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv16si",
32986 V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT
,
32987 IX86_BUILTIN_GATHER3DIV16SI
);
32989 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatherdiv8di",
32990 V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT
,
32991 IX86_BUILTIN_GATHER3DIV8DI
);
32993 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8df ",
32994 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT
,
32995 IX86_BUILTIN_GATHER3ALTSIV8DF
);
32997 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8sf ",
32998 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT
,
32999 IX86_BUILTIN_GATHER3ALTDIV16SF
);
33001 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltsiv8di ",
33002 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT
,
33003 IX86_BUILTIN_GATHER3ALTSIV8DI
);
33005 def_builtin_pure (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_gatheraltdiv8si ",
33006 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT
,
33007 IX86_BUILTIN_GATHER3ALTDIV16SI
);
33009 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16sf",
33010 VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT
,
33011 IX86_BUILTIN_SCATTERSIV16SF
);
33013 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8df",
33014 VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT
,
33015 IX86_BUILTIN_SCATTERSIV8DF
);
33017 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16sf",
33018 VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT
,
33019 IX86_BUILTIN_SCATTERDIV16SF
);
33021 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8df",
33022 VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT
,
33023 IX86_BUILTIN_SCATTERDIV8DF
);
33025 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv16si",
33026 VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT
,
33027 IX86_BUILTIN_SCATTERSIV16SI
);
33029 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scattersiv8di",
33030 VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT
,
33031 IX86_BUILTIN_SCATTERSIV8DI
);
33033 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv16si",
33034 VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT
,
33035 IX86_BUILTIN_SCATTERDIV16SI
);
33037 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatterdiv8di",
33038 VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT
,
33039 IX86_BUILTIN_SCATTERDIV8DI
);
33042 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv2df",
33043 V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT
,
33044 IX86_BUILTIN_GATHER3SIV2DF
);
33046 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4df",
33047 V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT
,
33048 IX86_BUILTIN_GATHER3SIV4DF
);
33050 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div2df",
33051 V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT
,
33052 IX86_BUILTIN_GATHER3DIV2DF
);
33054 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4df",
33055 V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT
,
33056 IX86_BUILTIN_GATHER3DIV4DF
);
33058 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4sf",
33059 V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT
,
33060 IX86_BUILTIN_GATHER3SIV4SF
);
33062 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv8sf",
33063 V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT
,
33064 IX86_BUILTIN_GATHER3SIV8SF
);
33066 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4sf",
33067 V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT
,
33068 IX86_BUILTIN_GATHER3DIV4SF
);
33070 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div8sf",
33071 V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT
,
33072 IX86_BUILTIN_GATHER3DIV8SF
);
33074 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv2di",
33075 V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT
,
33076 IX86_BUILTIN_GATHER3SIV2DI
);
33078 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4di",
33079 V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT
,
33080 IX86_BUILTIN_GATHER3SIV4DI
);
33082 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div2di",
33083 V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT
,
33084 IX86_BUILTIN_GATHER3DIV2DI
);
33086 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4di",
33087 V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT
,
33088 IX86_BUILTIN_GATHER3DIV4DI
);
33090 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv4si",
33091 V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT
,
33092 IX86_BUILTIN_GATHER3SIV4SI
);
33094 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3siv8si",
33095 V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT
,
33096 IX86_BUILTIN_GATHER3SIV8SI
);
33098 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div4si",
33099 V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT
,
33100 IX86_BUILTIN_GATHER3DIV4SI
);
33102 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3div8si",
33103 V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT
,
33104 IX86_BUILTIN_GATHER3DIV8SI
);
33106 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altsiv4df ",
33107 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT
,
33108 IX86_BUILTIN_GATHER3ALTSIV4DF
);
33110 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altdiv8sf ",
33111 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT
,
33112 IX86_BUILTIN_GATHER3ALTDIV8SF
);
33114 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altsiv4di ",
33115 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT
,
33116 IX86_BUILTIN_GATHER3ALTSIV4DI
);
33118 def_builtin_pure (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_gather3altdiv8si ",
33119 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT
,
33120 IX86_BUILTIN_GATHER3ALTDIV8SI
);
33122 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv8sf",
33123 VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT
,
33124 IX86_BUILTIN_SCATTERSIV8SF
);
33126 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4sf",
33127 VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT
,
33128 IX86_BUILTIN_SCATTERSIV4SF
);
33130 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4df",
33131 VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT
,
33132 IX86_BUILTIN_SCATTERSIV4DF
);
33134 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv2df",
33135 VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT
,
33136 IX86_BUILTIN_SCATTERSIV2DF
);
33138 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv8sf",
33139 VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT
,
33140 IX86_BUILTIN_SCATTERDIV8SF
);
33142 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4sf",
33143 VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT
,
33144 IX86_BUILTIN_SCATTERDIV4SF
);
33146 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4df",
33147 VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT
,
33148 IX86_BUILTIN_SCATTERDIV4DF
);
33150 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv2df",
33151 VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT
,
33152 IX86_BUILTIN_SCATTERDIV2DF
);
33154 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv8si",
33155 VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT
,
33156 IX86_BUILTIN_SCATTERSIV8SI
);
33158 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4si",
33159 VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT
,
33160 IX86_BUILTIN_SCATTERSIV4SI
);
33162 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv4di",
33163 VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT
,
33164 IX86_BUILTIN_SCATTERSIV4DI
);
33166 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scattersiv2di",
33167 VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT
,
33168 IX86_BUILTIN_SCATTERSIV2DI
);
33170 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv8si",
33171 VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT
,
33172 IX86_BUILTIN_SCATTERDIV8SI
);
33174 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4si",
33175 VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT
,
33176 IX86_BUILTIN_SCATTERDIV4SI
);
33178 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv4di",
33179 VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT
,
33180 IX86_BUILTIN_SCATTERDIV4DI
);
33182 def_builtin (OPTION_MASK_ISA_AVX512VL
, "__builtin_ia32_scatterdiv2di",
33183 VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT
,
33184 IX86_BUILTIN_SCATTERDIV2DI
);
33185 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltsiv8df ",
33186 VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT
,
33187 IX86_BUILTIN_SCATTERALTSIV8DF
);
33189 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltdiv8sf ",
33190 VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT
,
33191 IX86_BUILTIN_SCATTERALTDIV16SF
);
33193 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltsiv8di ",
33194 VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT
,
33195 IX86_BUILTIN_SCATTERALTSIV8DI
);
33197 def_builtin (OPTION_MASK_ISA_AVX512F
, "__builtin_ia32_scatteraltdiv8si ",
33198 VOID_FTYPE_PINT_HI_V8DI_V16SI_INT
,
33199 IX86_BUILTIN_SCATTERALTDIV16SI
);
33202 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdpd",
33203 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT
,
33204 IX86_BUILTIN_GATHERPFDPD
);
33205 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfdps",
33206 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT
,
33207 IX86_BUILTIN_GATHERPFDPS
);
33208 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqpd",
33209 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
33210 IX86_BUILTIN_GATHERPFQPD
);
33211 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_gatherpfqps",
33212 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
33213 IX86_BUILTIN_GATHERPFQPS
);
33214 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdpd",
33215 VOID_FTYPE_QI_V8SI_PCVOID_INT_INT
,
33216 IX86_BUILTIN_SCATTERPFDPD
);
33217 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfdps",
33218 VOID_FTYPE_HI_V16SI_PCVOID_INT_INT
,
33219 IX86_BUILTIN_SCATTERPFDPS
);
33220 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqpd",
33221 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
33222 IX86_BUILTIN_SCATTERPFQPD
);
33223 def_builtin (OPTION_MASK_ISA_AVX512PF
, "__builtin_ia32_scatterpfqps",
33224 VOID_FTYPE_QI_V8DI_PCVOID_INT_INT
,
33225 IX86_BUILTIN_SCATTERPFQPS
);
33228 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg1",
33229 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG1
);
33230 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1msg2",
33231 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1MSG2
);
33232 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1nexte",
33233 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA1NEXTE
);
33234 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha1rnds4",
33235 V4SI_FTYPE_V4SI_V4SI_INT
, IX86_BUILTIN_SHA1RNDS4
);
33236 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg1",
33237 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG1
);
33238 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256msg2",
33239 V4SI_FTYPE_V4SI_V4SI
, IX86_BUILTIN_SHA256MSG2
);
33240 def_builtin_const (OPTION_MASK_ISA_SHA
, "__builtin_ia32_sha256rnds2",
33241 V4SI_FTYPE_V4SI_V4SI_V4SI
, IX86_BUILTIN_SHA256RNDS2
);
33244 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
33245 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
33247 /* MMX access to the vec_init patterns. */
33248 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
33249 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
33251 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
33252 V4HI_FTYPE_HI_HI_HI_HI
,
33253 IX86_BUILTIN_VEC_INIT_V4HI
);
33255 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
33256 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
33257 IX86_BUILTIN_VEC_INIT_V8QI
);
33259 /* Access to the vec_extract patterns. */
33260 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
33261 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
33262 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
33263 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
33264 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
33265 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
33266 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
33267 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
33268 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
33269 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
33271 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
33272 "__builtin_ia32_vec_ext_v4hi",
33273 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
33275 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
33276 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
33278 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
33279 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
33281 /* Access to the vec_set patterns. */
33282 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
33283 "__builtin_ia32_vec_set_v2di",
33284 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
33286 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
33287 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
33289 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
33290 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
33292 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
33293 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
33295 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
33296 "__builtin_ia32_vec_set_v4hi",
33297 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
33299 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
33300 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
33303 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
33304 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
33305 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
33306 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
33307 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
33308 "__builtin_ia32_rdseed_di_step",
33309 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
33312 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33313 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
33314 def_builtin (OPTION_MASK_ISA_64BIT
,
33315 "__builtin_ia32_addcarryx_u64",
33316 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
33317 IX86_BUILTIN_ADDCARRYX64
);
33320 def_builtin (0, "__builtin_ia32_sbb_u32",
33321 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_SBB32
);
33322 def_builtin (OPTION_MASK_ISA_64BIT
,
33323 "__builtin_ia32_sbb_u64",
33324 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
33325 IX86_BUILTIN_SBB64
);
33327 /* Read/write FLAGS. */
33328 def_builtin (0, "__builtin_ia32_readeflags_u32",
33329 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
33330 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_readeflags_u64",
33331 UINT64_FTYPE_VOID
, IX86_BUILTIN_READ_FLAGS
);
33332 def_builtin (0, "__builtin_ia32_writeeflags_u32",
33333 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_WRITE_FLAGS
);
33334 def_builtin (OPTION_MASK_ISA_64BIT
, "__builtin_ia32_writeeflags_u64",
33335 VOID_FTYPE_UINT64
, IX86_BUILTIN_WRITE_FLAGS
);
33338 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT
, "__builtin_ia32_clflushopt",
33339 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSHOPT
);
33342 def_builtin (OPTION_MASK_ISA_CLWB
, "__builtin_ia32_clwb",
33343 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLWB
);
33345 /* MONITORX and MWAITX. */
33346 def_builtin (OPTION_MASK_ISA_MWAITX
, "__builtin_ia32_monitorx",
33347 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITORX
);
33348 def_builtin (OPTION_MASK_ISA_MWAITX
, "__builtin_ia32_mwaitx",
33349 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAITX
);
33352 def_builtin (OPTION_MASK_ISA_CLZERO
, "__builtin_ia32_clzero",
33353 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLZERO
);
33355 /* Add FMA4 multi-arg argument instructions */
33356 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
33358 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
, i
);
33362 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
33363 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
33365 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST
,
33366 IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
,
33367 ARRAY_SIZE (bdesc_multi_arg
) - 1);
33371 ix86_init_mpx_builtins ()
33373 const struct builtin_description
* d
;
33374 enum ix86_builtin_func_type ftype
;
33378 for (i
= 0, d
= bdesc_mpx
;
33379 i
< ARRAY_SIZE (bdesc_mpx
);
33382 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MPX_FIRST
, i
);
33386 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
33387 decl
= def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
33389 /* With no leaf and nothrow flags for MPX builtins
33390 abnormal edges may follow its call when setjmp
33391 presents in the function. Since we may have a lot
33392 of MPX builtins calls it causes lots of useless
33393 edges and enormous PHI nodes. To avoid this we mark
33394 MPX builtins as leaf and nothrow. */
33397 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
33399 TREE_NOTHROW (decl
) = 1;
33403 ix86_builtins_isa
[(int)d
->code
].leaf_p
= true;
33404 ix86_builtins_isa
[(int)d
->code
].nothrow_p
= true;
33407 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_LAST
,
33408 IX86_BUILTIN__BDESC_MPX_FIRST
,
33409 ARRAY_SIZE (bdesc_mpx
) - 1);
33411 for (i
= 0, d
= bdesc_mpx_const
;
33412 i
< ARRAY_SIZE (bdesc_mpx_const
);
33415 BDESC_VERIFY (d
->code
, IX86_BUILTIN__BDESC_MPX_CONST_FIRST
, i
);
33419 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
33420 decl
= def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
33424 DECL_ATTRIBUTES (decl
) = build_tree_list (get_identifier ("leaf"),
33426 TREE_NOTHROW (decl
) = 1;
33430 ix86_builtins_isa
[(int)d
->code
].leaf_p
= true;
33431 ix86_builtins_isa
[(int)d
->code
].nothrow_p
= true;
33434 BDESC_VERIFYS (IX86_BUILTIN__BDESC_MPX_CONST_LAST
,
33435 IX86_BUILTIN__BDESC_MPX_CONST_FIRST
,
33436 ARRAY_SIZE (bdesc_mpx_const
) - 1);
33438 #undef BDESC_VERIFY
33439 #undef BDESC_VERIFYS
33441 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
33442 to return a pointer to VERSION_DECL if the outcome of the expression
33443 formed by PREDICATE_CHAIN is true. This function will be called during
33444 version dispatch to decide which function version to execute. It returns
33445 the basic block at the end, to which more conditions can be added. */
33448 add_condition_to_bb (tree function_decl
, tree version_decl
,
33449 tree predicate_chain
, basic_block new_bb
)
33451 gimple
*return_stmt
;
33452 tree convert_expr
, result_var
;
33453 gimple
*convert_stmt
;
33454 gimple
*call_cond_stmt
;
33455 gimple
*if_else_stmt
;
33457 basic_block bb1
, bb2
, bb3
;
33460 tree cond_var
, and_expr_var
= NULL_TREE
;
33463 tree predicate_decl
, predicate_arg
;
33465 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
33467 gcc_assert (new_bb
!= NULL
);
33468 gseq
= bb_seq (new_bb
);
33471 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
33472 build_fold_addr_expr (version_decl
));
33473 result_var
= create_tmp_var (ptr_type_node
);
33474 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
33475 return_stmt
= gimple_build_return (result_var
);
33477 if (predicate_chain
== NULL_TREE
)
33479 gimple_seq_add_stmt (&gseq
, convert_stmt
);
33480 gimple_seq_add_stmt (&gseq
, return_stmt
);
33481 set_bb_seq (new_bb
, gseq
);
33482 gimple_set_bb (convert_stmt
, new_bb
);
33483 gimple_set_bb (return_stmt
, new_bb
);
33488 while (predicate_chain
!= NULL
)
33490 cond_var
= create_tmp_var (integer_type_node
);
33491 predicate_decl
= TREE_PURPOSE (predicate_chain
);
33492 predicate_arg
= TREE_VALUE (predicate_chain
);
33493 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
33494 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
33496 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
33497 gimple_set_bb (call_cond_stmt
, new_bb
);
33498 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
33500 predicate_chain
= TREE_CHAIN (predicate_chain
);
33502 if (and_expr_var
== NULL
)
33503 and_expr_var
= cond_var
;
33506 gimple
*assign_stmt
;
33507 /* Use MIN_EXPR to check if any integer is zero?.
33508 and_expr_var = min_expr <cond_var, and_expr_var> */
33509 assign_stmt
= gimple_build_assign (and_expr_var
,
33510 build2 (MIN_EXPR
, integer_type_node
,
33511 cond_var
, and_expr_var
));
33513 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
33514 gimple_set_bb (assign_stmt
, new_bb
);
33515 gimple_seq_add_stmt (&gseq
, assign_stmt
);
33519 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
33521 NULL_TREE
, NULL_TREE
);
33522 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
33523 gimple_set_bb (if_else_stmt
, new_bb
);
33524 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
33526 gimple_seq_add_stmt (&gseq
, convert_stmt
);
33527 gimple_seq_add_stmt (&gseq
, return_stmt
);
33528 set_bb_seq (new_bb
, gseq
);
33531 e12
= split_block (bb1
, if_else_stmt
);
33533 e12
->flags
&= ~EDGE_FALLTHRU
;
33534 e12
->flags
|= EDGE_TRUE_VALUE
;
33536 e23
= split_block (bb2
, return_stmt
);
33538 gimple_set_bb (convert_stmt
, bb2
);
33539 gimple_set_bb (return_stmt
, bb2
);
33542 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
33545 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
33552 /* This parses the attribute arguments to target in DECL and determines
33553 the right builtin to use to match the platform specification.
33554 It returns the priority value for this version decl. If PREDICATE_LIST
33555 is not NULL, it stores the list of cpu features that need to be checked
33556 before dispatching this function. */
33558 static unsigned int
33559 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
33562 struct cl_target_option cur_target
;
33564 struct cl_target_option
*new_target
;
33565 const char *arg_str
= NULL
;
33566 const char *attrs_str
= NULL
;
33567 char *tok_str
= NULL
;
33570 /* Priority of i386 features, greater value is higher priority. This is
33571 used to decide the order in which function dispatch must happen. For
33572 instance, a version specialized for SSE4.2 should be checked for dispatch
33573 before a version for SSE3, as SSE4.2 implies SSE3. */
33574 enum feature_priority
33607 enum feature_priority priority
= P_ZERO
;
33609 /* These are the target attribute strings for which a dispatcher is
33610 available, from fold_builtin_cpu. */
33612 static struct _feature_list
33614 const char *const name
;
33615 const enum feature_priority priority
;
33617 const feature_list
[] =
33623 {"sse4a", P_SSE4_A
},
33624 {"ssse3", P_SSSE3
},
33625 {"sse4.1", P_SSE4_1
},
33626 {"sse4.2", P_SSE4_2
},
33627 {"popcnt", P_POPCNT
},
33629 {"pclmul", P_PCLMUL
},
33637 {"avx512f", P_AVX512F
}
33641 static unsigned int NUM_FEATURES
33642 = sizeof (feature_list
) / sizeof (struct _feature_list
);
33646 tree predicate_chain
= NULL_TREE
;
33647 tree predicate_decl
, predicate_arg
;
33649 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
33650 gcc_assert (attrs
!= NULL
);
33652 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
33654 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
33655 attrs_str
= TREE_STRING_POINTER (attrs
);
33657 /* Return priority zero for default function. */
33658 if (strcmp (attrs_str
, "default") == 0)
33661 /* Handle arch= if specified. For priority, set it to be 1 more than
33662 the best instruction set the processor can handle. For instance, if
33663 there is a version for atom and a version for ssse3 (the highest ISA
33664 priority for atom), the atom version must be checked for dispatch
33665 before the ssse3 version. */
33666 if (strstr (attrs_str
, "arch=") != NULL
)
33668 cl_target_option_save (&cur_target
, &global_options
);
33669 target_node
= ix86_valid_target_attribute_tree (attrs
, &global_options
,
33670 &global_options_set
);
33672 gcc_assert (target_node
);
33673 new_target
= TREE_TARGET_OPTION (target_node
);
33674 gcc_assert (new_target
);
33676 if (new_target
->arch_specified
&& new_target
->arch
> 0)
33678 switch (new_target
->arch
)
33680 case PROCESSOR_CORE2
:
33682 priority
= P_PROC_SSSE3
;
33684 case PROCESSOR_NEHALEM
:
33685 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AES
)
33687 arg_str
= "westmere";
33692 /* We translate "arch=corei7" and "arch=nehalem" to
33693 "corei7" so that it will be mapped to M_INTEL_COREI7
33694 as cpu type to cover all M_INTEL_COREI7_XXXs. */
33695 arg_str
= "corei7";
33696 priority
= P_PROC_SSE4_2
;
33699 case PROCESSOR_SANDYBRIDGE
:
33700 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_F16C
)
33701 arg_str
= "ivybridge";
33703 arg_str
= "sandybridge";
33704 priority
= P_PROC_AVX
;
33706 case PROCESSOR_HASWELL
:
33707 if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
)
33708 arg_str
= "skylake-avx512";
33709 else if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_XSAVES
)
33710 arg_str
= "skylake";
33711 else if (new_target
->x_ix86_isa_flags
& OPTION_MASK_ISA_ADX
)
33712 arg_str
= "broadwell";
33714 arg_str
= "haswell";
33715 priority
= P_PROC_AVX2
;
33717 case PROCESSOR_BONNELL
:
33718 arg_str
= "bonnell";
33719 priority
= P_PROC_SSSE3
;
33721 case PROCESSOR_KNL
:
33723 priority
= P_PROC_AVX512F
;
33725 case PROCESSOR_SILVERMONT
:
33726 arg_str
= "silvermont";
33727 priority
= P_PROC_SSE4_2
;
33729 case PROCESSOR_AMDFAM10
:
33730 arg_str
= "amdfam10h";
33731 priority
= P_PROC_SSE4_A
;
33733 case PROCESSOR_BTVER1
:
33734 arg_str
= "btver1";
33735 priority
= P_PROC_SSE4_A
;
33737 case PROCESSOR_BTVER2
:
33738 arg_str
= "btver2";
33739 priority
= P_PROC_BMI
;
33741 case PROCESSOR_BDVER1
:
33742 arg_str
= "bdver1";
33743 priority
= P_PROC_XOP
;
33745 case PROCESSOR_BDVER2
:
33746 arg_str
= "bdver2";
33747 priority
= P_PROC_FMA
;
33749 case PROCESSOR_BDVER3
:
33750 arg_str
= "bdver3";
33751 priority
= P_PROC_FMA
;
33753 case PROCESSOR_BDVER4
:
33754 arg_str
= "bdver4";
33755 priority
= P_PROC_AVX2
;
33757 case PROCESSOR_ZNVER1
:
33758 arg_str
= "znver1";
33759 priority
= P_PROC_AVX2
;
33764 cl_target_option_restore (&global_options
, &cur_target
);
33766 if (predicate_list
&& arg_str
== NULL
)
33768 error_at (DECL_SOURCE_LOCATION (decl
),
33769 "No dispatcher found for the versioning attributes");
33773 if (predicate_list
)
33775 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
33776 /* For a C string literal the length includes the trailing NULL. */
33777 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
33778 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
33783 /* Process feature name. */
33784 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
33785 strcpy (tok_str
, attrs_str
);
33786 token
= strtok (tok_str
, ",");
33787 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
33789 while (token
!= NULL
)
33791 /* Do not process "arch=" */
33792 if (strncmp (token
, "arch=", 5) == 0)
33794 token
= strtok (NULL
, ",");
33797 for (i
= 0; i
< NUM_FEATURES
; ++i
)
33799 if (strcmp (token
, feature_list
[i
].name
) == 0)
33801 if (predicate_list
)
33803 predicate_arg
= build_string_literal (
33804 strlen (feature_list
[i
].name
) + 1,
33805 feature_list
[i
].name
);
33806 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
33809 /* Find the maximum priority feature. */
33810 if (feature_list
[i
].priority
> priority
)
33811 priority
= feature_list
[i
].priority
;
33816 if (predicate_list
&& i
== NUM_FEATURES
)
33818 error_at (DECL_SOURCE_LOCATION (decl
),
33819 "No dispatcher found for %s", token
);
33822 token
= strtok (NULL
, ",");
33826 if (predicate_list
&& predicate_chain
== NULL_TREE
)
33828 error_at (DECL_SOURCE_LOCATION (decl
),
33829 "No dispatcher found for the versioning attributes : %s",
33833 else if (predicate_list
)
33835 predicate_chain
= nreverse (predicate_chain
);
33836 *predicate_list
= predicate_chain
;
33842 /* This compares the priority of target features in function DECL1
33843 and DECL2. It returns positive value if DECL1 is higher priority,
33844 negative value if DECL2 is higher priority and 0 if they are the
33848 ix86_compare_version_priority (tree decl1
, tree decl2
)
33850 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
33851 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
33853 return (int)priority1
- (int)priority2
;
33856 /* V1 and V2 point to function versions with different priorities
33857 based on the target ISA. This function compares their priorities. */
33860 feature_compare (const void *v1
, const void *v2
)
33862 typedef struct _function_version_info
33865 tree predicate_chain
;
33866 unsigned int dispatch_priority
;
33867 } function_version_info
;
33869 const function_version_info c1
= *(const function_version_info
*)v1
;
33870 const function_version_info c2
= *(const function_version_info
*)v2
;
33871 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
33874 /* This function generates the dispatch function for
33875 multi-versioned functions. DISPATCH_DECL is the function which will
33876 contain the dispatch logic. FNDECLS are the function choices for
33877 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
33878 in DISPATCH_DECL in which the dispatch code is generated. */
33881 dispatch_function_versions (tree dispatch_decl
,
33883 basic_block
*empty_bb
)
33886 gimple
*ifunc_cpu_init_stmt
;
33890 vec
<tree
> *fndecls
;
33891 unsigned int num_versions
= 0;
33892 unsigned int actual_versions
= 0;
33895 struct _function_version_info
33898 tree predicate_chain
;
33899 unsigned int dispatch_priority
;
33900 }*function_version_info
;
33902 gcc_assert (dispatch_decl
!= NULL
33903 && fndecls_p
!= NULL
33904 && empty_bb
!= NULL
);
33906 /*fndecls_p is actually a vector. */
33907 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
33909 /* At least one more version other than the default. */
33910 num_versions
= fndecls
->length ();
33911 gcc_assert (num_versions
>= 2);
33913 function_version_info
= (struct _function_version_info
*)
33914 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
33916 /* The first version in the vector is the default decl. */
33917 default_decl
= (*fndecls
)[0];
33919 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
33921 gseq
= bb_seq (*empty_bb
);
33922 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
33923 constructors, so explicity call __builtin_cpu_init here. */
33924 ifunc_cpu_init_stmt
= gimple_build_call_vec (
33925 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
33926 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
33927 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
33928 set_bb_seq (*empty_bb
, gseq
);
33933 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
33935 tree version_decl
= ele
;
33936 tree predicate_chain
= NULL_TREE
;
33937 unsigned int priority
;
33938 /* Get attribute string, parse it and find the right predicate decl.
33939 The predicate function could be a lengthy combination of many
33940 features, like arch-type and various isa-variants. */
33941 priority
= get_builtin_code_for_version (version_decl
,
33944 if (predicate_chain
== NULL_TREE
)
33947 function_version_info
[actual_versions
].version_decl
= version_decl
;
33948 function_version_info
[actual_versions
].predicate_chain
33950 function_version_info
[actual_versions
].dispatch_priority
= priority
;
33954 /* Sort the versions according to descending order of dispatch priority. The
33955 priority is based on the ISA. This is not a perfect solution. There
33956 could still be ambiguity. If more than one function version is suitable
33957 to execute, which one should be dispatched? In future, allow the user
33958 to specify a dispatch priority next to the version. */
33959 qsort (function_version_info
, actual_versions
,
33960 sizeof (struct _function_version_info
), feature_compare
);
33962 for (i
= 0; i
< actual_versions
; ++i
)
33963 *empty_bb
= add_condition_to_bb (dispatch_decl
,
33964 function_version_info
[i
].version_decl
,
33965 function_version_info
[i
].predicate_chain
,
33968 /* dispatch default version at the end. */
33969 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
33972 free (function_version_info
);
33976 /* This function changes the assembler name for functions that are
33977 versions. If DECL is a function version and has a "target"
33978 attribute, it appends the attribute string to its assembler name. */
33981 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
33984 const char *orig_name
, *version_string
;
33985 char *attr_str
, *assembler_name
;
33987 if (DECL_DECLARED_INLINE_P (decl
)
33988 && lookup_attribute ("gnu_inline",
33989 DECL_ATTRIBUTES (decl
)))
33990 error_at (DECL_SOURCE_LOCATION (decl
),
33991 "Function versions cannot be marked as gnu_inline,"
33992 " bodies have to be generated");
33994 if (DECL_VIRTUAL_P (decl
)
33995 || DECL_VINDEX (decl
))
33996 sorry ("Virtual function multiversioning not supported");
33998 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
34000 /* target attribute string cannot be NULL. */
34001 gcc_assert (version_attr
!= NULL_TREE
);
34003 orig_name
= IDENTIFIER_POINTER (id
);
34005 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
34007 if (strcmp (version_string
, "default") == 0)
34010 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
34011 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
34013 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
34015 /* Allow assembler name to be modified if already set. */
34016 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
34017 SET_DECL_RTL (decl
, NULL
);
34019 tree ret
= get_identifier (assembler_name
);
34020 XDELETEVEC (attr_str
);
34021 XDELETEVEC (assembler_name
);
34027 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
34029 /* For function version, add the target suffix to the assembler name. */
34030 if (TREE_CODE (decl
) == FUNCTION_DECL
34031 && DECL_FUNCTION_VERSIONED (decl
))
34032 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
34033 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34034 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
34040 /* Make a dispatcher declaration for the multi-versioned function DECL.
34041 Calls to DECL function will be replaced with calls to the dispatcher
34042 by the front-end. Returns the decl of the dispatcher function. */
34045 ix86_get_function_versions_dispatcher (void *decl
)
34047 tree fn
= (tree
) decl
;
34048 struct cgraph_node
*node
= NULL
;
34049 struct cgraph_node
*default_node
= NULL
;
34050 struct cgraph_function_version_info
*node_v
= NULL
;
34051 struct cgraph_function_version_info
*first_v
= NULL
;
34053 tree dispatch_decl
= NULL
;
34055 struct cgraph_function_version_info
*default_version_info
= NULL
;
34057 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
34059 node
= cgraph_node::get (fn
);
34060 gcc_assert (node
!= NULL
);
34062 node_v
= node
->function_version ();
34063 gcc_assert (node_v
!= NULL
);
34065 if (node_v
->dispatcher_resolver
!= NULL
)
34066 return node_v
->dispatcher_resolver
;
34068 /* Find the default version and make it the first node. */
34070 /* Go to the beginning of the chain. */
34071 while (first_v
->prev
!= NULL
)
34072 first_v
= first_v
->prev
;
34073 default_version_info
= first_v
;
34074 while (default_version_info
!= NULL
)
34076 if (is_function_default_version
34077 (default_version_info
->this_node
->decl
))
34079 default_version_info
= default_version_info
->next
;
34082 /* If there is no default node, just return NULL. */
34083 if (default_version_info
== NULL
)
34086 /* Make default info the first node. */
34087 if (first_v
!= default_version_info
)
34089 default_version_info
->prev
->next
= default_version_info
->next
;
34090 if (default_version_info
->next
)
34091 default_version_info
->next
->prev
= default_version_info
->prev
;
34092 first_v
->prev
= default_version_info
;
34093 default_version_info
->next
= first_v
;
34094 default_version_info
->prev
= NULL
;
34097 default_node
= default_version_info
->this_node
;
34099 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34100 if (targetm
.has_ifunc_p ())
34102 struct cgraph_function_version_info
*it_v
= NULL
;
34103 struct cgraph_node
*dispatcher_node
= NULL
;
34104 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
34106 /* Right now, the dispatching is done via ifunc. */
34107 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
34109 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
34110 gcc_assert (dispatcher_node
!= NULL
);
34111 dispatcher_node
->dispatcher_function
= 1;
34112 dispatcher_version_info
34113 = dispatcher_node
->insert_new_function_version ();
34114 dispatcher_version_info
->next
= default_version_info
;
34115 dispatcher_node
->definition
= 1;
34117 /* Set the dispatcher for all the versions. */
34118 it_v
= default_version_info
;
34119 while (it_v
!= NULL
)
34121 it_v
->dispatcher_resolver
= dispatch_decl
;
34128 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
34129 "multiversioning needs ifunc which is not supported "
34133 return dispatch_decl
;
34136 /* Make the resolver function decl to dispatch the versions of
34137 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
34138 ifunc alias that will point to the created resolver. Create an
34139 empty basic block in the resolver and store the pointer in
34140 EMPTY_BB. Return the decl of the resolver function. */
34143 make_resolver_func (const tree default_decl
,
34144 const tree ifunc_alias_decl
,
34145 basic_block
*empty_bb
)
34147 char *resolver_name
;
34148 tree decl
, type
, decl_name
, t
;
34150 /* IFUNC's have to be globally visible. So, if the default_decl is
34151 not, then the name of the IFUNC should be made unique. */
34152 if (TREE_PUBLIC (default_decl
) == 0)
34154 char *ifunc_name
= make_unique_name (default_decl
, "ifunc", true);
34155 symtab
->change_decl_assembler_name (ifunc_alias_decl
,
34156 get_identifier (ifunc_name
));
34157 XDELETEVEC (ifunc_name
);
34160 resolver_name
= make_unique_name (default_decl
, "resolver", false);
34162 /* The resolver function should return a (void *). */
34163 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
34165 decl
= build_fn_decl (resolver_name
, type
);
34166 decl_name
= get_identifier (resolver_name
);
34167 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
34169 DECL_NAME (decl
) = decl_name
;
34170 TREE_USED (decl
) = 1;
34171 DECL_ARTIFICIAL (decl
) = 1;
34172 DECL_IGNORED_P (decl
) = 1;
34173 TREE_PUBLIC (decl
) = 0;
34174 DECL_UNINLINABLE (decl
) = 1;
34176 /* Resolver is not external, body is generated. */
34177 DECL_EXTERNAL (decl
) = 0;
34178 DECL_EXTERNAL (ifunc_alias_decl
) = 0;
34180 DECL_CONTEXT (decl
) = NULL_TREE
;
34181 DECL_INITIAL (decl
) = make_node (BLOCK
);
34182 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
34184 if (DECL_COMDAT_GROUP (default_decl
)
34185 || TREE_PUBLIC (default_decl
))
34187 /* In this case, each translation unit with a call to this
34188 versioned function will put out a resolver. Ensure it
34189 is comdat to keep just one copy. */
34190 DECL_COMDAT (decl
) = 1;
34191 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
34193 /* Build result decl and add to function_decl. */
34194 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
34195 DECL_ARTIFICIAL (t
) = 1;
34196 DECL_IGNORED_P (t
) = 1;
34197 DECL_RESULT (decl
) = t
;
34199 gimplify_function_tree (decl
);
34200 push_cfun (DECL_STRUCT_FUNCTION (decl
));
34201 *empty_bb
= init_lowered_empty_function (decl
, false,
34202 profile_count::uninitialized ());
34204 cgraph_node::add_new_function (decl
, true);
34205 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
34209 gcc_assert (ifunc_alias_decl
!= NULL
);
34210 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
34211 DECL_ATTRIBUTES (ifunc_alias_decl
)
34212 = make_attribute ("ifunc", resolver_name
,
34213 DECL_ATTRIBUTES (ifunc_alias_decl
));
34215 /* Create the alias for dispatch to resolver here. */
34216 cgraph_node::create_same_body_alias (ifunc_alias_decl
, decl
);
34217 XDELETEVEC (resolver_name
);
34221 /* Generate the dispatching code body to dispatch multi-versioned function
34222 DECL. The target hook is called to process the "target" attributes and
34223 provide the code to dispatch the right function at run-time. NODE points
34224 to the dispatcher decl whose body will be created. */
34227 ix86_generate_version_dispatcher_body (void *node_p
)
34229 tree resolver_decl
;
34230 basic_block empty_bb
;
34231 tree default_ver_decl
;
34232 struct cgraph_node
*versn
;
34233 struct cgraph_node
*node
;
34235 struct cgraph_function_version_info
*node_version_info
= NULL
;
34236 struct cgraph_function_version_info
*versn_info
= NULL
;
34238 node
= (cgraph_node
*)node_p
;
34240 node_version_info
= node
->function_version ();
34241 gcc_assert (node
->dispatcher_function
34242 && node_version_info
!= NULL
);
34244 if (node_version_info
->dispatcher_resolver
)
34245 return node_version_info
->dispatcher_resolver
;
34247 /* The first version in the chain corresponds to the default version. */
34248 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
34250 /* node is going to be an alias, so remove the finalized bit. */
34251 node
->definition
= false;
34253 resolver_decl
= make_resolver_func (default_ver_decl
,
34254 node
->decl
, &empty_bb
);
34256 node_version_info
->dispatcher_resolver
= resolver_decl
;
34258 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
34260 auto_vec
<tree
, 2> fn_ver_vec
;
34262 for (versn_info
= node_version_info
->next
; versn_info
;
34263 versn_info
= versn_info
->next
)
34265 versn
= versn_info
->this_node
;
34266 /* Check for virtual functions here again, as by this time it should
34267 have been determined if this function needs a vtable index or
34268 not. This happens for methods in derived classes that override
34269 virtual methods in base classes but are not explicitly marked as
34271 if (DECL_VINDEX (versn
->decl
))
34272 sorry ("Virtual function multiversioning not supported");
34274 fn_ver_vec
.safe_push (versn
->decl
);
34277 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
34278 cgraph_edge::rebuild_edges ();
34280 return resolver_decl
;
34282 /* This builds the processor_model struct type defined in
34283 libgcc/config/i386/cpuinfo.c */
34286 build_processor_model_struct (void)
34288 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
34290 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
34292 tree type
= make_node (RECORD_TYPE
);
34294 /* The first 3 fields are unsigned int. */
34295 for (i
= 0; i
< 3; ++i
)
34297 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
34298 get_identifier (field_name
[i
]), unsigned_type_node
);
34299 if (field_chain
!= NULL_TREE
)
34300 DECL_CHAIN (field
) = field_chain
;
34301 field_chain
= field
;
34304 /* The last field is an array of unsigned integers of size one. */
34305 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
34306 get_identifier (field_name
[3]),
34307 build_array_type (unsigned_type_node
,
34308 build_index_type (size_one_node
)));
34309 if (field_chain
!= NULL_TREE
)
34310 DECL_CHAIN (field
) = field_chain
;
34311 field_chain
= field
;
34313 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
34317 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
34320 make_var_decl (tree type
, const char *name
)
34324 new_decl
= build_decl (UNKNOWN_LOCATION
,
34326 get_identifier(name
),
34329 DECL_EXTERNAL (new_decl
) = 1;
34330 TREE_STATIC (new_decl
) = 1;
34331 TREE_PUBLIC (new_decl
) = 1;
34332 DECL_INITIAL (new_decl
) = 0;
34333 DECL_ARTIFICIAL (new_decl
) = 0;
34334 DECL_PRESERVE_P (new_decl
) = 1;
34336 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
34337 assemble_variable (new_decl
, 0, 0, 0);
34342 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
34343 into an integer defined in libgcc/config/i386/cpuinfo.c */
34346 fold_builtin_cpu (tree fndecl
, tree
*args
)
34349 enum ix86_builtins fn_code
= (enum ix86_builtins
)
34350 DECL_FUNCTION_CODE (fndecl
);
34351 tree param_string_cst
= NULL
;
34353 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
34354 enum processor_features
34390 /* These are the values for vendor types and cpu types and subtypes
34391 in cpuinfo.c. Cpu types and subtypes should be subtracted by
34392 the corresponding start value. */
34393 enum processor_model
34403 M_INTEL_SILVERMONT
,
34407 M_CPU_SUBTYPE_START
,
34408 M_INTEL_COREI7_NEHALEM
,
34409 M_INTEL_COREI7_WESTMERE
,
34410 M_INTEL_COREI7_SANDYBRIDGE
,
34411 M_AMDFAM10H_BARCELONA
,
34412 M_AMDFAM10H_SHANGHAI
,
34413 M_AMDFAM10H_ISTANBUL
,
34414 M_AMDFAM15H_BDVER1
,
34415 M_AMDFAM15H_BDVER2
,
34416 M_AMDFAM15H_BDVER3
,
34417 M_AMDFAM15H_BDVER4
,
34418 M_AMDFAM17H_ZNVER1
,
34419 M_INTEL_COREI7_IVYBRIDGE
,
34420 M_INTEL_COREI7_HASWELL
,
34421 M_INTEL_COREI7_BROADWELL
,
34422 M_INTEL_COREI7_SKYLAKE
,
34423 M_INTEL_COREI7_SKYLAKE_AVX512
34426 static struct _arch_names_table
34428 const char *const name
;
34429 const enum processor_model model
;
34431 const arch_names_table
[] =
34434 {"intel", M_INTEL
},
34435 {"atom", M_INTEL_BONNELL
},
34436 {"slm", M_INTEL_SILVERMONT
},
34437 {"core2", M_INTEL_CORE2
},
34438 {"corei7", M_INTEL_COREI7
},
34439 {"nehalem", M_INTEL_COREI7_NEHALEM
},
34440 {"westmere", M_INTEL_COREI7_WESTMERE
},
34441 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
34442 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE
},
34443 {"haswell", M_INTEL_COREI7_HASWELL
},
34444 {"broadwell", M_INTEL_COREI7_BROADWELL
},
34445 {"skylake", M_INTEL_COREI7_SKYLAKE
},
34446 {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512
},
34447 {"bonnell", M_INTEL_BONNELL
},
34448 {"silvermont", M_INTEL_SILVERMONT
},
34449 {"knl", M_INTEL_KNL
},
34450 {"amdfam10h", M_AMDFAM10H
},
34451 {"barcelona", M_AMDFAM10H_BARCELONA
},
34452 {"shanghai", M_AMDFAM10H_SHANGHAI
},
34453 {"istanbul", M_AMDFAM10H_ISTANBUL
},
34454 {"btver1", M_AMD_BTVER1
},
34455 {"amdfam15h", M_AMDFAM15H
},
34456 {"bdver1", M_AMDFAM15H_BDVER1
},
34457 {"bdver2", M_AMDFAM15H_BDVER2
},
34458 {"bdver3", M_AMDFAM15H_BDVER3
},
34459 {"bdver4", M_AMDFAM15H_BDVER4
},
34460 {"btver2", M_AMD_BTVER2
},
34461 {"znver1", M_AMDFAM17H_ZNVER1
},
34464 static struct _isa_names_table
34466 const char *const name
;
34467 const enum processor_features feature
;
34469 const isa_names_table
[] =
34473 {"popcnt", F_POPCNT
},
34477 {"ssse3", F_SSSE3
},
34478 {"sse4a", F_SSE4_A
},
34479 {"sse4.1", F_SSE4_1
},
34480 {"sse4.2", F_SSE4_2
},
34486 {"avx512f", F_AVX512F
},
34490 {"pclmul", F_PCLMUL
},
34491 {"avx512vl",F_AVX512VL
},
34492 {"avx512bw",F_AVX512BW
},
34493 {"avx512dq",F_AVX512DQ
},
34494 {"avx512cd",F_AVX512CD
},
34495 {"avx512er",F_AVX512ER
},
34496 {"avx512pf",F_AVX512PF
},
34497 {"avx512vbmi",F_AVX512VBMI
},
34498 {"avx512ifma",F_AVX512IFMA
},
34499 {"avx5124vnniw",F_AVX5124VNNIW
},
34500 {"avx5124fmaps",F_AVX5124FMAPS
},
34501 {"avx512vpopcntdq",F_AVX512VPOPCNTDQ
}
34504 tree __processor_model_type
= build_processor_model_struct ();
34505 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
34509 varpool_node::add (__cpu_model_var
);
34511 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
34513 param_string_cst
= *args
;
34514 while (param_string_cst
34515 && TREE_CODE (param_string_cst
) != STRING_CST
)
34517 /* *args must be a expr that can contain other EXPRS leading to a
34519 if (!EXPR_P (param_string_cst
))
34521 error ("Parameter to builtin must be a string constant or literal");
34522 return integer_zero_node
;
34524 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
34527 gcc_assert (param_string_cst
);
34529 if (fn_code
== IX86_BUILTIN_CPU_IS
)
34535 unsigned int field_val
= 0;
34536 unsigned int NUM_ARCH_NAMES
34537 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
34539 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
34540 if (strcmp (arch_names_table
[i
].name
,
34541 TREE_STRING_POINTER (param_string_cst
)) == 0)
34544 if (i
== NUM_ARCH_NAMES
)
34546 error ("Parameter to builtin not valid: %s",
34547 TREE_STRING_POINTER (param_string_cst
));
34548 return integer_zero_node
;
34551 field
= TYPE_FIELDS (__processor_model_type
);
34552 field_val
= arch_names_table
[i
].model
;
34554 /* CPU types are stored in the next field. */
34555 if (field_val
> M_CPU_TYPE_START
34556 && field_val
< M_CPU_SUBTYPE_START
)
34558 field
= DECL_CHAIN (field
);
34559 field_val
-= M_CPU_TYPE_START
;
34562 /* CPU subtypes are stored in the next field. */
34563 if (field_val
> M_CPU_SUBTYPE_START
)
34565 field
= DECL_CHAIN ( DECL_CHAIN (field
));
34566 field_val
-= M_CPU_SUBTYPE_START
;
34569 /* Get the appropriate field in __cpu_model. */
34570 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
34573 /* Check the value. */
34574 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
34575 build_int_cstu (unsigned_type_node
, field_val
));
34576 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
34578 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
34585 unsigned int field_val
= 0;
34586 unsigned int NUM_ISA_NAMES
34587 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
34589 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
34590 if (strcmp (isa_names_table
[i
].name
,
34591 TREE_STRING_POINTER (param_string_cst
)) == 0)
34594 if (i
== NUM_ISA_NAMES
)
34596 error ("Parameter to builtin not valid: %s",
34597 TREE_STRING_POINTER (param_string_cst
));
34598 return integer_zero_node
;
34601 field
= TYPE_FIELDS (__processor_model_type
);
34602 /* Get the last field, which is __cpu_features. */
34603 while (DECL_CHAIN (field
))
34604 field
= DECL_CHAIN (field
);
34606 /* Get the appropriate field: __cpu_model.__cpu_features */
34607 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
34610 /* Access the 0th element of __cpu_features array. */
34611 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
34612 integer_zero_node
, NULL_TREE
, NULL_TREE
);
34614 field_val
= (1 << isa_names_table
[i
].feature
);
34615 /* Return __cpu_model.__cpu_features[0] & field_val */
34616 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
34617 build_int_cstu (unsigned_type_node
, field_val
));
34618 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
34620 gcc_unreachable ();
34624 ix86_fold_builtin (tree fndecl
, int n_args
,
34625 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
34627 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
34629 enum ix86_builtins fn_code
= (enum ix86_builtins
)
34630 DECL_FUNCTION_CODE (fndecl
);
34633 case IX86_BUILTIN_CPU_IS
:
34634 case IX86_BUILTIN_CPU_SUPPORTS
:
34635 gcc_assert (n_args
== 1);
34636 return fold_builtin_cpu (fndecl
, args
);
34638 case IX86_BUILTIN_NANQ
:
34639 case IX86_BUILTIN_NANSQ
:
34641 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
34642 const char *str
= c_getstr (*args
);
34643 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
34644 REAL_VALUE_TYPE real
;
34646 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
34647 return build_real (type
, real
);
34651 case IX86_BUILTIN_INFQ
:
34652 case IX86_BUILTIN_HUGE_VALQ
:
34654 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
34655 REAL_VALUE_TYPE inf
;
34657 return build_real (type
, inf
);
34660 case IX86_BUILTIN_TZCNT16
:
34661 case IX86_BUILTIN_CTZS
:
34662 case IX86_BUILTIN_TZCNT32
:
34663 case IX86_BUILTIN_TZCNT64
:
34664 gcc_assert (n_args
== 1);
34665 if (TREE_CODE (args
[0]) == INTEGER_CST
)
34667 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
34668 tree arg
= args
[0];
34669 if (fn_code
== IX86_BUILTIN_TZCNT16
34670 || fn_code
== IX86_BUILTIN_CTZS
)
34671 arg
= fold_convert (short_unsigned_type_node
, arg
);
34672 if (integer_zerop (arg
))
34673 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
34675 return fold_const_call (CFN_CTZ
, type
, arg
);
34679 case IX86_BUILTIN_LZCNT16
:
34680 case IX86_BUILTIN_CLZS
:
34681 case IX86_BUILTIN_LZCNT32
:
34682 case IX86_BUILTIN_LZCNT64
:
34683 gcc_assert (n_args
== 1);
34684 if (TREE_CODE (args
[0]) == INTEGER_CST
)
34686 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
34687 tree arg
= args
[0];
34688 if (fn_code
== IX86_BUILTIN_LZCNT16
34689 || fn_code
== IX86_BUILTIN_CLZS
)
34690 arg
= fold_convert (short_unsigned_type_node
, arg
);
34691 if (integer_zerop (arg
))
34692 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
34694 return fold_const_call (CFN_CLZ
, type
, arg
);
34698 case IX86_BUILTIN_BEXTR32
:
34699 case IX86_BUILTIN_BEXTR64
:
34700 case IX86_BUILTIN_BEXTRI32
:
34701 case IX86_BUILTIN_BEXTRI64
:
34702 gcc_assert (n_args
== 2);
34703 if (tree_fits_uhwi_p (args
[1]))
34705 unsigned HOST_WIDE_INT res
= 0;
34706 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
34707 unsigned int start
= tree_to_uhwi (args
[1]);
34708 unsigned int len
= (start
& 0xff00) >> 8;
34710 if (start
>= prec
|| len
== 0)
34712 else if (!tree_fits_uhwi_p (args
[0]))
34715 res
= tree_to_uhwi (args
[0]) >> start
;
34718 if (len
< HOST_BITS_PER_WIDE_INT
)
34719 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
34720 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
34724 case IX86_BUILTIN_BZHI32
:
34725 case IX86_BUILTIN_BZHI64
:
34726 gcc_assert (n_args
== 2);
34727 if (tree_fits_uhwi_p (args
[1]))
34729 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
34730 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
34732 if (!tree_fits_uhwi_p (args
[0]))
34734 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
34735 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
34736 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
34740 case IX86_BUILTIN_PDEP32
:
34741 case IX86_BUILTIN_PDEP64
:
34742 gcc_assert (n_args
== 2);
34743 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
34745 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
34746 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
34747 unsigned HOST_WIDE_INT res
= 0;
34748 unsigned HOST_WIDE_INT m
, k
= 1;
34749 for (m
= 1; m
; m
<<= 1)
34750 if ((mask
& m
) != 0)
34752 if ((src
& k
) != 0)
34756 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
34760 case IX86_BUILTIN_PEXT32
:
34761 case IX86_BUILTIN_PEXT64
:
34762 gcc_assert (n_args
== 2);
34763 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
34765 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
34766 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
34767 unsigned HOST_WIDE_INT res
= 0;
34768 unsigned HOST_WIDE_INT m
, k
= 1;
34769 for (m
= 1; m
; m
<<= 1)
34770 if ((mask
& m
) != 0)
34772 if ((src
& m
) != 0)
34776 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
34785 #ifdef SUBTARGET_FOLD_BUILTIN
34786 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
34792 /* Fold a MD builtin (use ix86_fold_builtin for folding into
34793 constant) in GIMPLE. */
34796 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
34798 gimple
*stmt
= gsi_stmt (*gsi
);
34799 tree fndecl
= gimple_call_fndecl (stmt
);
34800 gcc_checking_assert (fndecl
&& DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
);
34801 int n_args
= gimple_call_num_args (stmt
);
34802 enum ix86_builtins fn_code
= (enum ix86_builtins
) DECL_FUNCTION_CODE (fndecl
);
34803 tree decl
= NULL_TREE
;
34808 case IX86_BUILTIN_TZCNT32
:
34809 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
34810 goto fold_tzcnt_lzcnt
;
34812 case IX86_BUILTIN_TZCNT64
:
34813 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
34814 goto fold_tzcnt_lzcnt
;
34816 case IX86_BUILTIN_LZCNT32
:
34817 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
34818 goto fold_tzcnt_lzcnt
;
34820 case IX86_BUILTIN_LZCNT64
:
34821 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
34822 goto fold_tzcnt_lzcnt
;
34825 gcc_assert (n_args
== 1);
34826 arg0
= gimple_call_arg (stmt
, 0);
34827 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
34829 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
34830 /* If arg0 is provably non-zero, optimize into generic
34831 __builtin_c[tl]z{,ll} function the middle-end handles
34833 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
34836 location_t loc
= gimple_location (stmt
);
34837 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
34838 gimple_set_location (g
, loc
);
34839 tree lhs
= make_ssa_name (integer_type_node
);
34840 gimple_call_set_lhs (g
, lhs
);
34841 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
34842 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
34843 gimple_set_location (g
, loc
);
34844 gsi_replace (gsi
, g
, false);
34849 case IX86_BUILTIN_BZHI32
:
34850 case IX86_BUILTIN_BZHI64
:
34851 gcc_assert (n_args
== 2);
34852 arg1
= gimple_call_arg (stmt
, 1);
34853 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
34855 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
34856 arg0
= gimple_call_arg (stmt
, 0);
34857 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
34859 location_t loc
= gimple_location (stmt
);
34860 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
34861 gimple_set_location (g
, loc
);
34862 gsi_replace (gsi
, g
, false);
34867 case IX86_BUILTIN_PDEP32
:
34868 case IX86_BUILTIN_PDEP64
:
34869 case IX86_BUILTIN_PEXT32
:
34870 case IX86_BUILTIN_PEXT64
:
34871 gcc_assert (n_args
== 2);
34872 arg1
= gimple_call_arg (stmt
, 1);
34873 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
34875 location_t loc
= gimple_location (stmt
);
34876 arg0
= gimple_call_arg (stmt
, 0);
34877 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
34878 gimple_set_location (g
, loc
);
34879 gsi_replace (gsi
, g
, false);
34891 /* Make builtins to detect cpu type and features supported. NAME is
34892 the builtin name, CODE is the builtin code, and FTYPE is the function
34893 type of the builtin. */
34896 make_cpu_type_builtin (const char* name
, int code
,
34897 enum ix86_builtin_func_type ftype
, bool is_const
)
34902 type
= ix86_get_builtin_func_type (ftype
);
34903 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
34905 gcc_assert (decl
!= NULL_TREE
);
34906 ix86_builtins
[(int) code
] = decl
;
34907 TREE_READONLY (decl
) = is_const
;
34910 /* Make builtins to get CPU type and features supported. The created
34913 __builtin_cpu_init (), to detect cpu type and features,
34914 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
34915 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
34919 ix86_init_platform_type_builtins (void)
34921 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
34922 INT_FTYPE_VOID
, false);
34923 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
34924 INT_FTYPE_PCCHAR
, true);
34925 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
34926 INT_FTYPE_PCCHAR
, true);
34929 /* Internal method for ix86_init_builtins. */
34932 ix86_init_builtins_va_builtins_abi (void)
34934 tree ms_va_ref
, sysv_va_ref
;
34935 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
34936 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
34937 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
34938 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
34942 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
34943 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
34944 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
34946 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
34949 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
34950 fnvoid_va_start_ms
=
34951 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
34952 fnvoid_va_end_sysv
=
34953 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
34954 fnvoid_va_start_sysv
=
34955 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
34957 fnvoid_va_copy_ms
=
34958 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
34960 fnvoid_va_copy_sysv
=
34961 build_function_type_list (void_type_node
, sysv_va_ref
,
34962 sysv_va_ref
, NULL_TREE
);
34964 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
34965 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
34966 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
34967 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
34968 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
34969 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
34970 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
34971 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
34972 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
34973 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
34974 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
34975 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
34979 ix86_init_builtin_types (void)
34981 tree float80_type_node
, const_string_type_node
;
34983 /* The __float80 type. */
34984 float80_type_node
= long_double_type_node
;
34985 if (TYPE_MODE (float80_type_node
) != XFmode
)
34987 if (float64x_type_node
!= NULL_TREE
34988 && TYPE_MODE (float64x_type_node
) == XFmode
)
34989 float80_type_node
= float64x_type_node
;
34992 /* The __float80 type. */
34993 float80_type_node
= make_node (REAL_TYPE
);
34995 TYPE_PRECISION (float80_type_node
) = 80;
34996 layout_type (float80_type_node
);
34999 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
35001 /* The __float128 type. The node has already been created as
35002 _Float128, so we only need to register the __float128 name for
35004 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
35006 const_string_type_node
35007 = build_pointer_type (build_qualified_type
35008 (char_type_node
, TYPE_QUAL_CONST
));
35010 /* This macro is built by i386-builtin-types.awk. */
35011 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
35015 ix86_init_builtins (void)
35019 ix86_init_builtin_types ();
35021 /* Builtins to get CPU type and features. */
35022 ix86_init_platform_type_builtins ();
35024 /* TFmode support builtins. */
35025 def_builtin_const (0, "__builtin_infq",
35026 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
35027 def_builtin_const (0, "__builtin_huge_valq",
35028 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
35030 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING
);
35031 decl
= add_builtin_function ("__builtin_nanq", ftype
, IX86_BUILTIN_NANQ
,
35032 BUILT_IN_MD
, "nanq", NULL_TREE
);
35033 TREE_READONLY (decl
) = 1;
35034 ix86_builtins
[(int) IX86_BUILTIN_NANQ
] = decl
;
35036 decl
= add_builtin_function ("__builtin_nansq", ftype
, IX86_BUILTIN_NANSQ
,
35037 BUILT_IN_MD
, "nansq", NULL_TREE
);
35038 TREE_READONLY (decl
) = 1;
35039 ix86_builtins
[(int) IX86_BUILTIN_NANSQ
] = decl
;
35041 /* We will expand them to normal call if SSE isn't available since
35042 they are used by libgcc. */
35043 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
35044 decl
= add_builtin_function ("__builtin_fabsq", ftype
, IX86_BUILTIN_FABSQ
,
35045 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
35046 TREE_READONLY (decl
) = 1;
35047 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = decl
;
35049 ftype
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
35050 decl
= add_builtin_function ("__builtin_copysignq", ftype
,
35051 IX86_BUILTIN_COPYSIGNQ
, BUILT_IN_MD
,
35052 "__copysigntf3", NULL_TREE
);
35053 TREE_READONLY (decl
) = 1;
35054 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = decl
;
35056 ix86_init_tm_builtins ();
35057 ix86_init_mmx_sse_builtins ();
35058 ix86_init_mpx_builtins ();
35061 ix86_init_builtins_va_builtins_abi ();
35063 #ifdef SUBTARGET_INIT_BUILTINS
35064 SUBTARGET_INIT_BUILTINS
;
35068 /* Return the ix86 builtin for CODE. */
35071 ix86_builtin_decl (unsigned code
, bool)
35073 if (code
>= IX86_BUILTIN_MAX
)
35074 return error_mark_node
;
35076 return ix86_builtins
[code
];
35079 /* Errors in the source file can cause expand_expr to return const0_rtx
35080 where we expect a vector. To avoid crashing, use one of the vector
35081 clear instructions. */
35083 safe_vector_operand (rtx x
, machine_mode mode
)
35085 if (x
== const0_rtx
)
35086 x
= CONST0_RTX (mode
);
35090 /* Fixup modeless constants to fit required mode. */
35092 fixup_modeless_constant (rtx x
, machine_mode mode
)
35094 if (GET_MODE (x
) == VOIDmode
)
35095 x
= convert_to_mode (mode
, x
, 1);
35099 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35102 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
35105 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35106 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35107 rtx op0
= expand_normal (arg0
);
35108 rtx op1
= expand_normal (arg1
);
35109 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
35110 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
35111 machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
35113 if (VECTOR_MODE_P (mode0
))
35114 op0
= safe_vector_operand (op0
, mode0
);
35115 if (VECTOR_MODE_P (mode1
))
35116 op1
= safe_vector_operand (op1
, mode1
);
35118 if (optimize
|| !target
35119 || GET_MODE (target
) != tmode
35120 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
35121 target
= gen_reg_rtx (tmode
);
35123 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
35125 rtx x
= gen_reg_rtx (V4SImode
);
35126 emit_insn (gen_sse2_loadd (x
, op1
));
35127 op1
= gen_lowpart (TImode
, x
);
35130 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
35131 op0
= copy_to_mode_reg (mode0
, op0
);
35132 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
35133 op1
= copy_to_mode_reg (mode1
, op1
);
35135 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
35144 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35147 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
35148 enum ix86_builtin_func_type m_type
,
35149 enum rtx_code sub_code
)
35154 bool comparison_p
= false;
35156 bool last_arg_constant
= false;
35157 int num_memory
= 0;
35163 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
35167 case MULTI_ARG_4_DF2_DI_I
:
35168 case MULTI_ARG_4_DF2_DI_I1
:
35169 case MULTI_ARG_4_SF2_SI_I
:
35170 case MULTI_ARG_4_SF2_SI_I1
:
35172 last_arg_constant
= true;
35175 case MULTI_ARG_3_SF
:
35176 case MULTI_ARG_3_DF
:
35177 case MULTI_ARG_3_SF2
:
35178 case MULTI_ARG_3_DF2
:
35179 case MULTI_ARG_3_DI
:
35180 case MULTI_ARG_3_SI
:
35181 case MULTI_ARG_3_SI_DI
:
35182 case MULTI_ARG_3_HI
:
35183 case MULTI_ARG_3_HI_SI
:
35184 case MULTI_ARG_3_QI
:
35185 case MULTI_ARG_3_DI2
:
35186 case MULTI_ARG_3_SI2
:
35187 case MULTI_ARG_3_HI2
:
35188 case MULTI_ARG_3_QI2
:
35192 case MULTI_ARG_2_SF
:
35193 case MULTI_ARG_2_DF
:
35194 case MULTI_ARG_2_DI
:
35195 case MULTI_ARG_2_SI
:
35196 case MULTI_ARG_2_HI
:
35197 case MULTI_ARG_2_QI
:
35201 case MULTI_ARG_2_DI_IMM
:
35202 case MULTI_ARG_2_SI_IMM
:
35203 case MULTI_ARG_2_HI_IMM
:
35204 case MULTI_ARG_2_QI_IMM
:
35206 last_arg_constant
= true;
35209 case MULTI_ARG_1_SF
:
35210 case MULTI_ARG_1_DF
:
35211 case MULTI_ARG_1_SF2
:
35212 case MULTI_ARG_1_DF2
:
35213 case MULTI_ARG_1_DI
:
35214 case MULTI_ARG_1_SI
:
35215 case MULTI_ARG_1_HI
:
35216 case MULTI_ARG_1_QI
:
35217 case MULTI_ARG_1_SI_DI
:
35218 case MULTI_ARG_1_HI_DI
:
35219 case MULTI_ARG_1_HI_SI
:
35220 case MULTI_ARG_1_QI_DI
:
35221 case MULTI_ARG_1_QI_SI
:
35222 case MULTI_ARG_1_QI_HI
:
35226 case MULTI_ARG_2_DI_CMP
:
35227 case MULTI_ARG_2_SI_CMP
:
35228 case MULTI_ARG_2_HI_CMP
:
35229 case MULTI_ARG_2_QI_CMP
:
35231 comparison_p
= true;
35234 case MULTI_ARG_2_SF_TF
:
35235 case MULTI_ARG_2_DF_TF
:
35236 case MULTI_ARG_2_DI_TF
:
35237 case MULTI_ARG_2_SI_TF
:
35238 case MULTI_ARG_2_HI_TF
:
35239 case MULTI_ARG_2_QI_TF
:
35245 gcc_unreachable ();
35248 if (optimize
|| !target
35249 || GET_MODE (target
) != tmode
35250 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
35251 target
= gen_reg_rtx (tmode
);
35252 else if (memory_operand (target
, tmode
))
35255 gcc_assert (nargs
<= 4);
35257 for (i
= 0; i
< nargs
; i
++)
35259 tree arg
= CALL_EXPR_ARG (exp
, i
);
35260 rtx op
= expand_normal (arg
);
35261 int adjust
= (comparison_p
) ? 1 : 0;
35262 machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
35264 if (last_arg_constant
&& i
== nargs
- 1)
35266 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
35268 enum insn_code new_icode
= icode
;
35271 case CODE_FOR_xop_vpermil2v2df3
:
35272 case CODE_FOR_xop_vpermil2v4sf3
:
35273 case CODE_FOR_xop_vpermil2v4df3
:
35274 case CODE_FOR_xop_vpermil2v8sf3
:
35275 error ("the last argument must be a 2-bit immediate");
35276 return gen_reg_rtx (tmode
);
35277 case CODE_FOR_xop_rotlv2di3
:
35278 new_icode
= CODE_FOR_rotlv2di3
;
35280 case CODE_FOR_xop_rotlv4si3
:
35281 new_icode
= CODE_FOR_rotlv4si3
;
35283 case CODE_FOR_xop_rotlv8hi3
:
35284 new_icode
= CODE_FOR_rotlv8hi3
;
35286 case CODE_FOR_xop_rotlv16qi3
:
35287 new_icode
= CODE_FOR_rotlv16qi3
;
35289 if (CONST_INT_P (op
))
35291 int mask
= GET_MODE_UNIT_BITSIZE (tmode
) - 1;
35292 op
= GEN_INT (INTVAL (op
) & mask
);
35293 gcc_checking_assert
35294 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
35298 gcc_checking_assert
35300 && insn_data
[new_icode
].operand
[0].mode
== tmode
35301 && insn_data
[new_icode
].operand
[1].mode
== tmode
35302 && insn_data
[new_icode
].operand
[2].mode
== mode
35303 && insn_data
[new_icode
].operand
[0].predicate
35304 == insn_data
[icode
].operand
[0].predicate
35305 && insn_data
[new_icode
].operand
[1].predicate
35306 == insn_data
[icode
].operand
[1].predicate
);
35312 gcc_unreachable ();
35319 if (VECTOR_MODE_P (mode
))
35320 op
= safe_vector_operand (op
, mode
);
35322 /* If we aren't optimizing, only allow one memory operand to be
35324 if (memory_operand (op
, mode
))
35327 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
35330 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
35332 op
= force_reg (mode
, op
);
35336 args
[i
].mode
= mode
;
35342 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
35347 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
35348 GEN_INT ((int)sub_code
));
35349 else if (! comparison_p
)
35350 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
35353 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
35357 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
35362 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
35366 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
35370 gcc_unreachable ();
35380 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35381 insns with vec_merge. */
35384 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
35388 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35389 rtx op1
, op0
= expand_normal (arg0
);
35390 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
35391 machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
35393 if (optimize
|| !target
35394 || GET_MODE (target
) != tmode
35395 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
35396 target
= gen_reg_rtx (tmode
);
35398 if (VECTOR_MODE_P (mode0
))
35399 op0
= safe_vector_operand (op0
, mode0
);
35401 if ((optimize
&& !register_operand (op0
, mode0
))
35402 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
35403 op0
= copy_to_mode_reg (mode0
, op0
);
35406 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
35407 op1
= copy_to_mode_reg (mode0
, op1
);
35409 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
35416 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35419 ix86_expand_sse_compare (const struct builtin_description
*d
,
35420 tree exp
, rtx target
, bool swap
)
35423 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35424 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35425 rtx op0
= expand_normal (arg0
);
35426 rtx op1
= expand_normal (arg1
);
35428 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
35429 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
35430 machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
35431 enum rtx_code comparison
= d
->comparison
;
35433 if (VECTOR_MODE_P (mode0
))
35434 op0
= safe_vector_operand (op0
, mode0
);
35435 if (VECTOR_MODE_P (mode1
))
35436 op1
= safe_vector_operand (op1
, mode1
);
35438 /* Swap operands if we have a comparison that isn't available in
35441 std::swap (op0
, op1
);
35443 if (optimize
|| !target
35444 || GET_MODE (target
) != tmode
35445 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
35446 target
= gen_reg_rtx (tmode
);
35448 if ((optimize
&& !register_operand (op0
, mode0
))
35449 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
35450 op0
= copy_to_mode_reg (mode0
, op0
);
35451 if ((optimize
&& !register_operand (op1
, mode1
))
35452 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
35453 op1
= copy_to_mode_reg (mode1
, op1
);
35455 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
35456 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
35463 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
35466 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
35470 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35471 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35472 rtx op0
= expand_normal (arg0
);
35473 rtx op1
= expand_normal (arg1
);
35474 machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
35475 machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
35476 enum rtx_code comparison
= d
->comparison
;
35478 if (VECTOR_MODE_P (mode0
))
35479 op0
= safe_vector_operand (op0
, mode0
);
35480 if (VECTOR_MODE_P (mode1
))
35481 op1
= safe_vector_operand (op1
, mode1
);
35483 /* Swap operands if we have a comparison that isn't available in
35485 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
35486 std::swap (op0
, op1
);
35488 target
= gen_reg_rtx (SImode
);
35489 emit_move_insn (target
, const0_rtx
);
35490 target
= gen_rtx_SUBREG (QImode
, target
, 0);
35492 if ((optimize
&& !register_operand (op0
, mode0
))
35493 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
35494 op0
= copy_to_mode_reg (mode0
, op0
);
35495 if ((optimize
&& !register_operand (op1
, mode1
))
35496 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
35497 op1
= copy_to_mode_reg (mode1
, op1
);
35499 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
35503 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
35504 gen_rtx_fmt_ee (comparison
, QImode
,
35508 return SUBREG_REG (target
);
35511 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
35514 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
35518 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35519 rtx op1
, op0
= expand_normal (arg0
);
35520 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
35521 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
35523 if (optimize
|| target
== 0
35524 || GET_MODE (target
) != tmode
35525 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
35526 target
= gen_reg_rtx (tmode
);
35528 if (VECTOR_MODE_P (mode0
))
35529 op0
= safe_vector_operand (op0
, mode0
);
35531 if ((optimize
&& !register_operand (op0
, mode0
))
35532 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
35533 op0
= copy_to_mode_reg (mode0
, op0
);
35535 op1
= GEN_INT (d
->comparison
);
35537 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
35545 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
35546 tree exp
, rtx target
)
35549 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35550 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35551 rtx op0
= expand_normal (arg0
);
35552 rtx op1
= expand_normal (arg1
);
35554 machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
35555 machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
35556 machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
35558 if (optimize
|| target
== 0
35559 || GET_MODE (target
) != tmode
35560 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
35561 target
= gen_reg_rtx (tmode
);
35563 op0
= safe_vector_operand (op0
, mode0
);
35564 op1
= safe_vector_operand (op1
, mode1
);
35566 if ((optimize
&& !register_operand (op0
, mode0
))
35567 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
35568 op0
= copy_to_mode_reg (mode0
, op0
);
35569 if ((optimize
&& !register_operand (op1
, mode1
))
35570 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
35571 op1
= copy_to_mode_reg (mode1
, op1
);
35573 op2
= GEN_INT (d
->comparison
);
35575 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
35582 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
35585 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
35589 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35590 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35591 rtx op0
= expand_normal (arg0
);
35592 rtx op1
= expand_normal (arg1
);
35593 machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
35594 machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
35595 enum rtx_code comparison
= d
->comparison
;
35597 if (VECTOR_MODE_P (mode0
))
35598 op0
= safe_vector_operand (op0
, mode0
);
35599 if (VECTOR_MODE_P (mode1
))
35600 op1
= safe_vector_operand (op1
, mode1
);
35602 target
= gen_reg_rtx (SImode
);
35603 emit_move_insn (target
, const0_rtx
);
35604 target
= gen_rtx_SUBREG (QImode
, target
, 0);
35606 if ((optimize
&& !register_operand (op0
, mode0
))
35607 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
35608 op0
= copy_to_mode_reg (mode0
, op0
);
35609 if ((optimize
&& !register_operand (op1
, mode1
))
35610 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
35611 op1
= copy_to_mode_reg (mode1
, op1
);
35613 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
35617 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
35618 gen_rtx_fmt_ee (comparison
, QImode
,
35622 return SUBREG_REG (target
);
35625 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
35628 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
35629 tree exp
, rtx target
)
35632 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35633 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35634 tree arg2
= CALL_EXPR_ARG (exp
, 2);
35635 tree arg3
= CALL_EXPR_ARG (exp
, 3);
35636 tree arg4
= CALL_EXPR_ARG (exp
, 4);
35637 rtx scratch0
, scratch1
;
35638 rtx op0
= expand_normal (arg0
);
35639 rtx op1
= expand_normal (arg1
);
35640 rtx op2
= expand_normal (arg2
);
35641 rtx op3
= expand_normal (arg3
);
35642 rtx op4
= expand_normal (arg4
);
35643 machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
35645 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
35646 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
35647 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
35648 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
35649 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
35650 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
35651 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
35653 if (VECTOR_MODE_P (modev2
))
35654 op0
= safe_vector_operand (op0
, modev2
);
35655 if (VECTOR_MODE_P (modev4
))
35656 op2
= safe_vector_operand (op2
, modev4
);
35658 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
35659 op0
= copy_to_mode_reg (modev2
, op0
);
35660 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
35661 op1
= copy_to_mode_reg (modei3
, op1
);
35662 if ((optimize
&& !register_operand (op2
, modev4
))
35663 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
35664 op2
= copy_to_mode_reg (modev4
, op2
);
35665 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
35666 op3
= copy_to_mode_reg (modei5
, op3
);
35668 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
35670 error ("the fifth argument must be an 8-bit immediate");
35674 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
35676 if (optimize
|| !target
35677 || GET_MODE (target
) != tmode0
35678 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
35679 target
= gen_reg_rtx (tmode0
);
35681 scratch1
= gen_reg_rtx (tmode1
);
35683 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
35685 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
35687 if (optimize
|| !target
35688 || GET_MODE (target
) != tmode1
35689 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
35690 target
= gen_reg_rtx (tmode1
);
35692 scratch0
= gen_reg_rtx (tmode0
);
35694 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
35698 gcc_assert (d
->flag
);
35700 scratch0
= gen_reg_rtx (tmode0
);
35701 scratch1
= gen_reg_rtx (tmode1
);
35703 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
35713 target
= gen_reg_rtx (SImode
);
35714 emit_move_insn (target
, const0_rtx
);
35715 target
= gen_rtx_SUBREG (QImode
, target
, 0);
35718 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
35719 gen_rtx_fmt_ee (EQ
, QImode
,
35720 gen_rtx_REG ((machine_mode
) d
->flag
,
35723 return SUBREG_REG (target
);
35730 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
35733 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
35734 tree exp
, rtx target
)
35737 tree arg0
= CALL_EXPR_ARG (exp
, 0);
35738 tree arg1
= CALL_EXPR_ARG (exp
, 1);
35739 tree arg2
= CALL_EXPR_ARG (exp
, 2);
35740 rtx scratch0
, scratch1
;
35741 rtx op0
= expand_normal (arg0
);
35742 rtx op1
= expand_normal (arg1
);
35743 rtx op2
= expand_normal (arg2
);
35744 machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
35746 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
35747 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
35748 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
35749 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
35750 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
35752 if (VECTOR_MODE_P (modev2
))
35753 op0
= safe_vector_operand (op0
, modev2
);
35754 if (VECTOR_MODE_P (modev3
))
35755 op1
= safe_vector_operand (op1
, modev3
);
35757 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
35758 op0
= copy_to_mode_reg (modev2
, op0
);
35759 if ((optimize
&& !register_operand (op1
, modev3
))
35760 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
35761 op1
= copy_to_mode_reg (modev3
, op1
);
35763 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
35765 error ("the third argument must be an 8-bit immediate");
35769 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
35771 if (optimize
|| !target
35772 || GET_MODE (target
) != tmode0
35773 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
35774 target
= gen_reg_rtx (tmode0
);
35776 scratch1
= gen_reg_rtx (tmode1
);
35778 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
35780 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
35782 if (optimize
|| !target
35783 || GET_MODE (target
) != tmode1
35784 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
35785 target
= gen_reg_rtx (tmode1
);
35787 scratch0
= gen_reg_rtx (tmode0
);
35789 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
35793 gcc_assert (d
->flag
);
35795 scratch0
= gen_reg_rtx (tmode0
);
35796 scratch1
= gen_reg_rtx (tmode1
);
35798 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
35808 target
= gen_reg_rtx (SImode
);
35809 emit_move_insn (target
, const0_rtx
);
35810 target
= gen_rtx_SUBREG (QImode
, target
, 0);
35813 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
35814 gen_rtx_fmt_ee (EQ
, QImode
,
35815 gen_rtx_REG ((machine_mode
) d
->flag
,
35818 return SUBREG_REG (target
);
35824 /* Subroutine of ix86_expand_builtin to take care of insns with
35825 variable number of operands. */
35828 ix86_expand_args_builtin (const struct builtin_description
*d
,
35829 tree exp
, rtx target
)
35831 rtx pat
, real_target
;
35832 unsigned int i
, nargs
;
35833 unsigned int nargs_constant
= 0;
35834 unsigned int mask_pos
= 0;
35835 int num_memory
= 0;
35841 bool second_arg_count
= false;
35842 enum insn_code icode
= d
->icode
;
35843 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
35844 machine_mode tmode
= insn_p
->operand
[0].mode
;
35845 machine_mode rmode
= VOIDmode
;
35847 enum rtx_code comparison
= d
->comparison
;
35849 switch ((enum ix86_builtin_func_type
) d
->flag
)
35851 case V2DF_FTYPE_V2DF_ROUND
:
35852 case V4DF_FTYPE_V4DF_ROUND
:
35853 case V8DF_FTYPE_V8DF_ROUND
:
35854 case V4SF_FTYPE_V4SF_ROUND
:
35855 case V8SF_FTYPE_V8SF_ROUND
:
35856 case V16SF_FTYPE_V16SF_ROUND
:
35857 case V4SI_FTYPE_V4SF_ROUND
:
35858 case V8SI_FTYPE_V8SF_ROUND
:
35859 case V16SI_FTYPE_V16SF_ROUND
:
35860 return ix86_expand_sse_round (d
, exp
, target
);
35861 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
35862 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
35863 case V16SI_FTYPE_V8DF_V8DF_ROUND
:
35864 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
35865 case INT_FTYPE_V8SF_V8SF_PTEST
:
35866 case INT_FTYPE_V4DI_V4DI_PTEST
:
35867 case INT_FTYPE_V4DF_V4DF_PTEST
:
35868 case INT_FTYPE_V4SF_V4SF_PTEST
:
35869 case INT_FTYPE_V2DI_V2DI_PTEST
:
35870 case INT_FTYPE_V2DF_V2DF_PTEST
:
35871 return ix86_expand_sse_ptest (d
, exp
, target
);
35872 case FLOAT128_FTYPE_FLOAT128
:
35873 case FLOAT_FTYPE_FLOAT
:
35874 case INT_FTYPE_INT
:
35875 case UINT_FTYPE_UINT
:
35876 case UINT16_FTYPE_UINT16
:
35877 case UINT64_FTYPE_INT
:
35878 case UINT64_FTYPE_UINT64
:
35879 case INT64_FTYPE_INT64
:
35880 case INT64_FTYPE_V4SF
:
35881 case INT64_FTYPE_V2DF
:
35882 case INT_FTYPE_V16QI
:
35883 case INT_FTYPE_V8QI
:
35884 case INT_FTYPE_V8SF
:
35885 case INT_FTYPE_V4DF
:
35886 case INT_FTYPE_V4SF
:
35887 case INT_FTYPE_V2DF
:
35888 case INT_FTYPE_V32QI
:
35889 case V16QI_FTYPE_V16QI
:
35890 case V8SI_FTYPE_V8SF
:
35891 case V8SI_FTYPE_V4SI
:
35892 case V8HI_FTYPE_V8HI
:
35893 case V8HI_FTYPE_V16QI
:
35894 case V8QI_FTYPE_V8QI
:
35895 case V8SF_FTYPE_V8SF
:
35896 case V8SF_FTYPE_V8SI
:
35897 case V8SF_FTYPE_V4SF
:
35898 case V8SF_FTYPE_V8HI
:
35899 case V4SI_FTYPE_V4SI
:
35900 case V4SI_FTYPE_V16QI
:
35901 case V4SI_FTYPE_V4SF
:
35902 case V4SI_FTYPE_V8SI
:
35903 case V4SI_FTYPE_V8HI
:
35904 case V4SI_FTYPE_V4DF
:
35905 case V4SI_FTYPE_V2DF
:
35906 case V4HI_FTYPE_V4HI
:
35907 case V4DF_FTYPE_V4DF
:
35908 case V4DF_FTYPE_V4SI
:
35909 case V4DF_FTYPE_V4SF
:
35910 case V4DF_FTYPE_V2DF
:
35911 case V4SF_FTYPE_V4SF
:
35912 case V4SF_FTYPE_V4SI
:
35913 case V4SF_FTYPE_V8SF
:
35914 case V4SF_FTYPE_V4DF
:
35915 case V4SF_FTYPE_V8HI
:
35916 case V4SF_FTYPE_V2DF
:
35917 case V2DI_FTYPE_V2DI
:
35918 case V2DI_FTYPE_V16QI
:
35919 case V2DI_FTYPE_V8HI
:
35920 case V2DI_FTYPE_V4SI
:
35921 case V2DF_FTYPE_V2DF
:
35922 case V2DF_FTYPE_V4SI
:
35923 case V2DF_FTYPE_V4DF
:
35924 case V2DF_FTYPE_V4SF
:
35925 case V2DF_FTYPE_V2SI
:
35926 case V2SI_FTYPE_V2SI
:
35927 case V2SI_FTYPE_V4SF
:
35928 case V2SI_FTYPE_V2SF
:
35929 case V2SI_FTYPE_V2DF
:
35930 case V2SF_FTYPE_V2SF
:
35931 case V2SF_FTYPE_V2SI
:
35932 case V32QI_FTYPE_V32QI
:
35933 case V32QI_FTYPE_V16QI
:
35934 case V16HI_FTYPE_V16HI
:
35935 case V16HI_FTYPE_V8HI
:
35936 case V8SI_FTYPE_V8SI
:
35937 case V16HI_FTYPE_V16QI
:
35938 case V8SI_FTYPE_V16QI
:
35939 case V4DI_FTYPE_V16QI
:
35940 case V8SI_FTYPE_V8HI
:
35941 case V4DI_FTYPE_V8HI
:
35942 case V4DI_FTYPE_V4SI
:
35943 case V4DI_FTYPE_V2DI
:
35944 case UQI_FTYPE_UQI
:
35945 case UHI_FTYPE_UHI
:
35946 case USI_FTYPE_USI
:
35947 case USI_FTYPE_UQI
:
35948 case USI_FTYPE_UHI
:
35949 case UDI_FTYPE_UDI
:
35950 case UHI_FTYPE_V16QI
:
35951 case USI_FTYPE_V32QI
:
35952 case UDI_FTYPE_V64QI
:
35953 case V16QI_FTYPE_UHI
:
35954 case V32QI_FTYPE_USI
:
35955 case V64QI_FTYPE_UDI
:
35956 case V8HI_FTYPE_UQI
:
35957 case V16HI_FTYPE_UHI
:
35958 case V32HI_FTYPE_USI
:
35959 case V4SI_FTYPE_UQI
:
35960 case V8SI_FTYPE_UQI
:
35961 case V4SI_FTYPE_UHI
:
35962 case V8SI_FTYPE_UHI
:
35963 case UQI_FTYPE_V8HI
:
35964 case UHI_FTYPE_V16HI
:
35965 case USI_FTYPE_V32HI
:
35966 case UQI_FTYPE_V4SI
:
35967 case UQI_FTYPE_V8SI
:
35968 case UHI_FTYPE_V16SI
:
35969 case UQI_FTYPE_V2DI
:
35970 case UQI_FTYPE_V4DI
:
35971 case UQI_FTYPE_V8DI
:
35972 case V16SI_FTYPE_UHI
:
35973 case V2DI_FTYPE_UQI
:
35974 case V4DI_FTYPE_UQI
:
35975 case V16SI_FTYPE_INT
:
35976 case V16SF_FTYPE_V8SF
:
35977 case V16SI_FTYPE_V8SI
:
35978 case V16SF_FTYPE_V4SF
:
35979 case V16SI_FTYPE_V4SI
:
35980 case V16SI_FTYPE_V16SF
:
35981 case V16SI_FTYPE_V16SI
:
35982 case V16SF_FTYPE_V16SF
:
35983 case V8DI_FTYPE_UQI
:
35984 case V8DI_FTYPE_V8DI
:
35985 case V8DF_FTYPE_V4DF
:
35986 case V8DF_FTYPE_V2DF
:
35987 case V8DF_FTYPE_V8DF
:
35990 case V4SF_FTYPE_V4SF_VEC_MERGE
:
35991 case V2DF_FTYPE_V2DF_VEC_MERGE
:
35992 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
35993 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
35994 case V16QI_FTYPE_V16QI_V16QI
:
35995 case V16QI_FTYPE_V8HI_V8HI
:
35996 case V16SF_FTYPE_V16SF_V16SF
:
35997 case V8QI_FTYPE_V8QI_V8QI
:
35998 case V8QI_FTYPE_V4HI_V4HI
:
35999 case V8HI_FTYPE_V8HI_V8HI
:
36000 case V8HI_FTYPE_V16QI_V16QI
:
36001 case V8HI_FTYPE_V4SI_V4SI
:
36002 case V8SF_FTYPE_V8SF_V8SF
:
36003 case V8SF_FTYPE_V8SF_V8SI
:
36004 case V8DF_FTYPE_V8DF_V8DF
:
36005 case V4SI_FTYPE_V4SI_V4SI
:
36006 case V4SI_FTYPE_V8HI_V8HI
:
36007 case V4SI_FTYPE_V2DF_V2DF
:
36008 case V4HI_FTYPE_V4HI_V4HI
:
36009 case V4HI_FTYPE_V8QI_V8QI
:
36010 case V4HI_FTYPE_V2SI_V2SI
:
36011 case V4DF_FTYPE_V4DF_V4DF
:
36012 case V4DF_FTYPE_V4DF_V4DI
:
36013 case V4SF_FTYPE_V4SF_V4SF
:
36014 case V4SF_FTYPE_V4SF_V4SI
:
36015 case V4SF_FTYPE_V4SF_V2SI
:
36016 case V4SF_FTYPE_V4SF_V2DF
:
36017 case V4SF_FTYPE_V4SF_UINT
:
36018 case V4SF_FTYPE_V4SF_DI
:
36019 case V4SF_FTYPE_V4SF_SI
:
36020 case V2DI_FTYPE_V2DI_V2DI
:
36021 case V2DI_FTYPE_V16QI_V16QI
:
36022 case V2DI_FTYPE_V4SI_V4SI
:
36023 case V2DI_FTYPE_V2DI_V16QI
:
36024 case V2SI_FTYPE_V2SI_V2SI
:
36025 case V2SI_FTYPE_V4HI_V4HI
:
36026 case V2SI_FTYPE_V2SF_V2SF
:
36027 case V2DF_FTYPE_V2DF_V2DF
:
36028 case V2DF_FTYPE_V2DF_V4SF
:
36029 case V2DF_FTYPE_V2DF_V2DI
:
36030 case V2DF_FTYPE_V2DF_DI
:
36031 case V2DF_FTYPE_V2DF_SI
:
36032 case V2DF_FTYPE_V2DF_UINT
:
36033 case V2SF_FTYPE_V2SF_V2SF
:
36034 case V1DI_FTYPE_V1DI_V1DI
:
36035 case V1DI_FTYPE_V8QI_V8QI
:
36036 case V1DI_FTYPE_V2SI_V2SI
:
36037 case V32QI_FTYPE_V16HI_V16HI
:
36038 case V16HI_FTYPE_V8SI_V8SI
:
36039 case V32QI_FTYPE_V32QI_V32QI
:
36040 case V16HI_FTYPE_V32QI_V32QI
:
36041 case V16HI_FTYPE_V16HI_V16HI
:
36042 case V8SI_FTYPE_V4DF_V4DF
:
36043 case V8SI_FTYPE_V8SI_V8SI
:
36044 case V8SI_FTYPE_V16HI_V16HI
:
36045 case V4DI_FTYPE_V4DI_V4DI
:
36046 case V4DI_FTYPE_V8SI_V8SI
:
36047 case V8DI_FTYPE_V64QI_V64QI
:
36048 if (comparison
== UNKNOWN
)
36049 return ix86_expand_binop_builtin (icode
, exp
, target
);
36052 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
36053 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
36054 gcc_assert (comparison
!= UNKNOWN
);
36058 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
36059 case V16HI_FTYPE_V16HI_SI_COUNT
:
36060 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
36061 case V8SI_FTYPE_V8SI_SI_COUNT
:
36062 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
36063 case V4DI_FTYPE_V4DI_INT_COUNT
:
36064 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
36065 case V8HI_FTYPE_V8HI_SI_COUNT
:
36066 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
36067 case V4SI_FTYPE_V4SI_SI_COUNT
:
36068 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
36069 case V4HI_FTYPE_V4HI_SI_COUNT
:
36070 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
36071 case V2DI_FTYPE_V2DI_SI_COUNT
:
36072 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
36073 case V2SI_FTYPE_V2SI_SI_COUNT
:
36074 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
36075 case V1DI_FTYPE_V1DI_SI_COUNT
:
36077 second_arg_count
= true;
36079 case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT
:
36080 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT
:
36081 case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT
:
36082 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT
:
36083 case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT
:
36084 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT
:
36085 case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT
:
36086 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT
:
36087 case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT
:
36088 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT
:
36089 case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT
:
36090 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT
:
36091 case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT
:
36092 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT
:
36093 case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT
:
36094 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT
:
36095 case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT
:
36096 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT
:
36098 second_arg_count
= true;
36100 case UINT64_FTYPE_UINT64_UINT64
:
36101 case UINT_FTYPE_UINT_UINT
:
36102 case UINT_FTYPE_UINT_USHORT
:
36103 case UINT_FTYPE_UINT_UCHAR
:
36104 case UINT16_FTYPE_UINT16_INT
:
36105 case UINT8_FTYPE_UINT8_INT
:
36106 case UQI_FTYPE_UQI_UQI
:
36107 case UHI_FTYPE_UHI_UHI
:
36108 case USI_FTYPE_USI_USI
:
36109 case UDI_FTYPE_UDI_UDI
:
36110 case V16SI_FTYPE_V8DF_V8DF
:
36113 case V2DI_FTYPE_V2DI_INT_CONVERT
:
36116 nargs_constant
= 1;
36118 case V4DI_FTYPE_V4DI_INT_CONVERT
:
36121 nargs_constant
= 1;
36123 case V8DI_FTYPE_V8DI_INT_CONVERT
:
36126 nargs_constant
= 1;
36128 case V8HI_FTYPE_V8HI_INT
:
36129 case V8HI_FTYPE_V8SF_INT
:
36130 case V16HI_FTYPE_V16SF_INT
:
36131 case V8HI_FTYPE_V4SF_INT
:
36132 case V8SF_FTYPE_V8SF_INT
:
36133 case V4SF_FTYPE_V16SF_INT
:
36134 case V16SF_FTYPE_V16SF_INT
:
36135 case V4SI_FTYPE_V4SI_INT
:
36136 case V4SI_FTYPE_V8SI_INT
:
36137 case V4HI_FTYPE_V4HI_INT
:
36138 case V4DF_FTYPE_V4DF_INT
:
36139 case V4DF_FTYPE_V8DF_INT
:
36140 case V4SF_FTYPE_V4SF_INT
:
36141 case V4SF_FTYPE_V8SF_INT
:
36142 case V2DI_FTYPE_V2DI_INT
:
36143 case V2DF_FTYPE_V2DF_INT
:
36144 case V2DF_FTYPE_V4DF_INT
:
36145 case V16HI_FTYPE_V16HI_INT
:
36146 case V8SI_FTYPE_V8SI_INT
:
36147 case V16SI_FTYPE_V16SI_INT
:
36148 case V4SI_FTYPE_V16SI_INT
:
36149 case V4DI_FTYPE_V4DI_INT
:
36150 case V2DI_FTYPE_V4DI_INT
:
36151 case V4DI_FTYPE_V8DI_INT
:
36152 case QI_FTYPE_V4SF_INT
:
36153 case QI_FTYPE_V2DF_INT
:
36154 case UQI_FTYPE_UQI_UQI_CONST
:
36155 case UHI_FTYPE_UHI_UQI
:
36156 case USI_FTYPE_USI_UQI
:
36157 case UDI_FTYPE_UDI_UQI
:
36159 nargs_constant
= 1;
36161 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
36162 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
36163 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
36164 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
36165 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
36166 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
36167 case UHI_FTYPE_V16SI_V16SI_UHI
:
36168 case UQI_FTYPE_V8DI_V8DI_UQI
:
36169 case V16HI_FTYPE_V16SI_V16HI_UHI
:
36170 case V16QI_FTYPE_V16SI_V16QI_UHI
:
36171 case V16QI_FTYPE_V8DI_V16QI_UQI
:
36172 case V16SF_FTYPE_V16SF_V16SF_UHI
:
36173 case V16SF_FTYPE_V4SF_V16SF_UHI
:
36174 case V16SI_FTYPE_SI_V16SI_UHI
:
36175 case V16SI_FTYPE_V16HI_V16SI_UHI
:
36176 case V16SI_FTYPE_V16QI_V16SI_UHI
:
36177 case V8SF_FTYPE_V4SF_V8SF_UQI
:
36178 case V4DF_FTYPE_V2DF_V4DF_UQI
:
36179 case V8SI_FTYPE_V4SI_V8SI_UQI
:
36180 case V8SI_FTYPE_SI_V8SI_UQI
:
36181 case V4SI_FTYPE_V4SI_V4SI_UQI
:
36182 case V4SI_FTYPE_SI_V4SI_UQI
:
36183 case V4DI_FTYPE_V2DI_V4DI_UQI
:
36184 case V4DI_FTYPE_DI_V4DI_UQI
:
36185 case V2DI_FTYPE_V2DI_V2DI_UQI
:
36186 case V2DI_FTYPE_DI_V2DI_UQI
:
36187 case V64QI_FTYPE_V64QI_V64QI_UDI
:
36188 case V64QI_FTYPE_V16QI_V64QI_UDI
:
36189 case V64QI_FTYPE_QI_V64QI_UDI
:
36190 case V32QI_FTYPE_V32QI_V32QI_USI
:
36191 case V32QI_FTYPE_V16QI_V32QI_USI
:
36192 case V32QI_FTYPE_QI_V32QI_USI
:
36193 case V16QI_FTYPE_V16QI_V16QI_UHI
:
36194 case V16QI_FTYPE_QI_V16QI_UHI
:
36195 case V32HI_FTYPE_V8HI_V32HI_USI
:
36196 case V32HI_FTYPE_HI_V32HI_USI
:
36197 case V16HI_FTYPE_V8HI_V16HI_UHI
:
36198 case V16HI_FTYPE_HI_V16HI_UHI
:
36199 case V8HI_FTYPE_V8HI_V8HI_UQI
:
36200 case V8HI_FTYPE_HI_V8HI_UQI
:
36201 case V8SF_FTYPE_V8HI_V8SF_UQI
:
36202 case V4SF_FTYPE_V8HI_V4SF_UQI
:
36203 case V8SI_FTYPE_V8SF_V8SI_UQI
:
36204 case V4SI_FTYPE_V4SF_V4SI_UQI
:
36205 case V4DI_FTYPE_V4SF_V4DI_UQI
:
36206 case V2DI_FTYPE_V4SF_V2DI_UQI
:
36207 case V4SF_FTYPE_V4DI_V4SF_UQI
:
36208 case V4SF_FTYPE_V2DI_V4SF_UQI
:
36209 case V4DF_FTYPE_V4DI_V4DF_UQI
:
36210 case V2DF_FTYPE_V2DI_V2DF_UQI
:
36211 case V16QI_FTYPE_V8HI_V16QI_UQI
:
36212 case V16QI_FTYPE_V16HI_V16QI_UHI
:
36213 case V16QI_FTYPE_V4SI_V16QI_UQI
:
36214 case V16QI_FTYPE_V8SI_V16QI_UQI
:
36215 case V8HI_FTYPE_V4SI_V8HI_UQI
:
36216 case V8HI_FTYPE_V8SI_V8HI_UQI
:
36217 case V16QI_FTYPE_V2DI_V16QI_UQI
:
36218 case V16QI_FTYPE_V4DI_V16QI_UQI
:
36219 case V8HI_FTYPE_V2DI_V8HI_UQI
:
36220 case V8HI_FTYPE_V4DI_V8HI_UQI
:
36221 case V4SI_FTYPE_V2DI_V4SI_UQI
:
36222 case V4SI_FTYPE_V4DI_V4SI_UQI
:
36223 case V32QI_FTYPE_V32HI_V32QI_USI
:
36224 case UHI_FTYPE_V16QI_V16QI_UHI
:
36225 case USI_FTYPE_V32QI_V32QI_USI
:
36226 case UDI_FTYPE_V64QI_V64QI_UDI
:
36227 case UQI_FTYPE_V8HI_V8HI_UQI
:
36228 case UHI_FTYPE_V16HI_V16HI_UHI
:
36229 case USI_FTYPE_V32HI_V32HI_USI
:
36230 case UQI_FTYPE_V4SI_V4SI_UQI
:
36231 case UQI_FTYPE_V8SI_V8SI_UQI
:
36232 case UQI_FTYPE_V2DI_V2DI_UQI
:
36233 case UQI_FTYPE_V4DI_V4DI_UQI
:
36234 case V4SF_FTYPE_V2DF_V4SF_UQI
:
36235 case V4SF_FTYPE_V4DF_V4SF_UQI
:
36236 case V16SI_FTYPE_V16SI_V16SI_UHI
:
36237 case V16SI_FTYPE_V4SI_V16SI_UHI
:
36238 case V2DI_FTYPE_V4SI_V2DI_UQI
:
36239 case V2DI_FTYPE_V8HI_V2DI_UQI
:
36240 case V2DI_FTYPE_V16QI_V2DI_UQI
:
36241 case V4DI_FTYPE_V4DI_V4DI_UQI
:
36242 case V4DI_FTYPE_V4SI_V4DI_UQI
:
36243 case V4DI_FTYPE_V8HI_V4DI_UQI
:
36244 case V4DI_FTYPE_V16QI_V4DI_UQI
:
36245 case V4DI_FTYPE_V4DF_V4DI_UQI
:
36246 case V2DI_FTYPE_V2DF_V2DI_UQI
:
36247 case V4SI_FTYPE_V4DF_V4SI_UQI
:
36248 case V4SI_FTYPE_V2DF_V4SI_UQI
:
36249 case V4SI_FTYPE_V8HI_V4SI_UQI
:
36250 case V4SI_FTYPE_V16QI_V4SI_UQI
:
36251 case V4DI_FTYPE_V4DI_V4DI_V4DI
:
36252 case V8DF_FTYPE_V2DF_V8DF_UQI
:
36253 case V8DF_FTYPE_V4DF_V8DF_UQI
:
36254 case V8DF_FTYPE_V8DF_V8DF_UQI
:
36255 case V8SF_FTYPE_V8SF_V8SF_UQI
:
36256 case V8SF_FTYPE_V8SI_V8SF_UQI
:
36257 case V4DF_FTYPE_V4DF_V4DF_UQI
:
36258 case V4SF_FTYPE_V4SF_V4SF_UQI
:
36259 case V2DF_FTYPE_V2DF_V2DF_UQI
:
36260 case V2DF_FTYPE_V4SF_V2DF_UQI
:
36261 case V2DF_FTYPE_V4SI_V2DF_UQI
:
36262 case V4SF_FTYPE_V4SI_V4SF_UQI
:
36263 case V4DF_FTYPE_V4SF_V4DF_UQI
:
36264 case V4DF_FTYPE_V4SI_V4DF_UQI
:
36265 case V8SI_FTYPE_V8SI_V8SI_UQI
:
36266 case V8SI_FTYPE_V8HI_V8SI_UQI
:
36267 case V8SI_FTYPE_V16QI_V8SI_UQI
:
36268 case V8DF_FTYPE_V8SI_V8DF_UQI
:
36269 case V8DI_FTYPE_DI_V8DI_UQI
:
36270 case V16SF_FTYPE_V8SF_V16SF_UHI
:
36271 case V16SI_FTYPE_V8SI_V16SI_UHI
:
36272 case V16HI_FTYPE_V16HI_V16HI_UHI
:
36273 case V8HI_FTYPE_V16QI_V8HI_UQI
:
36274 case V16HI_FTYPE_V16QI_V16HI_UHI
:
36275 case V32HI_FTYPE_V32HI_V32HI_USI
:
36276 case V32HI_FTYPE_V32QI_V32HI_USI
:
36277 case V8DI_FTYPE_V16QI_V8DI_UQI
:
36278 case V8DI_FTYPE_V2DI_V8DI_UQI
:
36279 case V8DI_FTYPE_V4DI_V8DI_UQI
:
36280 case V8DI_FTYPE_V8DI_V8DI_UQI
:
36281 case V8DI_FTYPE_V8HI_V8DI_UQI
:
36282 case V8DI_FTYPE_V8SI_V8DI_UQI
:
36283 case V8HI_FTYPE_V8DI_V8HI_UQI
:
36284 case V8SI_FTYPE_V8DI_V8SI_UQI
:
36285 case V4SI_FTYPE_V4SI_V4SI_V4SI
:
36288 case V32QI_FTYPE_V32QI_V32QI_INT
:
36289 case V16HI_FTYPE_V16HI_V16HI_INT
:
36290 case V16QI_FTYPE_V16QI_V16QI_INT
:
36291 case V4DI_FTYPE_V4DI_V4DI_INT
:
36292 case V8HI_FTYPE_V8HI_V8HI_INT
:
36293 case V8SI_FTYPE_V8SI_V8SI_INT
:
36294 case V8SI_FTYPE_V8SI_V4SI_INT
:
36295 case V8SF_FTYPE_V8SF_V8SF_INT
:
36296 case V8SF_FTYPE_V8SF_V4SF_INT
:
36297 case V4SI_FTYPE_V4SI_V4SI_INT
:
36298 case V4DF_FTYPE_V4DF_V4DF_INT
:
36299 case V16SF_FTYPE_V16SF_V16SF_INT
:
36300 case V16SF_FTYPE_V16SF_V4SF_INT
:
36301 case V16SI_FTYPE_V16SI_V4SI_INT
:
36302 case V4DF_FTYPE_V4DF_V2DF_INT
:
36303 case V4SF_FTYPE_V4SF_V4SF_INT
:
36304 case V2DI_FTYPE_V2DI_V2DI_INT
:
36305 case V4DI_FTYPE_V4DI_V2DI_INT
:
36306 case V2DF_FTYPE_V2DF_V2DF_INT
:
36307 case UQI_FTYPE_V8DI_V8UDI_INT
:
36308 case UQI_FTYPE_V8DF_V8DF_INT
:
36309 case UQI_FTYPE_V2DF_V2DF_INT
:
36310 case UQI_FTYPE_V4SF_V4SF_INT
:
36311 case UHI_FTYPE_V16SI_V16SI_INT
:
36312 case UHI_FTYPE_V16SF_V16SF_INT
:
36314 nargs_constant
= 1;
36316 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
36319 nargs_constant
= 1;
36321 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
36324 nargs_constant
= 1;
36326 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
36329 nargs_constant
= 1;
36331 case V2DI_FTYPE_V2DI_UINT_UINT
:
36333 nargs_constant
= 2;
36335 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT
:
36338 nargs_constant
= 1;
36340 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT
:
36344 nargs_constant
= 1;
36346 case QI_FTYPE_V8DF_INT_UQI
:
36347 case QI_FTYPE_V4DF_INT_UQI
:
36348 case QI_FTYPE_V2DF_INT_UQI
:
36349 case HI_FTYPE_V16SF_INT_UHI
:
36350 case QI_FTYPE_V8SF_INT_UQI
:
36351 case QI_FTYPE_V4SF_INT_UQI
:
36354 nargs_constant
= 1;
36356 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT
:
36360 nargs_constant
= 1;
36362 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT
:
36366 nargs_constant
= 1;
36368 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI
:
36369 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI
:
36370 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI
:
36371 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI
:
36372 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI
:
36373 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI
:
36374 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI
:
36375 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI
:
36376 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI
:
36377 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI
:
36378 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI
:
36379 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI
:
36380 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI
:
36381 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI
:
36382 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI
:
36383 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI
:
36384 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI
:
36385 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI
:
36386 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI
:
36387 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI
:
36388 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI
:
36389 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI
:
36390 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI
:
36391 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI
:
36392 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI
:
36393 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI
:
36394 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI
:
36395 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI
:
36396 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI
:
36397 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI
:
36398 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI
:
36399 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI
:
36400 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI
:
36401 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI
:
36402 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI
:
36403 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI
:
36404 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI
:
36405 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI
:
36406 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI
:
36407 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI
:
36408 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI
:
36409 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI
:
36410 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI
:
36411 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI
:
36412 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI
:
36413 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI
:
36414 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI
:
36415 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI
:
36416 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI
:
36417 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI
:
36418 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI
:
36421 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
36422 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
36423 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
36424 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
36425 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT
:
36427 nargs_constant
= 1;
36429 case UQI_FTYPE_V4DI_V4DI_INT_UQI
:
36430 case UQI_FTYPE_V8SI_V8SI_INT_UQI
:
36431 case QI_FTYPE_V4DF_V4DF_INT_UQI
:
36432 case QI_FTYPE_V8SF_V8SF_INT_UQI
:
36433 case UQI_FTYPE_V2DI_V2DI_INT_UQI
:
36434 case UQI_FTYPE_V4SI_V4SI_INT_UQI
:
36435 case UQI_FTYPE_V2DF_V2DF_INT_UQI
:
36436 case UQI_FTYPE_V4SF_V4SF_INT_UQI
:
36437 case UDI_FTYPE_V64QI_V64QI_INT_UDI
:
36438 case USI_FTYPE_V32QI_V32QI_INT_USI
:
36439 case UHI_FTYPE_V16QI_V16QI_INT_UHI
:
36440 case USI_FTYPE_V32HI_V32HI_INT_USI
:
36441 case UHI_FTYPE_V16HI_V16HI_INT_UHI
:
36442 case UQI_FTYPE_V8HI_V8HI_INT_UQI
:
36445 nargs_constant
= 1;
36447 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
36449 nargs_constant
= 2;
36451 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
36452 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
36455 case UQI_FTYPE_V8DI_V8DI_INT_UQI
:
36456 case UHI_FTYPE_V16SI_V16SI_INT_UHI
:
36459 nargs_constant
= 1;
36461 case V8SF_FTYPE_V8SF_INT_V8SF_UQI
:
36462 case V4SF_FTYPE_V4SF_INT_V4SF_UQI
:
36463 case V2DF_FTYPE_V4DF_INT_V2DF_UQI
:
36464 case V2DI_FTYPE_V4DI_INT_V2DI_UQI
:
36465 case V8SF_FTYPE_V16SF_INT_V8SF_UQI
:
36466 case V8SI_FTYPE_V16SI_INT_V8SI_UQI
:
36467 case V2DF_FTYPE_V8DF_INT_V2DF_UQI
:
36468 case V2DI_FTYPE_V8DI_INT_V2DI_UQI
:
36469 case V4SF_FTYPE_V8SF_INT_V4SF_UQI
:
36470 case V4SI_FTYPE_V8SI_INT_V4SI_UQI
:
36471 case V8HI_FTYPE_V8SF_INT_V8HI_UQI
:
36472 case V8HI_FTYPE_V4SF_INT_V8HI_UQI
:
36473 case V32HI_FTYPE_V32HI_INT_V32HI_USI
:
36474 case V16HI_FTYPE_V16HI_INT_V16HI_UHI
:
36475 case V8HI_FTYPE_V8HI_INT_V8HI_UQI
:
36476 case V4DI_FTYPE_V4DI_INT_V4DI_UQI
:
36477 case V2DI_FTYPE_V2DI_INT_V2DI_UQI
:
36478 case V8SI_FTYPE_V8SI_INT_V8SI_UQI
:
36479 case V4SI_FTYPE_V4SI_INT_V4SI_UQI
:
36480 case V4DF_FTYPE_V4DF_INT_V4DF_UQI
:
36481 case V2DF_FTYPE_V2DF_INT_V2DF_UQI
:
36482 case V8DF_FTYPE_V8DF_INT_V8DF_UQI
:
36483 case V16SF_FTYPE_V16SF_INT_V16SF_UHI
:
36484 case V16HI_FTYPE_V16SF_INT_V16HI_UHI
:
36485 case V16SI_FTYPE_V16SI_INT_V16SI_UHI
:
36486 case V4SI_FTYPE_V16SI_INT_V4SI_UQI
:
36487 case V4DI_FTYPE_V8DI_INT_V4DI_UQI
:
36488 case V4DF_FTYPE_V8DF_INT_V4DF_UQI
:
36489 case V4SF_FTYPE_V16SF_INT_V4SF_UQI
:
36490 case V8DI_FTYPE_V8DI_INT_V8DI_UQI
:
36493 nargs_constant
= 1;
36495 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI
:
36496 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI
:
36497 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI
:
36498 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI
:
36499 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI
:
36500 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI
:
36501 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI
:
36502 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI
:
36503 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI
:
36504 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI
:
36505 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI
:
36506 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI
:
36507 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI
:
36508 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI
:
36509 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI
:
36510 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI
:
36511 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI
:
36512 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI
:
36513 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI
:
36514 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI
:
36515 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI
:
36516 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI
:
36517 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI
:
36518 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI
:
36519 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI
:
36520 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI
:
36521 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI
:
36524 nargs_constant
= 1;
36526 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI
:
36527 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI
:
36528 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI
:
36529 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI
:
36530 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI
:
36531 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI
:
36532 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI
:
36533 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI
:
36534 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI
:
36535 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI
:
36538 nargs_constant
= 1;
36542 gcc_unreachable ();
36545 gcc_assert (nargs
<= ARRAY_SIZE (args
));
36547 if (comparison
!= UNKNOWN
)
36549 gcc_assert (nargs
== 2);
36550 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
36553 if (rmode
== VOIDmode
|| rmode
== tmode
)
36557 || GET_MODE (target
) != tmode
36558 || !insn_p
->operand
[0].predicate (target
, tmode
))
36559 target
= gen_reg_rtx (tmode
);
36560 else if (memory_operand (target
, tmode
))
36562 real_target
= target
;
36566 real_target
= gen_reg_rtx (tmode
);
36567 target
= lowpart_subreg (rmode
, real_target
, tmode
);
36570 for (i
= 0; i
< nargs
; i
++)
36572 tree arg
= CALL_EXPR_ARG (exp
, i
);
36573 rtx op
= expand_normal (arg
);
36574 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
36575 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
36577 if (second_arg_count
&& i
== 1)
36579 /* SIMD shift insns take either an 8-bit immediate or
36580 register as count. But builtin functions take int as
36581 count. If count doesn't match, we put it in register.
36582 The instructions are using 64-bit count, if op is just
36583 32-bit, zero-extend it, as negative shift counts
36584 are undefined behavior and zero-extension is more
36588 if (SCALAR_INT_MODE_P (GET_MODE (op
)))
36589 op
= convert_modes (mode
, GET_MODE (op
), op
, 1);
36591 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
36592 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
36593 op
= copy_to_reg (op
);
36596 else if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
36597 (!mask_pos
&& (nargs
- i
) <= nargs_constant
))
36602 case CODE_FOR_avx_vinsertf128v4di
:
36603 case CODE_FOR_avx_vextractf128v4di
:
36604 error ("the last argument must be an 1-bit immediate");
36607 case CODE_FOR_avx512f_cmpv8di3_mask
:
36608 case CODE_FOR_avx512f_cmpv16si3_mask
:
36609 case CODE_FOR_avx512f_ucmpv8di3_mask
:
36610 case CODE_FOR_avx512f_ucmpv16si3_mask
:
36611 case CODE_FOR_avx512vl_cmpv4di3_mask
:
36612 case CODE_FOR_avx512vl_cmpv8si3_mask
:
36613 case CODE_FOR_avx512vl_ucmpv4di3_mask
:
36614 case CODE_FOR_avx512vl_ucmpv8si3_mask
:
36615 case CODE_FOR_avx512vl_cmpv2di3_mask
:
36616 case CODE_FOR_avx512vl_cmpv4si3_mask
:
36617 case CODE_FOR_avx512vl_ucmpv2di3_mask
:
36618 case CODE_FOR_avx512vl_ucmpv4si3_mask
:
36619 error ("the last argument must be a 3-bit immediate");
36622 case CODE_FOR_sse4_1_roundsd
:
36623 case CODE_FOR_sse4_1_roundss
:
36625 case CODE_FOR_sse4_1_roundpd
:
36626 case CODE_FOR_sse4_1_roundps
:
36627 case CODE_FOR_avx_roundpd256
:
36628 case CODE_FOR_avx_roundps256
:
36630 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
36631 case CODE_FOR_sse4_1_roundps_sfix
:
36632 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
36633 case CODE_FOR_avx_roundps_sfix256
:
36635 case CODE_FOR_sse4_1_blendps
:
36636 case CODE_FOR_avx_blendpd256
:
36637 case CODE_FOR_avx_vpermilv4df
:
36638 case CODE_FOR_avx_vpermilv4df_mask
:
36639 case CODE_FOR_avx512f_getmantv8df_mask
:
36640 case CODE_FOR_avx512f_getmantv16sf_mask
:
36641 case CODE_FOR_avx512vl_getmantv8sf_mask
:
36642 case CODE_FOR_avx512vl_getmantv4df_mask
:
36643 case CODE_FOR_avx512vl_getmantv4sf_mask
:
36644 case CODE_FOR_avx512vl_getmantv2df_mask
:
36645 case CODE_FOR_avx512dq_rangepv8df_mask_round
:
36646 case CODE_FOR_avx512dq_rangepv16sf_mask_round
:
36647 case CODE_FOR_avx512dq_rangepv4df_mask
:
36648 case CODE_FOR_avx512dq_rangepv8sf_mask
:
36649 case CODE_FOR_avx512dq_rangepv2df_mask
:
36650 case CODE_FOR_avx512dq_rangepv4sf_mask
:
36651 case CODE_FOR_avx_shufpd256_mask
:
36652 error ("the last argument must be a 4-bit immediate");
36655 case CODE_FOR_sha1rnds4
:
36656 case CODE_FOR_sse4_1_blendpd
:
36657 case CODE_FOR_avx_vpermilv2df
:
36658 case CODE_FOR_avx_vpermilv2df_mask
:
36659 case CODE_FOR_xop_vpermil2v2df3
:
36660 case CODE_FOR_xop_vpermil2v4sf3
:
36661 case CODE_FOR_xop_vpermil2v4df3
:
36662 case CODE_FOR_xop_vpermil2v8sf3
:
36663 case CODE_FOR_avx512f_vinsertf32x4_mask
:
36664 case CODE_FOR_avx512f_vinserti32x4_mask
:
36665 case CODE_FOR_avx512f_vextractf32x4_mask
:
36666 case CODE_FOR_avx512f_vextracti32x4_mask
:
36667 case CODE_FOR_sse2_shufpd
:
36668 case CODE_FOR_sse2_shufpd_mask
:
36669 case CODE_FOR_avx512dq_shuf_f64x2_mask
:
36670 case CODE_FOR_avx512dq_shuf_i64x2_mask
:
36671 case CODE_FOR_avx512vl_shuf_i32x4_mask
:
36672 case CODE_FOR_avx512vl_shuf_f32x4_mask
:
36673 error ("the last argument must be a 2-bit immediate");
36676 case CODE_FOR_avx_vextractf128v4df
:
36677 case CODE_FOR_avx_vextractf128v8sf
:
36678 case CODE_FOR_avx_vextractf128v8si
:
36679 case CODE_FOR_avx_vinsertf128v4df
:
36680 case CODE_FOR_avx_vinsertf128v8sf
:
36681 case CODE_FOR_avx_vinsertf128v8si
:
36682 case CODE_FOR_avx512f_vinsertf64x4_mask
:
36683 case CODE_FOR_avx512f_vinserti64x4_mask
:
36684 case CODE_FOR_avx512f_vextractf64x4_mask
:
36685 case CODE_FOR_avx512f_vextracti64x4_mask
:
36686 case CODE_FOR_avx512dq_vinsertf32x8_mask
:
36687 case CODE_FOR_avx512dq_vinserti32x8_mask
:
36688 case CODE_FOR_avx512vl_vinsertv4df
:
36689 case CODE_FOR_avx512vl_vinsertv4di
:
36690 case CODE_FOR_avx512vl_vinsertv8sf
:
36691 case CODE_FOR_avx512vl_vinsertv8si
:
36692 error ("the last argument must be a 1-bit immediate");
36695 case CODE_FOR_avx_vmcmpv2df3
:
36696 case CODE_FOR_avx_vmcmpv4sf3
:
36697 case CODE_FOR_avx_cmpv2df3
:
36698 case CODE_FOR_avx_cmpv4sf3
:
36699 case CODE_FOR_avx_cmpv4df3
:
36700 case CODE_FOR_avx_cmpv8sf3
:
36701 case CODE_FOR_avx512f_cmpv8df3_mask
:
36702 case CODE_FOR_avx512f_cmpv16sf3_mask
:
36703 case CODE_FOR_avx512f_vmcmpv2df3_mask
:
36704 case CODE_FOR_avx512f_vmcmpv4sf3_mask
:
36705 error ("the last argument must be a 5-bit immediate");
36709 switch (nargs_constant
)
36712 if ((mask_pos
&& (nargs
- i
- mask_pos
) == nargs_constant
) ||
36713 (!mask_pos
&& (nargs
- i
) == nargs_constant
))
36715 error ("the next to last argument must be an 8-bit immediate");
36720 error ("the last argument must be an 8-bit immediate");
36723 gcc_unreachable ();
36730 if (VECTOR_MODE_P (mode
))
36731 op
= safe_vector_operand (op
, mode
);
36733 /* If we aren't optimizing, only allow one memory operand to
36735 if (memory_operand (op
, mode
))
36738 op
= fixup_modeless_constant (op
, mode
);
36740 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
36742 if (optimize
|| !match
|| num_memory
> 1)
36743 op
= copy_to_mode_reg (mode
, op
);
36747 op
= copy_to_reg (op
);
36748 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
36753 args
[i
].mode
= mode
;
36759 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
36762 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
36765 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
36769 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
36770 args
[2].op
, args
[3].op
);
36773 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
36774 args
[2].op
, args
[3].op
, args
[4].op
);
36777 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
36778 args
[2].op
, args
[3].op
, args
[4].op
,
36782 gcc_unreachable ();
36792 /* Transform pattern of following layout:
36794 (unspec [B C] UNSPEC_EMBEDDED_ROUNDING))
36800 ix86_erase_embedded_rounding (rtx pat
)
36802 if (GET_CODE (pat
) == INSN
)
36803 pat
= PATTERN (pat
);
36805 gcc_assert (GET_CODE (pat
) == SET
);
36806 rtx src
= SET_SRC (pat
);
36807 gcc_assert (XVECLEN (src
, 0) == 2);
36808 rtx p0
= XVECEXP (src
, 0, 0);
36809 gcc_assert (GET_CODE (src
) == UNSPEC
36810 && XINT (src
, 1) == UNSPEC_EMBEDDED_ROUNDING
);
36811 rtx res
= gen_rtx_SET (SET_DEST (pat
), p0
);
36815 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
36818 ix86_expand_sse_comi_round (const struct builtin_description
*d
,
36819 tree exp
, rtx target
)
36822 tree arg0
= CALL_EXPR_ARG (exp
, 0);
36823 tree arg1
= CALL_EXPR_ARG (exp
, 1);
36824 tree arg2
= CALL_EXPR_ARG (exp
, 2);
36825 tree arg3
= CALL_EXPR_ARG (exp
, 3);
36826 rtx op0
= expand_normal (arg0
);
36827 rtx op1
= expand_normal (arg1
);
36828 rtx op2
= expand_normal (arg2
);
36829 rtx op3
= expand_normal (arg3
);
36830 enum insn_code icode
= d
->icode
;
36831 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
36832 machine_mode mode0
= insn_p
->operand
[0].mode
;
36833 machine_mode mode1
= insn_p
->operand
[1].mode
;
36834 enum rtx_code comparison
= UNEQ
;
36835 bool need_ucomi
= false;
36837 /* See avxintrin.h for values. */
36838 enum rtx_code comi_comparisons
[32] =
36840 UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
, UNLT
, ORDERED
, UNEQ
, UNLT
,
36841 UNLE
, LT
, LTGT
, GE
, GT
, LT
, UNEQ
, GT
, GE
, UNORDERED
, LTGT
, UNLE
,
36842 UNLT
, ORDERED
, UNEQ
, UNLT
, UNLE
, LT
, LTGT
, GE
, GT
, LT
36844 bool need_ucomi_values
[32] =
36846 true, false, false, true, true, false, false, true,
36847 true, false, false, true, true, false, false, true,
36848 false, true, true, false, false, true, true, false,
36849 false, true, true, false, false, true, true, false
36852 if (!CONST_INT_P (op2
))
36854 error ("the third argument must be comparison constant");
36857 if (INTVAL (op2
) < 0 || INTVAL (op2
) >= 32)
36859 error ("incorrect comparison mode");
36863 if (!insn_p
->operand
[2].predicate (op3
, SImode
))
36865 error ("incorrect rounding operand");
36869 comparison
= comi_comparisons
[INTVAL (op2
)];
36870 need_ucomi
= need_ucomi_values
[INTVAL (op2
)];
36872 if (VECTOR_MODE_P (mode0
))
36873 op0
= safe_vector_operand (op0
, mode0
);
36874 if (VECTOR_MODE_P (mode1
))
36875 op1
= safe_vector_operand (op1
, mode1
);
36877 target
= gen_reg_rtx (SImode
);
36878 emit_move_insn (target
, const0_rtx
);
36879 target
= gen_rtx_SUBREG (QImode
, target
, 0);
36881 if ((optimize
&& !register_operand (op0
, mode0
))
36882 || !insn_p
->operand
[0].predicate (op0
, mode0
))
36883 op0
= copy_to_mode_reg (mode0
, op0
);
36884 if ((optimize
&& !register_operand (op1
, mode1
))
36885 || !insn_p
->operand
[1].predicate (op1
, mode1
))
36886 op1
= copy_to_mode_reg (mode1
, op1
);
36889 icode
= icode
== CODE_FOR_sse_comi_round
36890 ? CODE_FOR_sse_ucomi_round
36891 : CODE_FOR_sse2_ucomi_round
;
36893 pat
= GEN_FCN (icode
) (op0
, op1
, op3
);
36897 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
36898 if (INTVAL (op3
) == NO_ROUND
)
36900 pat
= ix86_erase_embedded_rounding (pat
);
36904 set_dst
= SET_DEST (pat
);
36908 gcc_assert (GET_CODE (pat
) == SET
);
36909 set_dst
= SET_DEST (pat
);
36913 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
36914 gen_rtx_fmt_ee (comparison
, QImode
,
36918 return SUBREG_REG (target
);
36922 ix86_expand_round_builtin (const struct builtin_description
*d
,
36923 tree exp
, rtx target
)
36926 unsigned int i
, nargs
;
36932 enum insn_code icode
= d
->icode
;
36933 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
36934 machine_mode tmode
= insn_p
->operand
[0].mode
;
36935 unsigned int nargs_constant
= 0;
36936 unsigned int redundant_embed_rnd
= 0;
36938 switch ((enum ix86_builtin_func_type
) d
->flag
)
36940 case UINT64_FTYPE_V2DF_INT
:
36941 case UINT64_FTYPE_V4SF_INT
:
36942 case UINT_FTYPE_V2DF_INT
:
36943 case UINT_FTYPE_V4SF_INT
:
36944 case INT64_FTYPE_V2DF_INT
:
36945 case INT64_FTYPE_V4SF_INT
:
36946 case INT_FTYPE_V2DF_INT
:
36947 case INT_FTYPE_V4SF_INT
:
36950 case V4SF_FTYPE_V4SF_UINT_INT
:
36951 case V4SF_FTYPE_V4SF_UINT64_INT
:
36952 case V2DF_FTYPE_V2DF_UINT64_INT
:
36953 case V4SF_FTYPE_V4SF_INT_INT
:
36954 case V4SF_FTYPE_V4SF_INT64_INT
:
36955 case V2DF_FTYPE_V2DF_INT64_INT
:
36956 case V4SF_FTYPE_V4SF_V4SF_INT
:
36957 case V2DF_FTYPE_V2DF_V2DF_INT
:
36958 case V4SF_FTYPE_V4SF_V2DF_INT
:
36959 case V2DF_FTYPE_V2DF_V4SF_INT
:
36962 case V8SF_FTYPE_V8DF_V8SF_QI_INT
:
36963 case V8DF_FTYPE_V8DF_V8DF_QI_INT
:
36964 case V8SI_FTYPE_V8DF_V8SI_QI_INT
:
36965 case V8DI_FTYPE_V8DF_V8DI_QI_INT
:
36966 case V8SF_FTYPE_V8DI_V8SF_QI_INT
:
36967 case V8DF_FTYPE_V8DI_V8DF_QI_INT
:
36968 case V16SF_FTYPE_V16SF_V16SF_HI_INT
:
36969 case V8DI_FTYPE_V8SF_V8DI_QI_INT
:
36970 case V16SF_FTYPE_V16SI_V16SF_HI_INT
:
36971 case V16SI_FTYPE_V16SF_V16SI_HI_INT
:
36972 case V8DF_FTYPE_V8SF_V8DF_QI_INT
:
36973 case V16SF_FTYPE_V16HI_V16SF_HI_INT
:
36974 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT
:
36975 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT
:
36978 case V4SF_FTYPE_V4SF_V4SF_INT_INT
:
36979 case V2DF_FTYPE_V2DF_V2DF_INT_INT
:
36980 nargs_constant
= 2;
36983 case INT_FTYPE_V4SF_V4SF_INT_INT
:
36984 case INT_FTYPE_V2DF_V2DF_INT_INT
:
36985 return ix86_expand_sse_comi_round (d
, exp
, target
);
36986 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT
:
36987 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT
:
36988 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT
:
36989 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT
:
36990 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT
:
36991 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT
:
36992 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT
:
36993 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT
:
36996 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT
:
36997 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT
:
36998 nargs_constant
= 4;
37001 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT
:
37002 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT
:
37003 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT
:
37004 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT
:
37005 nargs_constant
= 3;
37008 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT
:
37009 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT
:
37010 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT
:
37011 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT
:
37012 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT
:
37013 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT
:
37015 nargs_constant
= 4;
37017 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT
:
37018 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT
:
37019 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT
:
37020 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT
:
37022 nargs_constant
= 3;
37025 gcc_unreachable ();
37027 gcc_assert (nargs
<= ARRAY_SIZE (args
));
37031 || GET_MODE (target
) != tmode
37032 || !insn_p
->operand
[0].predicate (target
, tmode
))
37033 target
= gen_reg_rtx (tmode
);
37035 for (i
= 0; i
< nargs
; i
++)
37037 tree arg
= CALL_EXPR_ARG (exp
, i
);
37038 rtx op
= expand_normal (arg
);
37039 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
37040 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
37042 if (i
== nargs
- nargs_constant
)
37048 case CODE_FOR_avx512f_getmantv8df_mask_round
:
37049 case CODE_FOR_avx512f_getmantv16sf_mask_round
:
37050 case CODE_FOR_avx512f_vgetmantv2df_round
:
37051 case CODE_FOR_avx512f_vgetmantv2df_mask_round
:
37052 case CODE_FOR_avx512f_vgetmantv4sf_round
:
37053 case CODE_FOR_avx512f_vgetmantv4sf_mask_round
:
37054 error ("the immediate argument must be a 4-bit immediate");
37056 case CODE_FOR_avx512f_cmpv8df3_mask_round
:
37057 case CODE_FOR_avx512f_cmpv16sf3_mask_round
:
37058 case CODE_FOR_avx512f_vmcmpv2df3_mask_round
:
37059 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round
:
37060 error ("the immediate argument must be a 5-bit immediate");
37063 error ("the immediate argument must be an 8-bit immediate");
37068 else if (i
== nargs
-1)
37070 if (!insn_p
->operand
[nargs
].predicate (op
, SImode
))
37072 error ("incorrect rounding operand");
37076 /* If there is no rounding use normal version of the pattern. */
37077 if (INTVAL (op
) == NO_ROUND
)
37078 redundant_embed_rnd
= 1;
37082 if (VECTOR_MODE_P (mode
))
37083 op
= safe_vector_operand (op
, mode
);
37085 op
= fixup_modeless_constant (op
, mode
);
37087 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
37089 if (optimize
|| !match
)
37090 op
= copy_to_mode_reg (mode
, op
);
37094 op
= copy_to_reg (op
);
37095 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
37100 args
[i
].mode
= mode
;
37106 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
37109 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
37112 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
37116 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
37117 args
[2].op
, args
[3].op
);
37120 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
37121 args
[2].op
, args
[3].op
, args
[4].op
);
37124 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
37125 args
[2].op
, args
[3].op
, args
[4].op
,
37129 gcc_unreachable ();
37135 if (redundant_embed_rnd
)
37136 pat
= ix86_erase_embedded_rounding (pat
);
37142 /* Subroutine of ix86_expand_builtin to take care of special insns
37143 with variable number of operands. */
37146 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
37147 tree exp
, rtx target
)
37151 unsigned int i
, nargs
, arg_adjust
, memory
;
37152 bool aligned_mem
= false;
37158 enum insn_code icode
= d
->icode
;
37159 bool last_arg_constant
= false;
37160 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
37161 machine_mode tmode
= insn_p
->operand
[0].mode
;
37162 enum { load
, store
} klass
;
37164 switch ((enum ix86_builtin_func_type
) d
->flag
)
37166 case VOID_FTYPE_VOID
:
37167 emit_insn (GEN_FCN (icode
) (target
));
37169 case VOID_FTYPE_UINT64
:
37170 case VOID_FTYPE_UNSIGNED
:
37176 case INT_FTYPE_VOID
:
37177 case USHORT_FTYPE_VOID
:
37178 case UINT64_FTYPE_VOID
:
37179 case UNSIGNED_FTYPE_VOID
:
37184 case UINT64_FTYPE_PUNSIGNED
:
37185 case V2DI_FTYPE_PV2DI
:
37186 case V4DI_FTYPE_PV4DI
:
37187 case V32QI_FTYPE_PCCHAR
:
37188 case V16QI_FTYPE_PCCHAR
:
37189 case V8SF_FTYPE_PCV4SF
:
37190 case V8SF_FTYPE_PCFLOAT
:
37191 case V4SF_FTYPE_PCFLOAT
:
37192 case V4DF_FTYPE_PCV2DF
:
37193 case V4DF_FTYPE_PCDOUBLE
:
37194 case V2DF_FTYPE_PCDOUBLE
:
37195 case VOID_FTYPE_PVOID
:
37196 case V8DI_FTYPE_PV8DI
:
37202 case CODE_FOR_sse4_1_movntdqa
:
37203 case CODE_FOR_avx2_movntdqa
:
37204 case CODE_FOR_avx512f_movntdqa
:
37205 aligned_mem
= true;
37211 case VOID_FTYPE_PV2SF_V4SF
:
37212 case VOID_FTYPE_PV8DI_V8DI
:
37213 case VOID_FTYPE_PV4DI_V4DI
:
37214 case VOID_FTYPE_PV2DI_V2DI
:
37215 case VOID_FTYPE_PCHAR_V32QI
:
37216 case VOID_FTYPE_PCHAR_V16QI
:
37217 case VOID_FTYPE_PFLOAT_V16SF
:
37218 case VOID_FTYPE_PFLOAT_V8SF
:
37219 case VOID_FTYPE_PFLOAT_V4SF
:
37220 case VOID_FTYPE_PDOUBLE_V8DF
:
37221 case VOID_FTYPE_PDOUBLE_V4DF
:
37222 case VOID_FTYPE_PDOUBLE_V2DF
:
37223 case VOID_FTYPE_PLONGLONG_LONGLONG
:
37224 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
37225 case VOID_FTYPE_PINT_INT
:
37228 /* Reserve memory operand for target. */
37229 memory
= ARRAY_SIZE (args
);
37232 /* These builtins and instructions require the memory
37233 to be properly aligned. */
37234 case CODE_FOR_avx_movntv4di
:
37235 case CODE_FOR_sse2_movntv2di
:
37236 case CODE_FOR_avx_movntv8sf
:
37237 case CODE_FOR_sse_movntv4sf
:
37238 case CODE_FOR_sse4a_vmmovntv4sf
:
37239 case CODE_FOR_avx_movntv4df
:
37240 case CODE_FOR_sse2_movntv2df
:
37241 case CODE_FOR_sse4a_vmmovntv2df
:
37242 case CODE_FOR_sse2_movntidi
:
37243 case CODE_FOR_sse_movntq
:
37244 case CODE_FOR_sse2_movntisi
:
37245 case CODE_FOR_avx512f_movntv16sf
:
37246 case CODE_FOR_avx512f_movntv8df
:
37247 case CODE_FOR_avx512f_movntv8di
:
37248 aligned_mem
= true;
37254 case V4SF_FTYPE_V4SF_PCV2SF
:
37255 case V2DF_FTYPE_V2DF_PCDOUBLE
:
37260 case V8SF_FTYPE_PCV8SF_V8SI
:
37261 case V4DF_FTYPE_PCV4DF_V4DI
:
37262 case V4SF_FTYPE_PCV4SF_V4SI
:
37263 case V2DF_FTYPE_PCV2DF_V2DI
:
37264 case V8SI_FTYPE_PCV8SI_V8SI
:
37265 case V4DI_FTYPE_PCV4DI_V4DI
:
37266 case V4SI_FTYPE_PCV4SI_V4SI
:
37267 case V2DI_FTYPE_PCV2DI_V2DI
:
37268 case VOID_FTYPE_INT_INT64
:
37273 case VOID_FTYPE_PV8DF_V8DF_UQI
:
37274 case VOID_FTYPE_PV4DF_V4DF_UQI
:
37275 case VOID_FTYPE_PV2DF_V2DF_UQI
:
37276 case VOID_FTYPE_PV16SF_V16SF_UHI
:
37277 case VOID_FTYPE_PV8SF_V8SF_UQI
:
37278 case VOID_FTYPE_PV4SF_V4SF_UQI
:
37279 case VOID_FTYPE_PV8DI_V8DI_UQI
:
37280 case VOID_FTYPE_PV4DI_V4DI_UQI
:
37281 case VOID_FTYPE_PV2DI_V2DI_UQI
:
37282 case VOID_FTYPE_PV16SI_V16SI_UHI
:
37283 case VOID_FTYPE_PV8SI_V8SI_UQI
:
37284 case VOID_FTYPE_PV4SI_V4SI_UQI
:
37287 /* These builtins and instructions require the memory
37288 to be properly aligned. */
37289 case CODE_FOR_avx512f_storev16sf_mask
:
37290 case CODE_FOR_avx512f_storev16si_mask
:
37291 case CODE_FOR_avx512f_storev8df_mask
:
37292 case CODE_FOR_avx512f_storev8di_mask
:
37293 case CODE_FOR_avx512vl_storev8sf_mask
:
37294 case CODE_FOR_avx512vl_storev8si_mask
:
37295 case CODE_FOR_avx512vl_storev4df_mask
:
37296 case CODE_FOR_avx512vl_storev4di_mask
:
37297 case CODE_FOR_avx512vl_storev4sf_mask
:
37298 case CODE_FOR_avx512vl_storev4si_mask
:
37299 case CODE_FOR_avx512vl_storev2df_mask
:
37300 case CODE_FOR_avx512vl_storev2di_mask
:
37301 aligned_mem
= true;
37307 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
37308 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
37309 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
37310 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
37311 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
37312 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
37313 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
37314 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
37315 case VOID_FTYPE_PV8SI_V8DI_UQI
:
37316 case VOID_FTYPE_PV8HI_V8DI_UQI
:
37317 case VOID_FTYPE_PV16HI_V16SI_UHI
:
37318 case VOID_FTYPE_PV16QI_V8DI_UQI
:
37319 case VOID_FTYPE_PV16QI_V16SI_UHI
:
37320 case VOID_FTYPE_PV4SI_V4DI_UQI
:
37321 case VOID_FTYPE_PV4SI_V2DI_UQI
:
37322 case VOID_FTYPE_PV8HI_V4DI_UQI
:
37323 case VOID_FTYPE_PV8HI_V2DI_UQI
:
37324 case VOID_FTYPE_PV8HI_V8SI_UQI
:
37325 case VOID_FTYPE_PV8HI_V4SI_UQI
:
37326 case VOID_FTYPE_PV16QI_V4DI_UQI
:
37327 case VOID_FTYPE_PV16QI_V2DI_UQI
:
37328 case VOID_FTYPE_PV16QI_V8SI_UQI
:
37329 case VOID_FTYPE_PV16QI_V4SI_UQI
:
37330 case VOID_FTYPE_PCHAR_V64QI_UDI
:
37331 case VOID_FTYPE_PCHAR_V32QI_USI
:
37332 case VOID_FTYPE_PCHAR_V16QI_UHI
:
37333 case VOID_FTYPE_PSHORT_V32HI_USI
:
37334 case VOID_FTYPE_PSHORT_V16HI_UHI
:
37335 case VOID_FTYPE_PSHORT_V8HI_UQI
:
37336 case VOID_FTYPE_PINT_V16SI_UHI
:
37337 case VOID_FTYPE_PINT_V8SI_UQI
:
37338 case VOID_FTYPE_PINT_V4SI_UQI
:
37339 case VOID_FTYPE_PINT64_V8DI_UQI
:
37340 case VOID_FTYPE_PINT64_V4DI_UQI
:
37341 case VOID_FTYPE_PINT64_V2DI_UQI
:
37342 case VOID_FTYPE_PDOUBLE_V8DF_UQI
:
37343 case VOID_FTYPE_PDOUBLE_V4DF_UQI
:
37344 case VOID_FTYPE_PDOUBLE_V2DF_UQI
:
37345 case VOID_FTYPE_PFLOAT_V16SF_UHI
:
37346 case VOID_FTYPE_PFLOAT_V8SF_UQI
:
37347 case VOID_FTYPE_PFLOAT_V4SF_UQI
:
37348 case VOID_FTYPE_PV32QI_V32HI_USI
:
37349 case VOID_FTYPE_PV16QI_V16HI_UHI
:
37350 case VOID_FTYPE_PV8QI_V8HI_UQI
:
37353 /* Reserve memory operand for target. */
37354 memory
= ARRAY_SIZE (args
);
37356 case V4SF_FTYPE_PCV4SF_V4SF_UQI
:
37357 case V8SF_FTYPE_PCV8SF_V8SF_UQI
:
37358 case V16SF_FTYPE_PCV16SF_V16SF_UHI
:
37359 case V4SI_FTYPE_PCV4SI_V4SI_UQI
:
37360 case V8SI_FTYPE_PCV8SI_V8SI_UQI
:
37361 case V16SI_FTYPE_PCV16SI_V16SI_UHI
:
37362 case V2DF_FTYPE_PCV2DF_V2DF_UQI
:
37363 case V4DF_FTYPE_PCV4DF_V4DF_UQI
:
37364 case V8DF_FTYPE_PCV8DF_V8DF_UQI
:
37365 case V2DI_FTYPE_PCV2DI_V2DI_UQI
:
37366 case V4DI_FTYPE_PCV4DI_V4DI_UQI
:
37367 case V8DI_FTYPE_PCV8DI_V8DI_UQI
:
37370 /* These builtins and instructions require the memory
37371 to be properly aligned. */
37372 case CODE_FOR_avx512f_loadv16sf_mask
:
37373 case CODE_FOR_avx512f_loadv16si_mask
:
37374 case CODE_FOR_avx512f_loadv8df_mask
:
37375 case CODE_FOR_avx512f_loadv8di_mask
:
37376 case CODE_FOR_avx512vl_loadv8sf_mask
:
37377 case CODE_FOR_avx512vl_loadv8si_mask
:
37378 case CODE_FOR_avx512vl_loadv4df_mask
:
37379 case CODE_FOR_avx512vl_loadv4di_mask
:
37380 case CODE_FOR_avx512vl_loadv4sf_mask
:
37381 case CODE_FOR_avx512vl_loadv4si_mask
:
37382 case CODE_FOR_avx512vl_loadv2df_mask
:
37383 case CODE_FOR_avx512vl_loadv2di_mask
:
37384 case CODE_FOR_avx512bw_loadv64qi_mask
:
37385 case CODE_FOR_avx512vl_loadv32qi_mask
:
37386 case CODE_FOR_avx512vl_loadv16qi_mask
:
37387 case CODE_FOR_avx512bw_loadv32hi_mask
:
37388 case CODE_FOR_avx512vl_loadv16hi_mask
:
37389 case CODE_FOR_avx512vl_loadv8hi_mask
:
37390 aligned_mem
= true;
37395 case V64QI_FTYPE_PCCHAR_V64QI_UDI
:
37396 case V32QI_FTYPE_PCCHAR_V32QI_USI
:
37397 case V16QI_FTYPE_PCCHAR_V16QI_UHI
:
37398 case V32HI_FTYPE_PCSHORT_V32HI_USI
:
37399 case V16HI_FTYPE_PCSHORT_V16HI_UHI
:
37400 case V8HI_FTYPE_PCSHORT_V8HI_UQI
:
37401 case V16SI_FTYPE_PCINT_V16SI_UHI
:
37402 case V8SI_FTYPE_PCINT_V8SI_UQI
:
37403 case V4SI_FTYPE_PCINT_V4SI_UQI
:
37404 case V8DI_FTYPE_PCINT64_V8DI_UQI
:
37405 case V4DI_FTYPE_PCINT64_V4DI_UQI
:
37406 case V2DI_FTYPE_PCINT64_V2DI_UQI
:
37407 case V8DF_FTYPE_PCDOUBLE_V8DF_UQI
:
37408 case V4DF_FTYPE_PCDOUBLE_V4DF_UQI
:
37409 case V2DF_FTYPE_PCDOUBLE_V2DF_UQI
:
37410 case V16SF_FTYPE_PCFLOAT_V16SF_UHI
:
37411 case V8SF_FTYPE_PCFLOAT_V8SF_UQI
:
37412 case V4SF_FTYPE_PCFLOAT_V4SF_UQI
:
37417 case VOID_FTYPE_UINT_UINT_UINT
:
37418 case VOID_FTYPE_UINT64_UINT_UINT
:
37419 case UCHAR_FTYPE_UINT_UINT_UINT
:
37420 case UCHAR_FTYPE_UINT64_UINT_UINT
:
37423 memory
= ARRAY_SIZE (args
);
37424 last_arg_constant
= true;
37427 gcc_unreachable ();
37430 gcc_assert (nargs
<= ARRAY_SIZE (args
));
37432 if (klass
== store
)
37434 arg
= CALL_EXPR_ARG (exp
, 0);
37435 op
= expand_normal (arg
);
37436 gcc_assert (target
== 0);
37439 op
= ix86_zero_extend_to_Pmode (op
);
37440 target
= gen_rtx_MEM (tmode
, op
);
37441 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
37442 on it. Try to improve it using get_pointer_alignment,
37443 and if the special builtin is one that requires strict
37444 mode alignment, also from it's GET_MODE_ALIGNMENT.
37445 Failure to do so could lead to ix86_legitimate_combined_insn
37446 rejecting all changes to such insns. */
37447 unsigned int align
= get_pointer_alignment (arg
);
37448 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (tmode
))
37449 align
= GET_MODE_ALIGNMENT (tmode
);
37450 if (MEM_ALIGN (target
) < align
)
37451 set_mem_align (target
, align
);
37454 target
= force_reg (tmode
, op
);
37462 || !register_operand (target
, tmode
)
37463 || GET_MODE (target
) != tmode
)
37464 target
= gen_reg_rtx (tmode
);
37467 for (i
= 0; i
< nargs
; i
++)
37469 machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
37472 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
37473 op
= expand_normal (arg
);
37474 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
37476 if (last_arg_constant
&& (i
+ 1) == nargs
)
37480 if (icode
== CODE_FOR_lwp_lwpvalsi3
37481 || icode
== CODE_FOR_lwp_lwpinssi3
37482 || icode
== CODE_FOR_lwp_lwpvaldi3
37483 || icode
== CODE_FOR_lwp_lwpinsdi3
)
37484 error ("the last argument must be a 32-bit immediate");
37486 error ("the last argument must be an 8-bit immediate");
37494 /* This must be the memory operand. */
37495 op
= ix86_zero_extend_to_Pmode (op
);
37496 op
= gen_rtx_MEM (mode
, op
);
37497 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
37498 on it. Try to improve it using get_pointer_alignment,
37499 and if the special builtin is one that requires strict
37500 mode alignment, also from it's GET_MODE_ALIGNMENT.
37501 Failure to do so could lead to ix86_legitimate_combined_insn
37502 rejecting all changes to such insns. */
37503 unsigned int align
= get_pointer_alignment (arg
);
37504 if (aligned_mem
&& align
< GET_MODE_ALIGNMENT (mode
))
37505 align
= GET_MODE_ALIGNMENT (mode
);
37506 if (MEM_ALIGN (op
) < align
)
37507 set_mem_align (op
, align
);
37511 /* This must be register. */
37512 if (VECTOR_MODE_P (mode
))
37513 op
= safe_vector_operand (op
, mode
);
37515 op
= fixup_modeless_constant (op
, mode
);
37517 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
37518 op
= copy_to_mode_reg (mode
, op
);
37521 op
= copy_to_reg (op
);
37522 op
= lowpart_subreg (mode
, op
, GET_MODE (op
));
37528 args
[i
].mode
= mode
;
37534 pat
= GEN_FCN (icode
) (target
);
37537 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
37540 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
37543 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
37546 gcc_unreachable ();
37552 return klass
== store
? 0 : target
;
37555 /* Return the integer constant in ARG. Constrain it to be in the range
37556 of the subparts of VEC_TYPE; issue an error if not. */
37559 get_element_number (tree vec_type
, tree arg
)
37561 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
37563 if (!tree_fits_uhwi_p (arg
)
37564 || (elt
= tree_to_uhwi (arg
), elt
> max
))
37566 error ("selector must be an integer constant in the range 0..%wi", max
);
37573 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37574 ix86_expand_vector_init. We DO have language-level syntax for this, in
37575 the form of (type){ init-list }. Except that since we can't place emms
37576 instructions from inside the compiler, we can't allow the use of MMX
37577 registers unless the user explicitly asks for it. So we do *not* define
37578 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
37579 we have builtins invoked by mmintrin.h that gives us license to emit
37580 these sorts of instructions. */
37583 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
37585 machine_mode tmode
= TYPE_MODE (type
);
37586 machine_mode inner_mode
= GET_MODE_INNER (tmode
);
37587 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
37588 rtvec v
= rtvec_alloc (n_elt
);
37590 gcc_assert (VECTOR_MODE_P (tmode
));
37591 gcc_assert (call_expr_nargs (exp
) == n_elt
);
37593 for (i
= 0; i
< n_elt
; ++i
)
37595 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
37596 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
37599 if (!target
|| !register_operand (target
, tmode
))
37600 target
= gen_reg_rtx (tmode
);
37602 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
37606 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37607 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
37608 had a language-level syntax for referencing vector elements. */
37611 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
37613 machine_mode tmode
, mode0
;
37618 arg0
= CALL_EXPR_ARG (exp
, 0);
37619 arg1
= CALL_EXPR_ARG (exp
, 1);
37621 op0
= expand_normal (arg0
);
37622 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
37624 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
37625 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
37626 gcc_assert (VECTOR_MODE_P (mode0
));
37628 op0
= force_reg (mode0
, op0
);
37630 if (optimize
|| !target
|| !register_operand (target
, tmode
))
37631 target
= gen_reg_rtx (tmode
);
37633 ix86_expand_vector_extract (true, target
, op0
, elt
);
37638 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37639 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
37640 a language-level syntax for referencing vector elements. */
37643 ix86_expand_vec_set_builtin (tree exp
)
37645 machine_mode tmode
, mode1
;
37646 tree arg0
, arg1
, arg2
;
37648 rtx op0
, op1
, target
;
37650 arg0
= CALL_EXPR_ARG (exp
, 0);
37651 arg1
= CALL_EXPR_ARG (exp
, 1);
37652 arg2
= CALL_EXPR_ARG (exp
, 2);
37654 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
37655 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
37656 gcc_assert (VECTOR_MODE_P (tmode
));
37658 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
37659 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
37660 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
37662 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
37663 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
37665 op0
= force_reg (tmode
, op0
);
37666 op1
= force_reg (mode1
, op1
);
37668 /* OP0 is the source of these builtin functions and shouldn't be
37669 modified. Create a copy, use it and return it as target. */
37670 target
= gen_reg_rtx (tmode
);
37671 emit_move_insn (target
, op0
);
37672 ix86_expand_vector_set (true, target
, op1
, elt
);
37677 /* Emit conditional move of SRC to DST with condition
37680 ix86_emit_cmove (rtx dst
, rtx src
, enum rtx_code code
, rtx op1
, rtx op2
)
37686 t
= ix86_expand_compare (code
, op1
, op2
);
37687 emit_insn (gen_rtx_SET (dst
, gen_rtx_IF_THEN_ELSE (GET_MODE (dst
), t
,
37692 rtx_code_label
*nomove
= gen_label_rtx ();
37693 emit_cmp_and_jump_insns (op1
, op2
, reverse_condition (code
),
37694 const0_rtx
, GET_MODE (op1
), 1, nomove
);
37695 emit_move_insn (dst
, src
);
37696 emit_label (nomove
);
37700 /* Choose max of DST and SRC and put it to DST. */
37702 ix86_emit_move_max (rtx dst
, rtx src
)
37704 ix86_emit_cmove (dst
, src
, LTU
, dst
, src
);
37707 /* Expand an expression EXP that calls a built-in function,
37708 with result going to TARGET if that's convenient
37709 (and in mode MODE if that's convenient).
37710 SUBTARGET may be used as the target for computing one of EXP's operands.
37711 IGNORE is nonzero if the value is to be ignored. */
37714 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget
,
37715 machine_mode mode
, int ignore
)
37718 enum insn_code icode
;
37719 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
37720 tree arg0
, arg1
, arg2
, arg3
, arg4
;
37721 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
37722 machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
37723 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
37725 /* For CPU builtins that can be folded, fold first and expand the fold. */
37728 case IX86_BUILTIN_CPU_INIT
:
37730 /* Make it call __cpu_indicator_init in libgcc. */
37731 tree call_expr
, fndecl
, type
;
37732 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
37733 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
37734 call_expr
= build_call_expr (fndecl
, 0);
37735 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
37737 case IX86_BUILTIN_CPU_IS
:
37738 case IX86_BUILTIN_CPU_SUPPORTS
:
37740 tree arg0
= CALL_EXPR_ARG (exp
, 0);
37741 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
37742 gcc_assert (fold_expr
!= NULL_TREE
);
37743 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
37747 /* Determine whether the builtin function is available under the current ISA.
37748 Originally the builtin was not created if it wasn't applicable to the
37749 current ISA based on the command line switches. With function specific
37750 options, we need to check in the context of the function making the call
37751 whether it is supported. Treat AVX512VL specially. For other flags,
37752 if isa includes more than one ISA bit, treat those are requiring any
37753 of them. For AVX512VL, require both AVX512VL and the non-AVX512VL
37754 ISAs. Similarly for 64BIT, but we shouldn't be building such builtins
37755 at all, -m64 is a whole TU option. */
37756 if (((ix86_builtins_isa
[fcode
].isa
37757 & ~(OPTION_MASK_ISA_AVX512VL
| OPTION_MASK_ISA_64BIT
))
37758 && !(ix86_builtins_isa
[fcode
].isa
37759 & ~(OPTION_MASK_ISA_AVX512VL
| OPTION_MASK_ISA_64BIT
)
37761 || ((ix86_builtins_isa
[fcode
].isa
& OPTION_MASK_ISA_AVX512VL
)
37762 && !(ix86_isa_flags
& OPTION_MASK_ISA_AVX512VL
))
37763 || (ix86_builtins_isa
[fcode
].isa2
37764 && !(ix86_builtins_isa
[fcode
].isa2
& ix86_isa_flags2
)))
37766 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
,
37767 ix86_builtins_isa
[fcode
].isa2
, 0, 0,
37768 NULL
, NULL
, (enum fpmath_unit
) 0,
37771 error ("%qE needs unknown isa option", fndecl
);
37774 gcc_assert (opts
!= NULL
);
37775 error ("%qE needs isa option %s", fndecl
, opts
);
37778 return expand_call (exp
, target
, ignore
);
37783 case IX86_BUILTIN_BNDMK
:
37785 || GET_MODE (target
) != BNDmode
37786 || !register_operand (target
, BNDmode
))
37787 target
= gen_reg_rtx (BNDmode
);
37789 arg0
= CALL_EXPR_ARG (exp
, 0);
37790 arg1
= CALL_EXPR_ARG (exp
, 1);
37792 op0
= expand_normal (arg0
);
37793 op1
= expand_normal (arg1
);
37795 if (!register_operand (op0
, Pmode
))
37796 op0
= ix86_zero_extend_to_Pmode (op0
);
37797 if (!register_operand (op1
, Pmode
))
37798 op1
= ix86_zero_extend_to_Pmode (op1
);
37800 /* Builtin arg1 is size of block but instruction op1 should
37802 op1
= expand_simple_binop (Pmode
, PLUS
, op1
, constm1_rtx
,
37803 NULL_RTX
, 1, OPTAB_DIRECT
);
37805 emit_insn (BNDmode
== BND64mode
37806 ? gen_bnd64_mk (target
, op0
, op1
)
37807 : gen_bnd32_mk (target
, op0
, op1
));
37810 case IX86_BUILTIN_BNDSTX
:
37811 arg0
= CALL_EXPR_ARG (exp
, 0);
37812 arg1
= CALL_EXPR_ARG (exp
, 1);
37813 arg2
= CALL_EXPR_ARG (exp
, 2);
37815 op0
= expand_normal (arg0
);
37816 op1
= expand_normal (arg1
);
37817 op2
= expand_normal (arg2
);
37819 if (!register_operand (op0
, Pmode
))
37820 op0
= ix86_zero_extend_to_Pmode (op0
);
37821 if (!register_operand (op1
, BNDmode
))
37822 op1
= copy_to_mode_reg (BNDmode
, op1
);
37823 if (!register_operand (op2
, Pmode
))
37824 op2
= ix86_zero_extend_to_Pmode (op2
);
37826 emit_insn (BNDmode
== BND64mode
37827 ? gen_bnd64_stx (op2
, op0
, op1
)
37828 : gen_bnd32_stx (op2
, op0
, op1
));
37831 case IX86_BUILTIN_BNDLDX
:
37833 || GET_MODE (target
) != BNDmode
37834 || !register_operand (target
, BNDmode
))
37835 target
= gen_reg_rtx (BNDmode
);
37837 arg0
= CALL_EXPR_ARG (exp
, 0);
37838 arg1
= CALL_EXPR_ARG (exp
, 1);
37840 op0
= expand_normal (arg0
);
37841 op1
= expand_normal (arg1
);
37843 if (!register_operand (op0
, Pmode
))
37844 op0
= ix86_zero_extend_to_Pmode (op0
);
37845 if (!register_operand (op1
, Pmode
))
37846 op1
= ix86_zero_extend_to_Pmode (op1
);
37848 emit_insn (BNDmode
== BND64mode
37849 ? gen_bnd64_ldx (target
, op0
, op1
)
37850 : gen_bnd32_ldx (target
, op0
, op1
));
37853 case IX86_BUILTIN_BNDCL
:
37854 arg0
= CALL_EXPR_ARG (exp
, 0);
37855 arg1
= CALL_EXPR_ARG (exp
, 1);
37857 op0
= expand_normal (arg0
);
37858 op1
= expand_normal (arg1
);
37860 if (!register_operand (op0
, Pmode
))
37861 op0
= ix86_zero_extend_to_Pmode (op0
);
37862 if (!register_operand (op1
, BNDmode
))
37863 op1
= copy_to_mode_reg (BNDmode
, op1
);
37865 emit_insn (BNDmode
== BND64mode
37866 ? gen_bnd64_cl (op1
, op0
)
37867 : gen_bnd32_cl (op1
, op0
));
37870 case IX86_BUILTIN_BNDCU
:
37871 arg0
= CALL_EXPR_ARG (exp
, 0);
37872 arg1
= CALL_EXPR_ARG (exp
, 1);
37874 op0
= expand_normal (arg0
);
37875 op1
= expand_normal (arg1
);
37877 if (!register_operand (op0
, Pmode
))
37878 op0
= ix86_zero_extend_to_Pmode (op0
);
37879 if (!register_operand (op1
, BNDmode
))
37880 op1
= copy_to_mode_reg (BNDmode
, op1
);
37882 emit_insn (BNDmode
== BND64mode
37883 ? gen_bnd64_cu (op1
, op0
)
37884 : gen_bnd32_cu (op1
, op0
));
37887 case IX86_BUILTIN_BNDRET
:
37888 arg0
= CALL_EXPR_ARG (exp
, 0);
37889 target
= chkp_get_rtl_bounds (arg0
);
37891 /* If no bounds were specified for returned value,
37892 then use INIT bounds. It usually happens when
37893 some built-in function is expanded. */
37896 rtx t1
= gen_reg_rtx (Pmode
);
37897 rtx t2
= gen_reg_rtx (Pmode
);
37898 target
= gen_reg_rtx (BNDmode
);
37899 emit_move_insn (t1
, const0_rtx
);
37900 emit_move_insn (t2
, constm1_rtx
);
37901 emit_insn (BNDmode
== BND64mode
37902 ? gen_bnd64_mk (target
, t1
, t2
)
37903 : gen_bnd32_mk (target
, t1
, t2
));
37906 gcc_assert (target
&& REG_P (target
));
37909 case IX86_BUILTIN_BNDNARROW
:
37911 rtx m1
, m1h1
, m1h2
, lb
, ub
, t1
;
37913 /* Return value and lb. */
37914 arg0
= CALL_EXPR_ARG (exp
, 0);
37916 arg1
= CALL_EXPR_ARG (exp
, 1);
37918 arg2
= CALL_EXPR_ARG (exp
, 2);
37920 lb
= expand_normal (arg0
);
37921 op1
= expand_normal (arg1
);
37922 op2
= expand_normal (arg2
);
37924 /* Size was passed but we need to use (size - 1) as for bndmk. */
37925 op2
= expand_simple_binop (Pmode
, PLUS
, op2
, constm1_rtx
,
37926 NULL_RTX
, 1, OPTAB_DIRECT
);
37928 /* Add LB to size and inverse to get UB. */
37929 op2
= expand_simple_binop (Pmode
, PLUS
, op2
, lb
,
37930 op2
, 1, OPTAB_DIRECT
);
37931 ub
= expand_simple_unop (Pmode
, NOT
, op2
, op2
, 1);
37933 if (!register_operand (lb
, Pmode
))
37934 lb
= ix86_zero_extend_to_Pmode (lb
);
37935 if (!register_operand (ub
, Pmode
))
37936 ub
= ix86_zero_extend_to_Pmode (ub
);
37938 /* We need to move bounds to memory before any computations. */
37943 m1
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
37944 emit_move_insn (m1
, op1
);
37947 /* Generate mem expression to be used for access to LB and UB. */
37948 m1h1
= adjust_address (m1
, Pmode
, 0);
37949 m1h2
= adjust_address (m1
, Pmode
, GET_MODE_SIZE (Pmode
));
37951 t1
= gen_reg_rtx (Pmode
);
37954 emit_move_insn (t1
, m1h1
);
37955 ix86_emit_move_max (t1
, lb
);
37956 emit_move_insn (m1h1
, t1
);
37958 /* Compute UB. UB is stored in 1's complement form. Therefore
37959 we also use max here. */
37960 emit_move_insn (t1
, m1h2
);
37961 ix86_emit_move_max (t1
, ub
);
37962 emit_move_insn (m1h2
, t1
);
37964 op2
= gen_reg_rtx (BNDmode
);
37965 emit_move_insn (op2
, m1
);
37967 return chkp_join_splitted_slot (lb
, op2
);
37970 case IX86_BUILTIN_BNDINT
:
37972 rtx res
, rh1
, rh2
, lb1
, lb2
, ub1
, ub2
;
37975 || GET_MODE (target
) != BNDmode
37976 || !register_operand (target
, BNDmode
))
37977 target
= gen_reg_rtx (BNDmode
);
37979 arg0
= CALL_EXPR_ARG (exp
, 0);
37980 arg1
= CALL_EXPR_ARG (exp
, 1);
37982 op0
= expand_normal (arg0
);
37983 op1
= expand_normal (arg1
);
37985 res
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
37986 rh1
= adjust_address (res
, Pmode
, 0);
37987 rh2
= adjust_address (res
, Pmode
, GET_MODE_SIZE (Pmode
));
37989 /* Put first bounds to temporaries. */
37990 lb1
= gen_reg_rtx (Pmode
);
37991 ub1
= gen_reg_rtx (Pmode
);
37994 emit_move_insn (lb1
, adjust_address (op0
, Pmode
, 0));
37995 emit_move_insn (ub1
, adjust_address (op0
, Pmode
,
37996 GET_MODE_SIZE (Pmode
)));
38000 emit_move_insn (res
, op0
);
38001 emit_move_insn (lb1
, rh1
);
38002 emit_move_insn (ub1
, rh2
);
38005 /* Put second bounds to temporaries. */
38006 lb2
= gen_reg_rtx (Pmode
);
38007 ub2
= gen_reg_rtx (Pmode
);
38010 emit_move_insn (lb2
, adjust_address (op1
, Pmode
, 0));
38011 emit_move_insn (ub2
, adjust_address (op1
, Pmode
,
38012 GET_MODE_SIZE (Pmode
)));
38016 emit_move_insn (res
, op1
);
38017 emit_move_insn (lb2
, rh1
);
38018 emit_move_insn (ub2
, rh2
);
38022 ix86_emit_move_max (lb1
, lb2
);
38023 emit_move_insn (rh1
, lb1
);
38025 /* Compute UB. UB is stored in 1's complement form. Therefore
38026 we also use max here. */
38027 ix86_emit_move_max (ub1
, ub2
);
38028 emit_move_insn (rh2
, ub1
);
38030 emit_move_insn (target
, res
);
38035 case IX86_BUILTIN_SIZEOF
:
38041 || GET_MODE (target
) != Pmode
38042 || !register_operand (target
, Pmode
))
38043 target
= gen_reg_rtx (Pmode
);
38045 arg0
= CALL_EXPR_ARG (exp
, 0);
38046 gcc_assert (VAR_P (arg0
));
38048 name
= DECL_ASSEMBLER_NAME (arg0
);
38049 symbol
= gen_rtx_SYMBOL_REF (Pmode
, IDENTIFIER_POINTER (name
));
38051 emit_insn (Pmode
== SImode
38052 ? gen_move_size_reloc_si (target
, symbol
)
38053 : gen_move_size_reloc_di (target
, symbol
));
38058 case IX86_BUILTIN_BNDLOWER
:
38063 || GET_MODE (target
) != Pmode
38064 || !register_operand (target
, Pmode
))
38065 target
= gen_reg_rtx (Pmode
);
38067 arg0
= CALL_EXPR_ARG (exp
, 0);
38068 op0
= expand_normal (arg0
);
38070 /* We need to move bounds to memory first. */
38075 mem
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
38076 emit_move_insn (mem
, op0
);
38079 /* Generate mem expression to access LB and load it. */
38080 hmem
= adjust_address (mem
, Pmode
, 0);
38081 emit_move_insn (target
, hmem
);
38086 case IX86_BUILTIN_BNDUPPER
:
38088 rtx mem
, hmem
, res
;
38091 || GET_MODE (target
) != Pmode
38092 || !register_operand (target
, Pmode
))
38093 target
= gen_reg_rtx (Pmode
);
38095 arg0
= CALL_EXPR_ARG (exp
, 0);
38096 op0
= expand_normal (arg0
);
38098 /* We need to move bounds to memory first. */
38103 mem
= assign_386_stack_local (BNDmode
, SLOT_TEMP
);
38104 emit_move_insn (mem
, op0
);
38107 /* Generate mem expression to access UB. */
38108 hmem
= adjust_address (mem
, Pmode
, GET_MODE_SIZE (Pmode
));
38110 /* We need to inverse all bits of UB. */
38111 res
= expand_simple_unop (Pmode
, NOT
, hmem
, target
, 1);
38114 emit_move_insn (target
, res
);
38119 case IX86_BUILTIN_MASKMOVQ
:
38120 case IX86_BUILTIN_MASKMOVDQU
:
38121 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
38122 ? CODE_FOR_mmx_maskmovq
38123 : CODE_FOR_sse2_maskmovdqu
);
38124 /* Note the arg order is different from the operand order. */
38125 arg1
= CALL_EXPR_ARG (exp
, 0);
38126 arg2
= CALL_EXPR_ARG (exp
, 1);
38127 arg0
= CALL_EXPR_ARG (exp
, 2);
38128 op0
= expand_normal (arg0
);
38129 op1
= expand_normal (arg1
);
38130 op2
= expand_normal (arg2
);
38131 mode0
= insn_data
[icode
].operand
[0].mode
;
38132 mode1
= insn_data
[icode
].operand
[1].mode
;
38133 mode2
= insn_data
[icode
].operand
[2].mode
;
38135 op0
= ix86_zero_extend_to_Pmode (op0
);
38136 op0
= gen_rtx_MEM (mode1
, op0
);
38138 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
38139 op0
= copy_to_mode_reg (mode0
, op0
);
38140 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
38141 op1
= copy_to_mode_reg (mode1
, op1
);
38142 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
38143 op2
= copy_to_mode_reg (mode2
, op2
);
38144 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
38150 case IX86_BUILTIN_LDMXCSR
:
38151 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
38152 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
38153 emit_move_insn (target
, op0
);
38154 emit_insn (gen_sse_ldmxcsr (target
));
38157 case IX86_BUILTIN_STMXCSR
:
38158 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
38159 emit_insn (gen_sse_stmxcsr (target
));
38160 return copy_to_mode_reg (SImode
, target
);
38162 case IX86_BUILTIN_CLFLUSH
:
38163 arg0
= CALL_EXPR_ARG (exp
, 0);
38164 op0
= expand_normal (arg0
);
38165 icode
= CODE_FOR_sse2_clflush
;
38166 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
38167 op0
= ix86_zero_extend_to_Pmode (op0
);
38169 emit_insn (gen_sse2_clflush (op0
));
38172 case IX86_BUILTIN_CLWB
:
38173 arg0
= CALL_EXPR_ARG (exp
, 0);
38174 op0
= expand_normal (arg0
);
38175 icode
= CODE_FOR_clwb
;
38176 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
38177 op0
= ix86_zero_extend_to_Pmode (op0
);
38179 emit_insn (gen_clwb (op0
));
38182 case IX86_BUILTIN_CLFLUSHOPT
:
38183 arg0
= CALL_EXPR_ARG (exp
, 0);
38184 op0
= expand_normal (arg0
);
38185 icode
= CODE_FOR_clflushopt
;
38186 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
38187 op0
= ix86_zero_extend_to_Pmode (op0
);
38189 emit_insn (gen_clflushopt (op0
));
38192 case IX86_BUILTIN_MONITOR
:
38193 case IX86_BUILTIN_MONITORX
:
38194 arg0
= CALL_EXPR_ARG (exp
, 0);
38195 arg1
= CALL_EXPR_ARG (exp
, 1);
38196 arg2
= CALL_EXPR_ARG (exp
, 2);
38197 op0
= expand_normal (arg0
);
38198 op1
= expand_normal (arg1
);
38199 op2
= expand_normal (arg2
);
38201 op0
= ix86_zero_extend_to_Pmode (op0
);
38203 op1
= copy_to_mode_reg (SImode
, op1
);
38205 op2
= copy_to_mode_reg (SImode
, op2
);
38207 emit_insn (fcode
== IX86_BUILTIN_MONITOR
38208 ? ix86_gen_monitor (op0
, op1
, op2
)
38209 : ix86_gen_monitorx (op0
, op1
, op2
));
38212 case IX86_BUILTIN_MWAIT
:
38213 arg0
= CALL_EXPR_ARG (exp
, 0);
38214 arg1
= CALL_EXPR_ARG (exp
, 1);
38215 op0
= expand_normal (arg0
);
38216 op1
= expand_normal (arg1
);
38218 op0
= copy_to_mode_reg (SImode
, op0
);
38220 op1
= copy_to_mode_reg (SImode
, op1
);
38221 emit_insn (gen_sse3_mwait (op0
, op1
));
38224 case IX86_BUILTIN_MWAITX
:
38225 arg0
= CALL_EXPR_ARG (exp
, 0);
38226 arg1
= CALL_EXPR_ARG (exp
, 1);
38227 arg2
= CALL_EXPR_ARG (exp
, 2);
38228 op0
= expand_normal (arg0
);
38229 op1
= expand_normal (arg1
);
38230 op2
= expand_normal (arg2
);
38232 op0
= copy_to_mode_reg (SImode
, op0
);
38234 op1
= copy_to_mode_reg (SImode
, op1
);
38236 op2
= copy_to_mode_reg (SImode
, op2
);
38237 emit_insn (gen_mwaitx (op0
, op1
, op2
));
38240 case IX86_BUILTIN_CLZERO
:
38241 arg0
= CALL_EXPR_ARG (exp
, 0);
38242 op0
= expand_normal (arg0
);
38244 op0
= ix86_zero_extend_to_Pmode (op0
);
38245 emit_insn (ix86_gen_clzero (op0
));
38248 case IX86_BUILTIN_VEC_INIT_V2SI
:
38249 case IX86_BUILTIN_VEC_INIT_V4HI
:
38250 case IX86_BUILTIN_VEC_INIT_V8QI
:
38251 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
38253 case IX86_BUILTIN_VEC_EXT_V2DF
:
38254 case IX86_BUILTIN_VEC_EXT_V2DI
:
38255 case IX86_BUILTIN_VEC_EXT_V4SF
:
38256 case IX86_BUILTIN_VEC_EXT_V4SI
:
38257 case IX86_BUILTIN_VEC_EXT_V8HI
:
38258 case IX86_BUILTIN_VEC_EXT_V2SI
:
38259 case IX86_BUILTIN_VEC_EXT_V4HI
:
38260 case IX86_BUILTIN_VEC_EXT_V16QI
:
38261 return ix86_expand_vec_ext_builtin (exp
, target
);
38263 case IX86_BUILTIN_VEC_SET_V2DI
:
38264 case IX86_BUILTIN_VEC_SET_V4SF
:
38265 case IX86_BUILTIN_VEC_SET_V4SI
:
38266 case IX86_BUILTIN_VEC_SET_V8HI
:
38267 case IX86_BUILTIN_VEC_SET_V4HI
:
38268 case IX86_BUILTIN_VEC_SET_V16QI
:
38269 return ix86_expand_vec_set_builtin (exp
);
38271 case IX86_BUILTIN_NANQ
:
38272 case IX86_BUILTIN_NANSQ
:
38273 return expand_call (exp
, target
, ignore
);
38275 case IX86_BUILTIN_RDPMC
:
38276 case IX86_BUILTIN_RDTSC
:
38277 case IX86_BUILTIN_RDTSCP
:
38278 case IX86_BUILTIN_XGETBV
:
38280 op0
= gen_reg_rtx (DImode
);
38281 op1
= gen_reg_rtx (DImode
);
38283 if (fcode
== IX86_BUILTIN_RDPMC
)
38285 arg0
= CALL_EXPR_ARG (exp
, 0);
38286 op2
= expand_normal (arg0
);
38287 if (!register_operand (op2
, SImode
))
38288 op2
= copy_to_mode_reg (SImode
, op2
);
38290 insn
= (TARGET_64BIT
38291 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
38292 : gen_rdpmc (op0
, op2
));
38295 else if (fcode
== IX86_BUILTIN_XGETBV
)
38297 arg0
= CALL_EXPR_ARG (exp
, 0);
38298 op2
= expand_normal (arg0
);
38299 if (!register_operand (op2
, SImode
))
38300 op2
= copy_to_mode_reg (SImode
, op2
);
38302 insn
= (TARGET_64BIT
38303 ? gen_xgetbv_rex64 (op0
, op1
, op2
)
38304 : gen_xgetbv (op0
, op2
));
38307 else if (fcode
== IX86_BUILTIN_RDTSC
)
38309 insn
= (TARGET_64BIT
38310 ? gen_rdtsc_rex64 (op0
, op1
)
38311 : gen_rdtsc (op0
));
38316 op2
= gen_reg_rtx (SImode
);
38318 insn
= (TARGET_64BIT
38319 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
38320 : gen_rdtscp (op0
, op2
));
38323 arg0
= CALL_EXPR_ARG (exp
, 0);
38324 op4
= expand_normal (arg0
);
38325 if (!address_operand (op4
, VOIDmode
))
38327 op4
= convert_memory_address (Pmode
, op4
);
38328 op4
= copy_addr_to_reg (op4
);
38330 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
38335 /* mode is VOIDmode if __builtin_rd* has been called
38337 if (mode
== VOIDmode
)
38339 target
= gen_reg_rtx (mode
);
38344 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
38345 op1
, 1, OPTAB_DIRECT
);
38346 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
38347 op0
, 1, OPTAB_DIRECT
);
38350 emit_move_insn (target
, op0
);
38353 case IX86_BUILTIN_FXSAVE
:
38354 case IX86_BUILTIN_FXRSTOR
:
38355 case IX86_BUILTIN_FXSAVE64
:
38356 case IX86_BUILTIN_FXRSTOR64
:
38357 case IX86_BUILTIN_FNSTENV
:
38358 case IX86_BUILTIN_FLDENV
:
38362 case IX86_BUILTIN_FXSAVE
:
38363 icode
= CODE_FOR_fxsave
;
38365 case IX86_BUILTIN_FXRSTOR
:
38366 icode
= CODE_FOR_fxrstor
;
38368 case IX86_BUILTIN_FXSAVE64
:
38369 icode
= CODE_FOR_fxsave64
;
38371 case IX86_BUILTIN_FXRSTOR64
:
38372 icode
= CODE_FOR_fxrstor64
;
38374 case IX86_BUILTIN_FNSTENV
:
38375 icode
= CODE_FOR_fnstenv
;
38377 case IX86_BUILTIN_FLDENV
:
38378 icode
= CODE_FOR_fldenv
;
38381 gcc_unreachable ();
38384 arg0
= CALL_EXPR_ARG (exp
, 0);
38385 op0
= expand_normal (arg0
);
38387 if (!address_operand (op0
, VOIDmode
))
38389 op0
= convert_memory_address (Pmode
, op0
);
38390 op0
= copy_addr_to_reg (op0
);
38392 op0
= gen_rtx_MEM (mode0
, op0
);
38394 pat
= GEN_FCN (icode
) (op0
);
38399 case IX86_BUILTIN_XSETBV
:
38400 arg0
= CALL_EXPR_ARG (exp
, 0);
38401 arg1
= CALL_EXPR_ARG (exp
, 1);
38402 op0
= expand_normal (arg0
);
38403 op1
= expand_normal (arg1
);
38406 op0
= copy_to_mode_reg (SImode
, op0
);
38410 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
38411 NULL
, 1, OPTAB_DIRECT
);
38413 op2
= gen_lowpart (SImode
, op2
);
38414 op1
= gen_lowpart (SImode
, op1
);
38416 op1
= copy_to_mode_reg (SImode
, op1
);
38418 op2
= copy_to_mode_reg (SImode
, op2
);
38419 icode
= CODE_FOR_xsetbv_rex64
;
38420 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
38425 op1
= copy_to_mode_reg (DImode
, op1
);
38426 icode
= CODE_FOR_xsetbv
;
38427 pat
= GEN_FCN (icode
) (op0
, op1
);
38433 case IX86_BUILTIN_XSAVE
:
38434 case IX86_BUILTIN_XRSTOR
:
38435 case IX86_BUILTIN_XSAVE64
:
38436 case IX86_BUILTIN_XRSTOR64
:
38437 case IX86_BUILTIN_XSAVEOPT
:
38438 case IX86_BUILTIN_XSAVEOPT64
:
38439 case IX86_BUILTIN_XSAVES
:
38440 case IX86_BUILTIN_XRSTORS
:
38441 case IX86_BUILTIN_XSAVES64
:
38442 case IX86_BUILTIN_XRSTORS64
:
38443 case IX86_BUILTIN_XSAVEC
:
38444 case IX86_BUILTIN_XSAVEC64
:
38445 arg0
= CALL_EXPR_ARG (exp
, 0);
38446 arg1
= CALL_EXPR_ARG (exp
, 1);
38447 op0
= expand_normal (arg0
);
38448 op1
= expand_normal (arg1
);
38450 if (!address_operand (op0
, VOIDmode
))
38452 op0
= convert_memory_address (Pmode
, op0
);
38453 op0
= copy_addr_to_reg (op0
);
38455 op0
= gen_rtx_MEM (BLKmode
, op0
);
38457 op1
= force_reg (DImode
, op1
);
38461 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
38462 NULL
, 1, OPTAB_DIRECT
);
38465 case IX86_BUILTIN_XSAVE
:
38466 icode
= CODE_FOR_xsave_rex64
;
38468 case IX86_BUILTIN_XRSTOR
:
38469 icode
= CODE_FOR_xrstor_rex64
;
38471 case IX86_BUILTIN_XSAVE64
:
38472 icode
= CODE_FOR_xsave64
;
38474 case IX86_BUILTIN_XRSTOR64
:
38475 icode
= CODE_FOR_xrstor64
;
38477 case IX86_BUILTIN_XSAVEOPT
:
38478 icode
= CODE_FOR_xsaveopt_rex64
;
38480 case IX86_BUILTIN_XSAVEOPT64
:
38481 icode
= CODE_FOR_xsaveopt64
;
38483 case IX86_BUILTIN_XSAVES
:
38484 icode
= CODE_FOR_xsaves_rex64
;
38486 case IX86_BUILTIN_XRSTORS
:
38487 icode
= CODE_FOR_xrstors_rex64
;
38489 case IX86_BUILTIN_XSAVES64
:
38490 icode
= CODE_FOR_xsaves64
;
38492 case IX86_BUILTIN_XRSTORS64
:
38493 icode
= CODE_FOR_xrstors64
;
38495 case IX86_BUILTIN_XSAVEC
:
38496 icode
= CODE_FOR_xsavec_rex64
;
38498 case IX86_BUILTIN_XSAVEC64
:
38499 icode
= CODE_FOR_xsavec64
;
38502 gcc_unreachable ();
38505 op2
= gen_lowpart (SImode
, op2
);
38506 op1
= gen_lowpart (SImode
, op1
);
38507 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
38513 case IX86_BUILTIN_XSAVE
:
38514 icode
= CODE_FOR_xsave
;
38516 case IX86_BUILTIN_XRSTOR
:
38517 icode
= CODE_FOR_xrstor
;
38519 case IX86_BUILTIN_XSAVEOPT
:
38520 icode
= CODE_FOR_xsaveopt
;
38522 case IX86_BUILTIN_XSAVES
:
38523 icode
= CODE_FOR_xsaves
;
38525 case IX86_BUILTIN_XRSTORS
:
38526 icode
= CODE_FOR_xrstors
;
38528 case IX86_BUILTIN_XSAVEC
:
38529 icode
= CODE_FOR_xsavec
;
38532 gcc_unreachable ();
38534 pat
= GEN_FCN (icode
) (op0
, op1
);
38541 case IX86_BUILTIN_LLWPCB
:
38542 arg0
= CALL_EXPR_ARG (exp
, 0);
38543 op0
= expand_normal (arg0
);
38544 icode
= CODE_FOR_lwp_llwpcb
;
38545 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
38546 op0
= ix86_zero_extend_to_Pmode (op0
);
38547 emit_insn (gen_lwp_llwpcb (op0
));
38550 case IX86_BUILTIN_SLWPCB
:
38551 icode
= CODE_FOR_lwp_slwpcb
;
38553 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
38554 target
= gen_reg_rtx (Pmode
);
38555 emit_insn (gen_lwp_slwpcb (target
));
38558 case IX86_BUILTIN_BEXTRI32
:
38559 case IX86_BUILTIN_BEXTRI64
:
38560 arg0
= CALL_EXPR_ARG (exp
, 0);
38561 arg1
= CALL_EXPR_ARG (exp
, 1);
38562 op0
= expand_normal (arg0
);
38563 op1
= expand_normal (arg1
);
38564 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
38565 ? CODE_FOR_tbm_bextri_si
38566 : CODE_FOR_tbm_bextri_di
);
38567 if (!CONST_INT_P (op1
))
38569 error ("last argument must be an immediate");
38574 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
38575 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
38576 op1
= GEN_INT (length
);
38577 op2
= GEN_INT (lsb_index
);
38578 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
38584 case IX86_BUILTIN_RDRAND16_STEP
:
38585 icode
= CODE_FOR_rdrandhi_1
;
38589 case IX86_BUILTIN_RDRAND32_STEP
:
38590 icode
= CODE_FOR_rdrandsi_1
;
38594 case IX86_BUILTIN_RDRAND64_STEP
:
38595 icode
= CODE_FOR_rdranddi_1
;
38599 arg0
= CALL_EXPR_ARG (exp
, 0);
38600 op1
= expand_normal (arg0
);
38601 if (!address_operand (op1
, VOIDmode
))
38603 op1
= convert_memory_address (Pmode
, op1
);
38604 op1
= copy_addr_to_reg (op1
);
38607 op0
= gen_reg_rtx (mode0
);
38608 emit_insn (GEN_FCN (icode
) (op0
));
38610 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
38612 op1
= gen_reg_rtx (SImode
);
38613 emit_move_insn (op1
, CONST1_RTX (SImode
));
38615 /* Emit SImode conditional move. */
38616 if (mode0
== HImode
)
38618 if (TARGET_ZERO_EXTEND_WITH_AND
38619 && optimize_function_for_speed_p (cfun
))
38621 op2
= force_reg (SImode
, const0_rtx
);
38623 emit_insn (gen_movstricthi
38624 (gen_lowpart (HImode
, op2
), op0
));
38628 op2
= gen_reg_rtx (SImode
);
38630 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
38633 else if (mode0
== SImode
)
38636 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
38639 || !register_operand (target
, SImode
))
38640 target
= gen_reg_rtx (SImode
);
38642 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
38644 emit_insn (gen_rtx_SET (target
,
38645 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
38648 case IX86_BUILTIN_RDSEED16_STEP
:
38649 icode
= CODE_FOR_rdseedhi_1
;
38653 case IX86_BUILTIN_RDSEED32_STEP
:
38654 icode
= CODE_FOR_rdseedsi_1
;
38658 case IX86_BUILTIN_RDSEED64_STEP
:
38659 icode
= CODE_FOR_rdseeddi_1
;
38663 arg0
= CALL_EXPR_ARG (exp
, 0);
38664 op1
= expand_normal (arg0
);
38665 if (!address_operand (op1
, VOIDmode
))
38667 op1
= convert_memory_address (Pmode
, op1
);
38668 op1
= copy_addr_to_reg (op1
);
38671 op0
= gen_reg_rtx (mode0
);
38672 emit_insn (GEN_FCN (icode
) (op0
));
38674 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
38676 op2
= gen_reg_rtx (QImode
);
38678 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
38680 emit_insn (gen_rtx_SET (op2
, pat
));
38683 || !register_operand (target
, SImode
))
38684 target
= gen_reg_rtx (SImode
);
38686 emit_insn (gen_zero_extendqisi2 (target
, op2
));
38689 case IX86_BUILTIN_SBB32
:
38690 icode
= CODE_FOR_subborrowsi
;
38694 case IX86_BUILTIN_SBB64
:
38695 icode
= CODE_FOR_subborrowdi
;
38699 case IX86_BUILTIN_ADDCARRYX32
:
38700 icode
= CODE_FOR_addcarrysi
;
38704 case IX86_BUILTIN_ADDCARRYX64
:
38705 icode
= CODE_FOR_addcarrydi
;
38709 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
38710 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
38711 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
38712 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
38714 op1
= expand_normal (arg0
);
38715 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
38717 op2
= expand_normal (arg1
);
38718 if (!register_operand (op2
, mode0
))
38719 op2
= copy_to_mode_reg (mode0
, op2
);
38721 op3
= expand_normal (arg2
);
38722 if (!register_operand (op3
, mode0
))
38723 op3
= copy_to_mode_reg (mode0
, op3
);
38725 op4
= expand_normal (arg3
);
38726 if (!address_operand (op4
, VOIDmode
))
38728 op4
= convert_memory_address (Pmode
, op4
);
38729 op4
= copy_addr_to_reg (op4
);
38732 /* Generate CF from input operand. */
38733 emit_insn (gen_addqi3_cconly_overflow (op1
, constm1_rtx
));
38735 /* Generate instruction that consumes CF. */
38736 op0
= gen_reg_rtx (mode0
);
38738 op1
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
38739 pat
= gen_rtx_LTU (mode0
, op1
, const0_rtx
);
38740 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op1
, pat
));
38742 /* Return current CF value. */
38744 target
= gen_reg_rtx (QImode
);
38746 PUT_MODE (pat
, QImode
);
38747 emit_insn (gen_rtx_SET (target
, pat
));
38749 /* Store the result. */
38750 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
38754 case IX86_BUILTIN_READ_FLAGS
:
38755 emit_insn (gen_push (gen_rtx_REG (word_mode
, FLAGS_REG
)));
38758 || target
== NULL_RTX
38759 || !nonimmediate_operand (target
, word_mode
)
38760 || GET_MODE (target
) != word_mode
)
38761 target
= gen_reg_rtx (word_mode
);
38763 emit_insn (gen_pop (target
));
38766 case IX86_BUILTIN_WRITE_FLAGS
:
38768 arg0
= CALL_EXPR_ARG (exp
, 0);
38769 op0
= expand_normal (arg0
);
38770 if (!general_no_elim_operand (op0
, word_mode
))
38771 op0
= copy_to_mode_reg (word_mode
, op0
);
38773 emit_insn (gen_push (op0
));
38774 emit_insn (gen_pop (gen_rtx_REG (word_mode
, FLAGS_REG
)));
38777 case IX86_BUILTIN_KTESTC8
:
38778 icode
= CODE_FOR_ktestqi
;
38782 case IX86_BUILTIN_KTESTZ8
:
38783 icode
= CODE_FOR_ktestqi
;
38787 case IX86_BUILTIN_KTESTC16
:
38788 icode
= CODE_FOR_ktesthi
;
38792 case IX86_BUILTIN_KTESTZ16
:
38793 icode
= CODE_FOR_ktesthi
;
38797 case IX86_BUILTIN_KTESTC32
:
38798 icode
= CODE_FOR_ktestsi
;
38802 case IX86_BUILTIN_KTESTZ32
:
38803 icode
= CODE_FOR_ktestsi
;
38807 case IX86_BUILTIN_KTESTC64
:
38808 icode
= CODE_FOR_ktestdi
;
38812 case IX86_BUILTIN_KTESTZ64
:
38813 icode
= CODE_FOR_ktestdi
;
38817 case IX86_BUILTIN_KORTESTC8
:
38818 icode
= CODE_FOR_kortestqi
;
38822 case IX86_BUILTIN_KORTESTZ8
:
38823 icode
= CODE_FOR_kortestqi
;
38827 case IX86_BUILTIN_KORTESTC16
:
38828 icode
= CODE_FOR_kortesthi
;
38832 case IX86_BUILTIN_KORTESTZ16
:
38833 icode
= CODE_FOR_kortesthi
;
38837 case IX86_BUILTIN_KORTESTC32
:
38838 icode
= CODE_FOR_kortestsi
;
38842 case IX86_BUILTIN_KORTESTZ32
:
38843 icode
= CODE_FOR_kortestsi
;
38847 case IX86_BUILTIN_KORTESTC64
:
38848 icode
= CODE_FOR_kortestdi
;
38852 case IX86_BUILTIN_KORTESTZ64
:
38853 icode
= CODE_FOR_kortestdi
;
38857 arg0
= CALL_EXPR_ARG (exp
, 0); /* Mask reg src1. */
38858 arg1
= CALL_EXPR_ARG (exp
, 1); /* Mask reg src2. */
38859 op0
= expand_normal (arg0
);
38860 op1
= expand_normal (arg1
);
38862 mode0
= insn_data
[icode
].operand
[0].mode
;
38863 mode1
= insn_data
[icode
].operand
[1].mode
;
38865 if (GET_MODE (op0
) != VOIDmode
)
38866 op0
= force_reg (GET_MODE (op0
), op0
);
38868 op0
= gen_lowpart (mode0
, op0
);
38870 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
38871 op0
= copy_to_mode_reg (mode0
, op0
);
38873 if (GET_MODE (op1
) != VOIDmode
)
38874 op1
= force_reg (GET_MODE (op1
), op1
);
38876 op1
= gen_lowpart (mode1
, op1
);
38878 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
38879 op1
= copy_to_mode_reg (mode1
, op1
);
38881 target
= gen_reg_rtx (QImode
);
38883 /* Emit kortest. */
38884 emit_insn (GEN_FCN (icode
) (op0
, op1
));
38885 /* And use setcc to return result from flags. */
38886 ix86_expand_setcc (target
, EQ
,
38887 gen_rtx_REG (mode3
, FLAGS_REG
), const0_rtx
);
38890 case IX86_BUILTIN_GATHERSIV2DF
:
38891 icode
= CODE_FOR_avx2_gathersiv2df
;
38893 case IX86_BUILTIN_GATHERSIV4DF
:
38894 icode
= CODE_FOR_avx2_gathersiv4df
;
38896 case IX86_BUILTIN_GATHERDIV2DF
:
38897 icode
= CODE_FOR_avx2_gatherdiv2df
;
38899 case IX86_BUILTIN_GATHERDIV4DF
:
38900 icode
= CODE_FOR_avx2_gatherdiv4df
;
38902 case IX86_BUILTIN_GATHERSIV4SF
:
38903 icode
= CODE_FOR_avx2_gathersiv4sf
;
38905 case IX86_BUILTIN_GATHERSIV8SF
:
38906 icode
= CODE_FOR_avx2_gathersiv8sf
;
38908 case IX86_BUILTIN_GATHERDIV4SF
:
38909 icode
= CODE_FOR_avx2_gatherdiv4sf
;
38911 case IX86_BUILTIN_GATHERDIV8SF
:
38912 icode
= CODE_FOR_avx2_gatherdiv8sf
;
38914 case IX86_BUILTIN_GATHERSIV2DI
:
38915 icode
= CODE_FOR_avx2_gathersiv2di
;
38917 case IX86_BUILTIN_GATHERSIV4DI
:
38918 icode
= CODE_FOR_avx2_gathersiv4di
;
38920 case IX86_BUILTIN_GATHERDIV2DI
:
38921 icode
= CODE_FOR_avx2_gatherdiv2di
;
38923 case IX86_BUILTIN_GATHERDIV4DI
:
38924 icode
= CODE_FOR_avx2_gatherdiv4di
;
38926 case IX86_BUILTIN_GATHERSIV4SI
:
38927 icode
= CODE_FOR_avx2_gathersiv4si
;
38929 case IX86_BUILTIN_GATHERSIV8SI
:
38930 icode
= CODE_FOR_avx2_gathersiv8si
;
38932 case IX86_BUILTIN_GATHERDIV4SI
:
38933 icode
= CODE_FOR_avx2_gatherdiv4si
;
38935 case IX86_BUILTIN_GATHERDIV8SI
:
38936 icode
= CODE_FOR_avx2_gatherdiv8si
;
38938 case IX86_BUILTIN_GATHERALTSIV4DF
:
38939 icode
= CODE_FOR_avx2_gathersiv4df
;
38941 case IX86_BUILTIN_GATHERALTDIV8SF
:
38942 icode
= CODE_FOR_avx2_gatherdiv8sf
;
38944 case IX86_BUILTIN_GATHERALTSIV4DI
:
38945 icode
= CODE_FOR_avx2_gathersiv4di
;
38947 case IX86_BUILTIN_GATHERALTDIV8SI
:
38948 icode
= CODE_FOR_avx2_gatherdiv8si
;
38950 case IX86_BUILTIN_GATHER3SIV16SF
:
38951 icode
= CODE_FOR_avx512f_gathersiv16sf
;
38953 case IX86_BUILTIN_GATHER3SIV8DF
:
38954 icode
= CODE_FOR_avx512f_gathersiv8df
;
38956 case IX86_BUILTIN_GATHER3DIV16SF
:
38957 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
38959 case IX86_BUILTIN_GATHER3DIV8DF
:
38960 icode
= CODE_FOR_avx512f_gatherdiv8df
;
38962 case IX86_BUILTIN_GATHER3SIV16SI
:
38963 icode
= CODE_FOR_avx512f_gathersiv16si
;
38965 case IX86_BUILTIN_GATHER3SIV8DI
:
38966 icode
= CODE_FOR_avx512f_gathersiv8di
;
38968 case IX86_BUILTIN_GATHER3DIV16SI
:
38969 icode
= CODE_FOR_avx512f_gatherdiv16si
;
38971 case IX86_BUILTIN_GATHER3DIV8DI
:
38972 icode
= CODE_FOR_avx512f_gatherdiv8di
;
38974 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
38975 icode
= CODE_FOR_avx512f_gathersiv8df
;
38977 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
38978 icode
= CODE_FOR_avx512f_gatherdiv16sf
;
38980 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
38981 icode
= CODE_FOR_avx512f_gathersiv8di
;
38983 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
38984 icode
= CODE_FOR_avx512f_gatherdiv16si
;
38986 case IX86_BUILTIN_GATHER3SIV2DF
:
38987 icode
= CODE_FOR_avx512vl_gathersiv2df
;
38989 case IX86_BUILTIN_GATHER3SIV4DF
:
38990 icode
= CODE_FOR_avx512vl_gathersiv4df
;
38992 case IX86_BUILTIN_GATHER3DIV2DF
:
38993 icode
= CODE_FOR_avx512vl_gatherdiv2df
;
38995 case IX86_BUILTIN_GATHER3DIV4DF
:
38996 icode
= CODE_FOR_avx512vl_gatherdiv4df
;
38998 case IX86_BUILTIN_GATHER3SIV4SF
:
38999 icode
= CODE_FOR_avx512vl_gathersiv4sf
;
39001 case IX86_BUILTIN_GATHER3SIV8SF
:
39002 icode
= CODE_FOR_avx512vl_gathersiv8sf
;
39004 case IX86_BUILTIN_GATHER3DIV4SF
:
39005 icode
= CODE_FOR_avx512vl_gatherdiv4sf
;
39007 case IX86_BUILTIN_GATHER3DIV8SF
:
39008 icode
= CODE_FOR_avx512vl_gatherdiv8sf
;
39010 case IX86_BUILTIN_GATHER3SIV2DI
:
39011 icode
= CODE_FOR_avx512vl_gathersiv2di
;
39013 case IX86_BUILTIN_GATHER3SIV4DI
:
39014 icode
= CODE_FOR_avx512vl_gathersiv4di
;
39016 case IX86_BUILTIN_GATHER3DIV2DI
:
39017 icode
= CODE_FOR_avx512vl_gatherdiv2di
;
39019 case IX86_BUILTIN_GATHER3DIV4DI
:
39020 icode
= CODE_FOR_avx512vl_gatherdiv4di
;
39022 case IX86_BUILTIN_GATHER3SIV4SI
:
39023 icode
= CODE_FOR_avx512vl_gathersiv4si
;
39025 case IX86_BUILTIN_GATHER3SIV8SI
:
39026 icode
= CODE_FOR_avx512vl_gathersiv8si
;
39028 case IX86_BUILTIN_GATHER3DIV4SI
:
39029 icode
= CODE_FOR_avx512vl_gatherdiv4si
;
39031 case IX86_BUILTIN_GATHER3DIV8SI
:
39032 icode
= CODE_FOR_avx512vl_gatherdiv8si
;
39034 case IX86_BUILTIN_GATHER3ALTSIV4DF
:
39035 icode
= CODE_FOR_avx512vl_gathersiv4df
;
39037 case IX86_BUILTIN_GATHER3ALTDIV8SF
:
39038 icode
= CODE_FOR_avx512vl_gatherdiv8sf
;
39040 case IX86_BUILTIN_GATHER3ALTSIV4DI
:
39041 icode
= CODE_FOR_avx512vl_gathersiv4di
;
39043 case IX86_BUILTIN_GATHER3ALTDIV8SI
:
39044 icode
= CODE_FOR_avx512vl_gatherdiv8si
;
39046 case IX86_BUILTIN_SCATTERSIV16SF
:
39047 icode
= CODE_FOR_avx512f_scattersiv16sf
;
39049 case IX86_BUILTIN_SCATTERSIV8DF
:
39050 icode
= CODE_FOR_avx512f_scattersiv8df
;
39052 case IX86_BUILTIN_SCATTERDIV16SF
:
39053 icode
= CODE_FOR_avx512f_scatterdiv16sf
;
39055 case IX86_BUILTIN_SCATTERDIV8DF
:
39056 icode
= CODE_FOR_avx512f_scatterdiv8df
;
39058 case IX86_BUILTIN_SCATTERSIV16SI
:
39059 icode
= CODE_FOR_avx512f_scattersiv16si
;
39061 case IX86_BUILTIN_SCATTERSIV8DI
:
39062 icode
= CODE_FOR_avx512f_scattersiv8di
;
39064 case IX86_BUILTIN_SCATTERDIV16SI
:
39065 icode
= CODE_FOR_avx512f_scatterdiv16si
;
39067 case IX86_BUILTIN_SCATTERDIV8DI
:
39068 icode
= CODE_FOR_avx512f_scatterdiv8di
;
39070 case IX86_BUILTIN_SCATTERSIV8SF
:
39071 icode
= CODE_FOR_avx512vl_scattersiv8sf
;
39073 case IX86_BUILTIN_SCATTERSIV4SF
:
39074 icode
= CODE_FOR_avx512vl_scattersiv4sf
;
39076 case IX86_BUILTIN_SCATTERSIV4DF
:
39077 icode
= CODE_FOR_avx512vl_scattersiv4df
;
39079 case IX86_BUILTIN_SCATTERSIV2DF
:
39080 icode
= CODE_FOR_avx512vl_scattersiv2df
;
39082 case IX86_BUILTIN_SCATTERDIV8SF
:
39083 icode
= CODE_FOR_avx512vl_scatterdiv8sf
;
39085 case IX86_BUILTIN_SCATTERDIV4SF
:
39086 icode
= CODE_FOR_avx512vl_scatterdiv4sf
;
39088 case IX86_BUILTIN_SCATTERDIV4DF
:
39089 icode
= CODE_FOR_avx512vl_scatterdiv4df
;
39091 case IX86_BUILTIN_SCATTERDIV2DF
:
39092 icode
= CODE_FOR_avx512vl_scatterdiv2df
;
39094 case IX86_BUILTIN_SCATTERSIV8SI
:
39095 icode
= CODE_FOR_avx512vl_scattersiv8si
;
39097 case IX86_BUILTIN_SCATTERSIV4SI
:
39098 icode
= CODE_FOR_avx512vl_scattersiv4si
;
39100 case IX86_BUILTIN_SCATTERSIV4DI
:
39101 icode
= CODE_FOR_avx512vl_scattersiv4di
;
39103 case IX86_BUILTIN_SCATTERSIV2DI
:
39104 icode
= CODE_FOR_avx512vl_scattersiv2di
;
39106 case IX86_BUILTIN_SCATTERDIV8SI
:
39107 icode
= CODE_FOR_avx512vl_scatterdiv8si
;
39109 case IX86_BUILTIN_SCATTERDIV4SI
:
39110 icode
= CODE_FOR_avx512vl_scatterdiv4si
;
39112 case IX86_BUILTIN_SCATTERDIV4DI
:
39113 icode
= CODE_FOR_avx512vl_scatterdiv4di
;
39115 case IX86_BUILTIN_SCATTERDIV2DI
:
39116 icode
= CODE_FOR_avx512vl_scatterdiv2di
;
39118 case IX86_BUILTIN_GATHERPFDPD
:
39119 icode
= CODE_FOR_avx512pf_gatherpfv8sidf
;
39120 goto vec_prefetch_gen
;
39121 case IX86_BUILTIN_SCATTERALTSIV8DF
:
39122 icode
= CODE_FOR_avx512f_scattersiv8df
;
39124 case IX86_BUILTIN_SCATTERALTDIV16SF
:
39125 icode
= CODE_FOR_avx512f_scatterdiv16sf
;
39127 case IX86_BUILTIN_SCATTERALTSIV8DI
:
39128 icode
= CODE_FOR_avx512f_scattersiv8di
;
39130 case IX86_BUILTIN_SCATTERALTDIV16SI
:
39131 icode
= CODE_FOR_avx512f_scatterdiv16si
;
39133 case IX86_BUILTIN_GATHERPFDPS
:
39134 icode
= CODE_FOR_avx512pf_gatherpfv16sisf
;
39135 goto vec_prefetch_gen
;
39136 case IX86_BUILTIN_GATHERPFQPD
:
39137 icode
= CODE_FOR_avx512pf_gatherpfv8didf
;
39138 goto vec_prefetch_gen
;
39139 case IX86_BUILTIN_GATHERPFQPS
:
39140 icode
= CODE_FOR_avx512pf_gatherpfv8disf
;
39141 goto vec_prefetch_gen
;
39142 case IX86_BUILTIN_SCATTERPFDPD
:
39143 icode
= CODE_FOR_avx512pf_scatterpfv8sidf
;
39144 goto vec_prefetch_gen
;
39145 case IX86_BUILTIN_SCATTERPFDPS
:
39146 icode
= CODE_FOR_avx512pf_scatterpfv16sisf
;
39147 goto vec_prefetch_gen
;
39148 case IX86_BUILTIN_SCATTERPFQPD
:
39149 icode
= CODE_FOR_avx512pf_scatterpfv8didf
;
39150 goto vec_prefetch_gen
;
39151 case IX86_BUILTIN_SCATTERPFQPS
:
39152 icode
= CODE_FOR_avx512pf_scatterpfv8disf
;
39153 goto vec_prefetch_gen
;
39157 rtx (*gen
) (rtx
, rtx
);
39159 arg0
= CALL_EXPR_ARG (exp
, 0);
39160 arg1
= CALL_EXPR_ARG (exp
, 1);
39161 arg2
= CALL_EXPR_ARG (exp
, 2);
39162 arg3
= CALL_EXPR_ARG (exp
, 3);
39163 arg4
= CALL_EXPR_ARG (exp
, 4);
39164 op0
= expand_normal (arg0
);
39165 op1
= expand_normal (arg1
);
39166 op2
= expand_normal (arg2
);
39167 op3
= expand_normal (arg3
);
39168 op4
= expand_normal (arg4
);
39169 /* Note the arg order is different from the operand order. */
39170 mode0
= insn_data
[icode
].operand
[1].mode
;
39171 mode2
= insn_data
[icode
].operand
[3].mode
;
39172 mode3
= insn_data
[icode
].operand
[4].mode
;
39173 mode4
= insn_data
[icode
].operand
[5].mode
;
39175 if (target
== NULL_RTX
39176 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
39177 || !insn_data
[icode
].operand
[0].predicate (target
,
39178 GET_MODE (target
)))
39179 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
39181 subtarget
= target
;
39185 case IX86_BUILTIN_GATHER3ALTSIV8DF
:
39186 case IX86_BUILTIN_GATHER3ALTSIV8DI
:
39187 half
= gen_reg_rtx (V8SImode
);
39188 if (!nonimmediate_operand (op2
, V16SImode
))
39189 op2
= copy_to_mode_reg (V16SImode
, op2
);
39190 emit_insn (gen_vec_extract_lo_v16si (half
, op2
));
39193 case IX86_BUILTIN_GATHER3ALTSIV4DF
:
39194 case IX86_BUILTIN_GATHER3ALTSIV4DI
:
39195 case IX86_BUILTIN_GATHERALTSIV4DF
:
39196 case IX86_BUILTIN_GATHERALTSIV4DI
:
39197 half
= gen_reg_rtx (V4SImode
);
39198 if (!nonimmediate_operand (op2
, V8SImode
))
39199 op2
= copy_to_mode_reg (V8SImode
, op2
);
39200 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
39203 case IX86_BUILTIN_GATHER3ALTDIV16SF
:
39204 case IX86_BUILTIN_GATHER3ALTDIV16SI
:
39205 half
= gen_reg_rtx (mode0
);
39206 if (mode0
== V8SFmode
)
39207 gen
= gen_vec_extract_lo_v16sf
;
39209 gen
= gen_vec_extract_lo_v16si
;
39210 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
39211 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
39212 emit_insn (gen (half
, op0
));
39214 if (GET_MODE (op3
) != VOIDmode
)
39216 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
39217 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
39218 emit_insn (gen (half
, op3
));
39222 case IX86_BUILTIN_GATHER3ALTDIV8SF
:
39223 case IX86_BUILTIN_GATHER3ALTDIV8SI
:
39224 case IX86_BUILTIN_GATHERALTDIV8SF
:
39225 case IX86_BUILTIN_GATHERALTDIV8SI
:
39226 half
= gen_reg_rtx (mode0
);
39227 if (mode0
== V4SFmode
)
39228 gen
= gen_vec_extract_lo_v8sf
;
39230 gen
= gen_vec_extract_lo_v8si
;
39231 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
39232 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
39233 emit_insn (gen (half
, op0
));
39235 if (GET_MODE (op3
) != VOIDmode
)
39237 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
39238 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
39239 emit_insn (gen (half
, op3
));
39247 /* Force memory operand only with base register here. But we
39248 don't want to do it on memory operand for other builtin
39250 op1
= ix86_zero_extend_to_Pmode (op1
);
39252 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
39253 op0
= copy_to_mode_reg (mode0
, op0
);
39254 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
39255 op1
= copy_to_mode_reg (Pmode
, op1
);
39256 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
39257 op2
= copy_to_mode_reg (mode2
, op2
);
39259 op3
= fixup_modeless_constant (op3
, mode3
);
39261 if (GET_MODE (op3
) == mode3
|| GET_MODE (op3
) == VOIDmode
)
39263 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
39264 op3
= copy_to_mode_reg (mode3
, op3
);
39268 op3
= copy_to_reg (op3
);
39269 op3
= lowpart_subreg (mode3
, op3
, GET_MODE (op3
));
39271 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
39273 error ("the last argument must be scale 1, 2, 4, 8");
39277 /* Optimize. If mask is known to have all high bits set,
39278 replace op0 with pc_rtx to signal that the instruction
39279 overwrites the whole destination and doesn't use its
39280 previous contents. */
39283 if (TREE_CODE (arg3
) == INTEGER_CST
)
39285 if (integer_all_onesp (arg3
))
39288 else if (TREE_CODE (arg3
) == VECTOR_CST
)
39290 unsigned int negative
= 0;
39291 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
39293 tree cst
= VECTOR_CST_ELT (arg3
, i
);
39294 if (TREE_CODE (cst
) == INTEGER_CST
39295 && tree_int_cst_sign_bit (cst
))
39297 else if (TREE_CODE (cst
) == REAL_CST
39298 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
39301 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
39304 else if (TREE_CODE (arg3
) == SSA_NAME
39305 && TREE_CODE (TREE_TYPE (arg3
)) == VECTOR_TYPE
)
39307 /* Recognize also when mask is like:
39308 __v2df src = _mm_setzero_pd ();
39309 __v2df mask = _mm_cmpeq_pd (src, src);
39311 __v8sf src = _mm256_setzero_ps ();
39312 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
39313 as that is a cheaper way to load all ones into
39314 a register than having to load a constant from
39316 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg3
);
39317 if (is_gimple_call (def_stmt
))
39319 tree fndecl
= gimple_call_fndecl (def_stmt
);
39321 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
39322 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
39324 case IX86_BUILTIN_CMPPD
:
39325 case IX86_BUILTIN_CMPPS
:
39326 case IX86_BUILTIN_CMPPD256
:
39327 case IX86_BUILTIN_CMPPS256
:
39328 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
39331 case IX86_BUILTIN_CMPEQPD
:
39332 case IX86_BUILTIN_CMPEQPS
:
39333 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
39334 && initializer_zerop (gimple_call_arg (def_stmt
,
39345 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
39352 case IX86_BUILTIN_GATHER3DIV16SF
:
39353 if (target
== NULL_RTX
)
39354 target
= gen_reg_rtx (V8SFmode
);
39355 emit_insn (gen_vec_extract_lo_v16sf (target
, subtarget
));
39357 case IX86_BUILTIN_GATHER3DIV16SI
:
39358 if (target
== NULL_RTX
)
39359 target
= gen_reg_rtx (V8SImode
);
39360 emit_insn (gen_vec_extract_lo_v16si (target
, subtarget
));
39362 case IX86_BUILTIN_GATHER3DIV8SF
:
39363 case IX86_BUILTIN_GATHERDIV8SF
:
39364 if (target
== NULL_RTX
)
39365 target
= gen_reg_rtx (V4SFmode
);
39366 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
39368 case IX86_BUILTIN_GATHER3DIV8SI
:
39369 case IX86_BUILTIN_GATHERDIV8SI
:
39370 if (target
== NULL_RTX
)
39371 target
= gen_reg_rtx (V4SImode
);
39372 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
39375 target
= subtarget
;
39381 arg0
= CALL_EXPR_ARG (exp
, 0);
39382 arg1
= CALL_EXPR_ARG (exp
, 1);
39383 arg2
= CALL_EXPR_ARG (exp
, 2);
39384 arg3
= CALL_EXPR_ARG (exp
, 3);
39385 arg4
= CALL_EXPR_ARG (exp
, 4);
39386 op0
= expand_normal (arg0
);
39387 op1
= expand_normal (arg1
);
39388 op2
= expand_normal (arg2
);
39389 op3
= expand_normal (arg3
);
39390 op4
= expand_normal (arg4
);
39391 mode1
= insn_data
[icode
].operand
[1].mode
;
39392 mode2
= insn_data
[icode
].operand
[2].mode
;
39393 mode3
= insn_data
[icode
].operand
[3].mode
;
39394 mode4
= insn_data
[icode
].operand
[4].mode
;
39396 /* Scatter instruction stores operand op3 to memory with
39397 indices from op2 and scale from op4 under writemask op1.
39398 If index operand op2 has more elements then source operand
39399 op3 one need to use only its low half. And vice versa. */
39402 case IX86_BUILTIN_SCATTERALTSIV8DF
:
39403 case IX86_BUILTIN_SCATTERALTSIV8DI
:
39404 half
= gen_reg_rtx (V8SImode
);
39405 if (!nonimmediate_operand (op2
, V16SImode
))
39406 op2
= copy_to_mode_reg (V16SImode
, op2
);
39407 emit_insn (gen_vec_extract_lo_v16si (half
, op2
));
39410 case IX86_BUILTIN_SCATTERALTDIV16SF
:
39411 case IX86_BUILTIN_SCATTERALTDIV16SI
:
39412 half
= gen_reg_rtx (mode3
);
39413 if (mode3
== V8SFmode
)
39414 gen
= gen_vec_extract_lo_v16sf
;
39416 gen
= gen_vec_extract_lo_v16si
;
39417 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
39418 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
39419 emit_insn (gen (half
, op3
));
39426 /* Force memory operand only with base register here. But we
39427 don't want to do it on memory operand for other builtin
39429 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
39431 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
39432 op0
= copy_to_mode_reg (Pmode
, op0
);
39434 op1
= fixup_modeless_constant (op1
, mode1
);
39436 if (GET_MODE (op1
) == mode1
|| GET_MODE (op1
) == VOIDmode
)
39438 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
39439 op1
= copy_to_mode_reg (mode1
, op1
);
39443 op1
= copy_to_reg (op1
);
39444 op1
= lowpart_subreg (mode1
, op1
, GET_MODE (op1
));
39447 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
39448 op2
= copy_to_mode_reg (mode2
, op2
);
39450 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
39451 op3
= copy_to_mode_reg (mode3
, op3
);
39453 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
39455 error ("the last argument must be scale 1, 2, 4, 8");
39459 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
39467 arg0
= CALL_EXPR_ARG (exp
, 0);
39468 arg1
= CALL_EXPR_ARG (exp
, 1);
39469 arg2
= CALL_EXPR_ARG (exp
, 2);
39470 arg3
= CALL_EXPR_ARG (exp
, 3);
39471 arg4
= CALL_EXPR_ARG (exp
, 4);
39472 op0
= expand_normal (arg0
);
39473 op1
= expand_normal (arg1
);
39474 op2
= expand_normal (arg2
);
39475 op3
= expand_normal (arg3
);
39476 op4
= expand_normal (arg4
);
39477 mode0
= insn_data
[icode
].operand
[0].mode
;
39478 mode1
= insn_data
[icode
].operand
[1].mode
;
39479 mode3
= insn_data
[icode
].operand
[3].mode
;
39480 mode4
= insn_data
[icode
].operand
[4].mode
;
39482 op0
= fixup_modeless_constant (op0
, mode0
);
39484 if (GET_MODE (op0
) == mode0
|| GET_MODE (op0
) == VOIDmode
)
39486 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
39487 op0
= copy_to_mode_reg (mode0
, op0
);
39491 op0
= copy_to_reg (op0
);
39492 op0
= lowpart_subreg (mode0
, op0
, GET_MODE (op0
));
39495 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
39496 op1
= copy_to_mode_reg (mode1
, op1
);
39498 /* Force memory operand only with base register here. But we
39499 don't want to do it on memory operand for other builtin
39501 op2
= force_reg (Pmode
, convert_to_mode (Pmode
, op2
, 1));
39503 if (!insn_data
[icode
].operand
[2].predicate (op2
, Pmode
))
39504 op2
= copy_to_mode_reg (Pmode
, op2
);
39506 if (!insn_data
[icode
].operand
[3].predicate (op3
, mode3
))
39508 error ("the forth argument must be scale 1, 2, 4, 8");
39512 if (!insn_data
[icode
].operand
[4].predicate (op4
, mode4
))
39514 error ("incorrect hint operand");
39518 pat
= GEN_FCN (icode
) (op0
, op1
, op2
, op3
, op4
);
39526 case IX86_BUILTIN_XABORT
:
39527 icode
= CODE_FOR_xabort
;
39528 arg0
= CALL_EXPR_ARG (exp
, 0);
39529 op0
= expand_normal (arg0
);
39530 mode0
= insn_data
[icode
].operand
[0].mode
;
39531 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
39533 error ("the xabort's argument must be an 8-bit immediate");
39536 emit_insn (gen_xabort (op0
));
39543 if (fcode
>= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
39544 && fcode
<= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST
)
39546 i
= fcode
- IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST
;
39547 return ix86_expand_special_args_builtin (bdesc_special_args
+ i
, exp
,
39551 if (fcode
>= IX86_BUILTIN__BDESC_ARGS_FIRST
39552 && fcode
<= IX86_BUILTIN__BDESC_ARGS_LAST
)
39554 i
= fcode
- IX86_BUILTIN__BDESC_ARGS_FIRST
;
39557 case IX86_BUILTIN_FABSQ
:
39558 case IX86_BUILTIN_COPYSIGNQ
:
39560 /* Emit a normal call if SSE isn't available. */
39561 return expand_call (exp
, target
, ignore
);
39564 return ix86_expand_args_builtin (bdesc_args
+ i
, exp
, target
);
39568 if (fcode
>= IX86_BUILTIN__BDESC_ARGS2_FIRST
39569 && fcode
<= IX86_BUILTIN__BDESC_ARGS2_LAST
)
39571 i
= fcode
- IX86_BUILTIN__BDESC_ARGS2_FIRST
;
39572 rtx (*fcn
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39573 rtx (*fcn_mask
) (rtx
, rtx
, rtx
, rtx
, rtx
);
39574 rtx (*fcn_maskz
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
39576 machine_mode mode
, wide_mode
, nar_mode
;
39578 nar_mode
= V4SFmode
;
39580 wide_mode
= V64SFmode
;
39581 fcn_mask
= gen_avx5124fmaddps_4fmaddps_mask
;
39582 fcn_maskz
= gen_avx5124fmaddps_4fmaddps_maskz
;
39586 case IX86_BUILTIN_4FMAPS
:
39587 fcn
= gen_avx5124fmaddps_4fmaddps
;
39591 case IX86_BUILTIN_4DPWSSD
:
39592 nar_mode
= V4SImode
;
39594 wide_mode
= V64SImode
;
39595 fcn
= gen_avx5124vnniw_vp4dpwssd
;
39599 case IX86_BUILTIN_4DPWSSDS
:
39600 nar_mode
= V4SImode
;
39602 wide_mode
= V64SImode
;
39603 fcn
= gen_avx5124vnniw_vp4dpwssds
;
39607 case IX86_BUILTIN_4FNMAPS
:
39608 fcn
= gen_avx5124fmaddps_4fnmaddps
;
39612 case IX86_BUILTIN_4FNMAPS_MASK
:
39613 fcn_mask
= gen_avx5124fmaddps_4fnmaddps_mask
;
39614 fcn_maskz
= gen_avx5124fmaddps_4fnmaddps_maskz
;
39617 case IX86_BUILTIN_4DPWSSD_MASK
:
39618 nar_mode
= V4SImode
;
39620 wide_mode
= V64SImode
;
39621 fcn_mask
= gen_avx5124vnniw_vp4dpwssd_mask
;
39622 fcn_maskz
= gen_avx5124vnniw_vp4dpwssd_maskz
;
39625 case IX86_BUILTIN_4DPWSSDS_MASK
:
39626 nar_mode
= V4SImode
;
39628 wide_mode
= V64SImode
;
39629 fcn_mask
= gen_avx5124vnniw_vp4dpwssds_mask
;
39630 fcn_maskz
= gen_avx5124vnniw_vp4dpwssds_maskz
;
39633 case IX86_BUILTIN_4FMAPS_MASK
:
39643 wide_reg
= gen_reg_rtx (wide_mode
);
39644 for (i
= 0; i
< 4; i
++)
39646 args
[i
] = CALL_EXPR_ARG (exp
, i
);
39647 ops
[i
] = expand_normal (args
[i
]);
39649 emit_move_insn (gen_rtx_SUBREG (mode
, wide_reg
, i
* 64),
39653 accum
= expand_normal (CALL_EXPR_ARG (exp
, 4));
39654 accum
= force_reg (mode
, accum
);
39656 addr
= expand_normal (CALL_EXPR_ARG (exp
, 5));
39657 addr
= force_reg (Pmode
, addr
);
39659 mem
= gen_rtx_MEM (nar_mode
, addr
);
39661 target
= gen_reg_rtx (mode
);
39663 emit_move_insn (target
, accum
);
39666 emit_insn (fcn (target
, accum
, wide_reg
, mem
));
39670 merge
= expand_normal (CALL_EXPR_ARG (exp
, 6));
39672 mask
= expand_normal (CALL_EXPR_ARG (exp
, 7));
39674 if (CONST_INT_P (mask
))
39675 mask
= fixup_modeless_constant (mask
, HImode
);
39677 mask
= force_reg (HImode
, mask
);
39679 if (GET_MODE (mask
) != HImode
)
39680 mask
= gen_rtx_SUBREG (HImode
, mask
, 0);
39682 /* If merge is 0 then we're about to emit z-masked variant. */
39683 if (const0_operand (merge
, mode
))
39684 emit_insn (fcn_maskz (target
, accum
, wide_reg
, mem
, merge
, mask
));
39685 /* If merge is the same as accum then emit merge-masked variant. */
39686 else if (CALL_EXPR_ARG (exp
, 6) == CALL_EXPR_ARG (exp
, 4))
39688 merge
= force_reg (mode
, merge
);
39689 emit_insn (fcn_mask (target
, wide_reg
, mem
, merge
, mask
));
39691 /* Merge with something unknown might happen if we z-mask w/ -O0. */
39694 target
= gen_reg_rtx (mode
);
39695 emit_move_insn (target
, merge
);
39696 emit_insn (fcn_mask (target
, wide_reg
, mem
, target
, mask
));
39702 case IX86_BUILTIN_4FNMASS
:
39703 fcn
= gen_avx5124fmaddps_4fnmaddss
;
39707 case IX86_BUILTIN_4FMASS
:
39708 fcn
= gen_avx5124fmaddps_4fmaddss
;
39712 case IX86_BUILTIN_4FNMASS_MASK
:
39713 fcn_mask
= gen_avx5124fmaddps_4fnmaddss_mask
;
39714 fcn_maskz
= gen_avx5124fmaddps_4fnmaddss_maskz
;
39717 case IX86_BUILTIN_4FMASS_MASK
:
39726 fcn_mask
= gen_avx5124fmaddps_4fmaddss_mask
;
39727 fcn_maskz
= gen_avx5124fmaddps_4fmaddss_maskz
;
39731 wide_reg
= gen_reg_rtx (V64SFmode
);
39732 for (i
= 0; i
< 4; i
++)
39735 args
[i
] = CALL_EXPR_ARG (exp
, i
);
39736 ops
[i
] = expand_normal (args
[i
]);
39738 tmp
= gen_reg_rtx (SFmode
);
39739 emit_move_insn (tmp
, gen_rtx_SUBREG (SFmode
, ops
[i
], 0));
39741 emit_move_insn (gen_rtx_SUBREG (V16SFmode
, wide_reg
, i
* 64),
39742 gen_rtx_SUBREG (V16SFmode
, tmp
, 0));
39745 accum
= expand_normal (CALL_EXPR_ARG (exp
, 4));
39746 accum
= force_reg (V4SFmode
, accum
);
39748 addr
= expand_normal (CALL_EXPR_ARG (exp
, 5));
39749 addr
= force_reg (Pmode
, addr
);
39751 mem
= gen_rtx_MEM (V4SFmode
, addr
);
39753 target
= gen_reg_rtx (V4SFmode
);
39755 emit_move_insn (target
, accum
);
39758 emit_insn (fcn (target
, accum
, wide_reg
, mem
));
39762 merge
= expand_normal (CALL_EXPR_ARG (exp
, 6));
39764 mask
= expand_normal (CALL_EXPR_ARG (exp
, 7));
39766 if (CONST_INT_P (mask
))
39767 mask
= fixup_modeless_constant (mask
, QImode
);
39769 mask
= force_reg (QImode
, mask
);
39771 if (GET_MODE (mask
) != QImode
)
39772 mask
= gen_rtx_SUBREG (QImode
, mask
, 0);
39774 /* If merge is 0 then we're about to emit z-masked variant. */
39775 if (const0_operand (merge
, mode
))
39776 emit_insn (fcn_maskz (target
, accum
, wide_reg
, mem
, merge
, mask
));
39777 /* If merge is the same as accum then emit merge-masked
39779 else if (CALL_EXPR_ARG (exp
, 6) == CALL_EXPR_ARG (exp
, 4))
39781 merge
= force_reg (mode
, merge
);
39782 emit_insn (fcn_mask (target
, wide_reg
, mem
, merge
, mask
));
39784 /* Merge with something unknown might happen if we z-mask
39788 target
= gen_reg_rtx (mode
);
39789 emit_move_insn (target
, merge
);
39790 emit_insn (fcn_mask (target
, wide_reg
, mem
, target
, mask
));
39795 case IX86_BUILTIN_RDPID
:
39796 return ix86_expand_special_args_builtin (bdesc_args2
+ i
, exp
,
39799 return ix86_expand_args_builtin (bdesc_args2
+ i
, exp
, target
);
39803 if (fcode
>= IX86_BUILTIN__BDESC_COMI_FIRST
39804 && fcode
<= IX86_BUILTIN__BDESC_COMI_LAST
)
39806 i
= fcode
- IX86_BUILTIN__BDESC_COMI_FIRST
;
39807 return ix86_expand_sse_comi (bdesc_comi
+ i
, exp
, target
);
39810 if (fcode
>= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
39811 && fcode
<= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST
)
39813 i
= fcode
- IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST
;
39814 return ix86_expand_round_builtin (bdesc_round_args
+ i
, exp
, target
);
39817 if (fcode
>= IX86_BUILTIN__BDESC_PCMPESTR_FIRST
39818 && fcode
<= IX86_BUILTIN__BDESC_PCMPESTR_LAST
)
39820 i
= fcode
- IX86_BUILTIN__BDESC_PCMPESTR_FIRST
;
39821 return ix86_expand_sse_pcmpestr (bdesc_pcmpestr
+ i
, exp
, target
);
39824 if (fcode
>= IX86_BUILTIN__BDESC_PCMPISTR_FIRST
39825 && fcode
<= IX86_BUILTIN__BDESC_PCMPISTR_LAST
)
39827 i
= fcode
- IX86_BUILTIN__BDESC_PCMPISTR_FIRST
;
39828 return ix86_expand_sse_pcmpistr (bdesc_pcmpistr
+ i
, exp
, target
);
39831 if (fcode
>= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
39832 && fcode
<= IX86_BUILTIN__BDESC_MULTI_ARG_LAST
)
39834 i
= fcode
- IX86_BUILTIN__BDESC_MULTI_ARG_FIRST
;
39835 const struct builtin_description
*d
= bdesc_multi_arg
+ i
;
39836 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
39837 (enum ix86_builtin_func_type
)
39838 d
->flag
, d
->comparison
);
39841 gcc_unreachable ();
39844 /* This returns the target-specific builtin with code CODE if
39845 current_function_decl has visibility on this builtin, which is checked
39846 using isa flags. Returns NULL_TREE otherwise. */
39848 static tree
ix86_get_builtin (enum ix86_builtins code
)
39850 struct cl_target_option
*opts
;
39851 tree target_tree
= NULL_TREE
;
39853 /* Determine the isa flags of current_function_decl. */
39855 if (current_function_decl
)
39856 target_tree
= DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
);
39858 if (target_tree
== NULL
)
39859 target_tree
= target_option_default_node
;
39861 opts
= TREE_TARGET_OPTION (target_tree
);
39863 if ((ix86_builtins_isa
[(int) code
].isa
& opts
->x_ix86_isa_flags
)
39864 || (ix86_builtins_isa
[(int) code
].isa2
& opts
->x_ix86_isa_flags2
))
39865 return ix86_builtin_decl (code
, true);
39870 /* Return function decl for target specific builtin
39871 for given MPX builtin passed i FCODE. */
39873 ix86_builtin_mpx_function (unsigned fcode
)
39877 case BUILT_IN_CHKP_BNDMK
:
39878 return ix86_builtins
[IX86_BUILTIN_BNDMK
];
39880 case BUILT_IN_CHKP_BNDSTX
:
39881 return ix86_builtins
[IX86_BUILTIN_BNDSTX
];
39883 case BUILT_IN_CHKP_BNDLDX
:
39884 return ix86_builtins
[IX86_BUILTIN_BNDLDX
];
39886 case BUILT_IN_CHKP_BNDCL
:
39887 return ix86_builtins
[IX86_BUILTIN_BNDCL
];
39889 case BUILT_IN_CHKP_BNDCU
:
39890 return ix86_builtins
[IX86_BUILTIN_BNDCU
];
39892 case BUILT_IN_CHKP_BNDRET
:
39893 return ix86_builtins
[IX86_BUILTIN_BNDRET
];
39895 case BUILT_IN_CHKP_INTERSECT
:
39896 return ix86_builtins
[IX86_BUILTIN_BNDINT
];
39898 case BUILT_IN_CHKP_NARROW
:
39899 return ix86_builtins
[IX86_BUILTIN_BNDNARROW
];
39901 case BUILT_IN_CHKP_SIZEOF
:
39902 return ix86_builtins
[IX86_BUILTIN_SIZEOF
];
39904 case BUILT_IN_CHKP_EXTRACT_LOWER
:
39905 return ix86_builtins
[IX86_BUILTIN_BNDLOWER
];
39907 case BUILT_IN_CHKP_EXTRACT_UPPER
:
39908 return ix86_builtins
[IX86_BUILTIN_BNDUPPER
];
39914 gcc_unreachable ();
39917 /* Helper function for ix86_load_bounds and ix86_store_bounds.
39919 Return an address to be used to load/store bounds for pointer
39922 SLOT_NO is an integer constant holding number of a target
39923 dependent special slot to be used in case SLOT is not a memory.
39925 SPECIAL_BASE is a pointer to be used as a base of fake address
39926 to access special slots in Bounds Table. SPECIAL_BASE[-1],
39927 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
39930 ix86_get_arg_address_for_bt (rtx slot
, rtx slot_no
, rtx special_base
)
39934 /* NULL slot means we pass bounds for pointer not passed to the
39935 function at all. Register slot means we pass pointer in a
39936 register. In both these cases bounds are passed via Bounds
39937 Table. Since we do not have actual pointer stored in memory,
39938 we have to use fake addresses to access Bounds Table. We
39939 start with (special_base - sizeof (void*)) and decrease this
39940 address by pointer size to get addresses for other slots. */
39941 if (!slot
|| REG_P (slot
))
39943 gcc_assert (CONST_INT_P (slot_no
));
39944 addr
= plus_constant (Pmode
, special_base
,
39945 -(INTVAL (slot_no
) + 1) * GET_MODE_SIZE (Pmode
));
39947 /* If pointer is passed in a memory then its address is used to
39948 access Bounds Table. */
39949 else if (MEM_P (slot
))
39951 addr
= XEXP (slot
, 0);
39952 if (!register_operand (addr
, Pmode
))
39953 addr
= copy_addr_to_reg (addr
);
39956 gcc_unreachable ();
39961 /* Expand pass uses this hook to load bounds for function parameter
39962 PTR passed in SLOT in case its bounds are not passed in a register.
39964 If SLOT is a memory, then bounds are loaded as for regular pointer
39965 loaded from memory. PTR may be NULL in case SLOT is a memory.
39966 In such case value of PTR (if required) may be loaded from SLOT.
39968 If SLOT is NULL or a register then SLOT_NO is an integer constant
39969 holding number of the target dependent special slot which should be
39970 used to obtain bounds.
39972 Return loaded bounds. */
39975 ix86_load_bounds (rtx slot
, rtx ptr
, rtx slot_no
)
39977 rtx reg
= gen_reg_rtx (BNDmode
);
39980 /* Get address to be used to access Bounds Table. Special slots start
39981 at the location of return address of the current function. */
39982 addr
= ix86_get_arg_address_for_bt (slot
, slot_no
, arg_pointer_rtx
);
39984 /* Load pointer value from a memory if we don't have it. */
39987 gcc_assert (MEM_P (slot
));
39988 ptr
= copy_addr_to_reg (slot
);
39991 if (!register_operand (ptr
, Pmode
))
39992 ptr
= ix86_zero_extend_to_Pmode (ptr
);
39994 emit_insn (BNDmode
== BND64mode
39995 ? gen_bnd64_ldx (reg
, addr
, ptr
)
39996 : gen_bnd32_ldx (reg
, addr
, ptr
));
40001 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40002 passed in SLOT in case BOUNDS are not passed in a register.
40004 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40005 stored in memory. PTR may be NULL in case SLOT is a memory.
40006 In such case value of PTR (if required) may be loaded from SLOT.
40008 If SLOT is NULL or a register then SLOT_NO is an integer constant
40009 holding number of the target dependent special slot which should be
40010 used to store BOUNDS. */
40013 ix86_store_bounds (rtx ptr
, rtx slot
, rtx bounds
, rtx slot_no
)
40017 /* Get address to be used to access Bounds Table. Special slots start
40018 at the location of return address of a called function. */
40019 addr
= ix86_get_arg_address_for_bt (slot
, slot_no
, stack_pointer_rtx
);
40021 /* Load pointer value from a memory if we don't have it. */
40024 gcc_assert (MEM_P (slot
));
40025 ptr
= copy_addr_to_reg (slot
);
40028 if (!register_operand (ptr
, Pmode
))
40029 ptr
= ix86_zero_extend_to_Pmode (ptr
);
40031 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds
)));
40032 if (!register_operand (bounds
, BNDmode
))
40033 bounds
= copy_to_mode_reg (BNDmode
, bounds
);
40035 emit_insn (BNDmode
== BND64mode
40036 ? gen_bnd64_stx (addr
, ptr
, bounds
)
40037 : gen_bnd32_stx (addr
, ptr
, bounds
));
40040 /* Load and return bounds returned by function in SLOT. */
40043 ix86_load_returned_bounds (rtx slot
)
40047 gcc_assert (REG_P (slot
));
40048 res
= gen_reg_rtx (BNDmode
);
40049 emit_move_insn (res
, slot
);
40054 /* Store BOUNDS returned by function into SLOT. */
40057 ix86_store_returned_bounds (rtx slot
, rtx bounds
)
40059 gcc_assert (REG_P (slot
));
40060 emit_move_insn (slot
, bounds
);
40063 /* Returns a function decl for a vectorized version of the combined function
40064 with combined_fn code FN and the result vector type TYPE, or NULL_TREE
40065 if it is not available. */
40068 ix86_builtin_vectorized_function (unsigned int fn
, tree type_out
,
40071 machine_mode in_mode
, out_mode
;
40074 if (TREE_CODE (type_out
) != VECTOR_TYPE
40075 || TREE_CODE (type_in
) != VECTOR_TYPE
)
40078 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
40079 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
40080 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
40081 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
40086 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40088 if (out_n
== 16 && in_n
== 16)
40089 return ix86_get_builtin (IX86_BUILTIN_EXP2PS
);
40096 /* The round insn does not trap on denormals. */
40097 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40100 if (out_mode
== SImode
&& in_mode
== DFmode
)
40102 if (out_n
== 4 && in_n
== 2)
40103 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
);
40104 else if (out_n
== 8 && in_n
== 4)
40105 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
);
40106 else if (out_n
== 16 && in_n
== 8)
40107 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512
);
40109 if (out_mode
== SImode
&& in_mode
== SFmode
)
40111 if (out_n
== 4 && in_n
== 4)
40112 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX
);
40113 else if (out_n
== 8 && in_n
== 8)
40114 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256
);
40115 else if (out_n
== 16 && in_n
== 16)
40116 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512
);
40123 /* The round insn does not trap on denormals. */
40124 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40127 if (out_mode
== SImode
&& in_mode
== DFmode
)
40129 if (out_n
== 4 && in_n
== 2)
40130 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
);
40131 else if (out_n
== 8 && in_n
== 4)
40132 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
);
40133 else if (out_n
== 16 && in_n
== 8)
40134 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512
);
40136 if (out_mode
== SImode
&& in_mode
== SFmode
)
40138 if (out_n
== 4 && in_n
== 4)
40139 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX
);
40140 else if (out_n
== 8 && in_n
== 8)
40141 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256
);
40142 else if (out_n
== 16 && in_n
== 16)
40143 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512
);
40150 if (out_mode
== SImode
&& in_mode
== DFmode
)
40152 if (out_n
== 4 && in_n
== 2)
40153 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX
);
40154 else if (out_n
== 8 && in_n
== 4)
40155 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256
);
40156 else if (out_n
== 16 && in_n
== 8)
40157 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512
);
40159 if (out_mode
== SImode
&& in_mode
== SFmode
)
40161 if (out_n
== 4 && in_n
== 4)
40162 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ
);
40163 else if (out_n
== 8 && in_n
== 8)
40164 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256
);
40165 else if (out_n
== 16 && in_n
== 16)
40166 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512
);
40173 /* The round insn does not trap on denormals. */
40174 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40177 if (out_mode
== SImode
&& in_mode
== DFmode
)
40179 if (out_n
== 4 && in_n
== 2)
40180 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
);
40181 else if (out_n
== 8 && in_n
== 4)
40182 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
);
40183 else if (out_n
== 16 && in_n
== 8)
40184 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512
);
40186 if (out_mode
== SImode
&& in_mode
== SFmode
)
40188 if (out_n
== 4 && in_n
== 4)
40189 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX
);
40190 else if (out_n
== 8 && in_n
== 8)
40191 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256
);
40192 else if (out_n
== 16 && in_n
== 16)
40193 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512
);
40198 /* The round insn does not trap on denormals. */
40199 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40202 if (out_mode
== DFmode
&& in_mode
== DFmode
)
40204 if (out_n
== 2 && in_n
== 2)
40205 return ix86_get_builtin (IX86_BUILTIN_FLOORPD
);
40206 else if (out_n
== 4 && in_n
== 4)
40207 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256
);
40208 else if (out_n
== 8 && in_n
== 8)
40209 return ix86_get_builtin (IX86_BUILTIN_FLOORPD512
);
40211 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40213 if (out_n
== 4 && in_n
== 4)
40214 return ix86_get_builtin (IX86_BUILTIN_FLOORPS
);
40215 else if (out_n
== 8 && in_n
== 8)
40216 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256
);
40217 else if (out_n
== 16 && in_n
== 16)
40218 return ix86_get_builtin (IX86_BUILTIN_FLOORPS512
);
40223 /* The round insn does not trap on denormals. */
40224 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40227 if (out_mode
== DFmode
&& in_mode
== DFmode
)
40229 if (out_n
== 2 && in_n
== 2)
40230 return ix86_get_builtin (IX86_BUILTIN_CEILPD
);
40231 else if (out_n
== 4 && in_n
== 4)
40232 return ix86_get_builtin (IX86_BUILTIN_CEILPD256
);
40233 else if (out_n
== 8 && in_n
== 8)
40234 return ix86_get_builtin (IX86_BUILTIN_CEILPD512
);
40236 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40238 if (out_n
== 4 && in_n
== 4)
40239 return ix86_get_builtin (IX86_BUILTIN_CEILPS
);
40240 else if (out_n
== 8 && in_n
== 8)
40241 return ix86_get_builtin (IX86_BUILTIN_CEILPS256
);
40242 else if (out_n
== 16 && in_n
== 16)
40243 return ix86_get_builtin (IX86_BUILTIN_CEILPS512
);
40248 /* The round insn does not trap on denormals. */
40249 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40252 if (out_mode
== DFmode
&& in_mode
== DFmode
)
40254 if (out_n
== 2 && in_n
== 2)
40255 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD
);
40256 else if (out_n
== 4 && in_n
== 4)
40257 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256
);
40258 else if (out_n
== 8 && in_n
== 8)
40259 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512
);
40261 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40263 if (out_n
== 4 && in_n
== 4)
40264 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS
);
40265 else if (out_n
== 8 && in_n
== 8)
40266 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256
);
40267 else if (out_n
== 16 && in_n
== 16)
40268 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512
);
40273 /* The round insn does not trap on denormals. */
40274 if (flag_trapping_math
|| !TARGET_SSE4_1
)
40277 if (out_mode
== DFmode
&& in_mode
== DFmode
)
40279 if (out_n
== 2 && in_n
== 2)
40280 return ix86_get_builtin (IX86_BUILTIN_RINTPD
);
40281 else if (out_n
== 4 && in_n
== 4)
40282 return ix86_get_builtin (IX86_BUILTIN_RINTPD256
);
40284 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40286 if (out_n
== 4 && in_n
== 4)
40287 return ix86_get_builtin (IX86_BUILTIN_RINTPS
);
40288 else if (out_n
== 8 && in_n
== 8)
40289 return ix86_get_builtin (IX86_BUILTIN_RINTPS256
);
40294 if (out_mode
== DFmode
&& in_mode
== DFmode
)
40296 if (out_n
== 2 && in_n
== 2)
40297 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD
);
40298 if (out_n
== 4 && in_n
== 4)
40299 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256
);
40301 if (out_mode
== SFmode
&& in_mode
== SFmode
)
40303 if (out_n
== 4 && in_n
== 4)
40304 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS
);
40305 if (out_n
== 8 && in_n
== 8)
40306 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256
);
40314 /* Dispatch to a handler for a vectorization library. */
40315 if (ix86_veclib_handler
)
40316 return ix86_veclib_handler (combined_fn (fn
), type_out
, type_in
);
40321 /* Handler for an SVML-style interface to
40322 a library with vectorized intrinsics. */
40325 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
40328 tree fntype
, new_fndecl
, args
;
40331 machine_mode el_mode
, in_mode
;
40334 /* The SVML is suitable for unsafe math only. */
40335 if (!flag_unsafe_math_optimizations
)
40338 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
40339 n
= TYPE_VECTOR_SUBPARTS (type_out
);
40340 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
40341 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
40342 if (el_mode
!= in_mode
40366 if ((el_mode
!= DFmode
|| n
!= 2)
40367 && (el_mode
!= SFmode
|| n
!= 4))
40375 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
40376 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
40378 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
40379 strcpy (name
, "vmlsLn4");
40380 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
40381 strcpy (name
, "vmldLn2");
40384 sprintf (name
, "vmls%s", bname
+10);
40385 name
[strlen (name
)-1] = '4';
40388 sprintf (name
, "vmld%s2", bname
+10);
40390 /* Convert to uppercase. */
40394 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
40398 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
40400 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
40402 /* Build a function declaration for the vectorized function. */
40403 new_fndecl
= build_decl (BUILTINS_LOCATION
,
40404 FUNCTION_DECL
, get_identifier (name
), fntype
);
40405 TREE_PUBLIC (new_fndecl
) = 1;
40406 DECL_EXTERNAL (new_fndecl
) = 1;
40407 DECL_IS_NOVOPS (new_fndecl
) = 1;
40408 TREE_READONLY (new_fndecl
) = 1;
40413 /* Handler for an ACML-style interface to
40414 a library with vectorized intrinsics. */
40417 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
40419 char name
[20] = "__vr.._";
40420 tree fntype
, new_fndecl
, args
;
40423 machine_mode el_mode
, in_mode
;
40426 /* The ACML is 64bits only and suitable for unsafe math only as
40427 it does not correctly support parts of IEEE with the required
40428 precision such as denormals. */
40430 || !flag_unsafe_math_optimizations
)
40433 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
40434 n
= TYPE_VECTOR_SUBPARTS (type_out
);
40435 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
40436 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
40437 if (el_mode
!= in_mode
40449 if (el_mode
== DFmode
&& n
== 2)
40454 else if (el_mode
== SFmode
&& n
== 4)
40467 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
40468 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
40469 sprintf (name
+ 7, "%s", bname
+10);
40472 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
40476 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
40478 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
40480 /* Build a function declaration for the vectorized function. */
40481 new_fndecl
= build_decl (BUILTINS_LOCATION
,
40482 FUNCTION_DECL
, get_identifier (name
), fntype
);
40483 TREE_PUBLIC (new_fndecl
) = 1;
40484 DECL_EXTERNAL (new_fndecl
) = 1;
40485 DECL_IS_NOVOPS (new_fndecl
) = 1;
40486 TREE_READONLY (new_fndecl
) = 1;
40491 /* Returns a decl of a function that implements gather load with
40492 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
40493 Return NULL_TREE if it is not available. */
40496 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
40497 const_tree index_type
, int scale
)
40500 enum ix86_builtins code
;
40505 if ((TREE_CODE (index_type
) != INTEGER_TYPE
40506 && !POINTER_TYPE_P (index_type
))
40507 || (TYPE_MODE (index_type
) != SImode
40508 && TYPE_MODE (index_type
) != DImode
))
40511 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
40514 /* v*gather* insn sign extends index to pointer mode. */
40515 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
40516 && TYPE_UNSIGNED (index_type
))
40521 || (scale
& (scale
- 1)) != 0)
40524 si
= TYPE_MODE (index_type
) == SImode
;
40525 switch (TYPE_MODE (mem_vectype
))
40528 if (TARGET_AVX512VL
)
40529 code
= si
? IX86_BUILTIN_GATHER3SIV2DF
: IX86_BUILTIN_GATHER3DIV2DF
;
40531 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
40534 if (TARGET_AVX512VL
)
40535 code
= si
? IX86_BUILTIN_GATHER3ALTSIV4DF
: IX86_BUILTIN_GATHER3DIV4DF
;
40537 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
40540 if (TARGET_AVX512VL
)
40541 code
= si
? IX86_BUILTIN_GATHER3SIV2DI
: IX86_BUILTIN_GATHER3DIV2DI
;
40543 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
40546 if (TARGET_AVX512VL
)
40547 code
= si
? IX86_BUILTIN_GATHER3ALTSIV4DI
: IX86_BUILTIN_GATHER3DIV4DI
;
40549 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
40552 if (TARGET_AVX512VL
)
40553 code
= si
? IX86_BUILTIN_GATHER3SIV4SF
: IX86_BUILTIN_GATHER3DIV4SF
;
40555 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
40558 if (TARGET_AVX512VL
)
40559 code
= si
? IX86_BUILTIN_GATHER3SIV8SF
: IX86_BUILTIN_GATHER3ALTDIV8SF
;
40561 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
40564 if (TARGET_AVX512VL
)
40565 code
= si
? IX86_BUILTIN_GATHER3SIV4SI
: IX86_BUILTIN_GATHER3DIV4SI
;
40567 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
40570 if (TARGET_AVX512VL
)
40571 code
= si
? IX86_BUILTIN_GATHER3SIV8SI
: IX86_BUILTIN_GATHER3ALTDIV8SI
;
40573 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
40576 if (TARGET_AVX512F
)
40577 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DF
: IX86_BUILTIN_GATHER3DIV8DF
;
40582 if (TARGET_AVX512F
)
40583 code
= si
? IX86_BUILTIN_GATHER3ALTSIV8DI
: IX86_BUILTIN_GATHER3DIV8DI
;
40588 if (TARGET_AVX512F
)
40589 code
= si
? IX86_BUILTIN_GATHER3SIV16SF
: IX86_BUILTIN_GATHER3ALTDIV16SF
;
40594 if (TARGET_AVX512F
)
40595 code
= si
? IX86_BUILTIN_GATHER3SIV16SI
: IX86_BUILTIN_GATHER3ALTDIV16SI
;
40603 return ix86_get_builtin (code
);
40606 /* Returns a decl of a function that implements scatter store with
40607 register type VECTYPE and index type INDEX_TYPE and SCALE.
40608 Return NULL_TREE if it is not available. */
40611 ix86_vectorize_builtin_scatter (const_tree vectype
,
40612 const_tree index_type
, int scale
)
40615 enum ix86_builtins code
;
40617 if (!TARGET_AVX512F
)
40620 if ((TREE_CODE (index_type
) != INTEGER_TYPE
40621 && !POINTER_TYPE_P (index_type
))
40622 || (TYPE_MODE (index_type
) != SImode
40623 && TYPE_MODE (index_type
) != DImode
))
40626 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
40629 /* v*scatter* insn sign extends index to pointer mode. */
40630 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
40631 && TYPE_UNSIGNED (index_type
))
40634 /* Scale can be 1, 2, 4 or 8. */
40637 || (scale
& (scale
- 1)) != 0)
40640 si
= TYPE_MODE (index_type
) == SImode
;
40641 switch (TYPE_MODE (vectype
))
40644 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
40647 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
40650 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
40653 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
40659 return ix86_builtins
[code
];
40662 /* Return true if it is safe to use the rsqrt optabs to optimize
40668 return (TARGET_SSE_MATH
40669 && flag_finite_math_only
40670 && !flag_trapping_math
40671 && flag_unsafe_math_optimizations
);
40674 /* Returns a code for a target-specific builtin that implements
40675 reciprocal of the function, or NULL_TREE if not available. */
40678 ix86_builtin_reciprocal (tree fndecl
)
40680 switch (DECL_FUNCTION_CODE (fndecl
))
40682 /* Vectorized version of sqrt to rsqrt conversion. */
40683 case IX86_BUILTIN_SQRTPS_NR
:
40684 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR
);
40686 case IX86_BUILTIN_SQRTPS_NR256
:
40687 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256
);
40694 /* Helper for avx_vpermilps256_operand et al. This is also used by
40695 the expansion functions to turn the parallel back into a mask.
40696 The return value is 0 for no match and the imm8+1 for a match. */
40699 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
40701 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
40703 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
40705 if (XVECLEN (par
, 0) != (int) nelt
)
40708 /* Validate that all of the elements are constants, and not totally
40709 out of range. Copy the data into an integral array to make the
40710 subsequent checks easier. */
40711 for (i
= 0; i
< nelt
; ++i
)
40713 rtx er
= XVECEXP (par
, 0, i
);
40714 unsigned HOST_WIDE_INT ei
;
40716 if (!CONST_INT_P (er
))
40727 /* In the 512-bit DFmode case, we can only move elements within
40728 a 128-bit lane. First fill the second part of the mask,
40730 for (i
= 4; i
< 6; ++i
)
40732 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
40734 mask
|= (ipar
[i
] - 4) << i
;
40736 for (i
= 6; i
< 8; ++i
)
40740 mask
|= (ipar
[i
] - 6) << i
;
40745 /* In the 256-bit DFmode case, we can only move elements within
40747 for (i
= 0; i
< 2; ++i
)
40751 mask
|= ipar
[i
] << i
;
40753 for (i
= 2; i
< 4; ++i
)
40757 mask
|= (ipar
[i
] - 2) << i
;
40762 /* In 512 bit SFmode case, permutation in the upper 256 bits
40763 must mirror the permutation in the lower 256-bits. */
40764 for (i
= 0; i
< 8; ++i
)
40765 if (ipar
[i
] + 8 != ipar
[i
+ 8])
40770 /* In 256 bit SFmode case, we have full freedom of
40771 movement within the low 128-bit lane, but the high 128-bit
40772 lane must mirror the exact same pattern. */
40773 for (i
= 0; i
< 4; ++i
)
40774 if (ipar
[i
] + 4 != ipar
[i
+ 4])
40781 /* In the 128-bit case, we've full freedom in the placement of
40782 the elements from the source operand. */
40783 for (i
= 0; i
< nelt
; ++i
)
40784 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
40788 gcc_unreachable ();
40791 /* Make sure success has a non-zero value by adding one. */
40795 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
40796 the expansion functions to turn the parallel back into a mask.
40797 The return value is 0 for no match and the imm8+1 for a match. */
40800 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
40802 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
40804 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
40806 if (XVECLEN (par
, 0) != (int) nelt
)
40809 /* Validate that all of the elements are constants, and not totally
40810 out of range. Copy the data into an integral array to make the
40811 subsequent checks easier. */
40812 for (i
= 0; i
< nelt
; ++i
)
40814 rtx er
= XVECEXP (par
, 0, i
);
40815 unsigned HOST_WIDE_INT ei
;
40817 if (!CONST_INT_P (er
))
40820 if (ei
>= 2 * nelt
)
40825 /* Validate that the halves of the permute are halves. */
40826 for (i
= 0; i
< nelt2
- 1; ++i
)
40827 if (ipar
[i
] + 1 != ipar
[i
+ 1])
40829 for (i
= nelt2
; i
< nelt
- 1; ++i
)
40830 if (ipar
[i
] + 1 != ipar
[i
+ 1])
40833 /* Reconstruct the mask. */
40834 for (i
= 0; i
< 2; ++i
)
40836 unsigned e
= ipar
[i
* nelt2
];
40840 mask
|= e
<< (i
* 4);
40843 /* Make sure success has a non-zero value by adding one. */
40847 /* Return a register priority for hard reg REGNO. */
40849 ix86_register_priority (int hard_regno
)
40851 /* ebp and r13 as the base always wants a displacement, r12 as the
40852 base always wants an index. So discourage their usage in an
40854 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
40856 if (hard_regno
== BP_REG
)
40858 /* New x86-64 int registers result in bigger code size. Discourage
40860 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
40862 /* New x86-64 SSE registers result in bigger code size. Discourage
40864 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
40866 /* Usage of AX register results in smaller code. Prefer it. */
40867 if (hard_regno
== AX_REG
)
40872 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
40874 Put float CONST_DOUBLE in the constant pool instead of fp regs.
40875 QImode must go into class Q_REGS.
40876 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
40877 movdf to do mem-to-mem moves through integer regs. */
40880 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
40882 machine_mode mode
= GET_MODE (x
);
40884 /* We're only allowed to return a subclass of CLASS. Many of the
40885 following checks fail for NO_REGS, so eliminate that early. */
40886 if (regclass
== NO_REGS
)
40889 /* All classes can load zeros. */
40890 if (x
== CONST0_RTX (mode
))
40893 /* Force constants into memory if we are loading a (nonzero) constant into
40894 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
40895 instructions to load from a constant. */
40897 && (MAYBE_MMX_CLASS_P (regclass
)
40898 || MAYBE_SSE_CLASS_P (regclass
)
40899 || MAYBE_MASK_CLASS_P (regclass
)))
40902 /* Floating-point constants need more complex checks. */
40903 if (CONST_DOUBLE_P (x
))
40905 /* General regs can load everything. */
40906 if (INTEGER_CLASS_P (regclass
))
40909 /* Floats can load 0 and 1 plus some others. Note that we eliminated
40910 zero above. We only want to wind up preferring 80387 registers if
40911 we plan on doing computation with them. */
40912 if (IS_STACK_MODE (mode
)
40913 && standard_80387_constant_p (x
) > 0)
40915 /* Limit class to FP regs. */
40916 if (FLOAT_CLASS_P (regclass
))
40918 else if (regclass
== FP_TOP_SSE_REGS
)
40920 else if (regclass
== FP_SECOND_SSE_REGS
)
40921 return FP_SECOND_REG
;
40927 /* Prefer SSE regs only, if we can use them for math. */
40928 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
40929 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
40931 /* Generally when we see PLUS here, it's the function invariant
40932 (plus soft-fp const_int). Which can only be computed into general
40934 if (GET_CODE (x
) == PLUS
)
40935 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
40937 /* QImode constants are easy to load, but non-constant QImode data
40938 must go into Q_REGS. */
40939 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
40941 if (Q_CLASS_P (regclass
))
40943 else if (reg_class_subset_p (Q_REGS
, regclass
))
40952 /* Discourage putting floating-point values in SSE registers unless
40953 SSE math is being used, and likewise for the 387 registers. */
40955 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
40957 machine_mode mode
= GET_MODE (x
);
40959 /* Restrict the output reload class to the register bank that we are doing
40960 math on. If we would like not to return a subset of CLASS, reject this
40961 alternative: if reload cannot do this, it will still use its choice. */
40962 mode
= GET_MODE (x
);
40963 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
40964 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
40966 if (IS_STACK_MODE (mode
))
40968 if (regclass
== FP_TOP_SSE_REGS
)
40970 else if (regclass
== FP_SECOND_SSE_REGS
)
40971 return FP_SECOND_REG
;
40973 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
40980 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
40981 machine_mode mode
, secondary_reload_info
*sri
)
40983 /* Double-word spills from general registers to non-offsettable memory
40984 references (zero-extended addresses) require special handling. */
40987 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
40988 && INTEGER_CLASS_P (rclass
)
40989 && !offsettable_memref_p (x
))
40992 ? CODE_FOR_reload_noff_load
40993 : CODE_FOR_reload_noff_store
);
40994 /* Add the cost of moving address to a temporary. */
40995 sri
->extra_cost
= 1;
41000 /* QImode spills from non-QI registers require
41001 intermediate register on 32bit targets. */
41003 && ((!TARGET_64BIT
&& !in_p
41004 && INTEGER_CLASS_P (rclass
)
41005 && MAYBE_NON_Q_CLASS_P (rclass
))
41006 || (!TARGET_AVX512DQ
41007 && MAYBE_MASK_CLASS_P (rclass
))))
41009 int regno
= true_regnum (x
);
41011 /* Return Q_REGS if the operand is in memory. */
41018 /* This condition handles corner case where an expression involving
41019 pointers gets vectorized. We're trying to use the address of a
41020 stack slot as a vector initializer.
41022 (set (reg:V2DI 74 [ vect_cst_.2 ])
41023 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41025 Eventually frame gets turned into sp+offset like this:
41027 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41028 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41029 (const_int 392 [0x188]))))
41031 That later gets turned into:
41033 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41034 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41035 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41037 We'll have the following reload recorded:
41039 Reload 0: reload_in (DI) =
41040 (plus:DI (reg/f:DI 7 sp)
41041 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41042 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41043 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41044 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41045 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41046 reload_reg_rtx: (reg:V2DI 22 xmm1)
41048 Which isn't going to work since SSE instructions can't handle scalar
41049 additions. Returning GENERAL_REGS forces the addition into integer
41050 register and reload can handle subsequent reloads without problems. */
41052 if (in_p
&& GET_CODE (x
) == PLUS
41053 && SSE_CLASS_P (rclass
)
41054 && SCALAR_INT_MODE_P (mode
))
41055 return GENERAL_REGS
;
41060 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41063 ix86_class_likely_spilled_p (reg_class_t rclass
)
41074 case SSE_FIRST_REG
:
41076 case FP_SECOND_REG
:
41087 /* If we are copying between registers from different register sets
41088 (e.g. FP and integer), we may need a memory location.
41090 The function can't work reliably when one of the CLASSES is a class
41091 containing registers from multiple sets. We avoid this by never combining
41092 different sets in a single alternative in the machine description.
41093 Ensure that this constraint holds to avoid unexpected surprises.
41095 When STRICT is false, we are being called from REGISTER_MOVE_COST,
41096 so do not enforce these sanity checks.
41098 To optimize register_move_cost performance, define inline variant. */
41101 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
41102 reg_class_t class2
, int strict
)
41104 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
41107 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
41108 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
41109 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
41110 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
41111 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
41112 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
41113 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
41114 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
41116 gcc_assert (!strict
|| lra_in_progress
);
41120 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
41123 /* Between mask and general, we have moves no larger than word size. */
41124 if ((MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
41125 && (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
41128 /* ??? This is a lie. We do have moves between mmx/general, and for
41129 mmx/sse2. But by saying we need secondary memory we discourage the
41130 register allocator from using the mmx registers unless needed. */
41131 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
41134 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
41136 /* SSE1 doesn't have any direct moves from other classes. */
41140 /* If the target says that inter-unit moves are more expensive
41141 than moving through memory, then don't generate them. */
41142 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
41143 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
41146 /* Between SSE and general, we have moves no larger than word size. */
41147 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
41154 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
41157 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
41158 reg_class_t class2
)
41160 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
41163 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
41165 get_secondary_mem widens integral modes to BITS_PER_WORD.
41166 There is no need to emit full 64 bit move on 64 bit targets
41167 for integral modes that can be moved using 32 bit move. */
41169 static machine_mode
41170 ix86_secondary_memory_needed_mode (machine_mode mode
)
41172 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
41173 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
41177 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41179 On the 80386, this is the size of MODE in words,
41180 except in the FP regs, where a single reg is always enough. */
41182 static unsigned char
41183 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
41185 if (MAYBE_INTEGER_CLASS_P (rclass
))
41187 if (mode
== XFmode
)
41188 return (TARGET_64BIT
? 2 : 3);
41189 else if (mode
== XCmode
)
41190 return (TARGET_64BIT
? 4 : 6);
41192 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
41196 if (COMPLEX_MODE_P (mode
))
41203 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
41206 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
41207 reg_class_t regclass
)
41212 /* x87 registers can't do subreg at all, as all values are reformatted
41213 to extended precision. */
41214 if (MAYBE_FLOAT_CLASS_P (regclass
))
41217 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
41219 /* Vector registers do not support QI or HImode loads. If we don't
41220 disallow a change to these modes, reload will assume it's ok to
41221 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41222 the vec_dupv4hi pattern. */
41223 if (GET_MODE_SIZE (from
) < 4)
41230 /* Return the cost of moving data of mode M between a
41231 register and memory. A value of 2 is the default; this cost is
41232 relative to those in `REGISTER_MOVE_COST'.
41234 This function is used extensively by register_move_cost that is used to
41235 build tables at startup. Make it inline in this case.
41236 When IN is 2, return maximum of in and out move cost.
41238 If moving between registers and memory is more expensive than
41239 between two registers, you should define this macro to express the
41242 Model also increased moving costs of QImode registers in non
41246 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
,
41250 if (FLOAT_CLASS_P (regclass
))
41268 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
41269 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
41271 if (SSE_CLASS_P (regclass
))
41274 switch (GET_MODE_SIZE (mode
))
41289 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
41290 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
41292 if (MMX_CLASS_P (regclass
))
41295 switch (GET_MODE_SIZE (mode
))
41307 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
41308 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
41310 switch (GET_MODE_SIZE (mode
))
41313 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
41316 return ix86_cost
->int_store
[0];
41317 if (TARGET_PARTIAL_REG_DEPENDENCY
41318 && optimize_function_for_speed_p (cfun
))
41319 cost
= ix86_cost
->movzbl_load
;
41321 cost
= ix86_cost
->int_load
[0];
41323 return MAX (cost
, ix86_cost
->int_store
[0]);
41329 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
41331 return ix86_cost
->movzbl_load
;
41333 return ix86_cost
->int_store
[0] + 4;
41338 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
41339 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
41341 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41342 if (mode
== TFmode
)
41345 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
41347 cost
= ix86_cost
->int_load
[2];
41349 cost
= ix86_cost
->int_store
[2];
41350 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
41355 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
,
41358 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
41362 /* Return the cost of moving data from a register in class CLASS1 to
41363 one in class CLASS2.
41365 It is not required that the cost always equal 2 when FROM is the same as TO;
41366 on some machines it is expensive to move between registers if they are not
41367 general registers. */
41370 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
41371 reg_class_t class2_i
)
41373 enum reg_class class1
= (enum reg_class
) class1_i
;
41374 enum reg_class class2
= (enum reg_class
) class2_i
;
41376 /* In case we require secondary memory, compute cost of the store followed
41377 by load. In order to avoid bad register allocation choices, we need
41378 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41380 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
41384 cost
+= inline_memory_move_cost (mode
, class1
, 2);
41385 cost
+= inline_memory_move_cost (mode
, class2
, 2);
41387 /* In case of copying from general_purpose_register we may emit multiple
41388 stores followed by single load causing memory size mismatch stall.
41389 Count this as arbitrarily high cost of 20. */
41390 if (targetm
.class_max_nregs (class1
, mode
)
41391 > targetm
.class_max_nregs (class2
, mode
))
41394 /* In the case of FP/MMX moves, the registers actually overlap, and we
41395 have to switch modes in order to treat them differently. */
41396 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
41397 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
41403 /* Moves between SSE/MMX and integer unit are expensive. */
41404 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
41405 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
41407 /* ??? By keeping returned value relatively high, we limit the number
41408 of moves between integer and MMX/SSE registers for all targets.
41409 Additionally, high value prevents problem with x86_modes_tieable_p(),
41410 where integer modes in MMX/SSE registers are not tieable
41411 because of missing QImode and HImode moves to, from or between
41412 MMX/SSE registers. */
41413 return MAX (8, ix86_cost
->mmxsse_to_integer
);
41415 if (MAYBE_FLOAT_CLASS_P (class1
))
41416 return ix86_cost
->fp_move
;
41417 if (MAYBE_SSE_CLASS_P (class1
))
41418 return ix86_cost
->sse_move
;
41419 if (MAYBE_MMX_CLASS_P (class1
))
41420 return ix86_cost
->mmx_move
;
41424 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
41425 words of a value of mode MODE but can be less for certain modes in
41426 special long registers.
41428 Actually there are no two word move instructions for consecutive
41429 registers. And only registers 0-3 may have mov byte instructions
41430 applied to them. */
41432 static unsigned int
41433 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
41435 if (GENERAL_REGNO_P (regno
))
41437 if (mode
== XFmode
)
41438 return TARGET_64BIT
? 2 : 3;
41439 if (mode
== XCmode
)
41440 return TARGET_64BIT
? 4 : 6;
41441 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
41443 if (COMPLEX_MODE_P (mode
))
41445 if (mode
== V64SFmode
|| mode
== V64SImode
)
41450 /* Implement TARGET_HARD_REGNO_MODE_OK. */
41453 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
41455 /* Flags and only flags can only hold CCmode values. */
41456 if (CC_REGNO_P (regno
))
41457 return GET_MODE_CLASS (mode
) == MODE_CC
;
41458 if (GET_MODE_CLASS (mode
) == MODE_CC
41459 || GET_MODE_CLASS (mode
) == MODE_RANDOM
41460 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
41462 if (STACK_REGNO_P (regno
))
41463 return VALID_FP_MODE_P (mode
);
41464 if (MASK_REGNO_P (regno
))
41465 return (VALID_MASK_REG_MODE (mode
)
41466 || (TARGET_AVX512BW
41467 && VALID_MASK_AVX512BW_MODE (mode
)));
41468 if (BND_REGNO_P (regno
))
41469 return VALID_BND_REG_MODE (mode
);
41470 if (SSE_REGNO_P (regno
))
41472 /* We implement the move patterns for all vector modes into and
41473 out of SSE registers, even when no operation instructions
41476 /* For AVX-512 we allow, regardless of regno:
41478 - any of 512-bit wide vector mode
41479 - any scalar mode. */
41482 || VALID_AVX512F_REG_MODE (mode
)
41483 || VALID_AVX512F_SCALAR_MODE (mode
)))
41486 /* For AVX-5124FMAPS allow V64SFmode for special regnos. */
41487 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
41488 && MOD4_SSE_REGNO_P (regno
)
41489 && mode
== V64SFmode
)
41492 /* For AVX-5124VNNIW allow V64SImode for special regnos. */
41493 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
41494 && MOD4_SSE_REGNO_P (regno
)
41495 && mode
== V64SImode
)
41498 /* TODO check for QI/HI scalars. */
41499 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
41500 if (TARGET_AVX512VL
41503 || VALID_AVX256_REG_MODE (mode
)
41504 || VALID_AVX512VL_128_REG_MODE (mode
)))
41507 /* xmm16-xmm31 are only available for AVX-512. */
41508 if (EXT_REX_SSE_REGNO_P (regno
))
41511 /* OImode and AVX modes are available only when AVX is enabled. */
41512 return ((TARGET_AVX
41513 && VALID_AVX256_REG_OR_OI_MODE (mode
))
41514 || VALID_SSE_REG_MODE (mode
)
41515 || VALID_SSE2_REG_MODE (mode
)
41516 || VALID_MMX_REG_MODE (mode
)
41517 || VALID_MMX_REG_MODE_3DNOW (mode
));
41519 if (MMX_REGNO_P (regno
))
41521 /* We implement the move patterns for 3DNOW modes even in MMX mode,
41522 so if the register is available at all, then we can move data of
41523 the given mode into or out of it. */
41524 return (VALID_MMX_REG_MODE (mode
)
41525 || VALID_MMX_REG_MODE_3DNOW (mode
));
41528 if (mode
== QImode
)
41530 /* Take care for QImode values - they can be in non-QI regs,
41531 but then they do cause partial register stalls. */
41532 if (ANY_QI_REGNO_P (regno
))
41534 if (!TARGET_PARTIAL_REG_STALL
)
41536 /* LRA checks if the hard register is OK for the given mode.
41537 QImode values can live in non-QI regs, so we allow all
41539 if (lra_in_progress
)
41541 return !can_create_pseudo_p ();
41543 /* We handle both integer and floats in the general purpose registers. */
41544 else if (VALID_INT_MODE_P (mode
))
41546 else if (VALID_FP_MODE_P (mode
))
41548 else if (VALID_DFP_MODE_P (mode
))
41550 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
41551 on to use that value in smaller contexts, this can easily force a
41552 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
41553 supporting DImode, allow it. */
41554 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
41560 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
41561 saves SSE registers across calls is Win64 (thus no need to check the
41562 current ABI here), and with AVX enabled Win64 only guarantees that
41563 the low 16 bytes are saved. */
41566 ix86_hard_regno_call_part_clobbered (unsigned int regno
, machine_mode mode
)
41568 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
41571 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
41572 tieable integer mode. */
41575 ix86_tieable_integer_mode_p (machine_mode mode
)
41584 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
41587 return TARGET_64BIT
;
41594 /* Implement TARGET_MODES_TIEABLE_P.
41596 Return true if MODE1 is accessible in a register that can hold MODE2
41597 without copying. That is, all register classes that can hold MODE2
41598 can also hold MODE1. */
41601 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
41603 if (mode1
== mode2
)
41606 if (ix86_tieable_integer_mode_p (mode1
)
41607 && ix86_tieable_integer_mode_p (mode2
))
41610 /* MODE2 being XFmode implies fp stack or general regs, which means we
41611 can tie any smaller floating point modes to it. Note that we do not
41612 tie this with TFmode. */
41613 if (mode2
== XFmode
)
41614 return mode1
== SFmode
|| mode1
== DFmode
;
41616 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
41617 that we can tie it with SFmode. */
41618 if (mode2
== DFmode
)
41619 return mode1
== SFmode
;
41621 /* If MODE2 is only appropriate for an SSE register, then tie with
41622 any other mode acceptable to SSE registers. */
41623 if (GET_MODE_SIZE (mode2
) == 32
41624 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
41625 return (GET_MODE_SIZE (mode1
) == 32
41626 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
41627 if (GET_MODE_SIZE (mode2
) == 16
41628 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
41629 return (GET_MODE_SIZE (mode1
) == 16
41630 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
41632 /* If MODE2 is appropriate for an MMX register, then tie
41633 with any other mode acceptable to MMX registers. */
41634 if (GET_MODE_SIZE (mode2
) == 8
41635 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
41636 return (GET_MODE_SIZE (mode1
) == 8
41637 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
41642 /* Return the cost of moving between two registers of mode MODE. */
41645 ix86_set_reg_reg_cost (machine_mode mode
)
41647 unsigned int units
= UNITS_PER_WORD
;
41649 switch (GET_MODE_CLASS (mode
))
41655 units
= GET_MODE_SIZE (CCmode
);
41659 if ((TARGET_SSE
&& mode
== TFmode
)
41660 || (TARGET_80387
&& mode
== XFmode
)
41661 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
41662 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
41663 units
= GET_MODE_SIZE (mode
);
41666 case MODE_COMPLEX_FLOAT
:
41667 if ((TARGET_SSE
&& mode
== TCmode
)
41668 || (TARGET_80387
&& mode
== XCmode
)
41669 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
41670 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
41671 units
= GET_MODE_SIZE (mode
);
41674 case MODE_VECTOR_INT
:
41675 case MODE_VECTOR_FLOAT
:
41676 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
41677 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
41678 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
41679 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
41680 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
41681 units
= GET_MODE_SIZE (mode
);
41684 /* Return the cost of moving between two registers of mode MODE,
41685 assuming that the move will be in pieces of at most UNITS bytes. */
41686 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
41689 /* Compute a (partial) cost for rtx X. Return true if the complete
41690 cost has been computed, and false if subexpressions should be
41691 scanned. In either case, *TOTAL contains the cost result. */
41694 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
41695 int *total
, bool speed
)
41698 enum rtx_code code
= GET_CODE (x
);
41699 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
41700 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
41706 if (register_operand (SET_DEST (x
), VOIDmode
)
41707 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
41709 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
41713 if (register_operand (SET_SRC (x
), VOIDmode
))
41714 /* Avoid potentially incorrect high cost from rtx_costs
41715 for non-tieable SUBREGs. */
41719 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
41721 if (CONSTANT_P (SET_SRC (x
)))
41722 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
41723 a small value, possibly zero for cheap constants. */
41724 src_cost
+= COSTS_N_INSNS (1);
41727 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
41734 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
41736 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
41738 else if (flag_pic
&& SYMBOLIC_CONST (x
)
41740 && (GET_CODE (x
) == LABEL_REF
41741 || (GET_CODE (x
) == SYMBOL_REF
41742 && SYMBOL_REF_LOCAL_P (x
))))
41743 /* Use 0 cost for CONST to improve its propagation. */
41744 && (TARGET_64BIT
|| GET_CODE (x
) != CONST
))
41751 if (IS_STACK_MODE (mode
))
41752 switch (standard_80387_constant_p (x
))
41760 default: /* Other constants */
41767 switch (standard_sse_constant_p (x
, mode
))
41771 case 1: /* 0: xor eliminates false dependency */
41774 default: /* -1: cmp contains false dependency */
41780 case CONST_WIDE_INT
:
41781 /* Fall back to (MEM (SYMBOL_REF)), since that's where
41782 it'll probably end up. Add a penalty for size. */
41783 *total
= (COSTS_N_INSNS (1)
41784 + (!TARGET_64BIT
&& flag_pic
)
41785 + (GET_MODE_SIZE (mode
) <= 4
41786 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
41790 /* The zero extensions is often completely free on x86_64, so make
41791 it as cheap as possible. */
41792 if (TARGET_64BIT
&& mode
== DImode
41793 && GET_MODE (XEXP (x
, 0)) == SImode
)
41795 else if (TARGET_ZERO_EXTEND_WITH_AND
)
41796 *total
= cost
->add
;
41798 *total
= cost
->movzx
;
41802 *total
= cost
->movsx
;
41806 if (SCALAR_INT_MODE_P (mode
)
41807 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
41808 && CONST_INT_P (XEXP (x
, 1)))
41810 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
41813 *total
= cost
->add
;
41816 if ((value
== 2 || value
== 3)
41817 && cost
->lea
<= cost
->shift_const
)
41819 *total
= cost
->lea
;
41829 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
41831 /* ??? Should be SSE vector operation cost. */
41832 /* At least for published AMD latencies, this really is the same
41833 as the latency for a simple fpu operation like fabs. */
41834 /* V*QImode is emulated with 1-11 insns. */
41835 if (mode
== V16QImode
|| mode
== V32QImode
)
41838 if (TARGET_XOP
&& mode
== V16QImode
)
41840 /* For XOP we use vpshab, which requires a broadcast of the
41841 value to the variable shift insn. For constants this
41842 means a V16Q const in mem; even when we can perform the
41843 shift with one insn set the cost to prefer paddb. */
41844 if (CONSTANT_P (XEXP (x
, 1)))
41846 *total
= (cost
->fabs
41847 + rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
)
41848 + (speed
? 2 : COSTS_N_BYTES (16)));
41853 else if (TARGET_SSSE3
)
41855 *total
= cost
->fabs
* count
;
41858 *total
= cost
->fabs
;
41860 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
41862 if (CONST_INT_P (XEXP (x
, 1)))
41864 if (INTVAL (XEXP (x
, 1)) > 32)
41865 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
41867 *total
= cost
->shift_const
* 2;
41871 if (GET_CODE (XEXP (x
, 1)) == AND
)
41872 *total
= cost
->shift_var
* 2;
41874 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
41879 if (CONST_INT_P (XEXP (x
, 1)))
41880 *total
= cost
->shift_const
;
41881 else if (SUBREG_P (XEXP (x
, 1))
41882 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
)
41884 /* Return the cost after shift-and truncation. */
41885 *total
= cost
->shift_var
;
41889 *total
= cost
->shift_var
;
41897 gcc_assert (FLOAT_MODE_P (mode
));
41898 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
41900 /* ??? SSE scalar/vector cost should be used here. */
41901 /* ??? Bald assumption that fma has the same cost as fmul. */
41902 *total
= cost
->fmul
;
41903 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
41905 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
41907 if (GET_CODE (sub
) == NEG
)
41908 sub
= XEXP (sub
, 0);
41909 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
41912 if (GET_CODE (sub
) == NEG
)
41913 sub
= XEXP (sub
, 0);
41914 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
41919 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
41921 /* ??? SSE scalar cost should be used here. */
41922 *total
= cost
->fmul
;
41925 else if (X87_FLOAT_MODE_P (mode
))
41927 *total
= cost
->fmul
;
41930 else if (FLOAT_MODE_P (mode
))
41932 /* ??? SSE vector cost should be used here. */
41933 *total
= cost
->fmul
;
41936 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
41938 /* V*QImode is emulated with 7-13 insns. */
41939 if (mode
== V16QImode
|| mode
== V32QImode
)
41942 if (TARGET_XOP
&& mode
== V16QImode
)
41944 else if (TARGET_SSSE3
)
41946 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
41948 /* V*DImode is emulated with 5-8 insns. */
41949 else if (mode
== V2DImode
|| mode
== V4DImode
)
41951 if (TARGET_XOP
&& mode
== V2DImode
)
41952 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
41954 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
41956 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
41957 insns, including two PMULUDQ. */
41958 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
41959 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
41961 *total
= cost
->fmul
;
41966 rtx op0
= XEXP (x
, 0);
41967 rtx op1
= XEXP (x
, 1);
41969 if (CONST_INT_P (XEXP (x
, 1)))
41971 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
41972 for (nbits
= 0; value
!= 0; value
&= value
- 1)
41976 /* This is arbitrary. */
41979 /* Compute costs correctly for widening multiplication. */
41980 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
41981 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
41982 == GET_MODE_SIZE (mode
))
41984 int is_mulwiden
= 0;
41985 machine_mode inner_mode
= GET_MODE (op0
);
41987 if (GET_CODE (op0
) == GET_CODE (op1
))
41988 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
41989 else if (CONST_INT_P (op1
))
41991 if (GET_CODE (op0
) == SIGN_EXTEND
)
41992 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
41995 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
41999 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
42002 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
42003 + nbits
* cost
->mult_bit
42004 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
42005 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
42014 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
42015 /* ??? SSE cost should be used here. */
42016 *total
= cost
->fdiv
;
42017 else if (X87_FLOAT_MODE_P (mode
))
42018 *total
= cost
->fdiv
;
42019 else if (FLOAT_MODE_P (mode
))
42020 /* ??? SSE vector cost should be used here. */
42021 *total
= cost
->fdiv
;
42023 *total
= cost
->divide
[MODE_INDEX (mode
)];
42027 if (GET_MODE_CLASS (mode
) == MODE_INT
42028 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
42030 if (GET_CODE (XEXP (x
, 0)) == PLUS
42031 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
42032 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
42033 && CONSTANT_P (XEXP (x
, 1)))
42035 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
42036 if (val
== 2 || val
== 4 || val
== 8)
42038 *total
= cost
->lea
;
42039 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
42040 outer_code
, opno
, speed
);
42041 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
42042 outer_code
, opno
, speed
);
42043 *total
+= rtx_cost (XEXP (x
, 1), mode
,
42044 outer_code
, opno
, speed
);
42048 else if (GET_CODE (XEXP (x
, 0)) == MULT
42049 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
42051 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
42052 if (val
== 2 || val
== 4 || val
== 8)
42054 *total
= cost
->lea
;
42055 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
42056 outer_code
, opno
, speed
);
42057 *total
+= rtx_cost (XEXP (x
, 1), mode
,
42058 outer_code
, opno
, speed
);
42062 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
42064 /* Add with carry, ignore the cost of adding a carry flag. */
42065 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
42066 *total
= cost
->add
;
42069 *total
= cost
->lea
;
42070 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
42071 outer_code
, opno
, speed
);
42074 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
42075 outer_code
, opno
, speed
);
42076 *total
+= rtx_cost (XEXP (x
, 1), mode
,
42077 outer_code
, opno
, speed
);
42084 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
42085 if (GET_MODE_CLASS (mode
) == MODE_INT
42086 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
42087 && GET_CODE (XEXP (x
, 0)) == MINUS
42088 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
42090 *total
= cost
->add
;
42091 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
42092 outer_code
, opno
, speed
);
42093 *total
+= rtx_cost (XEXP (x
, 1), mode
,
42094 outer_code
, opno
, speed
);
42098 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
42100 /* ??? SSE cost should be used here. */
42101 *total
= cost
->fadd
;
42104 else if (X87_FLOAT_MODE_P (mode
))
42106 *total
= cost
->fadd
;
42109 else if (FLOAT_MODE_P (mode
))
42111 /* ??? SSE vector cost should be used here. */
42112 *total
= cost
->fadd
;
42120 if (GET_MODE_CLASS (mode
) == MODE_INT
42121 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
42123 *total
= (cost
->add
* 2
42124 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
42125 << (GET_MODE (XEXP (x
, 0)) != DImode
))
42126 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
42127 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
42133 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
42135 /* ??? SSE cost should be used here. */
42136 *total
= cost
->fchs
;
42139 else if (X87_FLOAT_MODE_P (mode
))
42141 *total
= cost
->fchs
;
42144 else if (FLOAT_MODE_P (mode
))
42146 /* ??? SSE vector cost should be used here. */
42147 *total
= cost
->fchs
;
42153 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
42155 /* ??? Should be SSE vector operation cost. */
42156 /* At least for published AMD latencies, this really is the same
42157 as the latency for a simple fpu operation like fabs. */
42158 *total
= cost
->fabs
;
42160 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
42161 *total
= cost
->add
* 2;
42163 *total
= cost
->add
;
42167 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
42168 && XEXP (XEXP (x
, 0), 1) == const1_rtx
42169 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
42170 && XEXP (x
, 1) == const0_rtx
)
42172 /* This kind of construct is implemented using test[bwl].
42173 Treat it as if we had an AND. */
42174 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
42175 *total
= (cost
->add
42176 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, outer_code
,
42178 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
42182 /* The embedded comparison operand is completely free. */
42183 if (!general_operand (XEXP (x
, 0), GET_MODE (XEXP (x
, 0)))
42184 && XEXP (x
, 1) == const0_rtx
)
42190 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
42195 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
42196 /* ??? SSE cost should be used here. */
42197 *total
= cost
->fabs
;
42198 else if (X87_FLOAT_MODE_P (mode
))
42199 *total
= cost
->fabs
;
42200 else if (FLOAT_MODE_P (mode
))
42201 /* ??? SSE vector cost should be used here. */
42202 *total
= cost
->fabs
;
42206 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
42207 /* ??? SSE cost should be used here. */
42208 *total
= cost
->fsqrt
;
42209 else if (X87_FLOAT_MODE_P (mode
))
42210 *total
= cost
->fsqrt
;
42211 else if (FLOAT_MODE_P (mode
))
42212 /* ??? SSE vector cost should be used here. */
42213 *total
= cost
->fsqrt
;
42217 if (XINT (x
, 1) == UNSPEC_TP
)
42223 case VEC_DUPLICATE
:
42224 /* ??? Assume all of these vector manipulation patterns are
42225 recognizable. In which case they all pretty much have the
42227 *total
= cost
->fabs
;
42230 mask
= XEXP (x
, 2);
42231 /* This is masked instruction, assume the same cost,
42232 as nonmasked variant. */
42233 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
42234 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
42236 *total
= cost
->fabs
;
42246 static int current_machopic_label_num
;
42248 /* Given a symbol name and its associated stub, write out the
42249 definition of the stub. */
42252 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
42254 unsigned int length
;
42255 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
42256 int label
= ++current_machopic_label_num
;
42258 /* For 64-bit we shouldn't get here. */
42259 gcc_assert (!TARGET_64BIT
);
42261 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42262 symb
= targetm
.strip_name_encoding (symb
);
42264 length
= strlen (stub
);
42265 binder_name
= XALLOCAVEC (char, length
+ 32);
42266 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
42268 length
= strlen (symb
);
42269 symbol_name
= XALLOCAVEC (char, length
+ 32);
42270 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
42272 sprintf (lazy_ptr_name
, "L%d$lz", label
);
42274 if (MACHOPIC_ATT_STUB
)
42275 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
42276 else if (MACHOPIC_PURE
)
42277 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
42279 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
42281 fprintf (file
, "%s:\n", stub
);
42282 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
42284 if (MACHOPIC_ATT_STUB
)
42286 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42288 else if (MACHOPIC_PURE
)
42291 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42292 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
42293 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42294 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42295 label
, lazy_ptr_name
, label
);
42296 fprintf (file
, "\tjmp\t*%%ecx\n");
42299 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
42301 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42302 it needs no stub-binding-helper. */
42303 if (MACHOPIC_ATT_STUB
)
42306 fprintf (file
, "%s:\n", binder_name
);
42310 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
42311 fprintf (file
, "\tpushl\t%%ecx\n");
42314 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
42316 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
42318 /* N.B. Keep the correspondence of these
42319 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42320 old-pic/new-pic/non-pic stubs; altering this will break
42321 compatibility with existing dylibs. */
42324 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42325 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
42328 /* 16-byte -mdynamic-no-pic stub. */
42329 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
42331 fprintf (file
, "%s:\n", lazy_ptr_name
);
42332 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
42333 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
42335 #endif /* TARGET_MACHO */
42337 /* Order the registers for register allocator. */
42340 x86_order_regs_for_local_alloc (void)
42345 /* First allocate the local general purpose registers. */
42346 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
42347 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
42348 reg_alloc_order
[pos
++] = i
;
42350 /* Global general purpose registers. */
42351 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
42352 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
42353 reg_alloc_order
[pos
++] = i
;
42355 /* x87 registers come first in case we are doing FP math
42357 if (!TARGET_SSE_MATH
)
42358 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
42359 reg_alloc_order
[pos
++] = i
;
42361 /* SSE registers. */
42362 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
42363 reg_alloc_order
[pos
++] = i
;
42364 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
42365 reg_alloc_order
[pos
++] = i
;
42367 /* Extended REX SSE registers. */
42368 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
42369 reg_alloc_order
[pos
++] = i
;
42371 /* Mask register. */
42372 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
42373 reg_alloc_order
[pos
++] = i
;
42375 /* MPX bound registers. */
42376 for (i
= FIRST_BND_REG
; i
<= LAST_BND_REG
; i
++)
42377 reg_alloc_order
[pos
++] = i
;
42379 /* x87 registers. */
42380 if (TARGET_SSE_MATH
)
42381 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
42382 reg_alloc_order
[pos
++] = i
;
42384 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
42385 reg_alloc_order
[pos
++] = i
;
42387 /* Initialize the rest of array as we do not allocate some registers
42389 while (pos
< FIRST_PSEUDO_REGISTER
)
42390 reg_alloc_order
[pos
++] = 0;
42393 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42394 in struct attribute_spec handler. */
42396 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
42399 bool *no_add_attrs
)
42401 if (TREE_CODE (*node
) != FUNCTION_TYPE
42402 && TREE_CODE (*node
) != METHOD_TYPE
42403 && TREE_CODE (*node
) != FIELD_DECL
42404 && TREE_CODE (*node
) != TYPE_DECL
)
42406 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
42408 *no_add_attrs
= true;
42413 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
42415 *no_add_attrs
= true;
42418 if (is_attribute_p ("callee_pop_aggregate_return", name
))
42422 cst
= TREE_VALUE (args
);
42423 if (TREE_CODE (cst
) != INTEGER_CST
)
42425 warning (OPT_Wattributes
,
42426 "%qE attribute requires an integer constant argument",
42428 *no_add_attrs
= true;
42430 else if (compare_tree_int (cst
, 0) != 0
42431 && compare_tree_int (cst
, 1) != 0)
42433 warning (OPT_Wattributes
,
42434 "argument to %qE attribute is neither zero, nor one",
42436 *no_add_attrs
= true;
42445 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42446 struct attribute_spec.handler. */
42448 ix86_handle_abi_attribute (tree
*node
, tree name
, tree
, int,
42449 bool *no_add_attrs
)
42451 if (TREE_CODE (*node
) != FUNCTION_TYPE
42452 && TREE_CODE (*node
) != METHOD_TYPE
42453 && TREE_CODE (*node
) != FIELD_DECL
42454 && TREE_CODE (*node
) != TYPE_DECL
)
42456 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
42458 *no_add_attrs
= true;
42462 /* Can combine regparm with all attributes but fastcall. */
42463 if (is_attribute_p ("ms_abi", name
))
42465 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
42467 error ("ms_abi and sysv_abi attributes are not compatible");
42472 else if (is_attribute_p ("sysv_abi", name
))
42474 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
42476 error ("ms_abi and sysv_abi attributes are not compatible");
42485 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42486 struct attribute_spec.handler. */
42488 ix86_handle_struct_attribute (tree
*node
, tree name
, tree
, int,
42489 bool *no_add_attrs
)
42492 if (DECL_P (*node
))
42494 if (TREE_CODE (*node
) == TYPE_DECL
)
42495 type
= &TREE_TYPE (*node
);
42500 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
42502 warning (OPT_Wattributes
, "%qE attribute ignored",
42504 *no_add_attrs
= true;
42507 else if ((is_attribute_p ("ms_struct", name
)
42508 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
42509 || ((is_attribute_p ("gcc_struct", name
)
42510 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
42512 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
42514 *no_add_attrs
= true;
42521 ix86_handle_fndecl_attribute (tree
*node
, tree name
, tree
, int,
42522 bool *no_add_attrs
)
42524 if (TREE_CODE (*node
) != FUNCTION_DECL
)
42526 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
42528 *no_add_attrs
= true;
42534 ix86_handle_no_caller_saved_registers_attribute (tree
*, tree
, tree
,
42541 ix86_handle_interrupt_attribute (tree
*node
, tree
, tree
, int, bool *)
42543 /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet,
42544 but the function type contains args and return type data. */
42545 tree func_type
= *node
;
42546 tree return_type
= TREE_TYPE (func_type
);
42549 tree current_arg_type
= TYPE_ARG_TYPES (func_type
);
42550 while (current_arg_type
42551 && ! VOID_TYPE_P (TREE_VALUE (current_arg_type
)))
42555 if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type
)))
42556 error ("interrupt service routine should have a pointer "
42557 "as the first argument");
42559 else if (nargs
== 1)
42561 if (TREE_CODE (TREE_VALUE (current_arg_type
)) != INTEGER_TYPE
42562 || TYPE_MODE (TREE_VALUE (current_arg_type
)) != word_mode
)
42563 error ("interrupt service routine should have unsigned %s"
42564 "int as the second argument",
42566 ? (TARGET_X32
? "long long " : "long ")
42570 current_arg_type
= TREE_CHAIN (current_arg_type
);
42572 if (!nargs
|| nargs
> 2)
42573 error ("interrupt service routine can only have a pointer argument "
42574 "and an optional integer argument");
42575 if (! VOID_TYPE_P (return_type
))
42576 error ("interrupt service routine can't have non-void return value");
42582 ix86_ms_bitfield_layout_p (const_tree record_type
)
42584 return ((TARGET_MS_BITFIELD_LAYOUT
42585 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
42586 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
42589 /* Returns an expression indicating where the this parameter is
42590 located on entry to the FUNCTION. */
42593 x86_this_parameter (tree function
)
42595 tree type
= TREE_TYPE (function
);
42596 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
42601 const int *parm_regs
;
42603 if (ix86_function_type_abi (type
) == MS_ABI
)
42604 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
42606 parm_regs
= x86_64_int_parameter_registers
;
42607 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
42610 nregs
= ix86_function_regparm (type
, function
);
42612 if (nregs
> 0 && !stdarg_p (type
))
42615 unsigned int ccvt
= ix86_get_callcvt (type
);
42617 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
42618 regno
= aggr
? DX_REG
: CX_REG
;
42619 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
42623 return gen_rtx_MEM (SImode
,
42624 plus_constant (Pmode
, stack_pointer_rtx
, 4));
42633 return gen_rtx_MEM (SImode
,
42634 plus_constant (Pmode
,
42635 stack_pointer_rtx
, 4));
42638 return gen_rtx_REG (SImode
, regno
);
42641 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
42645 /* Determine whether x86_output_mi_thunk can succeed. */
42648 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
42649 const_tree function
)
42651 /* 64-bit can handle anything. */
42655 /* For 32-bit, everything's fine if we have one free register. */
42656 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
42659 /* Need a free register for vcall_offset. */
42663 /* Need a free register for GOT references. */
42664 if (flag_pic
&& !targetm
.binds_local_p (function
))
42667 /* Otherwise ok. */
42671 /* Output the assembler code for a thunk function. THUNK_DECL is the
42672 declaration for the thunk function itself, FUNCTION is the decl for
42673 the target function. DELTA is an immediate constant offset to be
42674 added to THIS. If VCALL_OFFSET is nonzero, the word at
42675 *(*this + vcall_offset) should be added to THIS. */
42678 x86_output_mi_thunk (FILE *file
, tree
, HOST_WIDE_INT delta
,
42679 HOST_WIDE_INT vcall_offset
, tree function
)
42681 rtx this_param
= x86_this_parameter (function
);
42682 rtx this_reg
, tmp
, fnaddr
;
42683 unsigned int tmp_regno
;
42687 tmp_regno
= R10_REG
;
42690 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
42691 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
42692 tmp_regno
= AX_REG
;
42693 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
42694 tmp_regno
= DX_REG
;
42696 tmp_regno
= CX_REG
;
42699 emit_note (NOTE_INSN_PROLOGUE_END
);
42701 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
42702 pull it in now and let DELTA benefit. */
42703 if (REG_P (this_param
))
42704 this_reg
= this_param
;
42705 else if (vcall_offset
)
42707 /* Put the this parameter into %eax. */
42708 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
42709 emit_move_insn (this_reg
, this_param
);
42712 this_reg
= NULL_RTX
;
42714 /* Adjust the this parameter by a fixed constant. */
42717 rtx delta_rtx
= GEN_INT (delta
);
42718 rtx delta_dst
= this_reg
? this_reg
: this_param
;
42722 if (!x86_64_general_operand (delta_rtx
, Pmode
))
42724 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
42725 emit_move_insn (tmp
, delta_rtx
);
42730 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
42733 /* Adjust the this parameter by a value stored in the vtable. */
42736 rtx vcall_addr
, vcall_mem
, this_mem
;
42738 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
42740 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
42741 if (Pmode
!= ptr_mode
)
42742 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
42743 emit_move_insn (tmp
, this_mem
);
42745 /* Adjust the this parameter. */
42746 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
42748 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
42750 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
42751 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
42752 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
42755 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
42756 if (Pmode
!= ptr_mode
)
42757 emit_insn (gen_addsi_1_zext (this_reg
,
42758 gen_rtx_REG (ptr_mode
,
42762 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
42765 /* If necessary, drop THIS back to its stack slot. */
42766 if (this_reg
&& this_reg
!= this_param
)
42767 emit_move_insn (this_param
, this_reg
);
42769 fnaddr
= XEXP (DECL_RTL (function
), 0);
42772 if (!flag_pic
|| targetm
.binds_local_p (function
)
42777 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
42778 tmp
= gen_rtx_CONST (Pmode
, tmp
);
42779 fnaddr
= gen_const_mem (Pmode
, tmp
);
42784 if (!flag_pic
|| targetm
.binds_local_p (function
))
42787 else if (TARGET_MACHO
)
42789 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
42790 fnaddr
= XEXP (fnaddr
, 0);
42792 #endif /* TARGET_MACHO */
42795 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
42796 output_set_got (tmp
, NULL_RTX
);
42798 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
42799 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
42800 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
42801 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
42805 /* Our sibling call patterns do not allow memories, because we have no
42806 predicate that can distinguish between frame and non-frame memory.
42807 For our purposes here, we can get away with (ab)using a jump pattern,
42808 because we're going to do no optimization. */
42809 if (MEM_P (fnaddr
))
42811 if (sibcall_insn_operand (fnaddr
, word_mode
))
42813 fnaddr
= XEXP (DECL_RTL (function
), 0);
42814 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
42815 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
42816 tmp
= emit_call_insn (tmp
);
42817 SIBLING_CALL_P (tmp
) = 1;
42820 emit_jump_insn (gen_indirect_jump (fnaddr
));
42824 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
42826 // CM_LARGE_PIC always uses pseudo PIC register which is
42827 // uninitialized. Since FUNCTION is local and calling it
42828 // doesn't go through PLT, we use scratch register %r11 as
42829 // PIC register and initialize it here.
42830 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
42831 ix86_init_large_pic_reg (tmp_regno
);
42832 fnaddr
= legitimize_pic_address (fnaddr
,
42833 gen_rtx_REG (Pmode
, tmp_regno
));
42836 if (!sibcall_insn_operand (fnaddr
, word_mode
))
42838 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
42839 if (GET_MODE (fnaddr
) != word_mode
)
42840 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
42841 emit_move_insn (tmp
, fnaddr
);
42845 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
42846 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
42847 tmp
= emit_call_insn (tmp
);
42848 SIBLING_CALL_P (tmp
) = 1;
42852 /* Emit just enough of rest_of_compilation to get the insns emitted.
42853 Note that use_thunk calls assemble_start_function et al. */
42854 insn
= get_insns ();
42855 shorten_branches (insn
);
42856 final_start_function (insn
, file
, 1);
42857 final (insn
, file
, 1);
42858 final_end_function ();
42862 x86_file_start (void)
42864 default_file_start ();
42866 fputs ("\t.code16gcc\n", asm_out_file
);
42868 darwin_file_start ();
42870 if (X86_FILE_START_VERSION_DIRECTIVE
)
42871 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
42872 if (X86_FILE_START_FLTUSED
)
42873 fputs ("\t.global\t__fltused\n", asm_out_file
);
42874 if (ix86_asm_dialect
== ASM_INTEL
)
42875 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
42879 x86_field_alignment (tree type
, int computed
)
42883 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
42886 return iamcu_alignment (type
, computed
);
42887 mode
= TYPE_MODE (strip_array_types (type
));
42888 if (mode
== DFmode
|| mode
== DCmode
42889 || GET_MODE_CLASS (mode
) == MODE_INT
42890 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
42891 return MIN (32, computed
);
42895 /* Print call to TARGET to FILE. */
42898 x86_print_call_or_nop (FILE *file
, const char *target
)
42900 if (flag_nop_mcount
)
42901 fprintf (file
, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
42903 fprintf (file
, "1:\tcall\t%s\n", target
);
42906 /* Output assembler code to FILE to increment profiler label # LABELNO
42907 for profiling a function entry. */
42909 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
42911 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
42915 #ifndef NO_PROFILE_COUNTERS
42916 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
42919 if (!TARGET_PECOFF
&& flag_pic
)
42920 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
42922 x86_print_call_or_nop (file
, mcount_name
);
42926 #ifndef NO_PROFILE_COUNTERS
42927 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
42930 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
42934 #ifndef NO_PROFILE_COUNTERS
42935 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
42938 x86_print_call_or_nop (file
, mcount_name
);
42941 if (flag_record_mcount
)
42943 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
42944 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
42945 fprintf (file
, "\t.previous\n");
42949 /* We don't have exact information about the insn sizes, but we may assume
42950 quite safely that we are informed about all 1 byte insns and memory
42951 address sizes. This is enough to eliminate unnecessary padding in
42955 min_insn_size (rtx_insn
*insn
)
42959 if (!INSN_P (insn
) || !active_insn_p (insn
))
42962 /* Discard alignments we've emit and jump instructions. */
42963 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
42964 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
42967 /* Important case - calls are always 5 bytes.
42968 It is common to have many calls in the row. */
42970 && symbolic_reference_mentioned_p (PATTERN (insn
))
42971 && !SIBLING_CALL_P (insn
))
42973 len
= get_attr_length (insn
);
42977 /* For normal instructions we rely on get_attr_length being exact,
42978 with a few exceptions. */
42979 if (!JUMP_P (insn
))
42981 enum attr_type type
= get_attr_type (insn
);
42986 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
42987 || asm_noperands (PATTERN (insn
)) >= 0)
42994 /* Otherwise trust get_attr_length. */
42998 l
= get_attr_length_address (insn
);
42999 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
43008 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43010 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43014 ix86_avoid_jump_mispredicts (void)
43016 rtx_insn
*insn
, *start
= get_insns ();
43017 int nbytes
= 0, njumps
= 0;
43018 bool isjump
= false;
43020 /* Look for all minimal intervals of instructions containing 4 jumps.
43021 The intervals are bounded by START and INSN. NBYTES is the total
43022 size of instructions in the interval including INSN and not including
43023 START. When the NBYTES is smaller than 16 bytes, it is possible
43024 that the end of START and INSN ends up in the same 16byte page.
43026 The smallest offset in the page INSN can start is the case where START
43027 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43028 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43030 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43031 have to, control transfer to label(s) can be performed through other
43032 means, and also we estimate minimum length of all asm stmts as 0. */
43033 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
43037 if (LABEL_P (insn
))
43039 int align
= label_to_alignment (insn
);
43040 int max_skip
= label_to_max_skip (insn
);
43044 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43045 already in the current 16 byte page, because otherwise
43046 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43047 bytes to reach 16 byte boundary. */
43049 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
43052 fprintf (dump_file
, "Label %i with max_skip %i\n",
43053 INSN_UID (insn
), max_skip
);
43056 while (nbytes
+ max_skip
>= 16)
43058 start
= NEXT_INSN (start
);
43059 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
43061 njumps
--, isjump
= true;
43064 nbytes
-= min_insn_size (start
);
43070 min_size
= min_insn_size (insn
);
43071 nbytes
+= min_size
;
43073 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
43074 INSN_UID (insn
), min_size
);
43075 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
43083 start
= NEXT_INSN (start
);
43084 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
43086 njumps
--, isjump
= true;
43089 nbytes
-= min_insn_size (start
);
43091 gcc_assert (njumps
>= 0);
43093 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
43094 INSN_UID (start
), INSN_UID (insn
), nbytes
);
43096 if (njumps
== 3 && isjump
&& nbytes
< 16)
43098 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
43101 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
43102 INSN_UID (insn
), padsize
);
43103 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
43109 /* AMD Athlon works faster
43110 when RET is not destination of conditional jump or directly preceded
43111 by other jump instruction. We avoid the penalty by inserting NOP just
43112 before the RET instructions in such cases. */
43114 ix86_pad_returns (void)
43119 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
43121 basic_block bb
= e
->src
;
43122 rtx_insn
*ret
= BB_END (bb
);
43124 bool replace
= false;
43126 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
43127 || optimize_bb_for_size_p (bb
))
43129 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
43130 if (active_insn_p (prev
) || LABEL_P (prev
))
43132 if (prev
&& LABEL_P (prev
))
43137 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
43138 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
43139 && !(e
->flags
& EDGE_FALLTHRU
))
43147 prev
= prev_active_insn (ret
);
43149 && ((JUMP_P (prev
) && any_condjump_p (prev
))
43152 /* Empty functions get branch mispredict even when
43153 the jump destination is not visible to us. */
43154 if (!prev
&& !optimize_function_for_size_p (cfun
))
43159 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
43165 /* Count the minimum number of instructions in BB. Return 4 if the
43166 number of instructions >= 4. */
43169 ix86_count_insn_bb (basic_block bb
)
43172 int insn_count
= 0;
43174 /* Count number of instructions in this block. Return 4 if the number
43175 of instructions >= 4. */
43176 FOR_BB_INSNS (bb
, insn
)
43178 /* Only happen in exit blocks. */
43180 && ANY_RETURN_P (PATTERN (insn
)))
43183 if (NONDEBUG_INSN_P (insn
)
43184 && GET_CODE (PATTERN (insn
)) != USE
43185 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
43188 if (insn_count
>= 4)
43197 /* Count the minimum number of instructions in code path in BB.
43198 Return 4 if the number of instructions >= 4. */
43201 ix86_count_insn (basic_block bb
)
43205 int min_prev_count
;
43207 /* Only bother counting instructions along paths with no
43208 more than 2 basic blocks between entry and exit. Given
43209 that BB has an edge to exit, determine if a predecessor
43210 of BB has an edge from entry. If so, compute the number
43211 of instructions in the predecessor block. If there
43212 happen to be multiple such blocks, compute the minimum. */
43213 min_prev_count
= 4;
43214 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
43217 edge_iterator prev_ei
;
43219 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
43221 min_prev_count
= 0;
43224 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
43226 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
43228 int count
= ix86_count_insn_bb (e
->src
);
43229 if (count
< min_prev_count
)
43230 min_prev_count
= count
;
43236 if (min_prev_count
< 4)
43237 min_prev_count
+= ix86_count_insn_bb (bb
);
43239 return min_prev_count
;
43242 /* Pad short function to 4 instructions. */
43245 ix86_pad_short_function (void)
43250 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
43252 rtx_insn
*ret
= BB_END (e
->src
);
43253 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
43255 int insn_count
= ix86_count_insn (e
->src
);
43257 /* Pad short function. */
43258 if (insn_count
< 4)
43260 rtx_insn
*insn
= ret
;
43262 /* Find epilogue. */
43265 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
43266 insn
= PREV_INSN (insn
);
43271 /* Two NOPs count as one instruction. */
43272 insn_count
= 2 * (4 - insn_count
);
43273 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
43279 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43280 the epilogue, the Windows system unwinder will apply epilogue logic and
43281 produce incorrect offsets. This can be avoided by adding a nop between
43282 the last insn that can throw and the first insn of the epilogue. */
43285 ix86_seh_fixup_eh_fallthru (void)
43290 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
43292 rtx_insn
*insn
, *next
;
43294 /* Find the beginning of the epilogue. */
43295 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
43296 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
43301 /* We only care about preceding insns that can throw. */
43302 insn
= prev_active_insn (insn
);
43303 if (insn
== NULL
|| !can_throw_internal (insn
))
43306 /* Do not separate calls from their debug information. */
43307 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
43309 && (NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
43310 || NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
))
43315 emit_insn_after (gen_nops (const1_rtx
), insn
);
43319 /* Given a register number BASE, the lowest of a group of registers, update
43320 regsets IN and OUT with the registers that should be avoided in input
43321 and output operands respectively when trying to avoid generating a modr/m
43322 byte for -fmitigate-rop. */
43325 set_rop_modrm_reg_bits (int base
, HARD_REG_SET
&in
, HARD_REG_SET
&out
)
43327 SET_HARD_REG_BIT (out
, base
);
43328 SET_HARD_REG_BIT (out
, base
+ 1);
43329 SET_HARD_REG_BIT (in
, base
+ 2);
43330 SET_HARD_REG_BIT (in
, base
+ 3);
43333 /* Called if -fmitigate_rop is in effect. Try to rewrite instructions so
43334 that certain encodings of modr/m bytes do not occur. */
43336 ix86_mitigate_rop (void)
43338 HARD_REG_SET input_risky
;
43339 HARD_REG_SET output_risky
;
43340 HARD_REG_SET inout_risky
;
43342 CLEAR_HARD_REG_SET (output_risky
);
43343 CLEAR_HARD_REG_SET (input_risky
);
43344 SET_HARD_REG_BIT (output_risky
, AX_REG
);
43345 SET_HARD_REG_BIT (output_risky
, CX_REG
);
43346 SET_HARD_REG_BIT (input_risky
, BX_REG
);
43347 SET_HARD_REG_BIT (input_risky
, DX_REG
);
43348 set_rop_modrm_reg_bits (FIRST_SSE_REG
, input_risky
, output_risky
);
43349 set_rop_modrm_reg_bits (FIRST_REX_INT_REG
, input_risky
, output_risky
);
43350 set_rop_modrm_reg_bits (FIRST_REX_SSE_REG
, input_risky
, output_risky
);
43351 set_rop_modrm_reg_bits (FIRST_EXT_REX_SSE_REG
, input_risky
, output_risky
);
43352 set_rop_modrm_reg_bits (FIRST_MASK_REG
, input_risky
, output_risky
);
43353 set_rop_modrm_reg_bits (FIRST_BND_REG
, input_risky
, output_risky
);
43354 COPY_HARD_REG_SET (inout_risky
, input_risky
);
43355 IOR_HARD_REG_SET (inout_risky
, output_risky
);
43357 df_note_add_problem ();
43358 /* Fix up what stack-regs did. */
43359 df_insn_rescan_all ();
43362 regrename_init (true);
43363 regrename_analyze (NULL
);
43365 auto_vec
<du_head_p
> cands
;
43367 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
43369 if (!NONDEBUG_INSN_P (insn
))
43372 if (GET_CODE (PATTERN (insn
)) == USE
43373 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
43376 extract_insn (insn
);
43379 int modrm
= ix86_get_modrm_for_rop (insn
, recog_data
.operand
,
43380 recog_data
.n_operands
, &opno0
,
43383 if (!ix86_rop_should_change_byte_p (modrm
))
43386 insn_rr_info
*info
= &insn_rr
[INSN_UID (insn
)];
43388 /* This happens when regrename has to fail a block. */
43389 if (!info
->op_info
)
43392 if (info
->op_info
[opno0
].n_chains
!= 0)
43394 gcc_assert (info
->op_info
[opno0
].n_chains
== 1);
43396 op0c
= regrename_chain_from_id (info
->op_info
[opno0
].heads
[0]->id
);
43397 if (op0c
->target_data_1
+ op0c
->target_data_2
== 0
43398 && !op0c
->cannot_rename
)
43399 cands
.safe_push (op0c
);
43401 op0c
->target_data_1
++;
43403 if (info
->op_info
[opno1
].n_chains
!= 0)
43405 gcc_assert (info
->op_info
[opno1
].n_chains
== 1);
43407 op1c
= regrename_chain_from_id (info
->op_info
[opno1
].heads
[0]->id
);
43408 if (op1c
->target_data_1
+ op1c
->target_data_2
== 0
43409 && !op1c
->cannot_rename
)
43410 cands
.safe_push (op1c
);
43412 op1c
->target_data_2
++;
43418 FOR_EACH_VEC_ELT (cands
, i
, head
)
43420 int old_reg
, best_reg
;
43421 HARD_REG_SET unavailable
;
43423 CLEAR_HARD_REG_SET (unavailable
);
43424 if (head
->target_data_1
)
43425 IOR_HARD_REG_SET (unavailable
, output_risky
);
43426 if (head
->target_data_2
)
43427 IOR_HARD_REG_SET (unavailable
, input_risky
);
43430 reg_class superclass
= regrename_find_superclass (head
, &n_uses
,
43432 old_reg
= head
->regno
;
43433 best_reg
= find_rename_reg (head
, superclass
, &unavailable
,
43435 bool ok
= regrename_do_replace (head
, best_reg
);
43438 fprintf (dump_file
, "Chain %d renamed as %s in %s\n", head
->id
,
43439 reg_names
[best_reg
], reg_class_names
[superclass
]);
43443 regrename_finish ();
43450 INIT_REG_SET (&live
);
43452 FOR_EACH_BB_FN (bb
, cfun
)
43456 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
43457 df_simulate_initialize_backwards (bb
, &live
);
43459 FOR_BB_INSNS_REVERSE (bb
, insn
)
43461 if (!NONDEBUG_INSN_P (insn
))
43464 df_simulate_one_insn_backwards (bb
, insn
, &live
);
43466 if (GET_CODE (PATTERN (insn
)) == USE
43467 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
43470 extract_insn (insn
);
43471 constrain_operands_cached (insn
, reload_completed
);
43473 int modrm
= ix86_get_modrm_for_rop (insn
, recog_data
.operand
,
43474 recog_data
.n_operands
, &opno0
,
43477 || !ix86_rop_should_change_byte_p (modrm
)
43481 rtx oldreg
= recog_data
.operand
[opno1
];
43482 preprocess_constraints (insn
);
43483 const operand_alternative
*alt
= which_op_alt ();
43486 for (i
= 0; i
< recog_data
.n_operands
; i
++)
43488 && alt
[i
].earlyclobber
43489 && reg_overlap_mentioned_p (recog_data
.operand
[i
],
43493 if (i
< recog_data
.n_operands
)
43497 fprintf (dump_file
,
43498 "attempting to fix modrm byte in insn %d:"
43499 " reg %d class %s", INSN_UID (insn
), REGNO (oldreg
),
43500 reg_class_names
[alt
[opno1
].cl
]);
43502 HARD_REG_SET unavailable
;
43503 REG_SET_TO_HARD_REG_SET (unavailable
, &live
);
43504 SET_HARD_REG_BIT (unavailable
, REGNO (oldreg
));
43505 IOR_COMPL_HARD_REG_SET (unavailable
, call_used_reg_set
);
43506 IOR_HARD_REG_SET (unavailable
, fixed_reg_set
);
43507 IOR_HARD_REG_SET (unavailable
, output_risky
);
43508 IOR_COMPL_HARD_REG_SET (unavailable
,
43509 reg_class_contents
[alt
[opno1
].cl
]);
43511 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
43512 if (!TEST_HARD_REG_BIT (unavailable
, i
))
43514 if (i
== FIRST_PSEUDO_REGISTER
)
43517 fprintf (dump_file
, ", none available\n");
43521 fprintf (dump_file
, " -> %d\n", i
);
43522 rtx newreg
= gen_rtx_REG (recog_data
.operand_mode
[opno1
], i
);
43523 validate_change (insn
, recog_data
.operand_loc
[opno1
], newreg
, false);
43524 insn
= emit_insn_before (gen_move_insn (newreg
, oldreg
), insn
);
43529 /* Implement machine specific optimizations. We implement padding of returns
43530 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43534 /* We are freeing block_for_insn in the toplev to keep compatibility
43535 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43536 compute_bb_for_insn ();
43538 if (flag_mitigate_rop
)
43539 ix86_mitigate_rop ();
43541 if (TARGET_SEH
&& current_function_has_exception_handlers ())
43542 ix86_seh_fixup_eh_fallthru ();
43544 if (optimize
&& optimize_function_for_speed_p (cfun
))
43546 if (TARGET_PAD_SHORT_FUNCTION
)
43547 ix86_pad_short_function ();
43548 else if (TARGET_PAD_RETURNS
)
43549 ix86_pad_returns ();
43550 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43551 if (TARGET_FOUR_JUMP_LIMIT
)
43552 ix86_avoid_jump_mispredicts ();
43557 /* Return nonzero when QImode register that must be represented via REX prefix
43560 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
43563 extract_insn_cached (insn
);
43564 for (i
= 0; i
< recog_data
.n_operands
; i
++)
43565 if (GENERAL_REG_P (recog_data
.operand
[i
])
43566 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
43571 /* Return true when INSN mentions register that must be encoded using REX
43574 x86_extended_reg_mentioned_p (rtx insn
)
43576 subrtx_iterator::array_type array
;
43577 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
43579 const_rtx x
= *iter
;
43581 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
43587 /* If profitable, negate (without causing overflow) integer constant
43588 of mode MODE at location LOC. Return true in this case. */
43590 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
43594 if (!CONST_INT_P (*loc
))
43600 /* DImode x86_64 constants must fit in 32 bits. */
43601 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
43612 gcc_unreachable ();
43615 /* Avoid overflows. */
43616 if (mode_signbit_p (mode
, *loc
))
43619 val
= INTVAL (*loc
);
43621 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43622 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43623 if ((val
< 0 && val
!= -128)
43626 *loc
= GEN_INT (-val
);
43633 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43634 optabs would emit if we didn't have TFmode patterns. */
43637 x86_emit_floatuns (rtx operands
[2])
43639 rtx_code_label
*neglab
, *donelab
;
43640 rtx i0
, i1
, f0
, in
, out
;
43641 machine_mode mode
, inmode
;
43643 inmode
= GET_MODE (operands
[1]);
43644 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
43647 in
= force_reg (inmode
, operands
[1]);
43648 mode
= GET_MODE (out
);
43649 neglab
= gen_label_rtx ();
43650 donelab
= gen_label_rtx ();
43651 f0
= gen_reg_rtx (mode
);
43653 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
43655 expand_float (out
, in
, 0);
43657 emit_jump_insn (gen_jump (donelab
));
43660 emit_label (neglab
);
43662 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
43664 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
43666 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
43668 expand_float (f0
, i0
, 0);
43670 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
43672 emit_label (donelab
);
43675 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
43676 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
43677 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
43678 static bool expand_vec_perm_palignr (struct expand_vec_perm_d
*d
, bool);
43680 /* Get a vector mode of the same size as the original but with elements
43681 twice as wide. This is only guaranteed to apply to integral vectors. */
43683 static inline machine_mode
43684 get_mode_wider_vector (machine_mode o
)
43686 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43687 machine_mode n
= GET_MODE_WIDER_MODE (o
).require ();
43688 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
43689 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
43693 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43694 fill target with val via vec_duplicate. */
43697 ix86_vector_duplicate_value (machine_mode mode
, rtx target
, rtx val
)
43703 /* First attempt to recognize VAL as-is. */
43704 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
43705 insn
= emit_insn (gen_rtx_SET (target
, dup
));
43706 if (recog_memoized (insn
) < 0)
43709 machine_mode innermode
= GET_MODE_INNER (mode
);
43712 /* If that fails, force VAL into a register. */
43715 reg
= force_reg (innermode
, val
);
43716 if (GET_MODE (reg
) != innermode
)
43717 reg
= gen_lowpart (innermode
, reg
);
43718 XEXP (dup
, 0) = reg
;
43719 seq
= get_insns ();
43722 emit_insn_before (seq
, insn
);
43724 ok
= recog_memoized (insn
) >= 0;
43730 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43731 with all elements equal to VAR. Return true if successful. */
43734 ix86_expand_vector_init_duplicate (bool mmx_ok
, machine_mode mode
,
43735 rtx target
, rtx val
)
43759 return ix86_vector_duplicate_value (mode
, target
, val
);
43764 if (TARGET_SSE
|| TARGET_3DNOW_A
)
43768 val
= gen_lowpart (SImode
, val
);
43769 x
= gen_rtx_TRUNCATE (HImode
, val
);
43770 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
43771 emit_insn (gen_rtx_SET (target
, x
));
43783 return ix86_vector_duplicate_value (mode
, target
, val
);
43787 struct expand_vec_perm_d dperm
;
43791 memset (&dperm
, 0, sizeof (dperm
));
43792 dperm
.target
= target
;
43793 dperm
.vmode
= mode
;
43794 dperm
.nelt
= GET_MODE_NUNITS (mode
);
43795 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
43796 dperm
.one_operand_p
= true;
43798 /* Extend to SImode using a paradoxical SUBREG. */
43799 tmp1
= gen_reg_rtx (SImode
);
43800 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
43802 /* Insert the SImode value as low element of a V4SImode vector. */
43803 tmp2
= gen_reg_rtx (V4SImode
);
43804 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
43805 emit_move_insn (dperm
.op0
, gen_lowpart (mode
, tmp2
));
43807 ok
= (expand_vec_perm_1 (&dperm
)
43808 || expand_vec_perm_broadcast_1 (&dperm
));
43816 return ix86_vector_duplicate_value (mode
, target
, val
);
43823 /* Replicate the value once into the next wider mode and recurse. */
43825 machine_mode smode
, wsmode
, wvmode
;
43828 smode
= GET_MODE_INNER (mode
);
43829 wvmode
= get_mode_wider_vector (mode
);
43830 wsmode
= GET_MODE_INNER (wvmode
);
43832 val
= convert_modes (wsmode
, smode
, val
, true);
43833 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
43834 GEN_INT (GET_MODE_BITSIZE (smode
)),
43835 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
43836 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
43838 x
= gen_reg_rtx (wvmode
);
43839 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
43841 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), x
));
43848 return ix86_vector_duplicate_value (mode
, target
, val
);
43851 machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
43852 rtx x
= gen_reg_rtx (hvmode
);
43854 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
43857 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
43858 emit_insn (gen_rtx_SET (target
, x
));
43864 if (TARGET_AVX512BW
)
43865 return ix86_vector_duplicate_value (mode
, target
, val
);
43868 machine_mode hvmode
= (mode
== V32HImode
? V16HImode
: V32QImode
);
43869 rtx x
= gen_reg_rtx (hvmode
);
43871 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
43874 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
43875 emit_insn (gen_rtx_SET (target
, x
));
43884 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43885 whose ONE_VAR element is VAR, and other elements are zero. Return true
43889 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, machine_mode mode
,
43890 rtx target
, rtx var
, int one_var
)
43892 machine_mode vsimode
;
43895 bool use_vector_set
= false;
43900 /* For SSE4.1, we normally use vector set. But if the second
43901 element is zero and inter-unit moves are OK, we use movq
43903 use_vector_set
= (TARGET_64BIT
&& TARGET_SSE4_1
43904 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
43910 use_vector_set
= TARGET_SSE4_1
;
43913 use_vector_set
= TARGET_SSE2
;
43916 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
43923 use_vector_set
= TARGET_AVX
;
43926 /* Use ix86_expand_vector_set in 64bit mode only. */
43927 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
43933 if (use_vector_set
)
43935 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
43936 var
= force_reg (GET_MODE_INNER (mode
), var
);
43937 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
43953 var
= force_reg (GET_MODE_INNER (mode
), var
);
43954 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
43955 emit_insn (gen_rtx_SET (target
, x
));
43960 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
43961 new_target
= gen_reg_rtx (mode
);
43963 new_target
= target
;
43964 var
= force_reg (GET_MODE_INNER (mode
), var
);
43965 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
43966 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
43967 emit_insn (gen_rtx_SET (new_target
, x
));
43970 /* We need to shuffle the value to the correct position, so
43971 create a new pseudo to store the intermediate result. */
43973 /* With SSE2, we can use the integer shuffle insns. */
43974 if (mode
!= V4SFmode
&& TARGET_SSE2
)
43976 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
43978 GEN_INT (one_var
== 1 ? 0 : 1),
43979 GEN_INT (one_var
== 2 ? 0 : 1),
43980 GEN_INT (one_var
== 3 ? 0 : 1)));
43981 if (target
!= new_target
)
43982 emit_move_insn (target
, new_target
);
43986 /* Otherwise convert the intermediate result to V4SFmode and
43987 use the SSE1 shuffle instructions. */
43988 if (mode
!= V4SFmode
)
43990 tmp
= gen_reg_rtx (V4SFmode
);
43991 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
43996 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
43998 GEN_INT (one_var
== 1 ? 0 : 1),
43999 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
44000 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
44002 if (mode
!= V4SFmode
)
44003 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
44004 else if (tmp
!= target
)
44005 emit_move_insn (target
, tmp
);
44007 else if (target
!= new_target
)
44008 emit_move_insn (target
, new_target
);
44013 vsimode
= V4SImode
;
44019 vsimode
= V2SImode
;
44025 /* Zero extend the variable element to SImode and recurse. */
44026 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
44028 x
= gen_reg_rtx (vsimode
);
44029 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
44031 gcc_unreachable ();
44033 emit_move_insn (target
, gen_lowpart (mode
, x
));
44041 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44042 consisting of the values in VALS. It is known that all elements
44043 except ONE_VAR are constants. Return true if successful. */
44046 ix86_expand_vector_init_one_var (bool mmx_ok
, machine_mode mode
,
44047 rtx target
, rtx vals
, int one_var
)
44049 rtx var
= XVECEXP (vals
, 0, one_var
);
44050 machine_mode wmode
;
44053 const_vec
= copy_rtx (vals
);
44054 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
44055 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
44063 /* For the two element vectors, it's just as easy to use
44064 the general case. */
44068 /* Use ix86_expand_vector_set in 64bit mode only. */
44092 /* There's no way to set one QImode entry easily. Combine
44093 the variable value with its adjacent constant value, and
44094 promote to an HImode set. */
44095 x
= XVECEXP (vals
, 0, one_var
^ 1);
44098 var
= convert_modes (HImode
, QImode
, var
, true);
44099 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
44100 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
44101 x
= GEN_INT (INTVAL (x
) & 0xff);
44105 var
= convert_modes (HImode
, QImode
, var
, true);
44106 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
44108 if (x
!= const0_rtx
)
44109 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
44110 1, OPTAB_LIB_WIDEN
);
44112 x
= gen_reg_rtx (wmode
);
44113 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
44114 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
44116 emit_move_insn (target
, gen_lowpart (mode
, x
));
44123 emit_move_insn (target
, const_vec
);
44124 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
44128 /* A subroutine of ix86_expand_vector_init_general. Use vector
44129 concatenate to handle the most general case: all values variable,
44130 and none identical. */
44133 ix86_expand_vector_init_concat (machine_mode mode
,
44134 rtx target
, rtx
*ops
, int n
)
44136 machine_mode cmode
, hmode
= VOIDmode
, gmode
= VOIDmode
;
44137 rtx first
[16], second
[8], third
[4];
44189 gcc_unreachable ();
44192 if (!register_operand (ops
[1], cmode
))
44193 ops
[1] = force_reg (cmode
, ops
[1]);
44194 if (!register_operand (ops
[0], cmode
))
44195 ops
[0] = force_reg (cmode
, ops
[0]);
44196 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, ops
[0],
44216 gcc_unreachable ();
44240 gcc_unreachable ();
44258 gcc_unreachable ();
44263 /* FIXME: We process inputs backward to help RA. PR 36222. */
44266 for (; i
> 0; i
-= 2, j
--)
44268 first
[j
] = gen_reg_rtx (cmode
);
44269 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
44270 ix86_expand_vector_init (false, first
[j
],
44271 gen_rtx_PARALLEL (cmode
, v
));
44277 gcc_assert (hmode
!= VOIDmode
);
44278 gcc_assert (gmode
!= VOIDmode
);
44279 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
44281 second
[j
] = gen_reg_rtx (hmode
);
44282 ix86_expand_vector_init_concat (hmode
, second
[j
],
44286 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
44288 third
[j
] = gen_reg_rtx (gmode
);
44289 ix86_expand_vector_init_concat (gmode
, third
[j
],
44293 ix86_expand_vector_init_concat (mode
, target
, third
, n
);
44297 gcc_assert (hmode
!= VOIDmode
);
44298 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
44300 second
[j
] = gen_reg_rtx (hmode
);
44301 ix86_expand_vector_init_concat (hmode
, second
[j
],
44305 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
44308 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
44312 gcc_unreachable ();
44316 /* A subroutine of ix86_expand_vector_init_general. Use vector
44317 interleave to handle the most general case: all values variable,
44318 and none identical. */
44321 ix86_expand_vector_init_interleave (machine_mode mode
,
44322 rtx target
, rtx
*ops
, int n
)
44324 machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
44327 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
44328 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
44329 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
44334 gen_load_even
= gen_vec_setv8hi
;
44335 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
44336 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
44337 inner_mode
= HImode
;
44338 first_imode
= V4SImode
;
44339 second_imode
= V2DImode
;
44340 third_imode
= VOIDmode
;
44343 gen_load_even
= gen_vec_setv16qi
;
44344 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
44345 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
44346 inner_mode
= QImode
;
44347 first_imode
= V8HImode
;
44348 second_imode
= V4SImode
;
44349 third_imode
= V2DImode
;
44352 gcc_unreachable ();
44355 for (i
= 0; i
< n
; i
++)
44357 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44358 op0
= gen_reg_rtx (SImode
);
44359 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
44361 /* Insert the SImode value as low element of V4SImode vector. */
44362 op1
= gen_reg_rtx (V4SImode
);
44363 op0
= gen_rtx_VEC_MERGE (V4SImode
,
44364 gen_rtx_VEC_DUPLICATE (V4SImode
,
44366 CONST0_RTX (V4SImode
),
44368 emit_insn (gen_rtx_SET (op1
, op0
));
44370 /* Cast the V4SImode vector back to a vector in orignal mode. */
44371 op0
= gen_reg_rtx (mode
);
44372 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
44374 /* Load even elements into the second position. */
44375 emit_insn (gen_load_even (op0
,
44376 force_reg (inner_mode
,
44380 /* Cast vector to FIRST_IMODE vector. */
44381 ops
[i
] = gen_reg_rtx (first_imode
);
44382 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
44385 /* Interleave low FIRST_IMODE vectors. */
44386 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
44388 op0
= gen_reg_rtx (first_imode
);
44389 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
44391 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44392 ops
[j
] = gen_reg_rtx (second_imode
);
44393 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
44396 /* Interleave low SECOND_IMODE vectors. */
44397 switch (second_imode
)
44400 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
44402 op0
= gen_reg_rtx (second_imode
);
44403 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
44406 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44408 ops
[j
] = gen_reg_rtx (third_imode
);
44409 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
44411 second_imode
= V2DImode
;
44412 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
44416 op0
= gen_reg_rtx (second_imode
);
44417 emit_insn (gen_interleave_second_low (op0
, ops
[0],
44420 /* Cast the SECOND_IMODE vector back to a vector on original
44422 emit_insn (gen_rtx_SET (target
, gen_lowpart (mode
, op0
)));
44426 gcc_unreachable ();
44430 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44431 all values variable, and none identical. */
44434 ix86_expand_vector_init_general (bool mmx_ok
, machine_mode mode
,
44435 rtx target
, rtx vals
)
44437 rtx ops
[64], op0
, op1
, op2
, op3
, op4
, op5
;
44438 machine_mode half_mode
= VOIDmode
;
44439 machine_mode quarter_mode
= VOIDmode
;
44446 if (!mmx_ok
&& !TARGET_SSE
)
44462 n
= GET_MODE_NUNITS (mode
);
44463 for (i
= 0; i
< n
; i
++)
44464 ops
[i
] = XVECEXP (vals
, 0, i
);
44465 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
44469 for (i
= 0; i
< 2; i
++)
44470 ops
[i
] = gen_lowpart (V2DImode
, XVECEXP (vals
, 0, i
));
44471 op0
= gen_reg_rtx (V4DImode
);
44472 ix86_expand_vector_init_concat (V4DImode
, op0
, ops
, 2);
44473 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), op0
));
44477 for (i
= 0; i
< 4; i
++)
44478 ops
[i
] = gen_lowpart (V2DImode
, XVECEXP (vals
, 0, i
));
44479 ops
[4] = gen_reg_rtx (V4DImode
);
44480 ix86_expand_vector_init_concat (V4DImode
, ops
[4], ops
, 2);
44481 ops
[5] = gen_reg_rtx (V4DImode
);
44482 ix86_expand_vector_init_concat (V4DImode
, ops
[5], ops
+ 2, 2);
44483 op0
= gen_reg_rtx (V8DImode
);
44484 ix86_expand_vector_init_concat (V8DImode
, op0
, ops
+ 4, 2);
44485 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), op0
));
44489 half_mode
= V16QImode
;
44493 half_mode
= V8HImode
;
44497 n
= GET_MODE_NUNITS (mode
);
44498 for (i
= 0; i
< n
; i
++)
44499 ops
[i
] = XVECEXP (vals
, 0, i
);
44500 op0
= gen_reg_rtx (half_mode
);
44501 op1
= gen_reg_rtx (half_mode
);
44502 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
44504 ix86_expand_vector_init_interleave (half_mode
, op1
,
44505 &ops
[n
>> 1], n
>> 2);
44506 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
44510 quarter_mode
= V16QImode
;
44511 half_mode
= V32QImode
;
44515 quarter_mode
= V8HImode
;
44516 half_mode
= V16HImode
;
44520 n
= GET_MODE_NUNITS (mode
);
44521 for (i
= 0; i
< n
; i
++)
44522 ops
[i
] = XVECEXP (vals
, 0, i
);
44523 op0
= gen_reg_rtx (quarter_mode
);
44524 op1
= gen_reg_rtx (quarter_mode
);
44525 op2
= gen_reg_rtx (quarter_mode
);
44526 op3
= gen_reg_rtx (quarter_mode
);
44527 op4
= gen_reg_rtx (half_mode
);
44528 op5
= gen_reg_rtx (half_mode
);
44529 ix86_expand_vector_init_interleave (quarter_mode
, op0
, ops
,
44531 ix86_expand_vector_init_interleave (quarter_mode
, op1
,
44532 &ops
[n
>> 2], n
>> 3);
44533 ix86_expand_vector_init_interleave (quarter_mode
, op2
,
44534 &ops
[n
>> 1], n
>> 3);
44535 ix86_expand_vector_init_interleave (quarter_mode
, op3
,
44536 &ops
[(n
>> 1) | (n
>> 2)], n
>> 3);
44537 emit_insn (gen_rtx_SET (op4
, gen_rtx_VEC_CONCAT (half_mode
, op0
, op1
)));
44538 emit_insn (gen_rtx_SET (op5
, gen_rtx_VEC_CONCAT (half_mode
, op2
, op3
)));
44539 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_CONCAT (mode
, op4
, op5
)));
44543 if (!TARGET_SSE4_1
)
44551 /* Don't use ix86_expand_vector_init_interleave if we can't
44552 move from GPR to SSE register directly. */
44553 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
44556 n
= GET_MODE_NUNITS (mode
);
44557 for (i
= 0; i
< n
; i
++)
44558 ops
[i
] = XVECEXP (vals
, 0, i
);
44559 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
44567 gcc_unreachable ();
44571 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
44572 machine_mode inner_mode
;
44573 rtx words
[4], shift
;
44575 inner_mode
= GET_MODE_INNER (mode
);
44576 n_elts
= GET_MODE_NUNITS (mode
);
44577 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
44578 n_elt_per_word
= n_elts
/ n_words
;
44579 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
44581 for (i
= 0; i
< n_words
; ++i
)
44583 rtx word
= NULL_RTX
;
44585 for (j
= 0; j
< n_elt_per_word
; ++j
)
44587 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
44588 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
44594 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
44595 word
, 1, OPTAB_LIB_WIDEN
);
44596 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
44597 word
, 1, OPTAB_LIB_WIDEN
);
44605 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
44606 else if (n_words
== 2)
44608 rtx tmp
= gen_reg_rtx (mode
);
44609 emit_clobber (tmp
);
44610 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
44611 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
44612 emit_move_insn (target
, tmp
);
44614 else if (n_words
== 4)
44616 rtx tmp
= gen_reg_rtx (V4SImode
);
44617 gcc_assert (word_mode
== SImode
);
44618 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
44619 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
44620 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
44623 gcc_unreachable ();
44627 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44628 instructions unless MMX_OK is true. */
44631 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
44633 machine_mode mode
= GET_MODE (target
);
44634 machine_mode inner_mode
= GET_MODE_INNER (mode
);
44635 int n_elts
= GET_MODE_NUNITS (mode
);
44636 int n_var
= 0, one_var
= -1;
44637 bool all_same
= true, all_const_zero
= true;
44641 /* Handle first initialization from vector elts. */
44642 if (n_elts
!= XVECLEN (vals
, 0))
44644 rtx subtarget
= target
;
44645 x
= XVECEXP (vals
, 0, 0);
44646 gcc_assert (GET_MODE_INNER (GET_MODE (x
)) == inner_mode
);
44647 if (GET_MODE_NUNITS (GET_MODE (x
)) * 2 == n_elts
)
44649 rtx ops
[2] = { XVECEXP (vals
, 0, 0), XVECEXP (vals
, 0, 1) };
44650 if (inner_mode
== QImode
|| inner_mode
== HImode
)
44652 unsigned int n_bits
= n_elts
* GET_MODE_SIZE (inner_mode
);
44653 mode
= mode_for_vector (SImode
, n_bits
/ 4).require ();
44654 inner_mode
= mode_for_vector (SImode
, n_bits
/ 8).require ();
44655 ops
[0] = gen_lowpart (inner_mode
, ops
[0]);
44656 ops
[1] = gen_lowpart (inner_mode
, ops
[1]);
44657 subtarget
= gen_reg_rtx (mode
);
44659 ix86_expand_vector_init_concat (mode
, subtarget
, ops
, 2);
44660 if (subtarget
!= target
)
44661 emit_move_insn (target
, gen_lowpart (GET_MODE (target
), subtarget
));
44664 gcc_unreachable ();
44667 for (i
= 0; i
< n_elts
; ++i
)
44669 x
= XVECEXP (vals
, 0, i
);
44670 if (!(CONST_SCALAR_INT_P (x
)
44671 || CONST_DOUBLE_P (x
)
44672 || CONST_FIXED_P (x
)))
44673 n_var
++, one_var
= i
;
44674 else if (x
!= CONST0_RTX (inner_mode
))
44675 all_const_zero
= false;
44676 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
44680 /* Constants are best loaded from the constant pool. */
44683 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
44687 /* If all values are identical, broadcast the value. */
44689 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
44690 XVECEXP (vals
, 0, 0)))
44693 /* Values where only one field is non-constant are best loaded from
44694 the pool and overwritten via move later. */
44698 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
44699 XVECEXP (vals
, 0, one_var
),
44703 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
44707 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
44711 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
44713 machine_mode mode
= GET_MODE (target
);
44714 machine_mode inner_mode
= GET_MODE_INNER (mode
);
44715 machine_mode half_mode
;
44716 bool use_vec_merge
= false;
44718 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
44720 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
44721 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
44722 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
44723 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
44724 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
44725 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
44727 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
44729 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
44730 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
44731 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
44732 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
44733 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
44734 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
44737 machine_mode mmode
= VOIDmode
;
44738 rtx (*gen_blendm
) (rtx
, rtx
, rtx
, rtx
);
44746 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
44747 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
44749 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
44751 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
44752 emit_insn (gen_rtx_SET (target
, tmp
));
44758 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
44762 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
44763 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
44765 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
44767 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
44768 emit_insn (gen_rtx_SET (target
, tmp
));
44775 /* For the two element vectors, we implement a VEC_CONCAT with
44776 the extraction of the other element. */
44778 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
44779 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
44782 op0
= val
, op1
= tmp
;
44784 op0
= tmp
, op1
= val
;
44786 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
44787 emit_insn (gen_rtx_SET (target
, tmp
));
44792 use_vec_merge
= TARGET_SSE4_1
;
44799 use_vec_merge
= true;
44803 /* tmp = target = A B C D */
44804 tmp
= copy_to_reg (target
);
44805 /* target = A A B B */
44806 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
44807 /* target = X A B B */
44808 ix86_expand_vector_set (false, target
, val
, 0);
44809 /* target = A X C D */
44810 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
44811 const1_rtx
, const0_rtx
,
44812 GEN_INT (2+4), GEN_INT (3+4)));
44816 /* tmp = target = A B C D */
44817 tmp
= copy_to_reg (target
);
44818 /* tmp = X B C D */
44819 ix86_expand_vector_set (false, tmp
, val
, 0);
44820 /* target = A B X D */
44821 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
44822 const0_rtx
, const1_rtx
,
44823 GEN_INT (0+4), GEN_INT (3+4)));
44827 /* tmp = target = A B C D */
44828 tmp
= copy_to_reg (target
);
44829 /* tmp = X B C D */
44830 ix86_expand_vector_set (false, tmp
, val
, 0);
44831 /* target = A B X D */
44832 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
44833 const0_rtx
, const1_rtx
,
44834 GEN_INT (2+4), GEN_INT (0+4)));
44838 gcc_unreachable ();
44843 use_vec_merge
= TARGET_SSE4_1
;
44847 /* Element 0 handled by vec_merge below. */
44850 use_vec_merge
= true;
44856 /* With SSE2, use integer shuffles to swap element 0 and ELT,
44857 store into element 0, then shuffle them back. */
44861 order
[0] = GEN_INT (elt
);
44862 order
[1] = const1_rtx
;
44863 order
[2] = const2_rtx
;
44864 order
[3] = GEN_INT (3);
44865 order
[elt
] = const0_rtx
;
44867 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
44868 order
[1], order
[2], order
[3]));
44870 ix86_expand_vector_set (false, target
, val
, 0);
44872 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
44873 order
[1], order
[2], order
[3]));
44877 /* For SSE1, we have to reuse the V4SF code. */
44878 rtx t
= gen_reg_rtx (V4SFmode
);
44879 emit_move_insn (t
, gen_lowpart (V4SFmode
, target
));
44880 ix86_expand_vector_set (false, t
, gen_lowpart (SFmode
, val
), elt
);
44881 emit_move_insn (target
, gen_lowpart (mode
, t
));
44886 use_vec_merge
= TARGET_SSE2
;
44889 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
44893 use_vec_merge
= TARGET_SSE4_1
;
44900 half_mode
= V16QImode
;
44906 half_mode
= V8HImode
;
44912 half_mode
= V4SImode
;
44918 half_mode
= V2DImode
;
44924 half_mode
= V4SFmode
;
44930 half_mode
= V2DFmode
;
44936 /* Compute offset. */
44940 gcc_assert (i
<= 1);
44942 /* Extract the half. */
44943 tmp
= gen_reg_rtx (half_mode
);
44944 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
44946 /* Put val in tmp at elt. */
44947 ix86_expand_vector_set (false, tmp
, val
, elt
);
44950 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
44954 if (TARGET_AVX512F
)
44957 gen_blendm
= gen_avx512f_blendmv8df
;
44962 if (TARGET_AVX512F
)
44965 gen_blendm
= gen_avx512f_blendmv8di
;
44970 if (TARGET_AVX512F
)
44973 gen_blendm
= gen_avx512f_blendmv16sf
;
44978 if (TARGET_AVX512F
)
44981 gen_blendm
= gen_avx512f_blendmv16si
;
44986 if (TARGET_AVX512F
&& TARGET_AVX512BW
)
44989 gen_blendm
= gen_avx512bw_blendmv32hi
;
44994 if (TARGET_AVX512F
&& TARGET_AVX512BW
)
44997 gen_blendm
= gen_avx512bw_blendmv64qi
;
45005 if (mmode
!= VOIDmode
)
45007 tmp
= gen_reg_rtx (mode
);
45008 emit_insn (gen_rtx_SET (tmp
, gen_rtx_VEC_DUPLICATE (mode
, val
)));
45009 /* The avx512*_blendm<mode> expanders have different operand order
45010 from VEC_MERGE. In VEC_MERGE, the first input operand is used for
45011 elements where the mask is set and second input operand otherwise,
45012 in {sse,avx}*_*blend* the first input operand is used for elements
45013 where the mask is clear and second input operand otherwise. */
45014 emit_insn (gen_blendm (target
, target
, tmp
,
45016 gen_int_mode (1 << elt
, mmode
))));
45018 else if (use_vec_merge
)
45020 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
45021 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
45022 emit_insn (gen_rtx_SET (target
, tmp
));
45026 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
45028 emit_move_insn (mem
, target
);
45030 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
45031 emit_move_insn (tmp
, val
);
45033 emit_move_insn (target
, mem
);
45038 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
45040 machine_mode mode
= GET_MODE (vec
);
45041 machine_mode inner_mode
= GET_MODE_INNER (mode
);
45042 bool use_vec_extr
= false;
45057 use_vec_extr
= true;
45061 use_vec_extr
= TARGET_SSE4_1
;
45073 tmp
= gen_reg_rtx (mode
);
45074 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
45075 GEN_INT (elt
), GEN_INT (elt
),
45076 GEN_INT (elt
+4), GEN_INT (elt
+4)));
45080 tmp
= gen_reg_rtx (mode
);
45081 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
45085 gcc_unreachable ();
45088 use_vec_extr
= true;
45093 use_vec_extr
= TARGET_SSE4_1
;
45107 tmp
= gen_reg_rtx (mode
);
45108 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
45109 GEN_INT (elt
), GEN_INT (elt
),
45110 GEN_INT (elt
), GEN_INT (elt
)));
45114 tmp
= gen_reg_rtx (mode
);
45115 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
45119 gcc_unreachable ();
45122 use_vec_extr
= true;
45127 /* For SSE1, we have to reuse the V4SF code. */
45128 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
45129 gen_lowpart (V4SFmode
, vec
), elt
);
45135 use_vec_extr
= TARGET_SSE2
;
45138 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
45142 use_vec_extr
= TARGET_SSE4_1
;
45148 tmp
= gen_reg_rtx (V4SFmode
);
45150 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
45152 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
45153 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
45161 tmp
= gen_reg_rtx (V2DFmode
);
45163 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
45165 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
45166 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
45174 tmp
= gen_reg_rtx (V16QImode
);
45176 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
45178 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
45179 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
45187 tmp
= gen_reg_rtx (V8HImode
);
45189 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
45191 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
45192 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
45200 tmp
= gen_reg_rtx (V4SImode
);
45202 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
45204 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
45205 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
45213 tmp
= gen_reg_rtx (V2DImode
);
45215 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
45217 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
45218 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
45224 if (TARGET_AVX512BW
)
45226 tmp
= gen_reg_rtx (V16HImode
);
45228 emit_insn (gen_vec_extract_lo_v32hi (tmp
, vec
));
45230 emit_insn (gen_vec_extract_hi_v32hi (tmp
, vec
));
45231 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
45237 if (TARGET_AVX512BW
)
45239 tmp
= gen_reg_rtx (V32QImode
);
45241 emit_insn (gen_vec_extract_lo_v64qi (tmp
, vec
));
45243 emit_insn (gen_vec_extract_hi_v64qi (tmp
, vec
));
45244 ix86_expand_vector_extract (false, target
, tmp
, elt
& 31);
45250 tmp
= gen_reg_rtx (V8SFmode
);
45252 emit_insn (gen_vec_extract_lo_v16sf (tmp
, vec
));
45254 emit_insn (gen_vec_extract_hi_v16sf (tmp
, vec
));
45255 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
45259 tmp
= gen_reg_rtx (V4DFmode
);
45261 emit_insn (gen_vec_extract_lo_v8df (tmp
, vec
));
45263 emit_insn (gen_vec_extract_hi_v8df (tmp
, vec
));
45264 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
45268 tmp
= gen_reg_rtx (V8SImode
);
45270 emit_insn (gen_vec_extract_lo_v16si (tmp
, vec
));
45272 emit_insn (gen_vec_extract_hi_v16si (tmp
, vec
));
45273 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
45277 tmp
= gen_reg_rtx (V4DImode
);
45279 emit_insn (gen_vec_extract_lo_v8di (tmp
, vec
));
45281 emit_insn (gen_vec_extract_hi_v8di (tmp
, vec
));
45282 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
45286 /* ??? Could extract the appropriate HImode element and shift. */
45293 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
45294 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
45296 /* Let the rtl optimizers know about the zero extension performed. */
45297 if (inner_mode
== QImode
|| inner_mode
== HImode
)
45299 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
45300 target
= gen_lowpart (SImode
, target
);
45303 emit_insn (gen_rtx_SET (target
, tmp
));
45307 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
45309 emit_move_insn (mem
, vec
);
45311 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
45312 emit_move_insn (target
, tmp
);
45316 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45317 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45318 The upper bits of DEST are undefined, though they shouldn't cause
45319 exceptions (some bits from src or all zeros are ok). */
45322 emit_reduc_half (rtx dest
, rtx src
, int i
)
45325 switch (GET_MODE (src
))
45329 tem
= gen_sse_movhlps (dest
, src
, src
);
45331 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
45332 GEN_INT (1 + 4), GEN_INT (1 + 4));
45335 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
45341 d
= gen_reg_rtx (V1TImode
);
45342 tem
= gen_sse2_lshrv1ti3 (d
, gen_lowpart (V1TImode
, src
),
45347 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
45349 tem
= gen_avx_shufps256 (dest
, src
, src
,
45350 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
45354 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
45356 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
45364 if (GET_MODE (dest
) != V4DImode
)
45365 d
= gen_reg_rtx (V4DImode
);
45366 tem
= gen_avx2_permv2ti (d
, gen_lowpart (V4DImode
, src
),
45367 gen_lowpart (V4DImode
, src
),
45372 d
= gen_reg_rtx (V2TImode
);
45373 tem
= gen_avx2_lshrv2ti3 (d
, gen_lowpart (V2TImode
, src
),
45384 tem
= gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode
, dest
),
45385 gen_lowpart (V16SImode
, src
),
45386 gen_lowpart (V16SImode
, src
),
45387 GEN_INT (0x4 + (i
== 512 ? 4 : 0)),
45388 GEN_INT (0x5 + (i
== 512 ? 4 : 0)),
45389 GEN_INT (0x6 + (i
== 512 ? 4 : 0)),
45390 GEN_INT (0x7 + (i
== 512 ? 4 : 0)),
45391 GEN_INT (0xC), GEN_INT (0xD),
45392 GEN_INT (0xE), GEN_INT (0xF),
45393 GEN_INT (0x10), GEN_INT (0x11),
45394 GEN_INT (0x12), GEN_INT (0x13),
45395 GEN_INT (0x14), GEN_INT (0x15),
45396 GEN_INT (0x16), GEN_INT (0x17));
45398 tem
= gen_avx512f_pshufd_1 (gen_lowpart (V16SImode
, dest
),
45399 gen_lowpart (V16SImode
, src
),
45400 GEN_INT (i
== 128 ? 0x2 : 0x1),
45404 GEN_INT (i
== 128 ? 0x6 : 0x5),
45408 GEN_INT (i
== 128 ? 0xA : 0x9),
45412 GEN_INT (i
== 128 ? 0xE : 0xD),
45418 gcc_unreachable ();
45422 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), d
));
45425 /* Expand a vector reduction. FN is the binary pattern to reduce;
45426 DEST is the destination; IN is the input vector. */
45429 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
45431 rtx half
, dst
, vec
= in
;
45432 machine_mode mode
= GET_MODE (in
);
45435 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45437 && mode
== V8HImode
45438 && fn
== gen_uminv8hi3
)
45440 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
45444 for (i
= GET_MODE_BITSIZE (mode
);
45445 i
> GET_MODE_UNIT_BITSIZE (mode
);
45448 half
= gen_reg_rtx (mode
);
45449 emit_reduc_half (half
, vec
, i
);
45450 if (i
== GET_MODE_UNIT_BITSIZE (mode
) * 2)
45453 dst
= gen_reg_rtx (mode
);
45454 emit_insn (fn (dst
, half
, vec
));
45459 /* Target hook for scalar_mode_supported_p. */
45461 ix86_scalar_mode_supported_p (scalar_mode mode
)
45463 if (DECIMAL_FLOAT_MODE_P (mode
))
45464 return default_decimal_float_supported_p ();
45465 else if (mode
== TFmode
)
45468 return default_scalar_mode_supported_p (mode
);
45471 /* Implements target hook vector_mode_supported_p. */
45473 ix86_vector_mode_supported_p (machine_mode mode
)
45475 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
45477 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
45479 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
45481 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
45483 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
45485 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
45490 /* Target hook for c_mode_for_suffix. */
45491 static machine_mode
45492 ix86_c_mode_for_suffix (char suffix
)
45502 /* Worker function for TARGET_MD_ASM_ADJUST.
45504 We implement asm flag outputs, and maintain source compatibility
45505 with the old cc0-based compiler. */
45508 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
45509 vec
<const char *> &constraints
,
45510 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
45512 clobbers
.safe_push (gen_rtx_REG (CCFPmode
, FPSR_REG
));
45513 SET_HARD_REG_BIT (clobbered_regs
, FPSR_REG
);
45515 bool saw_asm_flag
= false;
45518 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
45520 const char *con
= constraints
[i
];
45521 if (strncmp (con
, "=@cc", 4) != 0)
45524 if (strchr (con
, ',') != NULL
)
45526 error ("alternatives not allowed in asm flag output");
45530 bool invert
= false;
45532 invert
= true, con
++;
45534 machine_mode mode
= CCmode
;
45535 rtx_code code
= UNKNOWN
;
45541 mode
= CCAmode
, code
= EQ
;
45542 else if (con
[1] == 'e' && con
[2] == 0)
45543 mode
= CCCmode
, code
= NE
;
45547 mode
= CCCmode
, code
= EQ
;
45548 else if (con
[1] == 'e' && con
[2] == 0)
45549 mode
= CCAmode
, code
= NE
;
45553 mode
= CCCmode
, code
= EQ
;
45557 mode
= CCZmode
, code
= EQ
;
45561 mode
= CCGCmode
, code
= GT
;
45562 else if (con
[1] == 'e' && con
[2] == 0)
45563 mode
= CCGCmode
, code
= GE
;
45567 mode
= CCGCmode
, code
= LT
;
45568 else if (con
[1] == 'e' && con
[2] == 0)
45569 mode
= CCGCmode
, code
= LE
;
45573 mode
= CCOmode
, code
= EQ
;
45577 mode
= CCPmode
, code
= EQ
;
45581 mode
= CCSmode
, code
= EQ
;
45585 mode
= CCZmode
, code
= EQ
;
45588 if (code
== UNKNOWN
)
45590 error ("unknown asm flag output %qs", constraints
[i
]);
45594 code
= reverse_condition (code
);
45596 rtx dest
= outputs
[i
];
45599 /* This is the first asm flag output. Here we put the flags
45600 register in as the real output and adjust the condition to
45602 constraints
[i
] = "=Bf";
45603 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
45604 saw_asm_flag
= true;
45608 /* We don't need the flags register as output twice. */
45609 constraints
[i
] = "=X";
45610 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
45613 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
45614 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
45616 machine_mode dest_mode
= GET_MODE (dest
);
45617 if (!SCALAR_INT_MODE_P (dest_mode
))
45619 error ("invalid type for asm flag output");
45623 if (dest_mode
== DImode
&& !TARGET_64BIT
)
45624 dest_mode
= SImode
;
45626 if (dest_mode
!= QImode
)
45628 rtx destqi
= gen_reg_rtx (QImode
);
45629 emit_insn (gen_rtx_SET (destqi
, x
));
45631 if (TARGET_ZERO_EXTEND_WITH_AND
45632 && optimize_function_for_speed_p (cfun
))
45634 x
= force_reg (dest_mode
, const0_rtx
);
45636 emit_insn (gen_movstrictqi
45637 (gen_lowpart (QImode
, x
), destqi
));
45640 x
= gen_rtx_ZERO_EXTEND (dest_mode
, destqi
);
45643 if (dest_mode
!= GET_MODE (dest
))
45645 rtx tmp
= gen_reg_rtx (SImode
);
45647 emit_insn (gen_rtx_SET (tmp
, x
));
45648 emit_insn (gen_zero_extendsidi2 (dest
, tmp
));
45651 emit_insn (gen_rtx_SET (dest
, x
));
45653 rtx_insn
*seq
= get_insns ();
45660 /* If we had no asm flag outputs, clobber the flags. */
45661 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
45662 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
45667 /* Implements target vector targetm.asm.encode_section_info. */
45669 static void ATTRIBUTE_UNUSED
45670 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
45672 default_encode_section_info (decl
, rtl
, first
);
45674 if (ix86_in_large_data_p (decl
))
45675 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
45678 /* Worker function for REVERSE_CONDITION. */
45681 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
45683 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
45684 ? reverse_condition (code
)
45685 : reverse_condition_maybe_unordered (code
));
45688 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45692 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
45694 if (REG_P (operands
[0]))
45696 if (REG_P (operands
[1])
45697 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
45699 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
45700 return output_387_ffreep (operands
, 0);
45701 return "fstp\t%y0";
45703 if (STACK_TOP_P (operands
[0]))
45704 return "fld%Z1\t%y1";
45707 else if (MEM_P (operands
[0]))
45709 gcc_assert (REG_P (operands
[1]));
45710 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
45711 return "fstp%Z0\t%y0";
45714 /* There is no non-popping store to memory for XFmode.
45715 So if we need one, follow the store with a load. */
45716 if (GET_MODE (operands
[0]) == XFmode
)
45717 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45719 return "fst%Z0\t%y0";
45726 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45727 FP status register is set. */
45730 ix86_emit_fp_unordered_jump (rtx label
)
45732 rtx reg
= gen_reg_rtx (HImode
);
45735 emit_insn (gen_x86_fnstsw_1 (reg
));
45737 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
45739 emit_insn (gen_x86_sahf_1 (reg
));
45741 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
45742 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
45746 emit_insn (gen_testqi_ext_1_ccno (reg
, GEN_INT (0x04)));
45748 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
45749 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
45752 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
45753 gen_rtx_LABEL_REF (VOIDmode
, label
),
45755 temp
= gen_rtx_SET (pc_rtx
, temp
);
45757 emit_jump_insn (temp
);
45758 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
45761 /* Output code to perform a log1p XFmode calculation. */
45763 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
45765 rtx_code_label
*label1
= gen_label_rtx ();
45766 rtx_code_label
*label2
= gen_label_rtx ();
45768 rtx tmp
= gen_reg_rtx (XFmode
);
45769 rtx tmp2
= gen_reg_rtx (XFmode
);
45772 emit_insn (gen_absxf2 (tmp
, op1
));
45773 test
= gen_rtx_GE (VOIDmode
, tmp
,
45774 const_double_from_real_value (
45775 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
45777 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
45779 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
45780 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
45781 emit_jump (label2
);
45783 emit_label (label1
);
45784 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
45785 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
45786 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
45787 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
45789 emit_label (label2
);
45792 /* Emit code for round calculation. */
45793 void ix86_emit_i387_round (rtx op0
, rtx op1
)
45795 machine_mode inmode
= GET_MODE (op1
);
45796 machine_mode outmode
= GET_MODE (op0
);
45797 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
45798 rtx scratch
= gen_reg_rtx (HImode
);
45799 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
45800 rtx_code_label
*jump_label
= gen_label_rtx ();
45802 rtx (*gen_abs
) (rtx
, rtx
);
45803 rtx (*gen_neg
) (rtx
, rtx
);
45808 gen_abs
= gen_abssf2
;
45811 gen_abs
= gen_absdf2
;
45814 gen_abs
= gen_absxf2
;
45817 gcc_unreachable ();
45823 gen_neg
= gen_negsf2
;
45826 gen_neg
= gen_negdf2
;
45829 gen_neg
= gen_negxf2
;
45832 gen_neg
= gen_neghi2
;
45835 gen_neg
= gen_negsi2
;
45838 gen_neg
= gen_negdi2
;
45841 gcc_unreachable ();
45844 e1
= gen_reg_rtx (inmode
);
45845 e2
= gen_reg_rtx (inmode
);
45846 res
= gen_reg_rtx (outmode
);
45848 half
= const_double_from_real_value (dconsthalf
, inmode
);
45850 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45852 /* scratch = fxam(op1) */
45853 emit_insn (gen_rtx_SET (scratch
,
45854 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
45856 /* e1 = fabs(op1) */
45857 emit_insn (gen_abs (e1
, op1
));
45859 /* e2 = e1 + 0.5 */
45860 half
= force_reg (inmode
, half
);
45861 emit_insn (gen_rtx_SET (e2
, gen_rtx_PLUS (inmode
, e1
, half
)));
45863 /* res = floor(e2) */
45864 if (inmode
!= XFmode
)
45866 tmp1
= gen_reg_rtx (XFmode
);
45868 emit_insn (gen_rtx_SET (tmp1
, gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
45878 rtx tmp0
= gen_reg_rtx (XFmode
);
45880 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
45882 emit_insn (gen_rtx_SET (res
,
45883 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
45884 UNSPEC_TRUNC_NOOP
)));
45888 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
45891 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
45894 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
45897 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
45900 gcc_unreachable ();
45903 /* flags = signbit(a) */
45904 emit_insn (gen_testqi_ext_1_ccno (scratch
, GEN_INT (0x02)));
45906 /* if (flags) then res = -res */
45907 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
45908 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
45909 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
45911 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
45912 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
45913 JUMP_LABEL (insn
) = jump_label
;
45915 emit_insn (gen_neg (res
, res
));
45917 emit_label (jump_label
);
45918 LABEL_NUSES (jump_label
) = 1;
45920 emit_move_insn (op0
, res
);
45923 /* Output code to perform a Newton-Rhapson approximation of a single precision
45924 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
45926 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, machine_mode mode
)
45928 rtx x0
, x1
, e0
, e1
;
45930 x0
= gen_reg_rtx (mode
);
45931 e0
= gen_reg_rtx (mode
);
45932 e1
= gen_reg_rtx (mode
);
45933 x1
= gen_reg_rtx (mode
);
45935 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
45937 b
= force_reg (mode
, b
);
45939 /* x0 = rcp(b) estimate */
45940 if (mode
== V16SFmode
|| mode
== V8DFmode
)
45942 if (TARGET_AVX512ER
)
45944 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
45947 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, a
, x0
)));
45951 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
45955 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
45959 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, b
)));
45962 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, e0
)));
45965 emit_insn (gen_rtx_SET (e1
, gen_rtx_PLUS (mode
, x0
, x0
)));
45968 emit_insn (gen_rtx_SET (x1
, gen_rtx_MINUS (mode
, e1
, e0
)));
45971 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, a
, x1
)));
45974 /* Output code to perform a Newton-Rhapson approximation of a
45975 single precision floating point [reciprocal] square root. */
45977 void ix86_emit_swsqrtsf (rtx res
, rtx a
, machine_mode mode
, bool recip
)
45979 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
45983 x0
= gen_reg_rtx (mode
);
45984 e0
= gen_reg_rtx (mode
);
45985 e1
= gen_reg_rtx (mode
);
45986 e2
= gen_reg_rtx (mode
);
45987 e3
= gen_reg_rtx (mode
);
45989 if (TARGET_AVX512ER
&& mode
== V16SFmode
)
45992 /* res = rsqrt28(a) estimate */
45993 emit_insn (gen_rtx_SET (res
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
45997 /* x0 = rsqrt28(a) estimate */
45998 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
46000 /* res = rcp28(x0) estimate */
46001 emit_insn (gen_rtx_SET (res
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, x0
),
46007 real_from_integer (&r
, VOIDmode
, -3, SIGNED
);
46008 mthree
= const_double_from_real_value (r
, SFmode
);
46010 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
46011 mhalf
= const_double_from_real_value (r
, SFmode
);
46012 unspec
= UNSPEC_RSQRT
;
46014 if (VECTOR_MODE_P (mode
))
46016 mthree
= ix86_build_const_vector (mode
, true, mthree
);
46017 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
46018 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46019 if (GET_MODE_SIZE (mode
) == 64)
46020 unspec
= UNSPEC_RSQRT14
;
46023 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46024 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46026 a
= force_reg (mode
, a
);
46028 /* x0 = rsqrt(a) estimate */
46029 emit_insn (gen_rtx_SET (x0
, gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
46032 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46035 rtx zero
= force_reg (mode
, CONST0_RTX(mode
));
46038 /* Handle masked compare. */
46039 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
46041 mask
= gen_reg_rtx (HImode
);
46042 /* Imm value 0x4 corresponds to not-equal comparison. */
46043 emit_insn (gen_avx512f_cmpv16sf3 (mask
, zero
, a
, GEN_INT (0x4)));
46044 emit_insn (gen_avx512f_blendmv16sf (x0
, zero
, x0
, mask
));
46048 mask
= gen_reg_rtx (mode
);
46049 emit_insn (gen_rtx_SET (mask
, gen_rtx_NE (mode
, zero
, a
)));
46050 emit_insn (gen_rtx_SET (x0
, gen_rtx_AND (mode
, x0
, mask
)));
46055 emit_insn (gen_rtx_SET (e0
, gen_rtx_MULT (mode
, x0
, a
)));
46057 emit_insn (gen_rtx_SET (e1
, gen_rtx_MULT (mode
, e0
, x0
)));
46060 mthree
= force_reg (mode
, mthree
);
46061 emit_insn (gen_rtx_SET (e2
, gen_rtx_PLUS (mode
, e1
, mthree
)));
46063 mhalf
= force_reg (mode
, mhalf
);
46065 /* e3 = -.5 * x0 */
46066 emit_insn (gen_rtx_SET (e3
, gen_rtx_MULT (mode
, x0
, mhalf
)));
46068 /* e3 = -.5 * e0 */
46069 emit_insn (gen_rtx_SET (e3
, gen_rtx_MULT (mode
, e0
, mhalf
)));
46070 /* ret = e2 * e3 */
46071 emit_insn (gen_rtx_SET (res
, gen_rtx_MULT (mode
, e2
, e3
)));
46074 #ifdef TARGET_SOLARIS
46075 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46078 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
46081 /* With Binutils 2.15, the "@unwind" marker must be specified on
46082 every occurrence of the ".eh_frame" section, not just the first
46085 && strcmp (name
, ".eh_frame") == 0)
46087 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
46088 flags
& SECTION_WRITE
? "aw" : "a");
46093 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
46095 solaris_elf_asm_comdat_section (name
, flags
, decl
);
46100 default_elf_asm_named_section (name
, flags
, decl
);
46102 #endif /* TARGET_SOLARIS */
46104 /* Return the mangling of TYPE if it is an extended fundamental type. */
46106 static const char *
46107 ix86_mangle_type (const_tree type
)
46109 type
= TYPE_MAIN_VARIANT (type
);
46111 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
46112 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
46115 switch (TYPE_MODE (type
))
46118 /* __float128 is "g". */
46121 /* "long double" or __float80 is "e". */
46128 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
46131 ix86_stack_protect_guard (void)
46133 if (TARGET_SSP_TLS_GUARD
)
46135 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
46136 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
46137 tree type
= build_qualified_type (type_node
, qual
);
46140 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
46142 t
= ix86_tls_stack_chk_guard_decl
;
46149 (UNKNOWN_LOCATION
, VAR_DECL
,
46150 get_identifier (ix86_stack_protector_guard_symbol_str
),
46152 TREE_STATIC (t
) = 1;
46153 TREE_PUBLIC (t
) = 1;
46154 DECL_EXTERNAL (t
) = 1;
46156 TREE_THIS_VOLATILE (t
) = 1;
46157 DECL_ARTIFICIAL (t
) = 1;
46158 DECL_IGNORED_P (t
) = 1;
46160 /* Do not share RTL as the declaration is visible outside of
46161 current function. */
46163 RTX_FLAG (x
, used
) = 1;
46165 ix86_tls_stack_chk_guard_decl
= t
;
46170 tree asptrtype
= build_pointer_type (type
);
46172 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
46173 t
= build2 (MEM_REF
, asptrtype
, t
,
46174 build_int_cst (asptrtype
, 0));
46180 return default_stack_protect_guard ();
46183 /* For 32-bit code we can save PIC register setup by using
46184 __stack_chk_fail_local hidden function instead of calling
46185 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46186 register, so it is better to call __stack_chk_fail directly. */
46188 static tree ATTRIBUTE_UNUSED
46189 ix86_stack_protect_fail (void)
46191 return TARGET_64BIT
46192 ? default_external_stack_protect_fail ()
46193 : default_hidden_stack_protect_fail ();
46196 /* Select a format to encode pointers in exception handling data. CODE
46197 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46198 true if the symbol may be affected by dynamic relocations.
46200 ??? All x86 object file formats are capable of representing this.
46201 After all, the relocation needed is the same as for the call insn.
46202 Whether or not a particular assembler allows us to enter such, I
46203 guess we'll have to see. */
46205 asm_preferred_eh_data_format (int code
, int global
)
46209 int type
= DW_EH_PE_sdata8
;
46211 || ix86_cmodel
== CM_SMALL_PIC
46212 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
46213 type
= DW_EH_PE_sdata4
;
46214 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
46216 if (ix86_cmodel
== CM_SMALL
46217 || (ix86_cmodel
== CM_MEDIUM
&& code
))
46218 return DW_EH_PE_udata4
;
46219 return DW_EH_PE_absptr
;
46222 /* Expand copysign from SIGN to the positive value ABS_VALUE
46223 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46226 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
46228 machine_mode mode
= GET_MODE (sign
);
46229 rtx sgn
= gen_reg_rtx (mode
);
46230 if (mask
== NULL_RTX
)
46232 machine_mode vmode
;
46234 if (mode
== SFmode
)
46236 else if (mode
== DFmode
)
46241 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
46242 if (!VECTOR_MODE_P (mode
))
46244 /* We need to generate a scalar mode mask in this case. */
46245 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
46246 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
46247 mask
= gen_reg_rtx (mode
);
46248 emit_insn (gen_rtx_SET (mask
, tmp
));
46252 mask
= gen_rtx_NOT (mode
, mask
);
46253 emit_insn (gen_rtx_SET (sgn
, gen_rtx_AND (mode
, mask
, sign
)));
46254 emit_insn (gen_rtx_SET (result
, gen_rtx_IOR (mode
, abs_value
, sgn
)));
46257 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46258 mask for masking out the sign-bit is stored in *SMASK, if that is
46261 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
46263 machine_mode vmode
, mode
= GET_MODE (op0
);
46266 xa
= gen_reg_rtx (mode
);
46267 if (mode
== SFmode
)
46269 else if (mode
== DFmode
)
46273 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
46274 if (!VECTOR_MODE_P (mode
))
46276 /* We need to generate a scalar mode mask in this case. */
46277 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
46278 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
46279 mask
= gen_reg_rtx (mode
);
46280 emit_insn (gen_rtx_SET (mask
, tmp
));
46282 emit_insn (gen_rtx_SET (xa
, gen_rtx_AND (mode
, op0
, mask
)));
46290 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46291 swapping the operands if SWAP_OPERANDS is true. The expanded
46292 code is a forward jump to a newly created label in case the
46293 comparison is true. The generated label rtx is returned. */
46294 static rtx_code_label
*
46295 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
46296 bool swap_operands
)
46298 machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
46299 rtx_code_label
*label
;
46303 std::swap (op0
, op1
);
46305 label
= gen_label_rtx ();
46306 tmp
= gen_rtx_REG (fpcmp_mode
, FLAGS_REG
);
46307 emit_insn (gen_rtx_SET (tmp
, gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
)));
46308 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
46309 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
46310 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
46311 tmp
= emit_jump_insn (gen_rtx_SET (pc_rtx
, tmp
));
46312 JUMP_LABEL (tmp
) = label
;
46317 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46318 using comparison code CODE. Operands are swapped for the comparison if
46319 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46321 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
46322 bool swap_operands
)
46324 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
46325 machine_mode mode
= GET_MODE (op0
);
46326 rtx mask
= gen_reg_rtx (mode
);
46329 std::swap (op0
, op1
);
46331 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
46333 emit_insn (insn (mask
, op0
, op1
,
46334 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
46338 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46339 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46341 ix86_gen_TWO52 (machine_mode mode
)
46343 REAL_VALUE_TYPE TWO52r
;
46346 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
46347 TWO52
= const_double_from_real_value (TWO52r
, mode
);
46348 TWO52
= force_reg (mode
, TWO52
);
46353 /* Expand SSE sequence for computing lround from OP1 storing
46356 ix86_expand_lround (rtx op0
, rtx op1
)
46358 /* C code for the stuff we're doing below:
46359 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46362 machine_mode mode
= GET_MODE (op1
);
46363 const struct real_format
*fmt
;
46364 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
46367 /* load nextafter (0.5, 0.0) */
46368 fmt
= REAL_MODE_FORMAT (mode
);
46369 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
46370 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
46372 /* adj = copysign (0.5, op1) */
46373 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
46374 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
46376 /* adj = op1 + adj */
46377 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
46379 /* op0 = (imode)adj */
46380 expand_fix (op0
, adj
, 0);
46383 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46386 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
46388 /* C code for the stuff we're doing below (for do_floor):
46390 xi -= (double)xi > op1 ? 1 : 0;
46393 machine_mode fmode
= GET_MODE (op1
);
46394 machine_mode imode
= GET_MODE (op0
);
46395 rtx ireg
, freg
, tmp
;
46396 rtx_code_label
*label
;
46398 /* reg = (long)op1 */
46399 ireg
= gen_reg_rtx (imode
);
46400 expand_fix (ireg
, op1
, 0);
46402 /* freg = (double)reg */
46403 freg
= gen_reg_rtx (fmode
);
46404 expand_float (freg
, ireg
, 0);
46406 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46407 label
= ix86_expand_sse_compare_and_jump (UNLE
,
46408 freg
, op1
, !do_floor
);
46409 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
46410 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
46411 emit_move_insn (ireg
, tmp
);
46413 emit_label (label
);
46414 LABEL_NUSES (label
) = 1;
46416 emit_move_insn (op0
, ireg
);
46419 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46420 result in OPERAND0. */
46422 ix86_expand_rint (rtx operand0
, rtx operand1
)
46424 /* C code for the stuff we're doing below:
46425 xa = fabs (operand1);
46426 if (!isless (xa, 2**52))
46428 xa = xa + 2**52 - 2**52;
46429 return copysign (xa, operand1);
46431 machine_mode mode
= GET_MODE (operand0
);
46432 rtx res
, xa
, TWO52
, mask
;
46433 rtx_code_label
*label
;
46435 res
= gen_reg_rtx (mode
);
46436 emit_move_insn (res
, operand1
);
46438 /* xa = abs (operand1) */
46439 xa
= ix86_expand_sse_fabs (res
, &mask
);
46441 /* if (!isless (xa, TWO52)) goto label; */
46442 TWO52
= ix86_gen_TWO52 (mode
);
46443 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46445 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
46446 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
46448 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
46450 emit_label (label
);
46451 LABEL_NUSES (label
) = 1;
46453 emit_move_insn (operand0
, res
);
46456 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46459 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
46461 /* C code for the stuff we expand below.
46462 double xa = fabs (x), x2;
46463 if (!isless (xa, TWO52))
46465 xa = xa + TWO52 - TWO52;
46466 x2 = copysign (xa, x);
46475 machine_mode mode
= GET_MODE (operand0
);
46476 rtx xa
, TWO52
, tmp
, one
, res
, mask
;
46477 rtx_code_label
*label
;
46479 TWO52
= ix86_gen_TWO52 (mode
);
46481 /* Temporary for holding the result, initialized to the input
46482 operand to ease control flow. */
46483 res
= gen_reg_rtx (mode
);
46484 emit_move_insn (res
, operand1
);
46486 /* xa = abs (operand1) */
46487 xa
= ix86_expand_sse_fabs (res
, &mask
);
46489 /* if (!isless (xa, TWO52)) goto label; */
46490 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46492 /* xa = xa + TWO52 - TWO52; */
46493 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
46494 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
46496 /* xa = copysign (xa, operand1) */
46497 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
46499 /* generate 1.0 or -1.0 */
46500 one
= force_reg (mode
,
46501 const_double_from_real_value (do_floor
46502 ? dconst1
: dconstm1
, mode
));
46504 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46505 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
46506 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
46507 /* We always need to subtract here to preserve signed zero. */
46508 tmp
= expand_simple_binop (mode
, MINUS
,
46509 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
46510 emit_move_insn (res
, tmp
);
46512 emit_label (label
);
46513 LABEL_NUSES (label
) = 1;
46515 emit_move_insn (operand0
, res
);
46518 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46521 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
46523 /* C code for the stuff we expand below.
46524 double xa = fabs (x), x2;
46525 if (!isless (xa, TWO52))
46527 x2 = (double)(long)x;
46534 if (HONOR_SIGNED_ZEROS (mode))
46535 return copysign (x2, x);
46538 machine_mode mode
= GET_MODE (operand0
);
46539 rtx xa
, xi
, TWO52
, tmp
, one
, res
, mask
;
46540 rtx_code_label
*label
;
46542 TWO52
= ix86_gen_TWO52 (mode
);
46544 /* Temporary for holding the result, initialized to the input
46545 operand to ease control flow. */
46546 res
= gen_reg_rtx (mode
);
46547 emit_move_insn (res
, operand1
);
46549 /* xa = abs (operand1) */
46550 xa
= ix86_expand_sse_fabs (res
, &mask
);
46552 /* if (!isless (xa, TWO52)) goto label; */
46553 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46555 /* xa = (double)(long)x */
46556 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
46557 expand_fix (xi
, res
, 0);
46558 expand_float (xa
, xi
, 0);
46561 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
46563 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46564 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
46565 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
46566 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
46567 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
46568 emit_move_insn (res
, tmp
);
46570 if (HONOR_SIGNED_ZEROS (mode
))
46571 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
46573 emit_label (label
);
46574 LABEL_NUSES (label
) = 1;
46576 emit_move_insn (operand0
, res
);
46579 /* Expand SSE sequence for computing round from OPERAND1 storing
46580 into OPERAND0. Sequence that works without relying on DImode truncation
46581 via cvttsd2siq that is only available on 64bit targets. */
46583 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
46585 /* C code for the stuff we expand below.
46586 double xa = fabs (x), xa2, x2;
46587 if (!isless (xa, TWO52))
46589 Using the absolute value and copying back sign makes
46590 -0.0 -> -0.0 correct.
46591 xa2 = xa + TWO52 - TWO52;
46596 else if (dxa > 0.5)
46598 x2 = copysign (xa2, x);
46601 machine_mode mode
= GET_MODE (operand0
);
46602 rtx xa
, xa2
, dxa
, TWO52
, tmp
, half
, mhalf
, one
, res
, mask
;
46603 rtx_code_label
*label
;
46605 TWO52
= ix86_gen_TWO52 (mode
);
46607 /* Temporary for holding the result, initialized to the input
46608 operand to ease control flow. */
46609 res
= gen_reg_rtx (mode
);
46610 emit_move_insn (res
, operand1
);
46612 /* xa = abs (operand1) */
46613 xa
= ix86_expand_sse_fabs (res
, &mask
);
46615 /* if (!isless (xa, TWO52)) goto label; */
46616 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46618 /* xa2 = xa + TWO52 - TWO52; */
46619 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
46620 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
46622 /* dxa = xa2 - xa; */
46623 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
46625 /* generate 0.5, 1.0 and -0.5 */
46626 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
46627 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
46628 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
46632 tmp
= gen_reg_rtx (mode
);
46633 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46634 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
46635 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
46636 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
46637 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46638 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
46639 emit_insn (gen_rtx_SET (tmp
, gen_rtx_AND (mode
, one
, tmp
)));
46640 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
46642 /* res = copysign (xa2, operand1) */
46643 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
46645 emit_label (label
);
46646 LABEL_NUSES (label
) = 1;
46648 emit_move_insn (operand0
, res
);
46651 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46654 ix86_expand_trunc (rtx operand0
, rtx operand1
)
46656 /* C code for SSE variant we expand below.
46657 double xa = fabs (x), x2;
46658 if (!isless (xa, TWO52))
46660 x2 = (double)(long)x;
46661 if (HONOR_SIGNED_ZEROS (mode))
46662 return copysign (x2, x);
46665 machine_mode mode
= GET_MODE (operand0
);
46666 rtx xa
, xi
, TWO52
, res
, mask
;
46667 rtx_code_label
*label
;
46669 TWO52
= ix86_gen_TWO52 (mode
);
46671 /* Temporary for holding the result, initialized to the input
46672 operand to ease control flow. */
46673 res
= gen_reg_rtx (mode
);
46674 emit_move_insn (res
, operand1
);
46676 /* xa = abs (operand1) */
46677 xa
= ix86_expand_sse_fabs (res
, &mask
);
46679 /* if (!isless (xa, TWO52)) goto label; */
46680 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46682 /* x = (double)(long)x */
46683 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
46684 expand_fix (xi
, res
, 0);
46685 expand_float (res
, xi
, 0);
46687 if (HONOR_SIGNED_ZEROS (mode
))
46688 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
46690 emit_label (label
);
46691 LABEL_NUSES (label
) = 1;
46693 emit_move_insn (operand0
, res
);
46696 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46699 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
46701 machine_mode mode
= GET_MODE (operand0
);
46702 rtx xa
, mask
, TWO52
, one
, res
, smask
, tmp
;
46703 rtx_code_label
*label
;
46705 /* C code for SSE variant we expand below.
46706 double xa = fabs (x), x2;
46707 if (!isless (xa, TWO52))
46709 xa2 = xa + TWO52 - TWO52;
46713 x2 = copysign (xa2, x);
46717 TWO52
= ix86_gen_TWO52 (mode
);
46719 /* Temporary for holding the result, initialized to the input
46720 operand to ease control flow. */
46721 res
= gen_reg_rtx (mode
);
46722 emit_move_insn (res
, operand1
);
46724 /* xa = abs (operand1) */
46725 xa
= ix86_expand_sse_fabs (res
, &smask
);
46727 /* if (!isless (xa, TWO52)) goto label; */
46728 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46730 /* res = xa + TWO52 - TWO52; */
46731 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
46732 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
46733 emit_move_insn (res
, tmp
);
46736 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
46738 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46739 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
46740 emit_insn (gen_rtx_SET (mask
, gen_rtx_AND (mode
, mask
, one
)));
46741 tmp
= expand_simple_binop (mode
, MINUS
,
46742 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
46743 emit_move_insn (res
, tmp
);
46745 /* res = copysign (res, operand1) */
46746 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
46748 emit_label (label
);
46749 LABEL_NUSES (label
) = 1;
46751 emit_move_insn (operand0
, res
);
46754 /* Expand SSE sequence for computing round from OPERAND1 storing
46757 ix86_expand_round (rtx operand0
, rtx operand1
)
46759 /* C code for the stuff we're doing below:
46760 double xa = fabs (x);
46761 if (!isless (xa, TWO52))
46763 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46764 return copysign (xa, x);
46766 machine_mode mode
= GET_MODE (operand0
);
46767 rtx res
, TWO52
, xa
, xi
, half
, mask
;
46768 rtx_code_label
*label
;
46769 const struct real_format
*fmt
;
46770 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
46772 /* Temporary for holding the result, initialized to the input
46773 operand to ease control flow. */
46774 res
= gen_reg_rtx (mode
);
46775 emit_move_insn (res
, operand1
);
46777 TWO52
= ix86_gen_TWO52 (mode
);
46778 xa
= ix86_expand_sse_fabs (res
, &mask
);
46779 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
46781 /* load nextafter (0.5, 0.0) */
46782 fmt
= REAL_MODE_FORMAT (mode
);
46783 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
46784 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
46786 /* xa = xa + 0.5 */
46787 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
46788 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
46790 /* xa = (double)(int64_t)xa */
46791 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
46792 expand_fix (xi
, xa
, 0);
46793 expand_float (xa
, xi
, 0);
46795 /* res = copysign (xa, operand1) */
46796 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
46798 emit_label (label
);
46799 LABEL_NUSES (label
) = 1;
46801 emit_move_insn (operand0
, res
);
46804 /* Expand SSE sequence for computing round
46805 from OP1 storing into OP0 using sse4 round insn. */
46807 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
46809 machine_mode mode
= GET_MODE (op0
);
46810 rtx e1
, e2
, res
, half
;
46811 const struct real_format
*fmt
;
46812 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
46813 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
46814 rtx (*gen_round
) (rtx
, rtx
, rtx
);
46819 gen_copysign
= gen_copysignsf3
;
46820 gen_round
= gen_sse4_1_roundsf2
;
46823 gen_copysign
= gen_copysigndf3
;
46824 gen_round
= gen_sse4_1_rounddf2
;
46827 gcc_unreachable ();
46830 /* round (a) = trunc (a + copysign (0.5, a)) */
46832 /* load nextafter (0.5, 0.0) */
46833 fmt
= REAL_MODE_FORMAT (mode
);
46834 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
46835 real_arithmetic (&pred_half
, MINUS_EXPR
, &dconsthalf
, &half_minus_pred_half
);
46836 half
= const_double_from_real_value (pred_half
, mode
);
46838 /* e1 = copysign (0.5, op1) */
46839 e1
= gen_reg_rtx (mode
);
46840 emit_insn (gen_copysign (e1
, half
, op1
));
46842 /* e2 = op1 + e1 */
46843 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
46845 /* res = trunc (e2) */
46846 res
= gen_reg_rtx (mode
);
46847 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
46849 emit_move_insn (op0
, res
);
46853 /* Table of valid machine attributes. */
46854 static const struct attribute_spec ix86_attribute_table
[] =
46856 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46857 affects_type_identity } */
46858 /* Stdcall attribute says callee is responsible for popping arguments
46859 if they are not variable. */
46860 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
46862 /* Fastcall attribute says callee is responsible for popping arguments
46863 if they are not variable. */
46864 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
46866 /* Thiscall attribute says callee is responsible for popping arguments
46867 if they are not variable. */
46868 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
46870 /* Cdecl attribute says the callee is a normal C declaration */
46871 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
46873 /* Regparm attribute specifies how many integer arguments are to be
46874 passed in registers. */
46875 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
46877 /* Sseregparm attribute says we are using x86_64 calling conventions
46878 for FP arguments. */
46879 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
46881 /* The transactional memory builtins are implicitly regparm or fastcall
46882 depending on the ABI. Override the generic do-nothing attribute that
46883 these builtins were declared with. */
46884 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
46886 /* force_align_arg_pointer says this function realigns the stack at entry. */
46887 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
46888 false, true, true, ix86_handle_force_align_arg_pointer_attribute
, false },
46889 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46890 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
46891 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
46892 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
46895 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
46897 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
46899 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46900 SUBTARGET_ATTRIBUTE_TABLE
,
46902 /* ms_abi and sysv_abi calling convention function attributes. */
46903 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
46904 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
46905 { "ms_abi va_list", 0, 0, false, false, false, NULL
, false },
46906 { "sysv_abi va_list", 0, 0, false, false, false, NULL
, false },
46907 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
46909 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46910 ix86_handle_callee_pop_aggregate_return
, true },
46911 { "interrupt", 0, 0, false, true, true,
46912 ix86_handle_interrupt_attribute
, false },
46913 { "no_caller_saved_registers", 0, 0, false, true, true,
46914 ix86_handle_no_caller_saved_registers_attribute
, false },
46915 { "naked", 0, 0, true, false, false,
46916 ix86_handle_fndecl_attribute
, false },
46919 { NULL
, 0, 0, false, false, false, NULL
, false }
46922 /* Implement targetm.vectorize.builtin_vectorization_cost. */
46924 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
46927 switch (type_of_cost
)
46930 return ix86_cost
->scalar_stmt_cost
;
46933 return ix86_cost
->scalar_load_cost
;
46936 return ix86_cost
->scalar_store_cost
;
46939 return ix86_cost
->vec_stmt_cost
;
46942 return ix86_cost
->vec_align_load_cost
;
46945 return ix86_cost
->vec_store_cost
;
46947 case vec_to_scalar
:
46948 return ix86_cost
->vec_to_scalar_cost
;
46950 case scalar_to_vec
:
46951 return ix86_cost
->scalar_to_vec_cost
;
46953 case unaligned_load
:
46954 case unaligned_store
:
46955 return ix86_cost
->vec_unalign_load_cost
;
46957 case cond_branch_taken
:
46958 return ix86_cost
->cond_taken_branch_cost
;
46960 case cond_branch_not_taken
:
46961 return ix86_cost
->cond_not_taken_branch_cost
;
46964 case vec_promote_demote
:
46965 return ix86_cost
->vec_stmt_cost
;
46967 case vec_construct
:
46968 return ix86_cost
->vec_stmt_cost
* (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
46971 gcc_unreachable ();
46975 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
46976 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
46977 insn every time. */
46979 static GTY(()) rtx_insn
*vselect_insn
;
46981 /* Initialize vselect_insn. */
46984 init_vselect_insn (void)
46989 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
46990 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
46991 XVECEXP (x
, 0, i
) = const0_rtx
;
46992 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
46994 x
= gen_rtx_SET (const0_rtx
, x
);
46996 vselect_insn
= emit_insn (x
);
47000 /* Construct (set target (vec_select op0 (parallel perm))) and
47001 return true if that's a valid instruction in the active ISA. */
47004 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
47005 unsigned nelt
, bool testing_p
)
47008 rtx x
, save_vconcat
;
47011 if (vselect_insn
== NULL_RTX
)
47012 init_vselect_insn ();
47014 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
47015 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
47016 for (i
= 0; i
< nelt
; ++i
)
47017 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
47018 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
47019 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
47020 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
47021 SET_DEST (PATTERN (vselect_insn
)) = target
;
47022 icode
= recog_memoized (vselect_insn
);
47024 if (icode
>= 0 && !testing_p
)
47025 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
47027 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
47028 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
47029 INSN_CODE (vselect_insn
) = -1;
47034 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47037 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
47038 const unsigned char *perm
, unsigned nelt
,
47041 machine_mode v2mode
;
47045 if (vselect_insn
== NULL_RTX
)
47046 init_vselect_insn ();
47048 if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0
)).exists (&v2mode
))
47050 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
47051 PUT_MODE (x
, v2mode
);
47054 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
47055 XEXP (x
, 0) = const0_rtx
;
47056 XEXP (x
, 1) = const0_rtx
;
47060 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47061 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47064 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
47066 machine_mode mmode
, vmode
= d
->vmode
;
47067 unsigned i
, mask
, nelt
= d
->nelt
;
47068 rtx target
, op0
, op1
, maskop
, x
;
47069 rtx rperm
[32], vperm
;
47071 if (d
->one_operand_p
)
47073 if (TARGET_AVX512F
&& GET_MODE_SIZE (vmode
) == 64
47074 && (TARGET_AVX512BW
47075 || GET_MODE_UNIT_SIZE (vmode
) >= 4))
47077 else if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
47079 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
47081 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
47086 /* This is a blend, not a permute. Elements must stay in their
47087 respective lanes. */
47088 for (i
= 0; i
< nelt
; ++i
)
47090 unsigned e
= d
->perm
[i
];
47091 if (!(e
== i
|| e
== i
+ nelt
))
47098 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47099 decision should be extracted elsewhere, so that we only try that
47100 sequence once all budget==3 options have been tried. */
47101 target
= d
->target
;
47120 for (i
= 0; i
< nelt
; ++i
)
47121 mask
|= (d
->perm
[i
] >= nelt
) << i
;
47125 for (i
= 0; i
< 2; ++i
)
47126 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
47131 for (i
= 0; i
< 4; ++i
)
47132 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
47137 /* See if bytes move in pairs so we can use pblendw with
47138 an immediate argument, rather than pblendvb with a vector
47140 for (i
= 0; i
< 16; i
+= 2)
47141 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
47144 for (i
= 0; i
< nelt
; ++i
)
47145 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
47148 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
47149 vperm
= force_reg (vmode
, vperm
);
47151 if (GET_MODE_SIZE (vmode
) == 16)
47152 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
47154 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
47155 if (target
!= d
->target
)
47156 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
47160 for (i
= 0; i
< 8; ++i
)
47161 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
47166 target
= gen_reg_rtx (vmode
);
47167 op0
= gen_lowpart (vmode
, op0
);
47168 op1
= gen_lowpart (vmode
, op1
);
47172 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47173 for (i
= 0; i
< 32; i
+= 2)
47174 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
47176 /* See if bytes move in quadruplets. If yes, vpblendd
47177 with immediate can be used. */
47178 for (i
= 0; i
< 32; i
+= 4)
47179 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
47183 /* See if bytes move the same in both lanes. If yes,
47184 vpblendw with immediate can be used. */
47185 for (i
= 0; i
< 16; i
+= 2)
47186 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
47189 /* Use vpblendw. */
47190 for (i
= 0; i
< 16; ++i
)
47191 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
47196 /* Use vpblendd. */
47197 for (i
= 0; i
< 8; ++i
)
47198 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
47203 /* See if words move in pairs. If yes, vpblendd can be used. */
47204 for (i
= 0; i
< 16; i
+= 2)
47205 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
47209 /* See if words move the same in both lanes. If not,
47210 vpblendvb must be used. */
47211 for (i
= 0; i
< 8; i
++)
47212 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
47214 /* Use vpblendvb. */
47215 for (i
= 0; i
< 32; ++i
)
47216 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
47220 target
= gen_reg_rtx (vmode
);
47221 op0
= gen_lowpart (vmode
, op0
);
47222 op1
= gen_lowpart (vmode
, op1
);
47223 goto finish_pblendvb
;
47226 /* Use vpblendw. */
47227 for (i
= 0; i
< 16; ++i
)
47228 mask
|= (d
->perm
[i
] >= 16) << i
;
47232 /* Use vpblendd. */
47233 for (i
= 0; i
< 8; ++i
)
47234 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
47239 /* Use vpblendd. */
47240 for (i
= 0; i
< 4; ++i
)
47241 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
47246 gcc_unreachable ();
47269 if (mmode
!= VOIDmode
)
47270 maskop
= force_reg (mmode
, gen_int_mode (mask
, mmode
));
47272 maskop
= GEN_INT (mask
);
47274 /* This matches five different patterns with the different modes. */
47275 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, maskop
);
47276 x
= gen_rtx_SET (target
, x
);
47278 if (target
!= d
->target
)
47279 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
47284 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47285 in terms of the variable form of vpermilps.
47287 Note that we will have already failed the immediate input vpermilps,
47288 which requires that the high and low part shuffle be identical; the
47289 variable form doesn't require that. */
47292 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
47294 rtx rperm
[8], vperm
;
47297 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
47300 /* We can only permute within the 128-bit lane. */
47301 for (i
= 0; i
< 8; ++i
)
47303 unsigned e
= d
->perm
[i
];
47304 if (i
< 4 ? e
>= 4 : e
< 4)
47311 for (i
= 0; i
< 8; ++i
)
47313 unsigned e
= d
->perm
[i
];
47315 /* Within each 128-bit lane, the elements of op0 are numbered
47316 from 0 and the elements of op1 are numbered from 4. */
47322 rperm
[i
] = GEN_INT (e
);
47325 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
47326 vperm
= force_reg (V8SImode
, vperm
);
47327 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
47332 /* Return true if permutation D can be performed as VMODE permutation
47336 valid_perm_using_mode_p (machine_mode vmode
, struct expand_vec_perm_d
*d
)
47338 unsigned int i
, j
, chunk
;
47340 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
47341 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
47342 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
47345 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
47348 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
47349 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
47350 if (d
->perm
[i
] & (chunk
- 1))
47353 for (j
= 1; j
< chunk
; ++j
)
47354 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
47360 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47361 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47364 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
47366 unsigned i
, nelt
, eltsz
, mask
;
47367 unsigned char perm
[64];
47368 machine_mode vmode
= V16QImode
;
47369 rtx rperm
[64], vperm
, target
, op0
, op1
;
47373 if (!d
->one_operand_p
)
47375 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
47378 && valid_perm_using_mode_p (V2TImode
, d
))
47383 /* Use vperm2i128 insn. The pattern uses
47384 V4DImode instead of V2TImode. */
47385 target
= d
->target
;
47386 if (d
->vmode
!= V4DImode
)
47387 target
= gen_reg_rtx (V4DImode
);
47388 op0
= gen_lowpart (V4DImode
, d
->op0
);
47389 op1
= gen_lowpart (V4DImode
, d
->op1
);
47391 = GEN_INT ((d
->perm
[0] / (nelt
/ 2))
47392 | ((d
->perm
[nelt
/ 2] / (nelt
/ 2)) * 16));
47393 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
47394 if (target
!= d
->target
)
47395 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
47403 if (GET_MODE_SIZE (d
->vmode
) == 16)
47408 else if (GET_MODE_SIZE (d
->vmode
) == 32)
47413 /* V4DImode should be already handled through
47414 expand_vselect by vpermq instruction. */
47415 gcc_assert (d
->vmode
!= V4DImode
);
47418 if (d
->vmode
== V8SImode
47419 || d
->vmode
== V16HImode
47420 || d
->vmode
== V32QImode
)
47422 /* First see if vpermq can be used for
47423 V8SImode/V16HImode/V32QImode. */
47424 if (valid_perm_using_mode_p (V4DImode
, d
))
47426 for (i
= 0; i
< 4; i
++)
47427 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
47430 target
= gen_reg_rtx (V4DImode
);
47431 if (expand_vselect (target
, gen_lowpart (V4DImode
, d
->op0
),
47434 emit_move_insn (d
->target
,
47435 gen_lowpart (d
->vmode
, target
));
47441 /* Next see if vpermd can be used. */
47442 if (valid_perm_using_mode_p (V8SImode
, d
))
47445 /* Or if vpermps can be used. */
47446 else if (d
->vmode
== V8SFmode
)
47449 if (vmode
== V32QImode
)
47451 /* vpshufb only works intra lanes, it is not
47452 possible to shuffle bytes in between the lanes. */
47453 for (i
= 0; i
< nelt
; ++i
)
47454 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
47458 else if (GET_MODE_SIZE (d
->vmode
) == 64)
47460 if (!TARGET_AVX512BW
)
47463 /* If vpermq didn't work, vpshufb won't work either. */
47464 if (d
->vmode
== V8DFmode
|| d
->vmode
== V8DImode
)
47468 if (d
->vmode
== V16SImode
47469 || d
->vmode
== V32HImode
47470 || d
->vmode
== V64QImode
)
47472 /* First see if vpermq can be used for
47473 V16SImode/V32HImode/V64QImode. */
47474 if (valid_perm_using_mode_p (V8DImode
, d
))
47476 for (i
= 0; i
< 8; i
++)
47477 perm
[i
] = (d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7;
47480 target
= gen_reg_rtx (V8DImode
);
47481 if (expand_vselect (target
, gen_lowpart (V8DImode
, d
->op0
),
47484 emit_move_insn (d
->target
,
47485 gen_lowpart (d
->vmode
, target
));
47491 /* Next see if vpermd can be used. */
47492 if (valid_perm_using_mode_p (V16SImode
, d
))
47495 /* Or if vpermps can be used. */
47496 else if (d
->vmode
== V16SFmode
)
47498 if (vmode
== V64QImode
)
47500 /* vpshufb only works intra lanes, it is not
47501 possible to shuffle bytes in between the lanes. */
47502 for (i
= 0; i
< nelt
; ++i
)
47503 if ((d
->perm
[i
] ^ i
) & (nelt
/ 4))
47514 if (vmode
== V8SImode
)
47515 for (i
= 0; i
< 8; ++i
)
47516 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
47517 else if (vmode
== V16SImode
)
47518 for (i
= 0; i
< 16; ++i
)
47519 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 16] * 16 / nelt
) & 15);
47522 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
47523 if (!d
->one_operand_p
)
47524 mask
= 2 * nelt
- 1;
47525 else if (vmode
== V16QImode
)
47527 else if (vmode
== V64QImode
)
47528 mask
= nelt
/ 4 - 1;
47530 mask
= nelt
/ 2 - 1;
47532 for (i
= 0; i
< nelt
; ++i
)
47534 unsigned j
, e
= d
->perm
[i
] & mask
;
47535 for (j
= 0; j
< eltsz
; ++j
)
47536 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
47540 vperm
= gen_rtx_CONST_VECTOR (vmode
,
47541 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
47542 vperm
= force_reg (vmode
, vperm
);
47544 target
= d
->target
;
47545 if (d
->vmode
!= vmode
)
47546 target
= gen_reg_rtx (vmode
);
47547 op0
= gen_lowpart (vmode
, d
->op0
);
47548 if (d
->one_operand_p
)
47550 if (vmode
== V16QImode
)
47551 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
47552 else if (vmode
== V32QImode
)
47553 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
47554 else if (vmode
== V64QImode
)
47555 emit_insn (gen_avx512bw_pshufbv64qi3 (target
, op0
, vperm
));
47556 else if (vmode
== V8SFmode
)
47557 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
47558 else if (vmode
== V8SImode
)
47559 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
47560 else if (vmode
== V16SFmode
)
47561 emit_insn (gen_avx512f_permvarv16sf (target
, op0
, vperm
));
47562 else if (vmode
== V16SImode
)
47563 emit_insn (gen_avx512f_permvarv16si (target
, op0
, vperm
));
47565 gcc_unreachable ();
47569 op1
= gen_lowpart (vmode
, d
->op1
);
47570 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
47572 if (target
!= d
->target
)
47573 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, target
));
47578 /* For V*[QHS]Imode permutations, check if the same permutation
47579 can't be performed in a 2x, 4x or 8x wider inner mode. */
47582 canonicalize_vector_int_perm (const struct expand_vec_perm_d
*d
,
47583 struct expand_vec_perm_d
*nd
)
47586 machine_mode mode
= VOIDmode
;
47590 case E_V16QImode
: mode
= V8HImode
; break;
47591 case E_V32QImode
: mode
= V16HImode
; break;
47592 case E_V64QImode
: mode
= V32HImode
; break;
47593 case E_V8HImode
: mode
= V4SImode
; break;
47594 case E_V16HImode
: mode
= V8SImode
; break;
47595 case E_V32HImode
: mode
= V16SImode
; break;
47596 case E_V4SImode
: mode
= V2DImode
; break;
47597 case E_V8SImode
: mode
= V4DImode
; break;
47598 case E_V16SImode
: mode
= V8DImode
; break;
47599 default: return false;
47601 for (i
= 0; i
< d
->nelt
; i
+= 2)
47602 if ((d
->perm
[i
] & 1) || d
->perm
[i
+ 1] != d
->perm
[i
] + 1)
47605 nd
->nelt
= d
->nelt
/ 2;
47606 for (i
= 0; i
< nd
->nelt
; i
++)
47607 nd
->perm
[i
] = d
->perm
[2 * i
] / 2;
47608 if (GET_MODE_INNER (mode
) != DImode
)
47609 canonicalize_vector_int_perm (nd
, nd
);
47612 nd
->one_operand_p
= d
->one_operand_p
;
47613 nd
->testing_p
= d
->testing_p
;
47614 if (d
->op0
== d
->op1
)
47615 nd
->op0
= nd
->op1
= gen_lowpart (nd
->vmode
, d
->op0
);
47618 nd
->op0
= gen_lowpart (nd
->vmode
, d
->op0
);
47619 nd
->op1
= gen_lowpart (nd
->vmode
, d
->op1
);
47622 nd
->target
= gen_raw_REG (nd
->vmode
, LAST_VIRTUAL_REGISTER
+ 1);
47624 nd
->target
= gen_reg_rtx (nd
->vmode
);
47629 /* Try to expand one-operand permutation with constant mask. */
47632 ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d
*d
)
47634 machine_mode mode
= GET_MODE (d
->op0
);
47635 machine_mode maskmode
= mode
;
47636 rtx (*gen
) (rtx
, rtx
, rtx
) = NULL
;
47637 rtx target
, op0
, mask
;
47640 if (!rtx_equal_p (d
->op0
, d
->op1
))
47643 if (!TARGET_AVX512F
)
47649 gen
= gen_avx512f_permvarv16si
;
47652 gen
= gen_avx512f_permvarv16sf
;
47653 maskmode
= V16SImode
;
47656 gen
= gen_avx512f_permvarv8di
;
47659 gen
= gen_avx512f_permvarv8df
;
47660 maskmode
= V8DImode
;
47666 target
= d
->target
;
47668 for (int i
= 0; i
< d
->nelt
; ++i
)
47669 vec
[i
] = GEN_INT (d
->perm
[i
]);
47670 mask
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (d
->nelt
, vec
));
47671 emit_insn (gen (target
, op0
, force_reg (maskmode
, mask
)));
47675 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47676 in a single instruction. */
47679 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
47681 unsigned i
, nelt
= d
->nelt
;
47682 struct expand_vec_perm_d nd
;
47684 /* Check plain VEC_SELECT first, because AVX has instructions that could
47685 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47686 input where SEL+CONCAT may not. */
47687 if (d
->one_operand_p
)
47689 int mask
= nelt
- 1;
47690 bool identity_perm
= true;
47691 bool broadcast_perm
= true;
47693 for (i
= 0; i
< nelt
; i
++)
47695 nd
.perm
[i
] = d
->perm
[i
] & mask
;
47696 if (nd
.perm
[i
] != i
)
47697 identity_perm
= false;
47699 broadcast_perm
= false;
47705 emit_move_insn (d
->target
, d
->op0
);
47708 else if (broadcast_perm
&& TARGET_AVX2
)
47710 /* Use vpbroadcast{b,w,d}. */
47711 rtx (*gen
) (rtx
, rtx
) = NULL
;
47715 if (TARGET_AVX512BW
)
47716 gen
= gen_avx512bw_vec_dupv64qi_1
;
47719 gen
= gen_avx2_pbroadcastv32qi_1
;
47722 if (TARGET_AVX512BW
)
47723 gen
= gen_avx512bw_vec_dupv32hi_1
;
47726 gen
= gen_avx2_pbroadcastv16hi_1
;
47729 if (TARGET_AVX512F
)
47730 gen
= gen_avx512f_vec_dupv16si_1
;
47733 gen
= gen_avx2_pbroadcastv8si_1
;
47736 gen
= gen_avx2_pbroadcastv16qi
;
47739 gen
= gen_avx2_pbroadcastv8hi
;
47742 if (TARGET_AVX512F
)
47743 gen
= gen_avx512f_vec_dupv16sf_1
;
47746 gen
= gen_avx2_vec_dupv8sf_1
;
47749 if (TARGET_AVX512F
)
47750 gen
= gen_avx512f_vec_dupv8df_1
;
47753 if (TARGET_AVX512F
)
47754 gen
= gen_avx512f_vec_dupv8di_1
;
47756 /* For other modes prefer other shuffles this function creates. */
47762 emit_insn (gen (d
->target
, d
->op0
));
47767 if (expand_vselect (d
->target
, d
->op0
, nd
.perm
, nelt
, d
->testing_p
))
47770 /* There are plenty of patterns in sse.md that are written for
47771 SEL+CONCAT and are not replicated for a single op. Perhaps
47772 that should be changed, to avoid the nastiness here. */
47774 /* Recognize interleave style patterns, which means incrementing
47775 every other permutation operand. */
47776 for (i
= 0; i
< nelt
; i
+= 2)
47778 nd
.perm
[i
] = d
->perm
[i
] & mask
;
47779 nd
.perm
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
47781 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, nd
.perm
, nelt
,
47785 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47788 for (i
= 0; i
< nelt
; i
+= 4)
47790 nd
.perm
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
47791 nd
.perm
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
47792 nd
.perm
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
47793 nd
.perm
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
47796 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, nd
.perm
, nelt
,
47802 /* Finally, try the fully general two operand permute. */
47803 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
47807 /* Recognize interleave style patterns with reversed operands. */
47808 if (!d
->one_operand_p
)
47810 for (i
= 0; i
< nelt
; ++i
)
47812 unsigned e
= d
->perm
[i
];
47820 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, nd
.perm
, nelt
,
47825 /* Try the SSE4.1 blend variable merge instructions. */
47826 if (expand_vec_perm_blend (d
))
47829 /* Try one of the AVX vpermil variable permutations. */
47830 if (expand_vec_perm_vpermil (d
))
47833 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47834 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47835 if (expand_vec_perm_pshufb (d
))
47838 /* Try the AVX2 vpalignr instruction. */
47839 if (expand_vec_perm_palignr (d
, true))
47842 /* Try the AVX512F vperm{s,d} instructions. */
47843 if (ix86_expand_vec_one_operand_perm_avx512 (d
))
47846 /* Try the AVX512F vpermi2 instructions. */
47847 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX
, NULL_RTX
, NULL_RTX
, NULL_RTX
, d
))
47850 /* See if we can get the same permutation in different vector integer
47852 if (canonicalize_vector_int_perm (d
, &nd
) && expand_vec_perm_1 (&nd
))
47855 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, nd
.target
));
47861 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47862 in terms of a pair of pshuflw + pshufhw instructions. */
47865 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
47867 unsigned char perm2
[MAX_VECT_LEN
];
47871 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
47874 /* The two permutations only operate in 64-bit lanes. */
47875 for (i
= 0; i
< 4; ++i
)
47876 if (d
->perm
[i
] >= 4)
47878 for (i
= 4; i
< 8; ++i
)
47879 if (d
->perm
[i
] < 4)
47885 /* Emit the pshuflw. */
47886 memcpy (perm2
, d
->perm
, 4);
47887 for (i
= 4; i
< 8; ++i
)
47889 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
47892 /* Emit the pshufhw. */
47893 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
47894 for (i
= 0; i
< 4; ++i
)
47896 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
47902 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47903 the permutation using the SSSE3 palignr instruction. This succeeds
47904 when all of the elements in PERM fit within one vector and we merely
47905 need to shift them down so that a single vector permutation has a
47906 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47907 the vpalignr instruction itself can perform the requested permutation. */
47910 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
, bool single_insn_only_p
)
47912 unsigned i
, nelt
= d
->nelt
;
47913 unsigned min
, max
, minswap
, maxswap
;
47914 bool in_order
, ok
, swap
= false;
47916 struct expand_vec_perm_d dcopy
;
47918 /* Even with AVX, palignr only operates on 128-bit vectors,
47919 in AVX2 palignr operates on both 128-bit lanes. */
47920 if ((!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
47921 && (!TARGET_AVX2
|| GET_MODE_SIZE (d
->vmode
) != 32))
47926 minswap
= 2 * nelt
;
47928 for (i
= 0; i
< nelt
; ++i
)
47930 unsigned e
= d
->perm
[i
];
47931 unsigned eswap
= d
->perm
[i
] ^ nelt
;
47932 if (GET_MODE_SIZE (d
->vmode
) == 32)
47934 e
= (e
& ((nelt
/ 2) - 1)) | ((e
& nelt
) >> 1);
47935 eswap
= e
^ (nelt
/ 2);
47941 if (eswap
< minswap
)
47943 if (eswap
> maxswap
)
47947 || max
- min
>= (GET_MODE_SIZE (d
->vmode
) == 32 ? nelt
/ 2 : nelt
))
47949 if (d
->one_operand_p
47951 || maxswap
- minswap
>= (GET_MODE_SIZE (d
->vmode
) == 32
47952 ? nelt
/ 2 : nelt
))
47959 /* Given that we have SSSE3, we know we'll be able to implement the
47960 single operand permutation after the palignr with pshufb for
47961 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47963 if (d
->testing_p
&& GET_MODE_SIZE (d
->vmode
) == 16 && !single_insn_only_p
)
47969 dcopy
.op0
= d
->op1
;
47970 dcopy
.op1
= d
->op0
;
47971 for (i
= 0; i
< nelt
; ++i
)
47972 dcopy
.perm
[i
] ^= nelt
;
47976 for (i
= 0; i
< nelt
; ++i
)
47978 unsigned e
= dcopy
.perm
[i
];
47979 if (GET_MODE_SIZE (d
->vmode
) == 32
47981 && (e
& (nelt
/ 2 - 1)) < min
)
47982 e
= e
- min
- (nelt
/ 2);
47989 dcopy
.one_operand_p
= true;
47991 if (single_insn_only_p
&& !in_order
)
47994 /* For AVX2, test whether we can permute the result in one instruction. */
47999 dcopy
.op1
= dcopy
.op0
;
48000 return expand_vec_perm_1 (&dcopy
);
48003 shift
= GEN_INT (min
* GET_MODE_UNIT_BITSIZE (d
->vmode
));
48004 if (GET_MODE_SIZE (d
->vmode
) == 16)
48006 target
= gen_reg_rtx (TImode
);
48007 emit_insn (gen_ssse3_palignrti (target
, gen_lowpart (TImode
, dcopy
.op1
),
48008 gen_lowpart (TImode
, dcopy
.op0
), shift
));
48012 target
= gen_reg_rtx (V2TImode
);
48013 emit_insn (gen_avx2_palignrv2ti (target
,
48014 gen_lowpart (V2TImode
, dcopy
.op1
),
48015 gen_lowpart (V2TImode
, dcopy
.op0
),
48019 dcopy
.op0
= dcopy
.op1
= gen_lowpart (d
->vmode
, target
);
48021 /* Test for the degenerate case where the alignment by itself
48022 produces the desired permutation. */
48025 emit_move_insn (d
->target
, dcopy
.op0
);
48029 ok
= expand_vec_perm_1 (&dcopy
);
48030 gcc_assert (ok
|| GET_MODE_SIZE (d
->vmode
) == 32);
48035 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
48036 the permutation using the SSE4_1 pblendv instruction. Potentially
48037 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
48040 expand_vec_perm_pblendv (struct expand_vec_perm_d
*d
)
48042 unsigned i
, which
, nelt
= d
->nelt
;
48043 struct expand_vec_perm_d dcopy
, dcopy1
;
48044 machine_mode vmode
= d
->vmode
;
48047 /* Use the same checks as in expand_vec_perm_blend. */
48048 if (d
->one_operand_p
)
48050 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
48052 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
48054 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
48059 /* Figure out where permutation elements stay not in their
48060 respective lanes. */
48061 for (i
= 0, which
= 0; i
< nelt
; ++i
)
48063 unsigned e
= d
->perm
[i
];
48065 which
|= (e
< nelt
? 1 : 2);
48067 /* We can pblend the part where elements stay not in their
48068 respective lanes only when these elements are all in one
48069 half of a permutation.
48070 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48071 lanes, but both 8 and 9 >= 8
48072 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48073 respective lanes and 8 >= 8, but 2 not. */
48074 if (which
!= 1 && which
!= 2)
48076 if (d
->testing_p
&& GET_MODE_SIZE (vmode
) == 16)
48079 /* First we apply one operand permutation to the part where
48080 elements stay not in their respective lanes. */
48083 dcopy
.op0
= dcopy
.op1
= d
->op1
;
48085 dcopy
.op0
= dcopy
.op1
= d
->op0
;
48087 dcopy
.target
= gen_reg_rtx (vmode
);
48088 dcopy
.one_operand_p
= true;
48090 for (i
= 0; i
< nelt
; ++i
)
48091 dcopy
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
48093 ok
= expand_vec_perm_1 (&dcopy
);
48094 if (GET_MODE_SIZE (vmode
) != 16 && !ok
)
48101 /* Next we put permuted elements into their positions. */
48104 dcopy1
.op1
= dcopy
.target
;
48106 dcopy1
.op0
= dcopy
.target
;
48108 for (i
= 0; i
< nelt
; ++i
)
48109 dcopy1
.perm
[i
] = ((d
->perm
[i
] >= nelt
) ? (nelt
+ i
) : i
);
48111 ok
= expand_vec_perm_blend (&dcopy1
);
48117 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
48119 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48120 a two vector permutation into a single vector permutation by using
48121 an interleave operation to merge the vectors. */
48124 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
48126 struct expand_vec_perm_d dremap
, dfinal
;
48127 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
48128 unsigned HOST_WIDE_INT contents
;
48129 unsigned char remap
[2 * MAX_VECT_LEN
];
48131 bool ok
, same_halves
= false;
48133 if (GET_MODE_SIZE (d
->vmode
) == 16)
48135 if (d
->one_operand_p
)
48138 else if (GET_MODE_SIZE (d
->vmode
) == 32)
48142 /* For 32-byte modes allow even d->one_operand_p.
48143 The lack of cross-lane shuffling in some instructions
48144 might prevent a single insn shuffle. */
48146 dfinal
.testing_p
= true;
48147 /* If expand_vec_perm_interleave3 can expand this into
48148 a 3 insn sequence, give up and let it be expanded as
48149 3 insn sequence. While that is one insn longer,
48150 it doesn't need a memory operand and in the common
48151 case that both interleave low and high permutations
48152 with the same operands are adjacent needs 4 insns
48153 for both after CSE. */
48154 if (expand_vec_perm_interleave3 (&dfinal
))
48160 /* Examine from whence the elements come. */
48162 for (i
= 0; i
< nelt
; ++i
)
48163 contents
|= HOST_WIDE_INT_1U
<< d
->perm
[i
];
48165 memset (remap
, 0xff, sizeof (remap
));
48168 if (GET_MODE_SIZE (d
->vmode
) == 16)
48170 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
48172 /* Split the two input vectors into 4 halves. */
48173 h1
= (HOST_WIDE_INT_1U
<< nelt2
) - 1;
48178 /* If the elements from the low halves use interleave low, and similarly
48179 for interleave high. If the elements are from mis-matched halves, we
48180 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48181 if ((contents
& (h1
| h3
)) == contents
)
48184 for (i
= 0; i
< nelt2
; ++i
)
48187 remap
[i
+ nelt
] = i
* 2 + 1;
48188 dremap
.perm
[i
* 2] = i
;
48189 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
48191 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
48192 dremap
.vmode
= V4SFmode
;
48194 else if ((contents
& (h2
| h4
)) == contents
)
48197 for (i
= 0; i
< nelt2
; ++i
)
48199 remap
[i
+ nelt2
] = i
* 2;
48200 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
48201 dremap
.perm
[i
* 2] = i
+ nelt2
;
48202 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
48204 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
48205 dremap
.vmode
= V4SFmode
;
48207 else if ((contents
& (h1
| h4
)) == contents
)
48210 for (i
= 0; i
< nelt2
; ++i
)
48213 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
48214 dremap
.perm
[i
] = i
;
48215 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
48220 dremap
.vmode
= V2DImode
;
48222 dremap
.perm
[0] = 0;
48223 dremap
.perm
[1] = 3;
48226 else if ((contents
& (h2
| h3
)) == contents
)
48229 for (i
= 0; i
< nelt2
; ++i
)
48231 remap
[i
+ nelt2
] = i
;
48232 remap
[i
+ nelt
] = i
+ nelt2
;
48233 dremap
.perm
[i
] = i
+ nelt2
;
48234 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
48239 dremap
.vmode
= V2DImode
;
48241 dremap
.perm
[0] = 1;
48242 dremap
.perm
[1] = 2;
48250 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
48251 unsigned HOST_WIDE_INT q
[8];
48252 unsigned int nonzero_halves
[4];
48254 /* Split the two input vectors into 8 quarters. */
48255 q
[0] = (HOST_WIDE_INT_1U
<< nelt4
) - 1;
48256 for (i
= 1; i
< 8; ++i
)
48257 q
[i
] = q
[0] << (nelt4
* i
);
48258 for (i
= 0; i
< 4; ++i
)
48259 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
48261 nonzero_halves
[nzcnt
] = i
;
48267 gcc_assert (d
->one_operand_p
);
48268 nonzero_halves
[1] = nonzero_halves
[0];
48269 same_halves
= true;
48271 else if (d
->one_operand_p
)
48273 gcc_assert (nonzero_halves
[0] == 0);
48274 gcc_assert (nonzero_halves
[1] == 1);
48279 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
48281 /* Attempt to increase the likelihood that dfinal
48282 shuffle will be intra-lane. */
48283 std::swap (nonzero_halves
[0], nonzero_halves
[1]);
48286 /* vperm2f128 or vperm2i128. */
48287 for (i
= 0; i
< nelt2
; ++i
)
48289 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
48290 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
48291 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
48292 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
48295 if (d
->vmode
!= V8SFmode
48296 && d
->vmode
!= V4DFmode
48297 && d
->vmode
!= V8SImode
)
48299 dremap
.vmode
= V8SImode
;
48301 for (i
= 0; i
< 4; ++i
)
48303 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
48304 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
48308 else if (d
->one_operand_p
)
48310 else if (TARGET_AVX2
48311 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
48314 for (i
= 0; i
< nelt4
; ++i
)
48317 remap
[i
+ nelt
] = i
* 2 + 1;
48318 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
48319 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
48320 dremap
.perm
[i
* 2] = i
;
48321 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
48322 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
48323 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
48326 else if (TARGET_AVX2
48327 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
48330 for (i
= 0; i
< nelt4
; ++i
)
48332 remap
[i
+ nelt4
] = i
* 2;
48333 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
48334 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
48335 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
48336 dremap
.perm
[i
* 2] = i
+ nelt4
;
48337 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
48338 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
48339 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
48346 /* Use the remapping array set up above to move the elements from their
48347 swizzled locations into their final destinations. */
48349 for (i
= 0; i
< nelt
; ++i
)
48351 unsigned e
= remap
[d
->perm
[i
]];
48352 gcc_assert (e
< nelt
);
48353 /* If same_halves is true, both halves of the remapped vector are the
48354 same. Avoid cross-lane accesses if possible. */
48355 if (same_halves
&& i
>= nelt2
)
48357 gcc_assert (e
< nelt2
);
48358 dfinal
.perm
[i
] = e
+ nelt2
;
48361 dfinal
.perm
[i
] = e
;
48365 dremap
.target
= gen_reg_rtx (dremap
.vmode
);
48366 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
48368 dfinal
.op1
= dfinal
.op0
;
48369 dfinal
.one_operand_p
= true;
48371 /* Test if the final remap can be done with a single insn. For V4SFmode or
48372 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48374 ok
= expand_vec_perm_1 (&dfinal
);
48375 seq
= get_insns ();
48384 if (dremap
.vmode
!= dfinal
.vmode
)
48386 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
48387 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
48390 ok
= expand_vec_perm_1 (&dremap
);
48397 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48398 a single vector cross-lane permutation into vpermq followed
48399 by any of the single insn permutations. */
48402 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
48404 struct expand_vec_perm_d dremap
, dfinal
;
48405 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
48406 unsigned contents
[2];
48410 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
48411 && d
->one_operand_p
))
48416 for (i
= 0; i
< nelt2
; ++i
)
48418 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
48419 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
48422 for (i
= 0; i
< 2; ++i
)
48424 unsigned int cnt
= 0;
48425 for (j
= 0; j
< 4; ++j
)
48426 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
48434 dremap
.vmode
= V4DImode
;
48436 dremap
.target
= gen_reg_rtx (V4DImode
);
48437 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
48438 dremap
.op1
= dremap
.op0
;
48439 dremap
.one_operand_p
= true;
48440 for (i
= 0; i
< 2; ++i
)
48442 unsigned int cnt
= 0;
48443 for (j
= 0; j
< 4; ++j
)
48444 if ((contents
[i
] & (1u << j
)) != 0)
48445 dremap
.perm
[2 * i
+ cnt
++] = j
;
48446 for (; cnt
< 2; ++cnt
)
48447 dremap
.perm
[2 * i
+ cnt
] = 0;
48451 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
48452 dfinal
.op1
= dfinal
.op0
;
48453 dfinal
.one_operand_p
= true;
48454 for (i
= 0, j
= 0; i
< nelt
; ++i
)
48458 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
48459 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
48461 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
48462 dfinal
.perm
[i
] |= nelt4
;
48464 gcc_unreachable ();
48467 ok
= expand_vec_perm_1 (&dremap
);
48470 ok
= expand_vec_perm_1 (&dfinal
);
48476 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48477 a vector permutation using two instructions, vperm2f128 resp.
48478 vperm2i128 followed by any single in-lane permutation. */
48481 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
48483 struct expand_vec_perm_d dfirst
, dsecond
;
48484 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
48488 || GET_MODE_SIZE (d
->vmode
) != 32
48489 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
48493 dsecond
.one_operand_p
= false;
48494 dsecond
.testing_p
= true;
48496 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48497 immediate. For perm < 16 the second permutation uses
48498 d->op0 as first operand, for perm >= 16 it uses d->op1
48499 as first operand. The second operand is the result of
48501 for (perm
= 0; perm
< 32; perm
++)
48503 /* Ignore permutations which do not move anything cross-lane. */
48506 /* The second shuffle for e.g. V4DFmode has
48507 0123 and ABCD operands.
48508 Ignore AB23, as 23 is already in the second lane
48509 of the first operand. */
48510 if ((perm
& 0xc) == (1 << 2)) continue;
48511 /* And 01CD, as 01 is in the first lane of the first
48513 if ((perm
& 3) == 0) continue;
48514 /* And 4567, as then the vperm2[fi]128 doesn't change
48515 anything on the original 4567 second operand. */
48516 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
48520 /* The second shuffle for e.g. V4DFmode has
48521 4567 and ABCD operands.
48522 Ignore AB67, as 67 is already in the second lane
48523 of the first operand. */
48524 if ((perm
& 0xc) == (3 << 2)) continue;
48525 /* And 45CD, as 45 is in the first lane of the first
48527 if ((perm
& 3) == 2) continue;
48528 /* And 0123, as then the vperm2[fi]128 doesn't change
48529 anything on the original 0123 first operand. */
48530 if ((perm
& 0xf) == (1 << 2)) continue;
48533 for (i
= 0; i
< nelt
; i
++)
48535 j
= d
->perm
[i
] / nelt2
;
48536 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
48537 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
48538 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
48539 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
48547 ok
= expand_vec_perm_1 (&dsecond
);
48558 /* Found a usable second shuffle. dfirst will be
48559 vperm2f128 on d->op0 and d->op1. */
48560 dsecond
.testing_p
= false;
48562 dfirst
.target
= gen_reg_rtx (d
->vmode
);
48563 for (i
= 0; i
< nelt
; i
++)
48564 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
48565 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
48567 canonicalize_perm (&dfirst
);
48568 ok
= expand_vec_perm_1 (&dfirst
);
48571 /* And dsecond is some single insn shuffle, taking
48572 d->op0 and result of vperm2f128 (if perm < 16) or
48573 d->op1 and result of vperm2f128 (otherwise). */
48575 dsecond
.op0
= dsecond
.op1
;
48576 dsecond
.op1
= dfirst
.target
;
48578 ok
= expand_vec_perm_1 (&dsecond
);
48584 /* For one operand, the only useful vperm2f128 permutation is 0x01
48586 if (d
->one_operand_p
)
48593 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48594 a two vector permutation using 2 intra-lane interleave insns
48595 and cross-lane shuffle for 32-byte vectors. */
48598 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
48601 rtx (*gen
) (rtx
, rtx
, rtx
);
48603 if (d
->one_operand_p
)
48605 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
48607 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
48613 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
48615 for (i
= 0; i
< nelt
; i
+= 2)
48616 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
48617 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
48627 gen
= gen_vec_interleave_highv32qi
;
48629 gen
= gen_vec_interleave_lowv32qi
;
48633 gen
= gen_vec_interleave_highv16hi
;
48635 gen
= gen_vec_interleave_lowv16hi
;
48639 gen
= gen_vec_interleave_highv8si
;
48641 gen
= gen_vec_interleave_lowv8si
;
48645 gen
= gen_vec_interleave_highv4di
;
48647 gen
= gen_vec_interleave_lowv4di
;
48651 gen
= gen_vec_interleave_highv8sf
;
48653 gen
= gen_vec_interleave_lowv8sf
;
48657 gen
= gen_vec_interleave_highv4df
;
48659 gen
= gen_vec_interleave_lowv4df
;
48662 gcc_unreachable ();
48665 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
48669 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48670 a single vector permutation using a single intra-lane vector
48671 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48672 the non-swapped and swapped vectors together. */
48675 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
48677 struct expand_vec_perm_d dfirst
, dsecond
;
48678 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
48681 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
48685 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
48686 || !d
->one_operand_p
)
48690 for (i
= 0; i
< nelt
; i
++)
48691 dfirst
.perm
[i
] = 0xff;
48692 for (i
= 0, msk
= 0; i
< nelt
; i
++)
48694 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
48695 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
48697 dfirst
.perm
[j
] = d
->perm
[i
];
48701 for (i
= 0; i
< nelt
; i
++)
48702 if (dfirst
.perm
[i
] == 0xff)
48703 dfirst
.perm
[i
] = i
;
48706 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
48709 ok
= expand_vec_perm_1 (&dfirst
);
48710 seq
= get_insns ();
48722 dsecond
.op0
= dfirst
.target
;
48723 dsecond
.op1
= dfirst
.target
;
48724 dsecond
.one_operand_p
= true;
48725 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
48726 for (i
= 0; i
< nelt
; i
++)
48727 dsecond
.perm
[i
] = i
^ nelt2
;
48729 ok
= expand_vec_perm_1 (&dsecond
);
48732 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
48733 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
48737 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48738 permutation using two vperm2f128, followed by a vshufpd insn blending
48739 the two vectors together. */
48742 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
48744 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
48747 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
48757 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
48758 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
48759 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
48760 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
48761 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
48762 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
48763 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
48764 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
48765 dthird
.perm
[0] = (d
->perm
[0] % 2);
48766 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
48767 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
48768 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
48770 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
48771 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
48772 dthird
.op0
= dfirst
.target
;
48773 dthird
.op1
= dsecond
.target
;
48774 dthird
.one_operand_p
= false;
48776 canonicalize_perm (&dfirst
);
48777 canonicalize_perm (&dsecond
);
48779 ok
= expand_vec_perm_1 (&dfirst
)
48780 && expand_vec_perm_1 (&dsecond
)
48781 && expand_vec_perm_1 (&dthird
);
48788 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48789 permutation with two pshufb insns and an ior. We should have already
48790 failed all two instruction sequences. */
48793 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
48795 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
48796 unsigned int i
, nelt
, eltsz
;
48798 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
48800 gcc_assert (!d
->one_operand_p
);
48806 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
48808 /* Generate two permutation masks. If the required element is within
48809 the given vector it is shuffled into the proper lane. If the required
48810 element is in the other vector, force a zero into the lane by setting
48811 bit 7 in the permutation mask. */
48812 m128
= GEN_INT (-128);
48813 for (i
= 0; i
< nelt
; ++i
)
48815 unsigned j
, e
= d
->perm
[i
];
48816 unsigned which
= (e
>= nelt
);
48820 for (j
= 0; j
< eltsz
; ++j
)
48822 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
48823 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
48827 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
48828 vperm
= force_reg (V16QImode
, vperm
);
48830 l
= gen_reg_rtx (V16QImode
);
48831 op
= gen_lowpart (V16QImode
, d
->op0
);
48832 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
48834 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
48835 vperm
= force_reg (V16QImode
, vperm
);
48837 h
= gen_reg_rtx (V16QImode
);
48838 op
= gen_lowpart (V16QImode
, d
->op1
);
48839 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
48842 if (d
->vmode
!= V16QImode
)
48843 op
= gen_reg_rtx (V16QImode
);
48844 emit_insn (gen_iorv16qi3 (op
, l
, h
));
48845 if (op
!= d
->target
)
48846 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
48851 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48852 with two vpshufb insns, vpermq and vpor. We should have already failed
48853 all two or three instruction sequences. */
48856 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
48858 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
48859 unsigned int i
, nelt
, eltsz
;
48862 || !d
->one_operand_p
48863 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
48870 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
48872 /* Generate two permutation masks. If the required element is within
48873 the same lane, it is shuffled in. If the required element from the
48874 other lane, force a zero by setting bit 7 in the permutation mask.
48875 In the other mask the mask has non-negative elements if element
48876 is requested from the other lane, but also moved to the other lane,
48877 so that the result of vpshufb can have the two V2TImode halves
48879 m128
= GEN_INT (-128);
48880 for (i
= 0; i
< nelt
; ++i
)
48882 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
48883 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
48885 for (j
= 0; j
< eltsz
; ++j
)
48887 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
48888 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
48892 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
48893 vperm
= force_reg (V32QImode
, vperm
);
48895 h
= gen_reg_rtx (V32QImode
);
48896 op
= gen_lowpart (V32QImode
, d
->op0
);
48897 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
48899 /* Swap the 128-byte lanes of h into hp. */
48900 hp
= gen_reg_rtx (V4DImode
);
48901 op
= gen_lowpart (V4DImode
, h
);
48902 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
48905 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
48906 vperm
= force_reg (V32QImode
, vperm
);
48908 l
= gen_reg_rtx (V32QImode
);
48909 op
= gen_lowpart (V32QImode
, d
->op0
);
48910 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
48913 if (d
->vmode
!= V32QImode
)
48914 op
= gen_reg_rtx (V32QImode
);
48915 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
48916 if (op
!= d
->target
)
48917 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
48922 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48923 and extract-odd permutations of two V32QImode and V16QImode operand
48924 with two vpshufb insns, vpor and vpermq. We should have already
48925 failed all two or three instruction sequences. */
48928 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
48930 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
48931 unsigned int i
, nelt
, eltsz
;
48934 || d
->one_operand_p
48935 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
48938 for (i
= 0; i
< d
->nelt
; ++i
)
48939 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
48946 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
48948 /* Generate two permutation masks. In the first permutation mask
48949 the first quarter will contain indexes for the first half
48950 of the op0, the second quarter will contain bit 7 set, third quarter
48951 will contain indexes for the second half of the op0 and the
48952 last quarter bit 7 set. In the second permutation mask
48953 the first quarter will contain bit 7 set, the second quarter
48954 indexes for the first half of the op1, the third quarter bit 7 set
48955 and last quarter indexes for the second half of the op1.
48956 I.e. the first mask e.g. for V32QImode extract even will be:
48957 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48958 (all values masked with 0xf except for -128) and second mask
48959 for extract even will be
48960 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48961 m128
= GEN_INT (-128);
48962 for (i
= 0; i
< nelt
; ++i
)
48964 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
48965 unsigned which
= d
->perm
[i
] >= nelt
;
48966 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
48968 for (j
= 0; j
< eltsz
; ++j
)
48970 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
48971 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
48975 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
48976 vperm
= force_reg (V32QImode
, vperm
);
48978 l
= gen_reg_rtx (V32QImode
);
48979 op
= gen_lowpart (V32QImode
, d
->op0
);
48980 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
48982 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
48983 vperm
= force_reg (V32QImode
, vperm
);
48985 h
= gen_reg_rtx (V32QImode
);
48986 op
= gen_lowpart (V32QImode
, d
->op1
);
48987 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
48989 ior
= gen_reg_rtx (V32QImode
);
48990 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
48992 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48993 op
= gen_reg_rtx (V4DImode
);
48994 ior
= gen_lowpart (V4DImode
, ior
);
48995 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
48996 const1_rtx
, GEN_INT (3)));
48997 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
49002 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49003 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
49004 with two "and" and "pack" or two "shift" and "pack" insns. We should
49005 have already failed all two instruction sequences. */
49008 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d
*d
)
49010 rtx op
, dop0
, dop1
, t
, rperm
[16];
49011 unsigned i
, odd
, c
, s
, nelt
= d
->nelt
;
49012 bool end_perm
= false;
49013 machine_mode half_mode
;
49014 rtx (*gen_and
) (rtx
, rtx
, rtx
);
49015 rtx (*gen_pack
) (rtx
, rtx
, rtx
);
49016 rtx (*gen_shift
) (rtx
, rtx
, rtx
);
49018 if (d
->one_operand_p
)
49024 /* Required for "pack". */
49025 if (!TARGET_SSE4_1
)
49029 half_mode
= V4SImode
;
49030 gen_and
= gen_andv4si3
;
49031 gen_pack
= gen_sse4_1_packusdw
;
49032 gen_shift
= gen_lshrv4si3
;
49035 /* No check as all instructions are SSE2. */
49038 half_mode
= V8HImode
;
49039 gen_and
= gen_andv8hi3
;
49040 gen_pack
= gen_sse2_packuswb
;
49041 gen_shift
= gen_lshrv8hi3
;
49048 half_mode
= V8SImode
;
49049 gen_and
= gen_andv8si3
;
49050 gen_pack
= gen_avx2_packusdw
;
49051 gen_shift
= gen_lshrv8si3
;
49059 half_mode
= V16HImode
;
49060 gen_and
= gen_andv16hi3
;
49061 gen_pack
= gen_avx2_packuswb
;
49062 gen_shift
= gen_lshrv16hi3
;
49066 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49067 general shuffles. */
49071 /* Check that permutation is even or odd. */
49076 for (i
= 1; i
< nelt
; ++i
)
49077 if (d
->perm
[i
] != 2 * i
+ odd
)
49083 dop0
= gen_reg_rtx (half_mode
);
49084 dop1
= gen_reg_rtx (half_mode
);
49087 for (i
= 0; i
< nelt
/ 2; i
++)
49088 rperm
[i
] = GEN_INT (c
);
49089 t
= gen_rtx_CONST_VECTOR (half_mode
, gen_rtvec_v (nelt
/ 2, rperm
));
49090 t
= force_reg (half_mode
, t
);
49091 emit_insn (gen_and (dop0
, t
, gen_lowpart (half_mode
, d
->op0
)));
49092 emit_insn (gen_and (dop1
, t
, gen_lowpart (half_mode
, d
->op1
)));
49096 emit_insn (gen_shift (dop0
,
49097 gen_lowpart (half_mode
, d
->op0
),
49099 emit_insn (gen_shift (dop1
,
49100 gen_lowpart (half_mode
, d
->op1
),
49103 /* In AVX2 for 256 bit case we need to permute pack result. */
49104 if (TARGET_AVX2
&& end_perm
)
49106 op
= gen_reg_rtx (d
->vmode
);
49107 t
= gen_reg_rtx (V4DImode
);
49108 emit_insn (gen_pack (op
, dop0
, dop1
));
49109 emit_insn (gen_avx2_permv4di_1 (t
,
49110 gen_lowpart (V4DImode
, op
),
49115 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, t
));
49118 emit_insn (gen_pack (d
->target
, dop0
, dop1
));
49123 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49124 and extract-odd permutations of two V64QI operands
49125 with two "shifts", two "truncs" and one "concat" insns for "odd"
49126 and two "truncs" and one concat insn for "even."
49127 Have already failed all two instruction sequences. */
49130 expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d
*d
)
49132 rtx t1
, t2
, t3
, t4
;
49133 unsigned i
, odd
, nelt
= d
->nelt
;
49135 if (!TARGET_AVX512BW
49136 || d
->one_operand_p
49137 || d
->vmode
!= V64QImode
)
49140 /* Check that permutation is even or odd. */
49145 for (i
= 1; i
< nelt
; ++i
)
49146 if (d
->perm
[i
] != 2 * i
+ odd
)
49155 t1
= gen_reg_rtx (V32HImode
);
49156 t2
= gen_reg_rtx (V32HImode
);
49157 emit_insn (gen_lshrv32hi3 (t1
,
49158 gen_lowpart (V32HImode
, d
->op0
),
49160 emit_insn (gen_lshrv32hi3 (t2
,
49161 gen_lowpart (V32HImode
, d
->op1
),
49166 t1
= gen_lowpart (V32HImode
, d
->op0
);
49167 t2
= gen_lowpart (V32HImode
, d
->op1
);
49170 t3
= gen_reg_rtx (V32QImode
);
49171 t4
= gen_reg_rtx (V32QImode
);
49172 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3
, t1
));
49173 emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4
, t2
));
49174 emit_insn (gen_avx_vec_concatv64qi (d
->target
, t3
, t4
));
49179 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49180 and extract-odd permutations. */
49183 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
49185 rtx t1
, t2
, t3
, t4
, t5
;
49192 t1
= gen_reg_rtx (V4DFmode
);
49193 t2
= gen_reg_rtx (V4DFmode
);
49195 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49196 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
49197 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
49199 /* Now an unpck[lh]pd will produce the result required. */
49201 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
49203 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
49209 int mask
= odd
? 0xdd : 0x88;
49213 t1
= gen_reg_rtx (V8SFmode
);
49214 t2
= gen_reg_rtx (V8SFmode
);
49215 t3
= gen_reg_rtx (V8SFmode
);
49217 /* Shuffle within the 128-bit lanes to produce:
49218 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49219 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
49222 /* Shuffle the lanes around to produce:
49223 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49224 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
49227 /* Shuffle within the 128-bit lanes to produce:
49228 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49229 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
49231 /* Shuffle within the 128-bit lanes to produce:
49232 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49233 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
49235 /* Shuffle the lanes around to produce:
49236 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49237 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
49246 /* These are always directly implementable by expand_vec_perm_1. */
49247 gcc_unreachable ();
49251 return expand_vec_perm_even_odd_pack (d
);
49252 else if (TARGET_SSSE3
&& !TARGET_SLOW_PSHUFB
)
49253 return expand_vec_perm_pshufb2 (d
);
49258 /* We need 2*log2(N)-1 operations to achieve odd/even
49259 with interleave. */
49260 t1
= gen_reg_rtx (V8HImode
);
49261 t2
= gen_reg_rtx (V8HImode
);
49262 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
49263 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
49264 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
49265 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
49267 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
49269 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
49275 return expand_vec_perm_even_odd_pack (d
);
49279 return expand_vec_perm_even_odd_pack (d
);
49282 return expand_vec_perm_even_odd_trunc (d
);
49287 struct expand_vec_perm_d d_copy
= *d
;
49288 d_copy
.vmode
= V4DFmode
;
49290 d_copy
.target
= gen_raw_REG (V4DFmode
, LAST_VIRTUAL_REGISTER
+ 1);
49292 d_copy
.target
= gen_reg_rtx (V4DFmode
);
49293 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
49294 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
49295 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
49298 emit_move_insn (d
->target
,
49299 gen_lowpart (V4DImode
, d_copy
.target
));
49308 t1
= gen_reg_rtx (V4DImode
);
49309 t2
= gen_reg_rtx (V4DImode
);
49311 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49312 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
49313 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
49315 /* Now an vpunpck[lh]qdq will produce the result required. */
49317 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
49319 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
49326 struct expand_vec_perm_d d_copy
= *d
;
49327 d_copy
.vmode
= V8SFmode
;
49329 d_copy
.target
= gen_raw_REG (V8SFmode
, LAST_VIRTUAL_REGISTER
+ 1);
49331 d_copy
.target
= gen_reg_rtx (V8SFmode
);
49332 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
49333 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
49334 if (expand_vec_perm_even_odd_1 (&d_copy
, odd
))
49337 emit_move_insn (d
->target
,
49338 gen_lowpart (V8SImode
, d_copy
.target
));
49347 t1
= gen_reg_rtx (V8SImode
);
49348 t2
= gen_reg_rtx (V8SImode
);
49349 t3
= gen_reg_rtx (V4DImode
);
49350 t4
= gen_reg_rtx (V4DImode
);
49351 t5
= gen_reg_rtx (V4DImode
);
49353 /* Shuffle the lanes around into
49354 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49355 emit_insn (gen_avx2_permv2ti (t3
, gen_lowpart (V4DImode
, d
->op0
),
49356 gen_lowpart (V4DImode
, d
->op1
),
49358 emit_insn (gen_avx2_permv2ti (t4
, gen_lowpart (V4DImode
, d
->op0
),
49359 gen_lowpart (V4DImode
, d
->op1
),
49362 /* Swap the 2nd and 3rd position in each lane into
49363 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49364 emit_insn (gen_avx2_pshufdv3 (t1
, gen_lowpart (V8SImode
, t3
),
49365 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49366 emit_insn (gen_avx2_pshufdv3 (t2
, gen_lowpart (V8SImode
, t4
),
49367 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49369 /* Now an vpunpck[lh]qdq will produce
49370 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49372 t3
= gen_avx2_interleave_highv4di (t5
, gen_lowpart (V4DImode
, t1
),
49373 gen_lowpart (V4DImode
, t2
));
49375 t3
= gen_avx2_interleave_lowv4di (t5
, gen_lowpart (V4DImode
, t1
),
49376 gen_lowpart (V4DImode
, t2
));
49378 emit_move_insn (d
->target
, gen_lowpart (V8SImode
, t5
));
49382 gcc_unreachable ();
49388 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49389 extract-even and extract-odd permutations. */
49392 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
49394 unsigned i
, odd
, nelt
= d
->nelt
;
49397 if (odd
!= 0 && odd
!= 1)
49400 for (i
= 1; i
< nelt
; ++i
)
49401 if (d
->perm
[i
] != 2 * i
+ odd
)
49404 return expand_vec_perm_even_odd_1 (d
, odd
);
49407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49408 permutations. We assume that expand_vec_perm_1 has already failed. */
49411 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
49413 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
49414 machine_mode vmode
= d
->vmode
;
49415 unsigned char perm2
[4];
49416 rtx op0
= d
->op0
, dest
;
49423 /* These are special-cased in sse.md so that we can optionally
49424 use the vbroadcast instruction. They expand to two insns
49425 if the input happens to be in a register. */
49426 gcc_unreachable ();
49432 /* These are always implementable using standard shuffle patterns. */
49433 gcc_unreachable ();
49437 /* These can be implemented via interleave. We save one insn by
49438 stopping once we have promoted to V4SImode and then use pshufd. */
49444 rtx (*gen
) (rtx
, rtx
, rtx
)
49445 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
49446 : gen_vec_interleave_lowv8hi
;
49450 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
49451 : gen_vec_interleave_highv8hi
;
49456 dest
= gen_reg_rtx (vmode
);
49457 emit_insn (gen (dest
, op0
, op0
));
49458 vmode
= get_mode_wider_vector (vmode
);
49459 op0
= gen_lowpart (vmode
, dest
);
49461 while (vmode
!= V4SImode
);
49463 memset (perm2
, elt
, 4);
49464 dest
= gen_reg_rtx (V4SImode
);
49465 ok
= expand_vselect (dest
, op0
, perm2
, 4, d
->testing_p
);
49468 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, dest
));
49476 /* For AVX2 broadcasts of the first element vpbroadcast* or
49477 vpermq should be used by expand_vec_perm_1. */
49478 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
49482 gcc_unreachable ();
49486 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49487 broadcast permutations. */
49490 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
49492 unsigned i
, elt
, nelt
= d
->nelt
;
49494 if (!d
->one_operand_p
)
49498 for (i
= 1; i
< nelt
; ++i
)
49499 if (d
->perm
[i
] != elt
)
49502 return expand_vec_perm_broadcast_1 (d
);
49505 /* Implement arbitrary permutations of two V64QImode operands
49506 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49508 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d
*d
)
49510 if (!TARGET_AVX512BW
|| !(d
->vmode
== V64QImode
))
49516 struct expand_vec_perm_d ds
[2];
49517 rtx rperm
[128], vperm
, target0
, target1
;
49518 unsigned int i
, nelt
;
49519 machine_mode vmode
;
49524 for (i
= 0; i
< 2; i
++)
49527 ds
[i
].vmode
= V32HImode
;
49529 ds
[i
].target
= gen_reg_rtx (V32HImode
);
49530 ds
[i
].op0
= gen_lowpart (V32HImode
, d
->op0
);
49531 ds
[i
].op1
= gen_lowpart (V32HImode
, d
->op1
);
49534 /* Prepare permutations such that the first one takes care of
49535 putting the even bytes into the right positions or one higher
49536 positions (ds[0]) and the second one takes care of
49537 putting the odd bytes into the right positions or one below
49540 for (i
= 0; i
< nelt
; i
++)
49542 ds
[i
& 1].perm
[i
/ 2] = d
->perm
[i
] / 2;
49545 rperm
[i
] = constm1_rtx
;
49546 rperm
[i
+ 64] = GEN_INT ((i
& 14) + (d
->perm
[i
] & 1));
49550 rperm
[i
] = GEN_INT ((i
& 14) + (d
->perm
[i
] & 1));
49551 rperm
[i
+ 64] = constm1_rtx
;
49555 bool ok
= expand_vec_perm_1 (&ds
[0]);
49557 ds
[0].target
= gen_lowpart (V64QImode
, ds
[0].target
);
49559 ok
= expand_vec_perm_1 (&ds
[1]);
49561 ds
[1].target
= gen_lowpart (V64QImode
, ds
[1].target
);
49563 vperm
= gen_rtx_CONST_VECTOR (V64QImode
, gen_rtvec_v (64, rperm
));
49564 vperm
= force_reg (vmode
, vperm
);
49565 target0
= gen_reg_rtx (V64QImode
);
49566 emit_insn (gen_avx512bw_pshufbv64qi3 (target0
, ds
[0].target
, vperm
));
49568 vperm
= gen_rtx_CONST_VECTOR (V64QImode
, gen_rtvec_v (64, rperm
+ 64));
49569 vperm
= force_reg (vmode
, vperm
);
49570 target1
= gen_reg_rtx (V64QImode
);
49571 emit_insn (gen_avx512bw_pshufbv64qi3 (target1
, ds
[1].target
, vperm
));
49573 emit_insn (gen_iorv64qi3 (d
->target
, target0
, target1
));
49577 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49578 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49579 all the shorter instruction sequences. */
49582 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
49584 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
49585 unsigned int i
, nelt
, eltsz
;
49589 || d
->one_operand_p
49590 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
49597 eltsz
= GET_MODE_UNIT_SIZE (d
->vmode
);
49599 /* Generate 4 permutation masks. If the required element is within
49600 the same lane, it is shuffled in. If the required element from the
49601 other lane, force a zero by setting bit 7 in the permutation mask.
49602 In the other mask the mask has non-negative elements if element
49603 is requested from the other lane, but also moved to the other lane,
49604 so that the result of vpshufb can have the two V2TImode halves
49606 m128
= GEN_INT (-128);
49607 for (i
= 0; i
< 32; ++i
)
49609 rperm
[0][i
] = m128
;
49610 rperm
[1][i
] = m128
;
49611 rperm
[2][i
] = m128
;
49612 rperm
[3][i
] = m128
;
49618 for (i
= 0; i
< nelt
; ++i
)
49620 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
49621 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
49622 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
49624 for (j
= 0; j
< eltsz
; ++j
)
49625 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
49626 used
[which
] = true;
49629 for (i
= 0; i
< 2; ++i
)
49631 if (!used
[2 * i
+ 1])
49636 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
49637 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
49638 vperm
= force_reg (V32QImode
, vperm
);
49639 h
[i
] = gen_reg_rtx (V32QImode
);
49640 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
49641 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
49644 /* Swap the 128-byte lanes of h[X]. */
49645 for (i
= 0; i
< 2; ++i
)
49647 if (h
[i
] == NULL_RTX
)
49649 op
= gen_reg_rtx (V4DImode
);
49650 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
49651 const2_rtx
, GEN_INT (3), const0_rtx
,
49653 h
[i
] = gen_lowpart (V32QImode
, op
);
49656 for (i
= 0; i
< 2; ++i
)
49663 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
49664 vperm
= force_reg (V32QImode
, vperm
);
49665 l
[i
] = gen_reg_rtx (V32QImode
);
49666 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
49667 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
49670 for (i
= 0; i
< 2; ++i
)
49674 op
= gen_reg_rtx (V32QImode
);
49675 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
49682 gcc_assert (l
[0] && l
[1]);
49684 if (d
->vmode
!= V32QImode
)
49685 op
= gen_reg_rtx (V32QImode
);
49686 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
49687 if (op
!= d
->target
)
49688 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, op
));
49692 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49693 With all of the interface bits taken care of, perform the expansion
49694 in D and return true on success. */
49697 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
49699 /* Try a single instruction expansion. */
49700 if (expand_vec_perm_1 (d
))
49703 /* Try sequences of two instructions. */
49705 if (expand_vec_perm_pshuflw_pshufhw (d
))
49708 if (expand_vec_perm_palignr (d
, false))
49711 if (expand_vec_perm_interleave2 (d
))
49714 if (expand_vec_perm_broadcast (d
))
49717 if (expand_vec_perm_vpermq_perm_1 (d
))
49720 if (expand_vec_perm_vperm2f128 (d
))
49723 if (expand_vec_perm_pblendv (d
))
49726 /* Try sequences of three instructions. */
49728 if (expand_vec_perm_even_odd_pack (d
))
49731 if (expand_vec_perm_2vperm2f128_vshuf (d
))
49734 if (expand_vec_perm_pshufb2 (d
))
49737 if (expand_vec_perm_interleave3 (d
))
49740 if (expand_vec_perm_vperm2f128_vblend (d
))
49743 /* Try sequences of four instructions. */
49745 if (expand_vec_perm_even_odd_trunc (d
))
49747 if (expand_vec_perm_vpshufb2_vpermq (d
))
49750 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
49753 if (expand_vec_perm_vpermi2_vpshub2 (d
))
49756 /* ??? Look for narrow permutations whose element orderings would
49757 allow the promotion to a wider mode. */
49759 /* ??? Look for sequences of interleave or a wider permute that place
49760 the data into the correct lanes for a half-vector shuffle like
49761 pshuf[lh]w or vpermilps. */
49763 /* ??? Look for sequences of interleave that produce the desired results.
49764 The combinatorics of punpck[lh] get pretty ugly... */
49766 if (expand_vec_perm_even_odd (d
))
49769 /* Even longer sequences. */
49770 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
49773 /* See if we can get the same permutation in different vector integer
49775 struct expand_vec_perm_d nd
;
49776 if (canonicalize_vector_int_perm (d
, &nd
) && expand_vec_perm_1 (&nd
))
49779 emit_move_insn (d
->target
, gen_lowpart (d
->vmode
, nd
.target
));
49786 /* If a permutation only uses one operand, make it clear. Returns true
49787 if the permutation references both operands. */
49790 canonicalize_perm (struct expand_vec_perm_d
*d
)
49792 int i
, which
, nelt
= d
->nelt
;
49794 for (i
= which
= 0; i
< nelt
; ++i
)
49795 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
49797 d
->one_operand_p
= true;
49804 if (!rtx_equal_p (d
->op0
, d
->op1
))
49806 d
->one_operand_p
= false;
49809 /* The elements of PERM do not suggest that only the first operand
49810 is used, but both operands are identical. Allow easier matching
49811 of the permutation by folding the permutation into the single
49816 for (i
= 0; i
< nelt
; ++i
)
49817 d
->perm
[i
] &= nelt
- 1;
49826 return (which
== 3);
49830 ix86_expand_vec_perm_const (rtx operands
[4])
49832 struct expand_vec_perm_d d
;
49833 unsigned char perm
[MAX_VECT_LEN
];
49838 d
.target
= operands
[0];
49839 d
.op0
= operands
[1];
49840 d
.op1
= operands
[2];
49843 d
.vmode
= GET_MODE (d
.target
);
49844 gcc_assert (VECTOR_MODE_P (d
.vmode
));
49845 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
49846 d
.testing_p
= false;
49848 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
49849 gcc_assert (XVECLEN (sel
, 0) == nelt
);
49850 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
49852 for (i
= 0; i
< nelt
; ++i
)
49854 rtx e
= XVECEXP (sel
, 0, i
);
49855 int ei
= INTVAL (e
) & (2 * nelt
- 1);
49860 two_args
= canonicalize_perm (&d
);
49862 if (ix86_expand_vec_perm_const_1 (&d
))
49865 /* If the selector says both arguments are needed, but the operands are the
49866 same, the above tried to expand with one_operand_p and flattened selector.
49867 If that didn't work, retry without one_operand_p; we succeeded with that
49869 if (two_args
&& d
.one_operand_p
)
49871 d
.one_operand_p
= false;
49872 memcpy (d
.perm
, perm
, sizeof (perm
));
49873 return ix86_expand_vec_perm_const_1 (&d
);
49879 /* Implement targetm.vectorize.vec_perm_const_ok. */
49882 ix86_vectorize_vec_perm_const_ok (machine_mode vmode
,
49883 const unsigned char *sel
)
49885 struct expand_vec_perm_d d
;
49886 unsigned int i
, nelt
, which
;
49890 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
49891 d
.testing_p
= true;
49893 /* Given sufficient ISA support we can just return true here
49894 for selected vector modes. */
49901 if (TARGET_AVX512F
)
49902 /* All implementable with a single vpermi2 insn. */
49906 if (TARGET_AVX512BW
)
49907 /* All implementable with a single vpermi2 insn. */
49911 if (TARGET_AVX512BW
)
49912 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49919 if (TARGET_AVX512VL
)
49920 /* All implementable with a single vpermi2 insn. */
49925 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49930 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49937 /* All implementable with a single vpperm insn. */
49940 /* All implementable with 2 pshufb + 1 ior. */
49946 /* All implementable with shufpd or unpck[lh]pd. */
49952 /* Extract the values from the vector CST into the permutation
49954 memcpy (d
.perm
, sel
, nelt
);
49955 for (i
= which
= 0; i
< nelt
; ++i
)
49957 unsigned char e
= d
.perm
[i
];
49958 gcc_assert (e
< 2 * nelt
);
49959 which
|= (e
< nelt
? 1 : 2);
49962 /* For all elements from second vector, fold the elements to first. */
49964 for (i
= 0; i
< nelt
; ++i
)
49967 /* Check whether the mask can be applied to the vector type. */
49968 d
.one_operand_p
= (which
!= 3);
49970 /* Implementable with shufps or pshufd. */
49971 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
49974 /* Otherwise we have to go through the motions and see if we can
49975 figure out how to generate the requested permutation. */
49976 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
49977 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
49978 if (!d
.one_operand_p
)
49979 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
49982 ret
= ix86_expand_vec_perm_const_1 (&d
);
49989 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
49991 struct expand_vec_perm_d d
;
49997 d
.vmode
= GET_MODE (targ
);
49998 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
49999 d
.one_operand_p
= false;
50000 d
.testing_p
= false;
50002 for (i
= 0; i
< nelt
; ++i
)
50003 d
.perm
[i
] = i
* 2 + odd
;
50005 /* We'll either be able to implement the permutation directly... */
50006 if (expand_vec_perm_1 (&d
))
50009 /* ... or we use the special-case patterns. */
50010 expand_vec_perm_even_odd_1 (&d
, odd
);
50014 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
50016 struct expand_vec_perm_d d
;
50017 unsigned i
, nelt
, base
;
50023 d
.vmode
= GET_MODE (targ
);
50024 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
50025 d
.one_operand_p
= false;
50026 d
.testing_p
= false;
50028 base
= high_p
? nelt
/ 2 : 0;
50029 for (i
= 0; i
< nelt
/ 2; ++i
)
50031 d
.perm
[i
* 2] = i
+ base
;
50032 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
50035 /* Note that for AVX this isn't one instruction. */
50036 ok
= ix86_expand_vec_perm_const_1 (&d
);
50041 /* Expand a vector operation CODE for a V*QImode in terms of the
50042 same operation on V*HImode. */
50045 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
50047 machine_mode qimode
= GET_MODE (dest
);
50048 machine_mode himode
;
50049 rtx (*gen_il
) (rtx
, rtx
, rtx
);
50050 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
50051 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
50052 struct expand_vec_perm_d d
;
50053 bool ok
, full_interleave
;
50054 bool uns_p
= false;
50061 gen_il
= gen_vec_interleave_lowv16qi
;
50062 gen_ih
= gen_vec_interleave_highv16qi
;
50065 himode
= V16HImode
;
50066 gen_il
= gen_avx2_interleave_lowv32qi
;
50067 gen_ih
= gen_avx2_interleave_highv32qi
;
50070 himode
= V32HImode
;
50071 gen_il
= gen_avx512bw_interleave_lowv64qi
;
50072 gen_ih
= gen_avx512bw_interleave_highv64qi
;
50075 gcc_unreachable ();
50078 op2_l
= op2_h
= op2
;
50082 /* Unpack data such that we've got a source byte in each low byte of
50083 each word. We don't care what goes into the high byte of each word.
50084 Rather than trying to get zero in there, most convenient is to let
50085 it be a copy of the low byte. */
50086 op2_l
= gen_reg_rtx (qimode
);
50087 op2_h
= gen_reg_rtx (qimode
);
50088 emit_insn (gen_il (op2_l
, op2
, op2
));
50089 emit_insn (gen_ih (op2_h
, op2
, op2
));
50092 op1_l
= gen_reg_rtx (qimode
);
50093 op1_h
= gen_reg_rtx (qimode
);
50094 emit_insn (gen_il (op1_l
, op1
, op1
));
50095 emit_insn (gen_ih (op1_h
, op1
, op1
));
50096 full_interleave
= qimode
== V16QImode
;
50104 op1_l
= gen_reg_rtx (himode
);
50105 op1_h
= gen_reg_rtx (himode
);
50106 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
50107 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
50108 full_interleave
= true;
50111 gcc_unreachable ();
50114 /* Perform the operation. */
50115 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
50117 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
50119 gcc_assert (res_l
&& res_h
);
50121 /* Merge the data back into the right place. */
50123 d
.op0
= gen_lowpart (qimode
, res_l
);
50124 d
.op1
= gen_lowpart (qimode
, res_h
);
50126 d
.nelt
= GET_MODE_NUNITS (qimode
);
50127 d
.one_operand_p
= false;
50128 d
.testing_p
= false;
50130 if (full_interleave
)
50132 /* For SSE2, we used an full interleave, so the desired
50133 results are in the even elements. */
50134 for (i
= 0; i
< d
.nelt
; ++i
)
50139 /* For AVX, the interleave used above was not cross-lane. So the
50140 extraction is evens but with the second and third quarter swapped.
50141 Happily, that is even one insn shorter than even extraction.
50142 For AVX512BW we have 4 lanes. We extract evens from within a lane,
50143 always first from the first and then from the second source operand,
50144 the index bits above the low 4 bits remains the same.
50145 Thus, for d.nelt == 32 we want permutation
50146 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
50147 and for d.nelt == 64 we want permutation
50148 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
50149 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */
50150 for (i
= 0; i
< d
.nelt
; ++i
)
50151 d
.perm
[i
] = ((i
* 2) & 14) + ((i
& 8) ? d
.nelt
: 0) + (i
& ~15);
50154 ok
= ix86_expand_vec_perm_const_1 (&d
);
50157 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
50158 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
50161 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50162 if op is CONST_VECTOR with all odd elements equal to their
50163 preceding element. */
50166 const_vector_equal_evenodd_p (rtx op
)
50168 machine_mode mode
= GET_MODE (op
);
50169 int i
, nunits
= GET_MODE_NUNITS (mode
);
50170 if (GET_CODE (op
) != CONST_VECTOR
50171 || nunits
!= CONST_VECTOR_NUNITS (op
))
50173 for (i
= 0; i
< nunits
; i
+= 2)
50174 if (CONST_VECTOR_ELT (op
, i
) != CONST_VECTOR_ELT (op
, i
+ 1))
50180 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
50181 bool uns_p
, bool odd_p
)
50183 machine_mode mode
= GET_MODE (op1
);
50184 machine_mode wmode
= GET_MODE (dest
);
50186 rtx orig_op1
= op1
, orig_op2
= op2
;
50188 if (!nonimmediate_operand (op1
, mode
))
50189 op1
= force_reg (mode
, op1
);
50190 if (!nonimmediate_operand (op2
, mode
))
50191 op2
= force_reg (mode
, op2
);
50193 /* We only play even/odd games with vectors of SImode. */
50194 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
|| mode
== V16SImode
);
50196 /* If we're looking for the odd results, shift those members down to
50197 the even slots. For some cpus this is faster than a PSHUFD. */
50200 /* For XOP use vpmacsdqh, but only for smult, as it is only
50202 if (TARGET_XOP
&& mode
== V4SImode
&& !uns_p
)
50204 x
= force_reg (wmode
, CONST0_RTX (wmode
));
50205 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
50209 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
50210 if (!const_vector_equal_evenodd_p (orig_op1
))
50211 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
50212 x
, NULL
, 1, OPTAB_DIRECT
);
50213 if (!const_vector_equal_evenodd_p (orig_op2
))
50214 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
50215 x
, NULL
, 1, OPTAB_DIRECT
);
50216 op1
= gen_lowpart (mode
, op1
);
50217 op2
= gen_lowpart (mode
, op2
);
50220 if (mode
== V16SImode
)
50223 x
= gen_vec_widen_umult_even_v16si (dest
, op1
, op2
);
50225 x
= gen_vec_widen_smult_even_v16si (dest
, op1
, op2
);
50227 else if (mode
== V8SImode
)
50230 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
50232 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
50235 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
50236 else if (TARGET_SSE4_1
)
50237 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
50240 rtx s1
, s2
, t0
, t1
, t2
;
50242 /* The easiest way to implement this without PMULDQ is to go through
50243 the motions as if we are performing a full 64-bit multiply. With
50244 the exception that we need to do less shuffling of the elements. */
50246 /* Compute the sign-extension, aka highparts, of the two operands. */
50247 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
50248 op1
, pc_rtx
, pc_rtx
);
50249 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
50250 op2
, pc_rtx
, pc_rtx
);
50252 /* Multiply LO(A) * HI(B), and vice-versa. */
50253 t1
= gen_reg_rtx (wmode
);
50254 t2
= gen_reg_rtx (wmode
);
50255 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
50256 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
50258 /* Multiply LO(A) * LO(B). */
50259 t0
= gen_reg_rtx (wmode
);
50260 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
50262 /* Combine and shift the highparts into place. */
50263 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
50264 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
50267 /* Combine high and low parts. */
50268 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
50275 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
50276 bool uns_p
, bool high_p
)
50278 machine_mode wmode
= GET_MODE (dest
);
50279 machine_mode mode
= GET_MODE (op1
);
50280 rtx t1
, t2
, t3
, t4
, mask
;
50285 t1
= gen_reg_rtx (mode
);
50286 t2
= gen_reg_rtx (mode
);
50287 if (TARGET_XOP
&& !uns_p
)
50289 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50290 shuffle the elements once so that all elements are in the right
50291 place for immediate use: { A C B D }. */
50292 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
50293 const1_rtx
, GEN_INT (3)));
50294 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
50295 const1_rtx
, GEN_INT (3)));
50299 /* Put the elements into place for the multiply. */
50300 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
50301 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
50304 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
50308 /* Shuffle the elements between the lanes. After this we
50309 have { A B E F | C D G H } for each operand. */
50310 t1
= gen_reg_rtx (V4DImode
);
50311 t2
= gen_reg_rtx (V4DImode
);
50312 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
50313 const0_rtx
, const2_rtx
,
50314 const1_rtx
, GEN_INT (3)));
50315 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
50316 const0_rtx
, const2_rtx
,
50317 const1_rtx
, GEN_INT (3)));
50319 /* Shuffle the elements within the lanes. After this we
50320 have { A A B B | C C D D } or { E E F F | G G H H }. */
50321 t3
= gen_reg_rtx (V8SImode
);
50322 t4
= gen_reg_rtx (V8SImode
);
50323 mask
= GEN_INT (high_p
50324 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50325 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50326 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
50327 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
50329 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
50334 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
50335 uns_p
, OPTAB_DIRECT
);
50336 t2
= expand_binop (mode
,
50337 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
50338 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
50339 gcc_assert (t1
&& t2
);
50341 t3
= gen_reg_rtx (mode
);
50342 ix86_expand_vec_interleave (t3
, t1
, t2
, high_p
);
50343 emit_move_insn (dest
, gen_lowpart (wmode
, t3
));
50351 t1
= gen_reg_rtx (wmode
);
50352 t2
= gen_reg_rtx (wmode
);
50353 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
50354 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
50356 emit_insn (gen_rtx_SET (dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
50360 gcc_unreachable ();
50365 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
50367 rtx res_1
, res_2
, res_3
, res_4
;
50369 res_1
= gen_reg_rtx (V4SImode
);
50370 res_2
= gen_reg_rtx (V4SImode
);
50371 res_3
= gen_reg_rtx (V2DImode
);
50372 res_4
= gen_reg_rtx (V2DImode
);
50373 ix86_expand_mul_widen_evenodd (res_3
, op1
, op2
, true, false);
50374 ix86_expand_mul_widen_evenodd (res_4
, op1
, op2
, true, true);
50376 /* Move the results in element 2 down to element 1; we don't care
50377 what goes in elements 2 and 3. Then we can merge the parts
50378 back together with an interleave.
50380 Note that two other sequences were tried:
50381 (1) Use interleaves at the start instead of psrldq, which allows
50382 us to use a single shufps to merge things back at the end.
50383 (2) Use shufps here to combine the two vectors, then pshufd to
50384 put the elements in the correct order.
50385 In both cases the cost of the reformatting stall was too high
50386 and the overall sequence slower. */
50388 emit_insn (gen_sse2_pshufd_1 (res_1
, gen_lowpart (V4SImode
, res_3
),
50389 const0_rtx
, const2_rtx
,
50390 const0_rtx
, const0_rtx
));
50391 emit_insn (gen_sse2_pshufd_1 (res_2
, gen_lowpart (V4SImode
, res_4
),
50392 const0_rtx
, const2_rtx
,
50393 const0_rtx
, const0_rtx
));
50394 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
50396 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
50400 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
50402 machine_mode mode
= GET_MODE (op0
);
50403 rtx t1
, t2
, t3
, t4
, t5
, t6
;
50405 if (TARGET_AVX512DQ
&& mode
== V8DImode
)
50406 emit_insn (gen_avx512dq_mulv8di3 (op0
, op1
, op2
));
50407 else if (TARGET_AVX512DQ
&& TARGET_AVX512VL
&& mode
== V4DImode
)
50408 emit_insn (gen_avx512dq_mulv4di3 (op0
, op1
, op2
));
50409 else if (TARGET_AVX512DQ
&& TARGET_AVX512VL
&& mode
== V2DImode
)
50410 emit_insn (gen_avx512dq_mulv2di3 (op0
, op1
, op2
));
50411 else if (TARGET_XOP
&& mode
== V2DImode
)
50413 /* op1: A,B,C,D, op2: E,F,G,H */
50414 op1
= gen_lowpart (V4SImode
, op1
);
50415 op2
= gen_lowpart (V4SImode
, op2
);
50417 t1
= gen_reg_rtx (V4SImode
);
50418 t2
= gen_reg_rtx (V4SImode
);
50419 t3
= gen_reg_rtx (V2DImode
);
50420 t4
= gen_reg_rtx (V2DImode
);
50423 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
50429 /* t2: (B*E),(A*F),(D*G),(C*H) */
50430 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
50432 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50433 emit_insn (gen_xop_phadddq (t3
, t2
));
50435 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50436 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
50438 /* Multiply lower parts and add all */
50439 t5
= gen_reg_rtx (V2DImode
);
50440 emit_insn (gen_vec_widen_umult_even_v4si (t5
,
50441 gen_lowpart (V4SImode
, op1
),
50442 gen_lowpart (V4SImode
, op2
)));
50443 op0
= expand_binop (mode
, add_optab
, t5
, t4
, op0
, 1, OPTAB_DIRECT
);
50448 machine_mode nmode
;
50449 rtx (*umul
) (rtx
, rtx
, rtx
);
50451 if (mode
== V2DImode
)
50453 umul
= gen_vec_widen_umult_even_v4si
;
50456 else if (mode
== V4DImode
)
50458 umul
= gen_vec_widen_umult_even_v8si
;
50461 else if (mode
== V8DImode
)
50463 umul
= gen_vec_widen_umult_even_v16si
;
50467 gcc_unreachable ();
50470 /* Multiply low parts. */
50471 t1
= gen_reg_rtx (mode
);
50472 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
50474 /* Shift input vectors right 32 bits so we can multiply high parts. */
50476 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
50477 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
50479 /* Multiply high parts by low parts. */
50480 t4
= gen_reg_rtx (mode
);
50481 t5
= gen_reg_rtx (mode
);
50482 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
50483 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
50485 /* Combine and shift the highparts back. */
50486 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
50487 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
50489 /* Combine high and low parts. */
50490 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
50493 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
50494 gen_rtx_MULT (mode
, op1
, op2
));
50497 /* Return 1 if control tansfer instruction INSN
50498 should be encoded with bnd prefix.
50499 If insn is NULL then return 1 when control
50500 transfer instructions should be prefixed with
50501 bnd by default for current function. */
50504 ix86_bnd_prefixed_insn_p (rtx insn
)
50506 /* For call insns check special flag. */
50507 if (insn
&& CALL_P (insn
))
50509 rtx call
= get_call_rtx_from (insn
);
50511 return CALL_EXPR_WITH_BOUNDS_P (call
);
50514 /* All other insns are prefixed only if function is instrumented. */
50515 return chkp_function_instrumented_p (current_function_decl
);
50518 /* Calculate integer abs() using only SSE2 instructions. */
50521 ix86_expand_sse2_abs (rtx target
, rtx input
)
50523 machine_mode mode
= GET_MODE (target
);
50528 /* For 32-bit signed integer X, the best way to calculate the absolute
50529 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50531 tmp0
= expand_simple_binop (mode
, ASHIFTRT
, input
,
50532 GEN_INT (GET_MODE_UNIT_BITSIZE (mode
) - 1),
50533 NULL
, 0, OPTAB_DIRECT
);
50534 tmp1
= expand_simple_binop (mode
, XOR
, tmp0
, input
,
50535 NULL
, 0, OPTAB_DIRECT
);
50536 x
= expand_simple_binop (mode
, MINUS
, tmp1
, tmp0
,
50537 target
, 0, OPTAB_DIRECT
);
50540 /* For 16-bit signed integer X, the best way to calculate the absolute
50541 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50543 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
50545 x
= expand_simple_binop (mode
, SMAX
, tmp0
, input
,
50546 target
, 0, OPTAB_DIRECT
);
50549 /* For 8-bit signed integer X, the best way to calculate the absolute
50550 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50551 as SSE2 provides the PMINUB insn. */
50553 tmp0
= expand_unop (mode
, neg_optab
, input
, NULL_RTX
, 0);
50555 x
= expand_simple_binop (V16QImode
, UMIN
, tmp0
, input
,
50556 target
, 0, OPTAB_DIRECT
);
50560 gcc_unreachable ();
50564 emit_move_insn (target
, x
);
50567 /* Expand an extract from a vector register through pextr insn.
50568 Return true if successful. */
50571 ix86_expand_pextr (rtx
*operands
)
50573 rtx dst
= operands
[0];
50574 rtx src
= operands
[1];
50576 unsigned int size
= INTVAL (operands
[2]);
50577 unsigned int pos
= INTVAL (operands
[3]);
50579 if (SUBREG_P (dst
))
50581 /* Reject non-lowpart subregs. */
50582 if (SUBREG_BYTE (dst
) > 0)
50584 dst
= SUBREG_REG (dst
);
50587 if (SUBREG_P (src
))
50589 pos
+= SUBREG_BYTE (src
) * BITS_PER_UNIT
;
50590 src
= SUBREG_REG (src
);
50593 switch (GET_MODE (src
))
50602 machine_mode srcmode
, dstmode
;
50605 if (!int_mode_for_size (size
, 0).exists (&dstmode
))
50611 if (!TARGET_SSE4_1
)
50613 srcmode
= V16QImode
;
50619 srcmode
= V8HImode
;
50623 if (!TARGET_SSE4_1
)
50625 srcmode
= V4SImode
;
50629 gcc_assert (TARGET_64BIT
);
50630 if (!TARGET_SSE4_1
)
50632 srcmode
= V2DImode
;
50639 /* Reject extractions from misaligned positions. */
50640 if (pos
& (size
-1))
50643 if (GET_MODE (dst
) == dstmode
)
50646 d
= gen_reg_rtx (dstmode
);
50648 /* Construct insn pattern. */
50649 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (pos
/ size
)));
50650 pat
= gen_rtx_VEC_SELECT (dstmode
, gen_lowpart (srcmode
, src
), pat
);
50652 /* Let the rtl optimizers know about the zero extension performed. */
50653 if (dstmode
== QImode
|| dstmode
== HImode
)
50655 pat
= gen_rtx_ZERO_EXTEND (SImode
, pat
);
50656 d
= gen_lowpart (SImode
, d
);
50659 emit_insn (gen_rtx_SET (d
, pat
));
50662 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
50671 /* Expand an insert into a vector register through pinsr insn.
50672 Return true if successful. */
50675 ix86_expand_pinsr (rtx
*operands
)
50677 rtx dst
= operands
[0];
50678 rtx src
= operands
[3];
50680 unsigned int size
= INTVAL (operands
[1]);
50681 unsigned int pos
= INTVAL (operands
[2]);
50683 if (SUBREG_P (dst
))
50685 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
50686 dst
= SUBREG_REG (dst
);
50689 switch (GET_MODE (dst
))
50698 machine_mode srcmode
, dstmode
;
50699 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
50702 if (!int_mode_for_size (size
, 0).exists (&srcmode
))
50708 if (!TARGET_SSE4_1
)
50710 dstmode
= V16QImode
;
50711 pinsr
= gen_sse4_1_pinsrb
;
50717 dstmode
= V8HImode
;
50718 pinsr
= gen_sse2_pinsrw
;
50722 if (!TARGET_SSE4_1
)
50724 dstmode
= V4SImode
;
50725 pinsr
= gen_sse4_1_pinsrd
;
50729 gcc_assert (TARGET_64BIT
);
50730 if (!TARGET_SSE4_1
)
50732 dstmode
= V2DImode
;
50733 pinsr
= gen_sse4_1_pinsrq
;
50740 /* Reject insertions to misaligned positions. */
50741 if (pos
& (size
-1))
50744 if (SUBREG_P (src
))
50746 unsigned int srcpos
= SUBREG_BYTE (src
);
50752 extr_ops
[0] = gen_reg_rtx (srcmode
);
50753 extr_ops
[1] = gen_lowpart (srcmode
, SUBREG_REG (src
));
50754 extr_ops
[2] = GEN_INT (size
);
50755 extr_ops
[3] = GEN_INT (srcpos
* BITS_PER_UNIT
);
50757 if (!ix86_expand_pextr (extr_ops
))
50763 src
= gen_lowpart (srcmode
, SUBREG_REG (src
));
50766 if (GET_MODE (dst
) == dstmode
)
50769 d
= gen_reg_rtx (dstmode
);
50771 emit_insn (pinsr (d
, gen_lowpart (dstmode
, dst
),
50772 gen_lowpart (srcmode
, src
),
50773 GEN_INT (1 << (pos
/ size
))));
50775 emit_move_insn (dst
, gen_lowpart (GET_MODE (dst
), d
));
50784 /* This function returns the calling abi specific va_list type node.
50785 It returns the FNDECL specific va_list type. */
50788 ix86_fn_abi_va_list (tree fndecl
)
50791 return va_list_type_node
;
50792 gcc_assert (fndecl
!= NULL_TREE
);
50794 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
50795 return ms_va_list_type_node
;
50797 return sysv_va_list_type_node
;
50800 /* Returns the canonical va_list type specified by TYPE. If there
50801 is no valid TYPE provided, it return NULL_TREE. */
50804 ix86_canonical_va_list_type (tree type
)
50808 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
50809 return ms_va_list_type_node
;
50811 if ((TREE_CODE (type
) == ARRAY_TYPE
50812 && integer_zerop (array_type_nelts (type
)))
50813 || POINTER_TYPE_P (type
))
50815 tree elem_type
= TREE_TYPE (type
);
50816 if (TREE_CODE (elem_type
) == RECORD_TYPE
50817 && lookup_attribute ("sysv_abi va_list",
50818 TYPE_ATTRIBUTES (elem_type
)))
50819 return sysv_va_list_type_node
;
50825 return std_canonical_va_list_type (type
);
50828 /* Iterate through the target-specific builtin types for va_list.
50829 IDX denotes the iterator, *PTREE is set to the result type of
50830 the va_list builtin, and *PNAME to its internal type.
50831 Returns zero if there is no element for this index, otherwise
50832 IDX should be increased upon the next call.
50833 Note, do not iterate a base builtin's name like __builtin_va_list.
50834 Used from c_common_nodes_and_builtins. */
50837 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
50847 *ptree
= ms_va_list_type_node
;
50848 *pname
= "__builtin_ms_va_list";
50852 *ptree
= sysv_va_list_type_node
;
50853 *pname
= "__builtin_sysv_va_list";
50861 #undef TARGET_SCHED_DISPATCH
50862 #define TARGET_SCHED_DISPATCH has_dispatch
50863 #undef TARGET_SCHED_DISPATCH_DO
50864 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50865 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50866 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50867 #undef TARGET_SCHED_REORDER
50868 #define TARGET_SCHED_REORDER ix86_sched_reorder
50869 #undef TARGET_SCHED_ADJUST_PRIORITY
50870 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50871 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50872 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50873 ix86_dependencies_evaluation_hook
50875 /* The size of the dispatch window is the total number of bytes of
50876 object code allowed in a window. */
50877 #define DISPATCH_WINDOW_SIZE 16
50879 /* Number of dispatch windows considered for scheduling. */
50880 #define MAX_DISPATCH_WINDOWS 3
50882 /* Maximum number of instructions in a window. */
50885 /* Maximum number of immediate operands in a window. */
50888 /* Maximum number of immediate bits allowed in a window. */
50889 #define MAX_IMM_SIZE 128
50891 /* Maximum number of 32 bit immediates allowed in a window. */
50892 #define MAX_IMM_32 4
50894 /* Maximum number of 64 bit immediates allowed in a window. */
50895 #define MAX_IMM_64 2
50897 /* Maximum total of loads or prefetches allowed in a window. */
50900 /* Maximum total of stores allowed in a window. */
50901 #define MAX_STORE 1
50907 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50908 enum dispatch_group
{
50923 /* Number of allowable groups in a dispatch window. It is an array
50924 indexed by dispatch_group enum. 100 is used as a big number,
50925 because the number of these kind of operations does not have any
50926 effect in dispatch window, but we need them for other reasons in
50928 static unsigned int num_allowable_groups
[disp_last
] = {
50929 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
50932 char group_name
[disp_last
+ 1][16] = {
50933 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50934 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50935 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50938 /* Instruction path. */
50941 path_single
, /* Single micro op. */
50942 path_double
, /* Double micro op. */
50943 path_multi
, /* Instructions with more than 2 micro op.. */
50947 /* sched_insn_info defines a window to the instructions scheduled in
50948 the basic block. It contains a pointer to the insn_info table and
50949 the instruction scheduled.
50951 Windows are allocated for each basic block and are linked
50953 typedef struct sched_insn_info_s
{
50955 enum dispatch_group group
;
50956 enum insn_path path
;
50961 /* Linked list of dispatch windows. This is a two way list of
50962 dispatch windows of a basic block. It contains information about
50963 the number of uops in the window and the total number of
50964 instructions and of bytes in the object code for this dispatch
50966 typedef struct dispatch_windows_s
{
50967 int num_insn
; /* Number of insn in the window. */
50968 int num_uops
; /* Number of uops in the window. */
50969 int window_size
; /* Number of bytes in the window. */
50970 int window_num
; /* Window number between 0 or 1. */
50971 int num_imm
; /* Number of immediates in an insn. */
50972 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
50973 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
50974 int imm_size
; /* Total immediates in the window. */
50975 int num_loads
; /* Total memory loads in the window. */
50976 int num_stores
; /* Total memory stores in the window. */
50977 int violation
; /* Violation exists in window. */
50978 sched_insn_info
*window
; /* Pointer to the window. */
50979 struct dispatch_windows_s
*next
;
50980 struct dispatch_windows_s
*prev
;
50981 } dispatch_windows
;
50983 /* Immediate valuse used in an insn. */
50984 typedef struct imm_info_s
50991 static dispatch_windows
*dispatch_window_list
;
50992 static dispatch_windows
*dispatch_window_list1
;
50994 /* Get dispatch group of insn. */
50996 static enum dispatch_group
50997 get_mem_group (rtx_insn
*insn
)
50999 enum attr_memory memory
;
51001 if (INSN_CODE (insn
) < 0)
51002 return disp_no_group
;
51003 memory
= get_attr_memory (insn
);
51004 if (memory
== MEMORY_STORE
)
51007 if (memory
== MEMORY_LOAD
)
51010 if (memory
== MEMORY_BOTH
)
51011 return disp_load_store
;
51013 return disp_no_group
;
51016 /* Return true if insn is a compare instruction. */
51019 is_cmp (rtx_insn
*insn
)
51021 enum attr_type type
;
51023 type
= get_attr_type (insn
);
51024 return (type
== TYPE_TEST
51025 || type
== TYPE_ICMP
51026 || type
== TYPE_FCMP
51027 || GET_CODE (PATTERN (insn
)) == COMPARE
);
51030 /* Return true if a dispatch violation encountered. */
51033 dispatch_violation (void)
51035 if (dispatch_window_list
->next
)
51036 return dispatch_window_list
->next
->violation
;
51037 return dispatch_window_list
->violation
;
51040 /* Return true if insn is a branch instruction. */
51043 is_branch (rtx_insn
*insn
)
51045 return (CALL_P (insn
) || JUMP_P (insn
));
51048 /* Return true if insn is a prefetch instruction. */
51051 is_prefetch (rtx_insn
*insn
)
51053 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
51056 /* This function initializes a dispatch window and the list container holding a
51057 pointer to the window. */
51060 init_window (int window_num
)
51063 dispatch_windows
*new_list
;
51065 if (window_num
== 0)
51066 new_list
= dispatch_window_list
;
51068 new_list
= dispatch_window_list1
;
51070 new_list
->num_insn
= 0;
51071 new_list
->num_uops
= 0;
51072 new_list
->window_size
= 0;
51073 new_list
->next
= NULL
;
51074 new_list
->prev
= NULL
;
51075 new_list
->window_num
= window_num
;
51076 new_list
->num_imm
= 0;
51077 new_list
->num_imm_32
= 0;
51078 new_list
->num_imm_64
= 0;
51079 new_list
->imm_size
= 0;
51080 new_list
->num_loads
= 0;
51081 new_list
->num_stores
= 0;
51082 new_list
->violation
= false;
51084 for (i
= 0; i
< MAX_INSN
; i
++)
51086 new_list
->window
[i
].insn
= NULL
;
51087 new_list
->window
[i
].group
= disp_no_group
;
51088 new_list
->window
[i
].path
= no_path
;
51089 new_list
->window
[i
].byte_len
= 0;
51090 new_list
->window
[i
].imm_bytes
= 0;
51095 /* This function allocates and initializes a dispatch window and the
51096 list container holding a pointer to the window. */
51098 static dispatch_windows
*
51099 allocate_window (void)
51101 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
51102 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
51107 /* This routine initializes the dispatch scheduling information. It
51108 initiates building dispatch scheduler tables and constructs the
51109 first dispatch window. */
51112 init_dispatch_sched (void)
51114 /* Allocate a dispatch list and a window. */
51115 dispatch_window_list
= allocate_window ();
51116 dispatch_window_list1
= allocate_window ();
51121 /* This function returns true if a branch is detected. End of a basic block
51122 does not have to be a branch, but here we assume only branches end a
51126 is_end_basic_block (enum dispatch_group group
)
51128 return group
== disp_branch
;
51131 /* This function is called when the end of a window processing is reached. */
51134 process_end_window (void)
51136 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
51137 if (dispatch_window_list
->next
)
51139 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
51140 gcc_assert (dispatch_window_list
->window_size
51141 + dispatch_window_list1
->window_size
<= 48);
51147 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51148 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
51149 for 48 bytes of instructions. Note that these windows are not dispatch
51150 windows that their sizes are DISPATCH_WINDOW_SIZE. */
51152 static dispatch_windows
*
51153 allocate_next_window (int window_num
)
51155 if (window_num
== 0)
51157 if (dispatch_window_list
->next
)
51160 return dispatch_window_list
;
51163 dispatch_window_list
->next
= dispatch_window_list1
;
51164 dispatch_window_list1
->prev
= dispatch_window_list
;
51166 return dispatch_window_list1
;
51169 /* Compute number of immediate operands of an instruction. */
51172 find_constant (rtx in_rtx
, imm_info
*imm_values
)
51174 if (INSN_P (in_rtx
))
51175 in_rtx
= PATTERN (in_rtx
);
51176 subrtx_iterator::array_type array
;
51177 FOR_EACH_SUBRTX (iter
, array
, in_rtx
, ALL
)
51178 if (const_rtx x
= *iter
)
51179 switch (GET_CODE (x
))
51184 (imm_values
->imm
)++;
51185 if (x86_64_immediate_operand (CONST_CAST_RTX (x
), SImode
))
51186 (imm_values
->imm32
)++;
51188 (imm_values
->imm64
)++;
51192 case CONST_WIDE_INT
:
51193 (imm_values
->imm
)++;
51194 (imm_values
->imm64
)++;
51198 if (LABEL_KIND (x
) == LABEL_NORMAL
)
51200 (imm_values
->imm
)++;
51201 (imm_values
->imm32
)++;
51210 /* Return total size of immediate operands of an instruction along with number
51211 of corresponding immediate-operands. It initializes its parameters to zero
51212 befor calling FIND_CONSTANT.
51213 INSN is the input instruction. IMM is the total of immediates.
51214 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51218 get_num_immediates (rtx_insn
*insn
, int *imm
, int *imm32
, int *imm64
)
51220 imm_info imm_values
= {0, 0, 0};
51222 find_constant (insn
, &imm_values
);
51223 *imm
= imm_values
.imm
;
51224 *imm32
= imm_values
.imm32
;
51225 *imm64
= imm_values
.imm64
;
51226 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
51229 /* This function indicates if an operand of an instruction is an
51233 has_immediate (rtx_insn
*insn
)
51235 int num_imm_operand
;
51236 int num_imm32_operand
;
51237 int num_imm64_operand
;
51240 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
51241 &num_imm64_operand
);
51245 /* Return single or double path for instructions. */
51247 static enum insn_path
51248 get_insn_path (rtx_insn
*insn
)
51250 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
51252 if ((int)path
== 0)
51253 return path_single
;
51255 if ((int)path
== 1)
51256 return path_double
;
51261 /* Return insn dispatch group. */
51263 static enum dispatch_group
51264 get_insn_group (rtx_insn
*insn
)
51266 enum dispatch_group group
= get_mem_group (insn
);
51270 if (is_branch (insn
))
51271 return disp_branch
;
51276 if (has_immediate (insn
))
51279 if (is_prefetch (insn
))
51280 return disp_prefetch
;
51282 return disp_no_group
;
51285 /* Count number of GROUP restricted instructions in a dispatch
51286 window WINDOW_LIST. */
51289 count_num_restricted (rtx_insn
*insn
, dispatch_windows
*window_list
)
51291 enum dispatch_group group
= get_insn_group (insn
);
51293 int num_imm_operand
;
51294 int num_imm32_operand
;
51295 int num_imm64_operand
;
51297 if (group
== disp_no_group
)
51300 if (group
== disp_imm
)
51302 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
51303 &num_imm64_operand
);
51304 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
51305 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
51306 || (num_imm32_operand
> 0
51307 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
51308 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
51309 || (num_imm64_operand
> 0
51310 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
51311 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
51312 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
51313 && num_imm64_operand
> 0
51314 && ((window_list
->num_imm_64
> 0
51315 && window_list
->num_insn
>= 2)
51316 || window_list
->num_insn
>= 3)))
51322 if ((group
== disp_load_store
51323 && (window_list
->num_loads
>= MAX_LOAD
51324 || window_list
->num_stores
>= MAX_STORE
))
51325 || ((group
== disp_load
51326 || group
== disp_prefetch
)
51327 && window_list
->num_loads
>= MAX_LOAD
)
51328 || (group
== disp_store
51329 && window_list
->num_stores
>= MAX_STORE
))
51335 /* This function returns true if insn satisfies dispatch rules on the
51336 last window scheduled. */
51339 fits_dispatch_window (rtx_insn
*insn
)
51341 dispatch_windows
*window_list
= dispatch_window_list
;
51342 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
51343 unsigned int num_restrict
;
51344 enum dispatch_group group
= get_insn_group (insn
);
51345 enum insn_path path
= get_insn_path (insn
);
51348 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51349 instructions should be given the lowest priority in the
51350 scheduling process in Haifa scheduler to make sure they will be
51351 scheduled in the same dispatch window as the reference to them. */
51352 if (group
== disp_jcc
|| group
== disp_cmp
)
51355 /* Check nonrestricted. */
51356 if (group
== disp_no_group
|| group
== disp_branch
)
51359 /* Get last dispatch window. */
51360 if (window_list_next
)
51361 window_list
= window_list_next
;
51363 if (window_list
->window_num
== 1)
51365 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
51368 || (min_insn_size (insn
) + sum
) >= 48)
51369 /* Window 1 is full. Go for next window. */
51373 num_restrict
= count_num_restricted (insn
, window_list
);
51375 if (num_restrict
> num_allowable_groups
[group
])
51378 /* See if it fits in the first window. */
51379 if (window_list
->window_num
== 0)
51381 /* The first widow should have only single and double path
51383 if (path
== path_double
51384 && (window_list
->num_uops
+ 2) > MAX_INSN
)
51386 else if (path
!= path_single
)
51392 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51393 dispatch window WINDOW_LIST. */
51396 add_insn_window (rtx_insn
*insn
, dispatch_windows
*window_list
, int num_uops
)
51398 int byte_len
= min_insn_size (insn
);
51399 int num_insn
= window_list
->num_insn
;
51401 sched_insn_info
*window
= window_list
->window
;
51402 enum dispatch_group group
= get_insn_group (insn
);
51403 enum insn_path path
= get_insn_path (insn
);
51404 int num_imm_operand
;
51405 int num_imm32_operand
;
51406 int num_imm64_operand
;
51408 if (!window_list
->violation
&& group
!= disp_cmp
51409 && !fits_dispatch_window (insn
))
51410 window_list
->violation
= true;
51412 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
51413 &num_imm64_operand
);
51415 /* Initialize window with new instruction. */
51416 window
[num_insn
].insn
= insn
;
51417 window
[num_insn
].byte_len
= byte_len
;
51418 window
[num_insn
].group
= group
;
51419 window
[num_insn
].path
= path
;
51420 window
[num_insn
].imm_bytes
= imm_size
;
51422 window_list
->window_size
+= byte_len
;
51423 window_list
->num_insn
= num_insn
+ 1;
51424 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
51425 window_list
->imm_size
+= imm_size
;
51426 window_list
->num_imm
+= num_imm_operand
;
51427 window_list
->num_imm_32
+= num_imm32_operand
;
51428 window_list
->num_imm_64
+= num_imm64_operand
;
51430 if (group
== disp_store
)
51431 window_list
->num_stores
+= 1;
51432 else if (group
== disp_load
51433 || group
== disp_prefetch
)
51434 window_list
->num_loads
+= 1;
51435 else if (group
== disp_load_store
)
51437 window_list
->num_stores
+= 1;
51438 window_list
->num_loads
+= 1;
51442 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51443 If the total bytes of instructions or the number of instructions in
51444 the window exceed allowable, it allocates a new window. */
51447 add_to_dispatch_window (rtx_insn
*insn
)
51450 dispatch_windows
*window_list
;
51451 dispatch_windows
*next_list
;
51452 dispatch_windows
*window0_list
;
51453 enum insn_path path
;
51454 enum dispatch_group insn_group
;
51462 if (INSN_CODE (insn
) < 0)
51465 byte_len
= min_insn_size (insn
);
51466 window_list
= dispatch_window_list
;
51467 next_list
= window_list
->next
;
51468 path
= get_insn_path (insn
);
51469 insn_group
= get_insn_group (insn
);
51471 /* Get the last dispatch window. */
51473 window_list
= dispatch_window_list
->next
;
51475 if (path
== path_single
)
51477 else if (path
== path_double
)
51480 insn_num_uops
= (int) path
;
51482 /* If current window is full, get a new window.
51483 Window number zero is full, if MAX_INSN uops are scheduled in it.
51484 Window number one is full, if window zero's bytes plus window
51485 one's bytes is 32, or if the bytes of the new instruction added
51486 to the total makes it greater than 48, or it has already MAX_INSN
51487 instructions in it. */
51488 num_insn
= window_list
->num_insn
;
51489 num_uops
= window_list
->num_uops
;
51490 window_num
= window_list
->window_num
;
51491 insn_fits
= fits_dispatch_window (insn
);
51493 if (num_insn
>= MAX_INSN
51494 || num_uops
+ insn_num_uops
> MAX_INSN
51497 window_num
= ~window_num
& 1;
51498 window_list
= allocate_next_window (window_num
);
51501 if (window_num
== 0)
51503 add_insn_window (insn
, window_list
, insn_num_uops
);
51504 if (window_list
->num_insn
>= MAX_INSN
51505 && insn_group
== disp_branch
)
51507 process_end_window ();
51511 else if (window_num
== 1)
51513 window0_list
= window_list
->prev
;
51514 sum
= window0_list
->window_size
+ window_list
->window_size
;
51516 || (byte_len
+ sum
) >= 48)
51518 process_end_window ();
51519 window_list
= dispatch_window_list
;
51522 add_insn_window (insn
, window_list
, insn_num_uops
);
51525 gcc_unreachable ();
51527 if (is_end_basic_block (insn_group
))
51529 /* End of basic block is reached do end-basic-block process. */
51530 process_end_window ();
51535 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51537 DEBUG_FUNCTION
static void
51538 debug_dispatch_window_file (FILE *file
, int window_num
)
51540 dispatch_windows
*list
;
51543 if (window_num
== 0)
51544 list
= dispatch_window_list
;
51546 list
= dispatch_window_list1
;
51548 fprintf (file
, "Window #%d:\n", list
->window_num
);
51549 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
51550 list
->num_insn
, list
->num_uops
, list
->window_size
);
51551 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51552 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
51554 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
51556 fprintf (file
, " insn info:\n");
51558 for (i
= 0; i
< MAX_INSN
; i
++)
51560 if (!list
->window
[i
].insn
)
51562 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51563 i
, group_name
[list
->window
[i
].group
],
51564 i
, (void *)list
->window
[i
].insn
,
51565 i
, list
->window
[i
].path
,
51566 i
, list
->window
[i
].byte_len
,
51567 i
, list
->window
[i
].imm_bytes
);
51571 /* Print to stdout a dispatch window. */
51573 DEBUG_FUNCTION
void
51574 debug_dispatch_window (int window_num
)
51576 debug_dispatch_window_file (stdout
, window_num
);
51579 /* Print INSN dispatch information to FILE. */
51581 DEBUG_FUNCTION
static void
51582 debug_insn_dispatch_info_file (FILE *file
, rtx_insn
*insn
)
51585 enum insn_path path
;
51586 enum dispatch_group group
;
51588 int num_imm_operand
;
51589 int num_imm32_operand
;
51590 int num_imm64_operand
;
51592 if (INSN_CODE (insn
) < 0)
51595 byte_len
= min_insn_size (insn
);
51596 path
= get_insn_path (insn
);
51597 group
= get_insn_group (insn
);
51598 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
51599 &num_imm64_operand
);
51601 fprintf (file
, " insn info:\n");
51602 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
51603 group_name
[group
], path
, byte_len
);
51604 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51605 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
51608 /* Print to STDERR the status of the ready list with respect to
51609 dispatch windows. */
51611 DEBUG_FUNCTION
void
51612 debug_ready_dispatch (void)
51615 int no_ready
= number_in_ready ();
51617 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
51619 for (i
= 0; i
< no_ready
; i
++)
51620 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
51623 /* This routine is the driver of the dispatch scheduler. */
51626 do_dispatch (rtx_insn
*insn
, int mode
)
51628 if (mode
== DISPATCH_INIT
)
51629 init_dispatch_sched ();
51630 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
51631 add_to_dispatch_window (insn
);
51634 /* Return TRUE if Dispatch Scheduling is supported. */
51637 has_dispatch (rtx_insn
*insn
, int action
)
51639 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
51640 || TARGET_BDVER4
|| TARGET_ZNVER1
) && flag_dispatch_scheduler
)
51646 case IS_DISPATCH_ON
:
51650 return is_cmp (insn
);
51652 case DISPATCH_VIOLATION
:
51653 return dispatch_violation ();
51655 case FITS_DISPATCH_WINDOW
:
51656 return fits_dispatch_window (insn
);
51662 /* Implementation of reassociation_width target hook used by
51663 reassoc phase to identify parallelism level in reassociated
51664 tree. Statements tree_code is passed in OPC. Arguments type
51667 Currently parallel reassociation is enabled for Atom
51668 processors only and we set reassociation width to be 2
51669 because Atom may issue up to 2 instructions per cycle.
51671 Return value should be fixed if parallel reassociation is
51672 enabled for other processors. */
51675 ix86_reassociation_width (unsigned int, machine_mode mode
)
51678 if (VECTOR_MODE_P (mode
))
51680 if (TARGET_VECTOR_PARALLEL_EXECUTION
)
51687 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
51689 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
51690 return ((TARGET_64BIT
&& ix86_tune
== PROCESSOR_HASWELL
)? 4 : 2);
51695 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51696 place emms and femms instructions. */
51698 static machine_mode
51699 ix86_preferred_simd_mode (scalar_mode mode
)
51707 return TARGET_AVX512BW
? V64QImode
:
51708 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
51710 return TARGET_AVX512BW
? V32HImode
:
51711 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
51713 return TARGET_AVX512F
? V16SImode
:
51714 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
51716 return TARGET_AVX512F
? V8DImode
:
51717 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
51720 if (TARGET_AVX512F
)
51722 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
51728 if (TARGET_AVX512F
)
51730 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
51732 else if (TARGET_SSE2
)
51741 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51742 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51743 256bit and 128bit vectors. */
51745 static unsigned int
51746 ix86_autovectorize_vector_sizes (void)
51748 return TARGET_AVX512F
? 64 | 32 | 16 :
51749 (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
51752 /* Implemenation of targetm.vectorize.get_mask_mode. */
51754 static opt_machine_mode
51755 ix86_get_mask_mode (unsigned nunits
, unsigned vector_size
)
51757 unsigned elem_size
= vector_size
/ nunits
;
51759 /* Scalar mask case. */
51760 if ((TARGET_AVX512F
&& vector_size
== 64)
51761 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
51763 if (elem_size
== 4 || elem_size
== 8 || TARGET_AVX512BW
)
51764 return smallest_int_mode_for_size (nunits
);
51767 scalar_int_mode elem_mode
51768 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
51770 gcc_assert (elem_size
* nunits
== vector_size
);
51772 return mode_for_vector (elem_mode
, nunits
);
51777 /* Return class of registers which could be used for pseudo of MODE
51778 and of class RCLASS for spilling instead of memory. Return NO_REGS
51779 if it is not possible or non-profitable. */
51781 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
51784 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
51786 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
51788 && TARGET_INTER_UNIT_MOVES_TO_VEC
51789 && TARGET_INTER_UNIT_MOVES_FROM_VEC
51790 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
51791 && INTEGER_CLASS_P (rclass
))
51792 return ALL_SSE_REGS
;
51796 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
51797 but returns a lower bound. */
51799 static unsigned int
51800 ix86_max_noce_ifcvt_seq_cost (edge e
)
51802 bool predictable_p
= predictable_edge_p (e
);
51804 enum compiler_param param
51806 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
51807 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST
);
51809 /* If we have a parameter set, use that, otherwise take a guess using
51811 if (global_options_set
.x_param_values
[param
])
51812 return PARAM_VALUE (param
);
51814 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
51817 /* Return true if SEQ is a good candidate as a replacement for the
51818 if-convertible sequence described in IF_INFO. */
51821 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
51823 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
51826 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
51827 Maybe we should allow even more conditional moves as long as they
51828 are used far enough not to stall the CPU, or also consider
51829 IF_INFO->TEST_BB succ edge probabilities. */
51830 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
51832 rtx set
= single_set (insn
);
51835 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
51837 rtx src
= SET_SRC (set
);
51838 machine_mode mode
= GET_MODE (src
);
51839 if (GET_MODE_CLASS (mode
) != MODE_INT
51840 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
51842 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
51843 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
51845 /* insn is CMOV or FCMOV. */
51846 if (++cmov_cnt
> 1)
51850 return default_noce_conversion_profitable_p (seq
, if_info
);
51853 /* Implement targetm.vectorize.init_cost. */
51856 ix86_init_cost (struct loop
*)
51858 unsigned *cost
= XNEWVEC (unsigned, 3);
51859 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
51863 /* Implement targetm.vectorize.add_stmt_cost. */
51866 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
51867 struct _stmt_vec_info
*stmt_info
, int misalign
,
51868 enum vect_cost_model_location where
)
51870 unsigned *cost
= (unsigned *) data
;
51871 unsigned retval
= 0;
51873 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
51874 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
51876 /* Penalize DFmode vector operations for Bonnell. */
51877 if (TARGET_BONNELL
&& kind
== vector_stmt
51878 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
51879 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
51881 /* Statements in an inner loop relative to the loop being
51882 vectorized are weighted more heavily. The value here is
51883 arbitrary and could potentially be improved with analysis. */
51884 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
51885 count
*= 50; /* FIXME. */
51887 retval
= (unsigned) (count
* stmt_cost
);
51889 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51890 for Silvermont as it has out of order integer pipeline and can execute
51891 2 scalar instruction per tick, but has in order SIMD pipeline. */
51892 if ((TARGET_SILVERMONT
|| TARGET_INTEL
)
51893 && stmt_info
&& stmt_info
->stmt
)
51895 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
51896 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
51897 retval
= (retval
* 17) / 10;
51900 cost
[where
] += retval
;
51905 /* Implement targetm.vectorize.finish_cost. */
51908 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
51909 unsigned *body_cost
, unsigned *epilogue_cost
)
51911 unsigned *cost
= (unsigned *) data
;
51912 *prologue_cost
= cost
[vect_prologue
];
51913 *body_cost
= cost
[vect_body
];
51914 *epilogue_cost
= cost
[vect_epilogue
];
51917 /* Implement targetm.vectorize.destroy_cost_data. */
51920 ix86_destroy_cost_data (void *data
)
51925 /* Validate target specific memory model bits in VAL. */
51927 static unsigned HOST_WIDE_INT
51928 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
51930 enum memmodel model
= memmodel_from_int (val
);
51933 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
51935 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
51937 warning (OPT_Winvalid_memory_model
,
51938 "Unknown architecture specific memory model");
51939 return MEMMODEL_SEQ_CST
;
51941 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
51942 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
51944 warning (OPT_Winvalid_memory_model
,
51945 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51946 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
51948 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
51950 warning (OPT_Winvalid_memory_model
,
51951 "HLE_RELEASE not used with RELEASE or stronger memory model");
51952 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
51957 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
51958 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51959 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51960 or number of vecsize_mangle variants that should be emitted. */
51963 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
51964 struct cgraph_simd_clone
*clonei
,
51965 tree base_type
, int num
)
51969 if (clonei
->simdlen
51970 && (clonei
->simdlen
< 2
51971 || clonei
->simdlen
> 1024
51972 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
51974 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
51975 "unsupported simdlen %d", clonei
->simdlen
);
51979 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
51980 if (TREE_CODE (ret_type
) != VOID_TYPE
)
51981 switch (TYPE_MODE (ret_type
))
51989 /* case E_SCmode: */
51990 /* case E_DCmode: */
51993 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
51994 "unsupported return type %qT for simd\n", ret_type
);
52001 for (t
= DECL_ARGUMENTS (node
->decl
), i
= 0; t
; t
= DECL_CHAIN (t
), i
++)
52002 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
52003 switch (TYPE_MODE (TREE_TYPE (t
)))
52011 /* case E_SCmode: */
52012 /* case E_DCmode: */
52015 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
52016 "unsupported argument type %qT for simd\n", TREE_TYPE (t
));
52020 if (clonei
->cilk_elemental
)
52022 /* Parse here processor clause. If not present, default to 'b'. */
52023 clonei
->vecsize_mangle
= 'b';
52025 else if (!TREE_PUBLIC (node
->decl
))
52027 /* If the function isn't exported, we can pick up just one ISA
52029 if (TARGET_AVX512F
)
52030 clonei
->vecsize_mangle
= 'e';
52031 else if (TARGET_AVX2
)
52032 clonei
->vecsize_mangle
= 'd';
52033 else if (TARGET_AVX
)
52034 clonei
->vecsize_mangle
= 'c';
52036 clonei
->vecsize_mangle
= 'b';
52041 clonei
->vecsize_mangle
= "bcde"[num
];
52044 clonei
->mask_mode
= VOIDmode
;
52045 switch (clonei
->vecsize_mangle
)
52048 clonei
->vecsize_int
= 128;
52049 clonei
->vecsize_float
= 128;
52052 clonei
->vecsize_int
= 128;
52053 clonei
->vecsize_float
= 256;
52056 clonei
->vecsize_int
= 256;
52057 clonei
->vecsize_float
= 256;
52060 clonei
->vecsize_int
= 512;
52061 clonei
->vecsize_float
= 512;
52062 if (TYPE_MODE (base_type
) == QImode
)
52063 clonei
->mask_mode
= DImode
;
52065 clonei
->mask_mode
= SImode
;
52068 if (clonei
->simdlen
== 0)
52070 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
52071 clonei
->simdlen
= clonei
->vecsize_int
;
52073 clonei
->simdlen
= clonei
->vecsize_float
;
52074 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
52076 else if (clonei
->simdlen
> 16)
52078 /* For compatibility with ICC, use the same upper bounds
52079 for simdlen. In particular, for CTYPE below, use the return type,
52080 unless the function returns void, in that case use the characteristic
52081 type. If it is possible for given SIMDLEN to pass CTYPE value
52082 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
52083 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
52084 emit corresponding clone. */
52085 tree ctype
= ret_type
;
52086 if (TREE_CODE (ret_type
) == VOID_TYPE
)
52088 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
52089 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
52090 cnt
/= clonei
->vecsize_int
;
52092 cnt
/= clonei
->vecsize_float
;
52093 if (cnt
> (TARGET_64BIT
? 16 : 8))
52095 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
52096 "unsupported simdlen %d", clonei
->simdlen
);
52103 /* Add target attribute to SIMD clone NODE if needed. */
52106 ix86_simd_clone_adjust (struct cgraph_node
*node
)
52108 const char *str
= NULL
;
52109 gcc_assert (node
->decl
== cfun
->decl
);
52110 switch (node
->simdclone
->vecsize_mangle
)
52125 if (!TARGET_AVX512F
)
52129 gcc_unreachable ();
52134 tree args
= build_tree_list (NULL_TREE
, build_string (strlen (str
), str
));
52135 bool ok
= ix86_valid_target_attribute_p (node
->decl
, NULL
, args
, 0);
52138 ix86_reset_previous_fndecl ();
52139 ix86_set_current_function (node
->decl
);
52142 /* If SIMD clone NODE can't be used in a vectorized loop
52143 in current function, return -1, otherwise return a badness of using it
52144 (0 if it is most desirable from vecsize_mangle point of view, 1
52145 slightly less desirable, etc.). */
52148 ix86_simd_clone_usable (struct cgraph_node
*node
)
52150 switch (node
->simdclone
->vecsize_mangle
)
52157 return TARGET_AVX2
? 2 : 1;
52161 return TARGET_AVX2
? 1 : 0;
52167 if (!TARGET_AVX512F
)
52171 gcc_unreachable ();
52175 /* This function adjusts the unroll factor based on
52176 the hardware capabilities. For ex, bdver3 has
52177 a loop buffer which makes unrolling of smaller
52178 loops less important. This function decides the
52179 unroll factor using number of memory references
52180 (value 32 is used) as a heuristic. */
52183 ix86_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
52188 unsigned mem_count
= 0;
52190 if (!TARGET_ADJUST_UNROLL
)
52193 /* Count the number of memory references within the loop body.
52194 This value determines the unrolling factor for bdver3 and bdver4
52196 subrtx_iterator::array_type array
;
52197 bbs
= get_loop_body (loop
);
52198 for (i
= 0; i
< loop
->num_nodes
; i
++)
52199 FOR_BB_INSNS (bbs
[i
], insn
)
52200 if (NONDEBUG_INSN_P (insn
))
52201 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
52202 if (const_rtx x
= *iter
)
52205 machine_mode mode
= GET_MODE (x
);
52206 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
52214 if (mem_count
&& mem_count
<=32)
52215 return 32/mem_count
;
52221 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
52224 ix86_float_exceptions_rounding_supported_p (void)
52226 /* For x87 floating point with standard excess precision handling,
52227 there is no adddf3 pattern (since x87 floating point only has
52228 XFmode operations) so the default hook implementation gets this
52230 return TARGET_80387
|| TARGET_SSE_MATH
;
52233 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
52236 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
52238 if (!TARGET_80387
&& !TARGET_SSE_MATH
)
52240 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
52243 tree fenv_index_type
= build_index_type (size_int (6));
52244 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
52245 tree fenv_var
= create_tmp_var_raw (fenv_type
);
52246 TREE_ADDRESSABLE (fenv_var
) = 1;
52247 tree fenv_ptr
= build_pointer_type (fenv_type
);
52248 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
52249 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
52250 tree fnstenv
= ix86_builtins
[IX86_BUILTIN_FNSTENV
];
52251 tree fldenv
= ix86_builtins
[IX86_BUILTIN_FLDENV
];
52252 tree fnstsw
= ix86_builtins
[IX86_BUILTIN_FNSTSW
];
52253 tree fnclex
= ix86_builtins
[IX86_BUILTIN_FNCLEX
];
52254 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
52255 tree hold_fnclex
= build_call_expr (fnclex
, 0);
52256 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
52257 NULL_TREE
, NULL_TREE
);
52258 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
52260 *clear
= build_call_expr (fnclex
, 0);
52261 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
52262 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
52263 tree sw_mod
= build2 (MODIFY_EXPR
, short_unsigned_type_node
,
52264 sw_var
, fnstsw_call
);
52265 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
52266 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
52267 exceptions_var
, exceptions_x87
);
52268 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
52269 sw_mod
, update_mod
);
52270 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
52271 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
52273 if (TARGET_SSE_MATH
)
52275 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
52276 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
52277 tree stmxcsr
= ix86_builtins
[IX86_BUILTIN_STMXCSR
];
52278 tree ldmxcsr
= ix86_builtins
[IX86_BUILTIN_LDMXCSR
];
52279 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
52280 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
52281 mxcsr_orig_var
, stmxcsr_hold_call
);
52282 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
52284 build_int_cst (unsigned_type_node
, 0x1f80));
52285 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
52286 build_int_cst (unsigned_type_node
, 0xffffffc0));
52287 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
52288 mxcsr_mod_var
, hold_mod_val
);
52289 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
52290 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
52291 hold_assign_orig
, hold_assign_mod
);
52292 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
52293 ldmxcsr_hold_call
);
52295 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
52298 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
52300 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
52301 ldmxcsr_clear_call
);
52303 *clear
= ldmxcsr_clear_call
;
52304 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
52305 tree exceptions_sse
= fold_convert (integer_type_node
,
52306 stxmcsr_update_call
);
52309 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
52310 exceptions_var
, exceptions_sse
);
52311 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
52312 exceptions_var
, exceptions_mod
);
52313 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
52314 exceptions_assign
);
52317 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
52318 exceptions_var
, exceptions_sse
);
52319 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
52320 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
52321 ldmxcsr_update_call
);
52323 tree atomic_feraiseexcept
52324 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
52325 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
52326 1, exceptions_var
);
52327 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
52328 atomic_feraiseexcept_call
);
52331 /* Return mode to be used for bounds or VOIDmode
52332 if bounds are not supported. */
52334 static machine_mode
52335 ix86_mpx_bound_mode ()
52337 /* Do not support pointer checker if MPX
52341 if (flag_check_pointer_bounds
)
52342 warning (0, "Pointer Checker requires MPX support on this target."
52343 " Use -mmpx options to enable MPX.");
52350 /* Return constant used to statically initialize constant bounds.
52352 This function is used to create special bound values. For now
52353 only INIT bounds and NONE bounds are expected. More special
52354 values may be added later. */
52357 ix86_make_bounds_constant (HOST_WIDE_INT lb
, HOST_WIDE_INT ub
)
52359 tree low
= lb
? build_minus_one_cst (pointer_sized_int_node
)
52360 : build_zero_cst (pointer_sized_int_node
);
52361 tree high
= ub
? build_zero_cst (pointer_sized_int_node
)
52362 : build_minus_one_cst (pointer_sized_int_node
);
52364 /* This function is supposed to be used to create INIT and
52365 NONE bounds only. */
52366 gcc_assert ((lb
== 0 && ub
== -1)
52367 || (lb
== -1 && ub
== 0));
52369 return build_complex (NULL
, low
, high
);
52372 /* Generate a list of statements STMTS to initialize pointer bounds
52373 variable VAR with bounds LB and UB. Return the number of generated
52377 ix86_initialize_bounds (tree var
, tree lb
, tree ub
, tree
*stmts
)
52379 tree bnd_ptr
= build_pointer_type (pointer_sized_int_node
);
52380 tree lhs
, modify
, var_p
;
52382 ub
= build1 (BIT_NOT_EXPR
, pointer_sized_int_node
, ub
);
52383 var_p
= fold_convert (bnd_ptr
, build_fold_addr_expr (var
));
52385 lhs
= build1 (INDIRECT_REF
, pointer_sized_int_node
, var_p
);
52386 modify
= build2 (MODIFY_EXPR
, TREE_TYPE (lhs
), lhs
, lb
);
52387 append_to_statement_list (modify
, stmts
);
52389 lhs
= build1 (INDIRECT_REF
, pointer_sized_int_node
,
52390 build2 (POINTER_PLUS_EXPR
, bnd_ptr
, var_p
,
52391 TYPE_SIZE_UNIT (pointer_sized_int_node
)));
52392 modify
= build2 (MODIFY_EXPR
, TREE_TYPE (lhs
), lhs
, ub
);
52393 append_to_statement_list (modify
, stmts
);
52398 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52399 /* For i386, common symbol is local only for non-PIE binaries. For
52400 x86-64, common symbol is local only for non-PIE binaries or linker
52401 supports copy reloc in PIE binaries. */
52404 ix86_binds_local_p (const_tree exp
)
52406 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
52409 && HAVE_LD_PIE_COPYRELOC
!= 0)));
52413 /* If MEM is in the form of [base+offset], extract the two parts
52414 of address and set to BASE and OFFSET, otherwise return false. */
52417 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
52421 gcc_assert (MEM_P (mem
));
52423 addr
= XEXP (mem
, 0);
52425 if (GET_CODE (addr
) == CONST
)
52426 addr
= XEXP (addr
, 0);
52428 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
52431 *offset
= const0_rtx
;
52435 if (GET_CODE (addr
) == PLUS
52436 && (REG_P (XEXP (addr
, 0))
52437 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
52438 && CONST_INT_P (XEXP (addr
, 1)))
52440 *base
= XEXP (addr
, 0);
52441 *offset
= XEXP (addr
, 1);
52448 /* Given OPERANDS of consecutive load/store, check if we can merge
52449 them into move multiple. LOAD is true if they are load instructions.
52450 MODE is the mode of memory operands. */
52453 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
52456 HOST_WIDE_INT offval_1
, offval_2
, msize
;
52457 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
52461 mem_1
= operands
[1];
52462 mem_2
= operands
[3];
52463 reg_1
= operands
[0];
52464 reg_2
= operands
[2];
52468 mem_1
= operands
[0];
52469 mem_2
= operands
[2];
52470 reg_1
= operands
[1];
52471 reg_2
= operands
[3];
52474 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
52476 if (REGNO (reg_1
) != REGNO (reg_2
))
52479 /* Check if the addresses are in the form of [base+offset]. */
52480 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
52482 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
52485 /* Check if the bases are the same. */
52486 if (!rtx_equal_p (base_1
, base_2
))
52489 offval_1
= INTVAL (offset_1
);
52490 offval_2
= INTVAL (offset_2
);
52491 msize
= GET_MODE_SIZE (mode
);
52492 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52493 if (offval_1
+ msize
!= offval_2
)
52499 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
52502 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
52503 optimization_type opt_type
)
52517 return opt_type
== OPTIMIZE_FOR_SPEED
;
52520 if (SSE_FLOAT_MODE_P (mode1
)
52522 && !flag_trapping_math
52524 return opt_type
== OPTIMIZE_FOR_SPEED
;
52530 if (SSE_FLOAT_MODE_P (mode1
)
52532 && !flag_trapping_math
52535 return opt_type
== OPTIMIZE_FOR_SPEED
;
52538 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
52545 /* Address space support.
52547 This is not "far pointers" in the 16-bit sense, but an easy way
52548 to use %fs and %gs segment prefixes. Therefore:
52550 (a) All address spaces have the same modes,
52551 (b) All address spaces have the same addresss forms,
52552 (c) While %fs and %gs are technically subsets of the generic
52553 address space, they are probably not subsets of each other.
52554 (d) Since we have no access to the segment base register values
52555 without resorting to a system call, we cannot convert a
52556 non-default address space to a default address space.
52557 Therefore we do not claim %fs or %gs are subsets of generic.
52559 Therefore we can (mostly) use the default hooks. */
52561 /* All use of segmentation is assumed to make address 0 valid. */
52564 ix86_addr_space_zero_address_valid (addr_space_t as
)
52566 return as
!= ADDR_SPACE_GENERIC
;
52570 ix86_init_libfuncs (void)
52574 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
52575 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
52579 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
52580 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
52584 darwin_rename_builtins ();
52588 /* Generate call to __divmoddi4. */
52591 ix86_expand_divmod_libfunc (rtx libfunc
, machine_mode mode
,
52593 rtx
*quot_p
, rtx
*rem_p
)
52595 rtx rem
= assign_386_stack_local (mode
, SLOT_TEMP
);
52597 rtx quot
= emit_library_call_value (libfunc
, NULL_RTX
, LCT_NORMAL
,
52599 op0
, GET_MODE (op0
),
52600 op1
, GET_MODE (op1
),
52601 XEXP (rem
, 0), Pmode
);
52606 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
52607 FPU, assume that the fpcw is set to extended precision; when using
52608 only SSE, rounding is correct; when using both SSE and the FPU,
52609 the rounding precision is indeterminate, since either may be chosen
52610 apparently at random. */
52612 static enum flt_eval_method
52613 ix86_excess_precision (enum excess_precision_type type
)
52617 case EXCESS_PRECISION_TYPE_FAST
:
52618 /* The fastest type to promote to will always be the native type,
52619 whether that occurs with implicit excess precision or
52621 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
52622 case EXCESS_PRECISION_TYPE_STANDARD
:
52623 case EXCESS_PRECISION_TYPE_IMPLICIT
:
52624 /* Otherwise, the excess precision we want when we are
52625 in a standards compliant mode, and the implicit precision we
52626 provide would be identical were it not for the unpredictable
52629 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
52630 else if (!TARGET_MIX_SSE_I387
)
52632 if (!TARGET_SSE_MATH
)
52633 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
52634 else if (TARGET_SSE2
)
52635 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
52638 /* If we are in standards compliant mode, but we know we will
52639 calculate in unpredictable precision, return
52640 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
52641 excess precision if the target can't guarantee it will honor
52643 return (type
== EXCESS_PRECISION_TYPE_STANDARD
52644 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
52645 : FLT_EVAL_METHOD_UNPREDICTABLE
);
52647 gcc_unreachable ();
52650 return FLT_EVAL_METHOD_UNPREDICTABLE
;
52653 /* Target-specific selftests. */
52657 namespace selftest
{
52659 /* Verify that hard regs are dumped as expected (in compact mode). */
52662 ix86_test_dumping_hard_regs ()
52664 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
52665 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
52668 /* Test dumping an insn with repeated references to the same SCRATCH,
52669 to verify the rtx_reuse code. */
52672 ix86_test_dumping_memory_blockage ()
52674 set_new_first_and_last_insn (NULL
, NULL
);
52676 rtx pat
= gen_memory_blockage ();
52677 rtx_reuse_manager r
;
52678 r
.preprocess (pat
);
52680 /* Verify that the repeated references to the SCRATCH show use
52681 reuse IDS. The first should be prefixed with a reuse ID,
52682 and the second should be dumped as a "reuse_rtx" of that ID.
52683 The expected string assumes Pmode == DImode. */
52684 if (Pmode
== DImode
)
52685 ASSERT_RTL_DUMP_EQ_WITH_REUSE
52686 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
52688 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
52689 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
52692 /* Verify loading an RTL dump; specifically a dump of copying
52693 a param on x86_64 from a hard reg into the frame.
52694 This test is target-specific since the dump contains target-specific
52698 ix86_test_loading_dump_fragment_1 ()
52700 rtl_dump_test
t (SELFTEST_LOCATION
,
52701 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
52703 rtx_insn
*insn
= get_insn_by_uid (1);
52705 /* The block structure and indentation here is purely for
52706 readability; it mirrors the structure of the rtx. */
52709 rtx pat
= PATTERN (insn
);
52710 ASSERT_EQ (SET
, GET_CODE (pat
));
52712 rtx dest
= SET_DEST (pat
);
52713 ASSERT_EQ (MEM
, GET_CODE (dest
));
52714 /* Verify the "/c" was parsed. */
52715 ASSERT_TRUE (RTX_FLAG (dest
, call
));
52716 ASSERT_EQ (SImode
, GET_MODE (dest
));
52718 rtx addr
= XEXP (dest
, 0);
52719 ASSERT_EQ (PLUS
, GET_CODE (addr
));
52720 ASSERT_EQ (DImode
, GET_MODE (addr
));
52722 rtx lhs
= XEXP (addr
, 0);
52723 /* Verify that the "frame" REG was consolidated. */
52724 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
52727 rtx rhs
= XEXP (addr
, 1);
52728 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
52729 ASSERT_EQ (-4, INTVAL (rhs
));
52732 /* Verify the "[1 i+0 S4 A32]" was parsed. */
52733 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
52734 /* "i" should have been handled by synthesizing a global int
52735 variable named "i". */
52736 mem_expr
= MEM_EXPR (dest
);
52737 ASSERT_NE (mem_expr
, NULL
);
52738 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
52739 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
52740 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
52741 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
52743 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
52744 ASSERT_EQ (0, MEM_OFFSET (dest
));
52746 ASSERT_EQ (4, MEM_SIZE (dest
));
52748 ASSERT_EQ (32, MEM_ALIGN (dest
));
52751 rtx src
= SET_SRC (pat
);
52752 ASSERT_EQ (REG
, GET_CODE (src
));
52753 ASSERT_EQ (SImode
, GET_MODE (src
));
52754 ASSERT_EQ (5, REGNO (src
));
52755 tree reg_expr
= REG_EXPR (src
);
52756 /* "i" here should point to the same var as for the MEM_EXPR. */
52757 ASSERT_EQ (reg_expr
, mem_expr
);
52762 /* Verify that the RTL loader copes with a call_insn dump.
52763 This test is target-specific since the dump contains a target-specific
52767 ix86_test_loading_call_insn ()
52769 /* The test dump includes register "xmm0", where requires TARGET_SSE
52774 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
52776 rtx_insn
*insn
= get_insns ();
52777 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
52780 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
52782 rtx pat
= PATTERN (insn
);
52783 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
52785 /* Verify REG_NOTES. */
52787 /* "(expr_list:REG_CALL_DECL". */
52788 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
52789 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
52790 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
52792 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
52793 rtx_expr_list
*note1
= note0
->next ();
52794 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
52796 ASSERT_EQ (NULL
, note1
->next ());
52799 /* Verify CALL_INSN_FUNCTION_USAGE. */
52801 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
52802 rtx_expr_list
*usage
52803 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
52804 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
52805 ASSERT_EQ (DFmode
, GET_MODE (usage
));
52806 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
52807 ASSERT_EQ (NULL
, usage
->next ());
52811 /* Verify that the RTL loader copes a dump from print_rtx_function.
52812 This test is target-specific since the dump contains target-specific
52816 ix86_test_loading_full_dump ()
52818 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
52820 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
52822 rtx_insn
*insn_1
= get_insn_by_uid (1);
52823 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
52825 rtx_insn
*insn_7
= get_insn_by_uid (7);
52826 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
52827 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
52829 rtx_insn
*insn_15
= get_insn_by_uid (15);
52830 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
52831 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
52833 /* Verify crtl->return_rtx. */
52834 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
52835 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
52836 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
52839 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
52840 In particular, verify that it correctly loads the 2nd operand.
52841 This test is target-specific since these are machine-specific
52842 operands (and enums). */
52845 ix86_test_loading_unspec ()
52847 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
52849 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
52851 ASSERT_TRUE (cfun
);
52853 /* Test of an UNSPEC. */
52854 rtx_insn
*insn
= get_insns ();
52855 ASSERT_EQ (INSN
, GET_CODE (insn
));
52856 rtx set
= single_set (insn
);
52857 ASSERT_NE (NULL
, set
);
52858 rtx dst
= SET_DEST (set
);
52859 ASSERT_EQ (MEM
, GET_CODE (dst
));
52860 rtx src
= SET_SRC (set
);
52861 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
52862 ASSERT_EQ (BLKmode
, GET_MODE (src
));
52863 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
52865 rtx v0
= XVECEXP (src
, 0, 0);
52867 /* Verify that the two uses of the first SCRATCH have pointer
52869 rtx scratch_a
= XEXP (dst
, 0);
52870 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
52872 rtx scratch_b
= XEXP (v0
, 0);
52873 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
52875 ASSERT_EQ (scratch_a
, scratch_b
);
52877 /* Verify that the two mems are thus treated as equal. */
52878 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
52880 /* Verify the the insn is recognized. */
52881 ASSERT_NE(-1, recog_memoized (insn
));
52883 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
52884 insn
= NEXT_INSN (insn
);
52885 ASSERT_EQ (INSN
, GET_CODE (insn
));
52887 set
= single_set (insn
);
52888 ASSERT_NE (NULL
, set
);
52890 src
= SET_SRC (set
);
52891 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
52892 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
52895 /* Run all target-specific selftests. */
52898 ix86_run_selftests (void)
52900 ix86_test_dumping_hard_regs ();
52901 ix86_test_dumping_memory_blockage ();
52903 /* Various tests of loading RTL dumps, here because they contain
52904 ix86-isms (e.g. names of hard regs). */
52905 ix86_test_loading_dump_fragment_1 ();
52906 ix86_test_loading_call_insn ();
52907 ix86_test_loading_full_dump ();
52908 ix86_test_loading_unspec ();
52911 } // namespace selftest
52913 #endif /* CHECKING_P */
52915 /* Initialize the GCC target structure. */
52916 #undef TARGET_RETURN_IN_MEMORY
52917 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52919 #undef TARGET_LEGITIMIZE_ADDRESS
52920 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52922 #undef TARGET_ATTRIBUTE_TABLE
52923 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52924 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52925 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52926 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52927 # undef TARGET_MERGE_DECL_ATTRIBUTES
52928 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52931 #undef TARGET_COMP_TYPE_ATTRIBUTES
52932 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52934 #undef TARGET_INIT_BUILTINS
52935 #define TARGET_INIT_BUILTINS ix86_init_builtins
52936 #undef TARGET_BUILTIN_DECL
52937 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52938 #undef TARGET_EXPAND_BUILTIN
52939 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52941 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52942 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52943 ix86_builtin_vectorized_function
52945 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52946 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52948 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
52949 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
52951 #undef TARGET_BUILTIN_RECIPROCAL
52952 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52954 #undef TARGET_ASM_FUNCTION_EPILOGUE
52955 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52957 #undef TARGET_ENCODE_SECTION_INFO
52958 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52959 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52961 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52964 #undef TARGET_ASM_OPEN_PAREN
52965 #define TARGET_ASM_OPEN_PAREN ""
52966 #undef TARGET_ASM_CLOSE_PAREN
52967 #define TARGET_ASM_CLOSE_PAREN ""
52969 #undef TARGET_ASM_BYTE_OP
52970 #define TARGET_ASM_BYTE_OP ASM_BYTE
52972 #undef TARGET_ASM_ALIGNED_HI_OP
52973 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52974 #undef TARGET_ASM_ALIGNED_SI_OP
52975 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52977 #undef TARGET_ASM_ALIGNED_DI_OP
52978 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52981 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52982 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52984 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52985 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52987 #undef TARGET_ASM_UNALIGNED_HI_OP
52988 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52989 #undef TARGET_ASM_UNALIGNED_SI_OP
52990 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52991 #undef TARGET_ASM_UNALIGNED_DI_OP
52992 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52994 #undef TARGET_PRINT_OPERAND
52995 #define TARGET_PRINT_OPERAND ix86_print_operand
52996 #undef TARGET_PRINT_OPERAND_ADDRESS
52997 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52998 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52999 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
53000 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
53001 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
53003 #undef TARGET_SCHED_INIT_GLOBAL
53004 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
53005 #undef TARGET_SCHED_ADJUST_COST
53006 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
53007 #undef TARGET_SCHED_ISSUE_RATE
53008 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
53009 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
53010 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
53011 ia32_multipass_dfa_lookahead
53012 #undef TARGET_SCHED_MACRO_FUSION_P
53013 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
53014 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
53015 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
53017 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
53018 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
53020 #undef TARGET_MEMMODEL_CHECK
53021 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
53023 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
53024 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
53027 #undef TARGET_HAVE_TLS
53028 #define TARGET_HAVE_TLS true
53030 #undef TARGET_CANNOT_FORCE_CONST_MEM
53031 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
53032 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
53033 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
53035 #undef TARGET_DELEGITIMIZE_ADDRESS
53036 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
53038 #undef TARGET_MS_BITFIELD_LAYOUT_P
53039 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
53042 #undef TARGET_BINDS_LOCAL_P
53043 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
53045 #undef TARGET_BINDS_LOCAL_P
53046 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
53048 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
53049 #undef TARGET_BINDS_LOCAL_P
53050 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
53053 #undef TARGET_ASM_OUTPUT_MI_THUNK
53054 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
53055 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
53056 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
53058 #undef TARGET_ASM_FILE_START
53059 #define TARGET_ASM_FILE_START x86_file_start
53061 #undef TARGET_OPTION_OVERRIDE
53062 #define TARGET_OPTION_OVERRIDE ix86_option_override
53064 #undef TARGET_REGISTER_MOVE_COST
53065 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
53066 #undef TARGET_MEMORY_MOVE_COST
53067 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
53068 #undef TARGET_RTX_COSTS
53069 #define TARGET_RTX_COSTS ix86_rtx_costs
53070 #undef TARGET_ADDRESS_COST
53071 #define TARGET_ADDRESS_COST ix86_address_cost
53073 #undef TARGET_FLAGS_REGNUM
53074 #define TARGET_FLAGS_REGNUM FLAGS_REG
53075 #undef TARGET_FIXED_CONDITION_CODE_REGS
53076 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
53077 #undef TARGET_CC_MODES_COMPATIBLE
53078 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
53080 #undef TARGET_MACHINE_DEPENDENT_REORG
53081 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
53083 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
53084 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
53086 #undef TARGET_BUILD_BUILTIN_VA_LIST
53087 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
53089 #undef TARGET_FOLD_BUILTIN
53090 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
53092 #undef TARGET_GIMPLE_FOLD_BUILTIN
53093 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
53095 #undef TARGET_COMPARE_VERSION_PRIORITY
53096 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
53098 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
53099 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
53100 ix86_generate_version_dispatcher_body
53102 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
53103 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
53104 ix86_get_function_versions_dispatcher
53106 #undef TARGET_ENUM_VA_LIST_P
53107 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
53109 #undef TARGET_FN_ABI_VA_LIST
53110 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
53112 #undef TARGET_CANONICAL_VA_LIST_TYPE
53113 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
53115 #undef TARGET_EXPAND_BUILTIN_VA_START
53116 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
53118 #undef TARGET_MD_ASM_ADJUST
53119 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
53121 #undef TARGET_C_EXCESS_PRECISION
53122 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
53123 #undef TARGET_PROMOTE_PROTOTYPES
53124 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
53125 #undef TARGET_SETUP_INCOMING_VARARGS
53126 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
53127 #undef TARGET_MUST_PASS_IN_STACK
53128 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
53129 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
53130 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
53131 #undef TARGET_FUNCTION_ARG_ADVANCE
53132 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
53133 #undef TARGET_FUNCTION_ARG
53134 #define TARGET_FUNCTION_ARG ix86_function_arg
53135 #undef TARGET_INIT_PIC_REG
53136 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
53137 #undef TARGET_USE_PSEUDO_PIC_REG
53138 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
53139 #undef TARGET_FUNCTION_ARG_BOUNDARY
53140 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
53141 #undef TARGET_PASS_BY_REFERENCE
53142 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
53143 #undef TARGET_INTERNAL_ARG_POINTER
53144 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
53145 #undef TARGET_UPDATE_STACK_BOUNDARY
53146 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
53147 #undef TARGET_GET_DRAP_RTX
53148 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
53149 #undef TARGET_STRICT_ARGUMENT_NAMING
53150 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
53151 #undef TARGET_STATIC_CHAIN
53152 #define TARGET_STATIC_CHAIN ix86_static_chain
53153 #undef TARGET_TRAMPOLINE_INIT
53154 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
53155 #undef TARGET_RETURN_POPS_ARGS
53156 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
53158 #undef TARGET_WARN_FUNC_RETURN
53159 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
53161 #undef TARGET_LEGITIMATE_COMBINED_INSN
53162 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
53164 #undef TARGET_ASAN_SHADOW_OFFSET
53165 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
53167 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
53168 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
53170 #undef TARGET_SCALAR_MODE_SUPPORTED_P
53171 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
53173 #undef TARGET_VECTOR_MODE_SUPPORTED_P
53174 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
53176 #undef TARGET_C_MODE_FOR_SUFFIX
53177 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
53180 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
53181 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
53184 #ifdef SUBTARGET_INSERT_ATTRIBUTES
53185 #undef TARGET_INSERT_ATTRIBUTES
53186 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
53189 #undef TARGET_MANGLE_TYPE
53190 #define TARGET_MANGLE_TYPE ix86_mangle_type
53192 #undef TARGET_STACK_PROTECT_GUARD
53193 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
53196 #undef TARGET_STACK_PROTECT_FAIL
53197 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
53200 #undef TARGET_FUNCTION_VALUE
53201 #define TARGET_FUNCTION_VALUE ix86_function_value
53203 #undef TARGET_FUNCTION_VALUE_REGNO_P
53204 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
53206 #undef TARGET_PROMOTE_FUNCTION_MODE
53207 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
53209 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
53210 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
53212 #undef TARGET_MEMBER_TYPE_FORCES_BLK
53213 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
53215 #undef TARGET_INSTANTIATE_DECLS
53216 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
53218 #undef TARGET_SECONDARY_RELOAD
53219 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
53220 #undef TARGET_SECONDARY_MEMORY_NEEDED
53221 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
53222 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
53223 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
53225 #undef TARGET_CLASS_MAX_NREGS
53226 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
53228 #undef TARGET_PREFERRED_RELOAD_CLASS
53229 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
53230 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
53231 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
53232 #undef TARGET_CLASS_LIKELY_SPILLED_P
53233 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
53235 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
53236 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
53237 ix86_builtin_vectorization_cost
53238 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
53239 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
53240 ix86_vectorize_vec_perm_const_ok
53241 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
53242 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
53243 ix86_preferred_simd_mode
53244 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
53245 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
53246 ix86_autovectorize_vector_sizes
53247 #undef TARGET_VECTORIZE_GET_MASK_MODE
53248 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
53249 #undef TARGET_VECTORIZE_INIT_COST
53250 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
53251 #undef TARGET_VECTORIZE_ADD_STMT_COST
53252 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
53253 #undef TARGET_VECTORIZE_FINISH_COST
53254 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
53255 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
53256 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
53258 #undef TARGET_SET_CURRENT_FUNCTION
53259 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
53261 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
53262 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
53264 #undef TARGET_OPTION_SAVE
53265 #define TARGET_OPTION_SAVE ix86_function_specific_save
53267 #undef TARGET_OPTION_RESTORE
53268 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
53270 #undef TARGET_OPTION_POST_STREAM_IN
53271 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
53273 #undef TARGET_OPTION_PRINT
53274 #define TARGET_OPTION_PRINT ix86_function_specific_print
53276 #undef TARGET_OPTION_FUNCTION_VERSIONS
53277 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
53279 #undef TARGET_CAN_INLINE_P
53280 #define TARGET_CAN_INLINE_P ix86_can_inline_p
53282 #undef TARGET_LEGITIMATE_ADDRESS_P
53283 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
53285 #undef TARGET_REGISTER_PRIORITY
53286 #define TARGET_REGISTER_PRIORITY ix86_register_priority
53288 #undef TARGET_REGISTER_USAGE_LEVELING_P
53289 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
53291 #undef TARGET_LEGITIMATE_CONSTANT_P
53292 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
53294 #undef TARGET_COMPUTE_FRAME_LAYOUT
53295 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
53297 #undef TARGET_FRAME_POINTER_REQUIRED
53298 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
53300 #undef TARGET_CAN_ELIMINATE
53301 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
53303 #undef TARGET_EXTRA_LIVE_ON_ENTRY
53304 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
53306 #undef TARGET_ASM_CODE_END
53307 #define TARGET_ASM_CODE_END ix86_code_end
53309 #undef TARGET_CONDITIONAL_REGISTER_USAGE
53310 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
53312 #undef TARGET_LOOP_UNROLL_ADJUST
53313 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
53315 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
53316 #undef TARGET_SPILL_CLASS
53317 #define TARGET_SPILL_CLASS ix86_spill_class
53319 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
53320 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
53321 ix86_simd_clone_compute_vecsize_and_simdlen
53323 #undef TARGET_SIMD_CLONE_ADJUST
53324 #define TARGET_SIMD_CLONE_ADJUST \
53325 ix86_simd_clone_adjust
53327 #undef TARGET_SIMD_CLONE_USABLE
53328 #define TARGET_SIMD_CLONE_USABLE \
53329 ix86_simd_clone_usable
53331 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
53332 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
53333 ix86_float_exceptions_rounding_supported_p
53335 #undef TARGET_MODE_EMIT
53336 #define TARGET_MODE_EMIT ix86_emit_mode_set
53338 #undef TARGET_MODE_NEEDED
53339 #define TARGET_MODE_NEEDED ix86_mode_needed
53341 #undef TARGET_MODE_AFTER
53342 #define TARGET_MODE_AFTER ix86_mode_after
53344 #undef TARGET_MODE_ENTRY
53345 #define TARGET_MODE_ENTRY ix86_mode_entry
53347 #undef TARGET_MODE_EXIT
53348 #define TARGET_MODE_EXIT ix86_mode_exit
53350 #undef TARGET_MODE_PRIORITY
53351 #define TARGET_MODE_PRIORITY ix86_mode_priority
53353 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
53354 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
53356 #undef TARGET_LOAD_BOUNDS_FOR_ARG
53357 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
53359 #undef TARGET_STORE_BOUNDS_FOR_ARG
53360 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
53362 #undef TARGET_LOAD_RETURNED_BOUNDS
53363 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
53365 #undef TARGET_STORE_RETURNED_BOUNDS
53366 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
53368 #undef TARGET_CHKP_BOUND_MODE
53369 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
53371 #undef TARGET_BUILTIN_CHKP_FUNCTION
53372 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
53374 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
53375 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
53377 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
53378 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
53380 #undef TARGET_CHKP_INITIALIZE_BOUNDS
53381 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
53383 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
53384 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
53386 #undef TARGET_OFFLOAD_OPTIONS
53387 #define TARGET_OFFLOAD_OPTIONS \
53388 ix86_offload_options
53390 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
53391 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
53393 #undef TARGET_OPTAB_SUPPORTED_P
53394 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
53396 #undef TARGET_HARD_REGNO_SCRATCH_OK
53397 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
53399 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
53400 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
53402 #undef TARGET_ADDITIONAL_ALLOCNO_CLASS_P
53403 #define TARGET_ADDITIONAL_ALLOCNO_CLASS_P ix86_additional_allocno_class_p
53405 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
53406 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
53408 #undef TARGET_INIT_LIBFUNCS
53409 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
53411 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
53412 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
53414 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
53415 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
53417 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
53418 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
53420 #undef TARGET_HARD_REGNO_NREGS
53421 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
53422 #undef TARGET_HARD_REGNO_MODE_OK
53423 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
53425 #undef TARGET_MODES_TIEABLE_P
53426 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
53428 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
53429 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
53430 ix86_hard_regno_call_part_clobbered
53432 #undef TARGET_CAN_CHANGE_MODE_CLASS
53433 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
53436 #undef TARGET_RUN_TARGET_SELFTESTS
53437 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
53438 #endif /* #if CHECKING_P */
53440 struct gcc_target targetm
= TARGET_INITIALIZER
;
53442 #include "gt-i386.h"