1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
140 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost
= { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
211 DUMMY_STRINGOP_ALGS
},
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
213 DUMMY_STRINGOP_ALGS
},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost
= { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
283 DUMMY_STRINGOP_ALGS
},
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
285 DUMMY_STRINGOP_ALGS
},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost
= {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
353 DUMMY_STRINGOP_ALGS
},
354 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
355 DUMMY_STRINGOP_ALGS
},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost
= {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
428 {8192, rep_prefix_4_byte
, false},
429 {-1, rep_prefix_1_byte
, false}}},
430 DUMMY_STRINGOP_ALGS
},
431 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, libcall
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
727 {-1, rep_prefix_4_byte
, false}}},
728 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
729 {-1, libcall
, false}}}},
730 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
731 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
732 {libcall
, {{48, unrolled_loop
, false},
733 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost
= {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}},
820 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
821 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
822 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost
= {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
907 {-1, rep_prefix_4_byte
, false}}},
908 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
909 {-1, libcall
, false}}}},
910 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost
= {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
997 {-1, rep_prefix_4_byte
, false}}},
998 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
999 {-1, libcall
, false}}}},
1000 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1001 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1002 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1079 {-1, rep_prefix_4_byte
, false}}},
1080 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1081 {-1, libcall
, false}}}},
1082 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1083 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1084 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost
= {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1164 {-1, rep_prefix_4_byte
, false}}},
1165 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1166 {-1, libcall
, false}}}},
1167 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1168 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1169 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1246 {-1, rep_prefix_4_byte
, false}}},
1247 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1248 {-1, libcall
, false}}}},
1249 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1250 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1251 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost
= {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1320 DUMMY_STRINGOP_ALGS
},
1321 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1322 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1323 DUMMY_STRINGOP_ALGS
},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost
= {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1391 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1392 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1393 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1394 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1395 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1396 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost
= {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1464 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1465 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1466 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1467 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1468 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1483 /* Generic64 should produce code tuned for Nocona and K8. */
1485 struct processor_costs generic64_cost
= {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 /* On all chips taken into consideration lea is 2 cycles and more. With
1488 this cost however our current implementation of synth_mult results in
1489 use of unnecessary temporary registers causing regression on several
1490 SPECfp benchmarks. */
1491 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1492 COSTS_N_INSNS (1), /* variable shift costs */
1493 COSTS_N_INSNS (1), /* constant shift costs */
1494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1495 COSTS_N_INSNS (4), /* HI */
1496 COSTS_N_INSNS (3), /* SI */
1497 COSTS_N_INSNS (4), /* DI */
1498 COSTS_N_INSNS (2)}, /* other */
1499 0, /* cost of multiply per each bit set */
1500 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1501 COSTS_N_INSNS (26), /* HI */
1502 COSTS_N_INSNS (42), /* SI */
1503 COSTS_N_INSNS (74), /* DI */
1504 COSTS_N_INSNS (74)}, /* other */
1505 COSTS_N_INSNS (1), /* cost of movsx */
1506 COSTS_N_INSNS (1), /* cost of movzx */
1507 8, /* "large" insn */
1508 17, /* MOVE_RATIO */
1509 4, /* cost for loading QImode using movzbl */
1510 {4, 4, 4}, /* cost of loading integer registers
1511 in QImode, HImode and SImode.
1512 Relative to reg-reg move (2). */
1513 {4, 4, 4}, /* cost of storing integer registers */
1514 4, /* cost of reg,reg fld/fst */
1515 {12, 12, 12}, /* cost of loading fp registers
1516 in SFmode, DFmode and XFmode */
1517 {6, 6, 8}, /* cost of storing fp registers
1518 in SFmode, DFmode and XFmode */
1519 2, /* cost of moving MMX register */
1520 {8, 8}, /* cost of loading MMX registers
1521 in SImode and DImode */
1522 {8, 8}, /* cost of storing MMX registers
1523 in SImode and DImode */
1524 2, /* cost of moving SSE register */
1525 {8, 8, 8}, /* cost of loading SSE registers
1526 in SImode, DImode and TImode */
1527 {8, 8, 8}, /* cost of storing SSE registers
1528 in SImode, DImode and TImode */
1529 5, /* MMX or SSE register to integer */
1530 32, /* size of l1 cache. */
1531 512, /* size of l2 cache. */
1532 64, /* size of prefetch block */
1533 6, /* number of parallel prefetches */
1534 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1535 value is increased to perhaps more appropriate value of 5. */
1536 3, /* Branch cost */
1537 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1538 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1539 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1540 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1541 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1542 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1543 {DUMMY_STRINGOP_ALGS
,
1544 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1545 {-1, libcall
, false}}}},
1546 {DUMMY_STRINGOP_ALGS
,
1547 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1548 {-1, libcall
, false}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* core_cost should produce code tuned for Core familly of CPUs. */
1564 struct processor_costs core_cost
= {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 /* On all chips taken into consideration lea is 2 cycles and more. With
1567 this cost however our current implementation of synth_mult results in
1568 use of unnecessary temporary registers causing regression on several
1569 SPECfp benchmarks. */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 64, /* size of l1 cache. */
1610 512, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 /* FIXME perhaps more appropriate value is 5. */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1622 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1623 {-1, libcall
, false}}}},
1624 {{libcall
, {{6, loop_1_byte
, true},
1626 {8192, rep_prefix_4_byte
, true},
1627 {-1, libcall
, false}}},
1628 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1629 {-1, libcall
, false}}}},
1630 1, /* scalar_stmt_cost. */
1631 1, /* scalar load_cost. */
1632 1, /* scalar_store_cost. */
1633 1, /* vec_stmt_cost. */
1634 1, /* vec_to_scalar_cost. */
1635 1, /* scalar_to_vec_cost. */
1636 1, /* vec_align_load_cost. */
1637 2, /* vec_unalign_load_cost. */
1638 1, /* vec_store_cost. */
1639 3, /* cond_taken_branch_cost. */
1640 1, /* cond_not_taken_branch_cost. */
1643 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1646 struct processor_costs generic32_cost
= {
1647 COSTS_N_INSNS (1), /* cost of an add instruction */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 256, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1699 {-1, libcall
, false}}},
1700 DUMMY_STRINGOP_ALGS
},
1701 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1702 {-1, libcall
, false}}},
1703 DUMMY_STRINGOP_ALGS
},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Set by -mtune. */
1718 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1720 /* Set by -mtune or -Os. */
1721 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1723 /* Processor feature/optimization bitmasks. */
1724 #define m_386 (1<<PROCESSOR_I386)
1725 #define m_486 (1<<PROCESSOR_I486)
1726 #define m_PENT (1<<PROCESSOR_PENTIUM)
1727 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1728 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1730 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1731 #define m_CORE2 (1<<PROCESSOR_CORE2)
1732 #define m_COREI7 (1<<PROCESSOR_COREI7)
1733 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1734 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1735 #define m_ATOM (1<<PROCESSOR_ATOM)
1737 #define m_GEODE (1<<PROCESSOR_GEODE)
1738 #define m_K6 (1<<PROCESSOR_K6)
1739 #define m_K6_GEODE (m_K6 | m_GEODE)
1740 #define m_K8 (1<<PROCESSOR_K8)
1741 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1742 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1743 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1744 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1745 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1746 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1747 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1748 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1749 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1750 #define m_BTVER (m_BTVER1 | m_BTVER2)
1751 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1753 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1754 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1756 /* Generic instruction choice should be common subset of supported CPUs
1757 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1758 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1760 /* Feature tests against the various tunings. */
1761 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1763 /* Feature tests against the various tunings used to create ix86_tune_features
1764 based on the processor mask. */
1765 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1766 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1767 negatively, so enabling for Generic64 seems like good code size
1768 tradeoff. We can't enable it for 32bit generic because it does not
1769 work well with PPro base chips. */
1770 m_386
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1772 /* X86_TUNE_PUSH_MEMORY */
1773 m_386
| m_P4_NOCONA
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1775 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1778 /* X86_TUNE_UNROLL_STRLEN */
1779 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE_ALL
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1781 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1782 on simulation result. But after P4 was made, no performance benefit
1783 was observed with branch hints. It also increases the code size.
1784 As a result, icc never generates branch hints. */
1787 /* X86_TUNE_DOUBLE_WITH_ADD */
1790 /* X86_TUNE_USE_SAHF */
1791 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1793 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1794 partial dependencies. */
1795 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1797 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1798 register stalls on Generic32 compilation setting as well. However
1799 in current implementation the partial register stalls are not eliminated
1800 very well - they can be introduced via subregs synthesized by combine
1801 and can happen in caller/callee saving sequences. Because this option
1802 pays back little on PPro based chips and is in conflict with partial reg
1803 dependencies used by Athlon/P4 based chips, it is better to leave it off
1804 for generic32 for now. */
1807 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1808 m_CORE_ALL
| m_GENERIC
,
1810 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1811 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1812 m_CORE_ALL
| m_GENERIC
,
1814 /* X86_TUNE_USE_HIMODE_FIOP */
1815 m_386
| m_486
| m_K6_GEODE
,
1817 /* X86_TUNE_USE_SIMODE_FIOP */
1818 ~(m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1820 /* X86_TUNE_USE_MOV0 */
1823 /* X86_TUNE_USE_CLTD */
1824 ~(m_PENT
| m_ATOM
| m_K6
),
1826 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1829 /* X86_TUNE_SPLIT_LONG_MOVES */
1832 /* X86_TUNE_READ_MODIFY_WRITE */
1835 /* X86_TUNE_READ_MODIFY */
1838 /* X86_TUNE_PROMOTE_QIMODE */
1839 m_386
| m_486
| m_PENT
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1841 /* X86_TUNE_FAST_PREFIX */
1842 ~(m_386
| m_486
| m_PENT
),
1844 /* X86_TUNE_SINGLE_STRINGOP */
1845 m_386
| m_P4_NOCONA
,
1847 /* X86_TUNE_QIMODE_MATH */
1850 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1851 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1852 might be considered for Generic32 if our scheme for avoiding partial
1853 stalls was more effective. */
1856 /* X86_TUNE_PROMOTE_QI_REGS */
1859 /* X86_TUNE_PROMOTE_HI_REGS */
1862 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1863 over esp addition. */
1864 m_386
| m_486
| m_PENT
| m_PPRO
,
1866 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1867 over esp addition. */
1870 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1871 over esp subtraction. */
1872 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1874 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1875 over esp subtraction. */
1876 m_PENT
| m_K6_GEODE
,
1878 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1879 for DFmode copies */
1880 ~(m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1882 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1883 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1885 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1886 conflict here in between PPro/Pentium4 based chips that thread 128bit
1887 SSE registers as single units versus K8 based chips that divide SSE
1888 registers to two 64bit halves. This knob promotes all store destinations
1889 to be 128bit to allow register renaming on 128bit SSE units, but usually
1890 results in one extra microop on 64bit SSE units. Experimental results
1891 shows that disabling this option on P4 brings over 20% SPECfp regression,
1892 while enabling it on K8 brings roughly 2.4% regression that can be partly
1893 masked by careful scheduling of moves. */
1894 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1896 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1897 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1899 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1902 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1905 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1906 are resolved on SSE register parts instead of whole registers, so we may
1907 maintain just lower part of scalar values in proper format leaving the
1908 upper part undefined. */
1911 /* X86_TUNE_SSE_TYPELESS_STORES */
1914 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1915 m_PPRO
| m_P4_NOCONA
,
1917 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1918 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1920 /* X86_TUNE_PROLOGUE_USING_MOVE */
1921 m_PPRO
| m_ATHLON_K8
,
1923 /* X86_TUNE_EPILOGUE_USING_MOVE */
1924 m_PPRO
| m_ATHLON_K8
,
1926 /* X86_TUNE_SHIFT1 */
1929 /* X86_TUNE_USE_FFREEP */
1932 /* X86_TUNE_INTER_UNIT_MOVES */
1933 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1935 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1936 ~(m_AMDFAM10
| m_BDVER
),
1938 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1939 than 4 branch instructions in the 16 byte window. */
1940 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1942 /* X86_TUNE_SCHEDULE */
1943 m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1945 /* X86_TUNE_USE_BT */
1946 m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1948 /* X86_TUNE_USE_INCDEC */
1949 ~(m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GENERIC
),
1951 /* X86_TUNE_PAD_RETURNS */
1952 m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1957 /* X86_TUNE_EXT_80387_CONSTANTS */
1958 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1960 /* X86_TUNE_AVOID_VECTOR_DECODE */
1961 m_CORE_ALL
| m_K8
| m_GENERIC64
,
1963 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1964 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1967 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1968 vector path on AMD machines. */
1969 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1971 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1973 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1975 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1979 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1980 but one byte longer. */
1983 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1984 operand that cannot be represented using a modRM byte. The XOR
1985 replacement is long decoded, so this split helps here as well. */
1988 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1990 m_CORE_ALL
| m_AMDFAM10
| m_GENERIC
,
1992 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1993 from integer to FP. */
1996 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1997 with a subsequent conditional jump instruction into a single
1998 compare-and-branch uop. */
2001 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2002 will impact LEA instruction selection. */
2005 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2009 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2010 at -O3. For the moment, the prefetching seems badly tuned for Intel
2012 m_K6_GEODE
| m_AMD_MULTIPLE
,
2014 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2015 the auto-vectorizer. */
2018 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2019 during reassociation of integer computation. */
2022 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2023 during reassociation of fp computation. */
2026 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2027 regs instead of memory. */
2030 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2031 a conditional move. */
2035 /* Feature tests against the various architecture variations. */
2036 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2038 /* Feature tests against the various architecture variations, used to create
2039 ix86_arch_features based on the processor mask. */
2040 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2041 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2042 ~(m_386
| m_486
| m_PENT
| m_K6
),
2044 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2047 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2050 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2053 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2057 static const unsigned int x86_accumulate_outgoing_args
2058 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
2060 static const unsigned int x86_arch_always_fancy_math_387
2061 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2063 static const unsigned int x86_avx256_split_unaligned_load
2064 = m_COREI7
| m_GENERIC
;
2066 static const unsigned int x86_avx256_split_unaligned_store
2067 = m_COREI7
| m_BDVER
| m_GENERIC
;
2069 /* In case the average insn count for single function invocation is
2070 lower than this constant, emit fast (but longer) prologue and
2072 #define FAST_PROLOGUE_INSN_COUNT 20
2074 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2075 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2076 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2077 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2079 /* Array of the smallest class containing reg number REGNO, indexed by
2080 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2082 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2084 /* ax, dx, cx, bx */
2085 AREG
, DREG
, CREG
, BREG
,
2086 /* si, di, bp, sp */
2087 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2089 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2090 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2093 /* flags, fpsr, fpcr, frame */
2094 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2096 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2099 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2102 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2103 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2104 /* SSE REX registers */
2105 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2109 /* The "default" register map used in 32bit mode. */
2111 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2113 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2114 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2115 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2116 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2117 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2118 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2119 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2122 /* The "default" register map used in 64bit mode. */
2124 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2126 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2127 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2128 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2129 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2130 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2131 8,9,10,11,12,13,14,15, /* extended integer registers */
2132 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2135 /* Define the register numbers to be used in Dwarf debugging information.
2136 The SVR4 reference port C compiler uses the following register numbers
2137 in its Dwarf output code:
2138 0 for %eax (gcc regno = 0)
2139 1 for %ecx (gcc regno = 2)
2140 2 for %edx (gcc regno = 1)
2141 3 for %ebx (gcc regno = 3)
2142 4 for %esp (gcc regno = 7)
2143 5 for %ebp (gcc regno = 6)
2144 6 for %esi (gcc regno = 4)
2145 7 for %edi (gcc regno = 5)
2146 The following three DWARF register numbers are never generated by
2147 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2148 believes these numbers have these meanings.
2149 8 for %eip (no gcc equivalent)
2150 9 for %eflags (gcc regno = 17)
2151 10 for %trapno (no gcc equivalent)
2152 It is not at all clear how we should number the FP stack registers
2153 for the x86 architecture. If the version of SDB on x86/svr4 were
2154 a bit less brain dead with respect to floating-point then we would
2155 have a precedent to follow with respect to DWARF register numbers
2156 for x86 FP registers, but the SDB on x86/svr4 is so completely
2157 broken with respect to FP registers that it is hardly worth thinking
2158 of it as something to strive for compatibility with.
2159 The version of x86/svr4 SDB I have at the moment does (partially)
2160 seem to believe that DWARF register number 11 is associated with
2161 the x86 register %st(0), but that's about all. Higher DWARF
2162 register numbers don't seem to be associated with anything in
2163 particular, and even for DWARF regno 11, SDB only seems to under-
2164 stand that it should say that a variable lives in %st(0) (when
2165 asked via an `=' command) if we said it was in DWARF regno 11,
2166 but SDB still prints garbage when asked for the value of the
2167 variable in question (via a `/' command).
2168 (Also note that the labels SDB prints for various FP stack regs
2169 when doing an `x' command are all wrong.)
2170 Note that these problems generally don't affect the native SVR4
2171 C compiler because it doesn't allow the use of -O with -g and
2172 because when it is *not* optimizing, it allocates a memory
2173 location for each floating-point variable, and the memory
2174 location is what gets described in the DWARF AT_location
2175 attribute for the variable in question.
2176 Regardless of the severe mental illness of the x86/svr4 SDB, we
2177 do something sensible here and we use the following DWARF
2178 register numbers. Note that these are all stack-top-relative
2180 11 for %st(0) (gcc regno = 8)
2181 12 for %st(1) (gcc regno = 9)
2182 13 for %st(2) (gcc regno = 10)
2183 14 for %st(3) (gcc regno = 11)
2184 15 for %st(4) (gcc regno = 12)
2185 16 for %st(5) (gcc regno = 13)
2186 17 for %st(6) (gcc regno = 14)
2187 18 for %st(7) (gcc regno = 15)
2189 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2191 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2192 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2193 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2194 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2195 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2196 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2197 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2200 /* Define parameter passing and return registers. */
2202 static int const x86_64_int_parameter_registers
[6] =
2204 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2207 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2209 CX_REG
, DX_REG
, R8_REG
, R9_REG
2212 static int const x86_64_int_return_registers
[4] =
2214 AX_REG
, DX_REG
, DI_REG
, SI_REG
2217 /* Define the structure for the machine field in struct function. */
2219 struct GTY(()) stack_local_entry
{
2220 unsigned short mode
;
2223 struct stack_local_entry
*next
;
2226 /* Structure describing stack frame layout.
2227 Stack grows downward:
2233 saved static chain if ix86_static_chain_on_stack
2235 saved frame pointer if frame_pointer_needed
2236 <- HARD_FRAME_POINTER
2242 <- sse_regs_save_offset
2245 [va_arg registers] |
2249 [padding2] | = to_allocate
2258 int outgoing_arguments_size
;
2260 /* The offsets relative to ARG_POINTER. */
2261 HOST_WIDE_INT frame_pointer_offset
;
2262 HOST_WIDE_INT hard_frame_pointer_offset
;
2263 HOST_WIDE_INT stack_pointer_offset
;
2264 HOST_WIDE_INT hfp_save_offset
;
2265 HOST_WIDE_INT reg_save_offset
;
2266 HOST_WIDE_INT sse_reg_save_offset
;
2268 /* When save_regs_using_mov is set, emit prologue using
2269 move instead of push instructions. */
2270 bool save_regs_using_mov
;
2273 /* Which cpu are we scheduling for. */
2274 enum attr_cpu ix86_schedule
;
2276 /* Which cpu are we optimizing for. */
2277 enum processor_type ix86_tune
;
2279 /* Which instruction set architecture to use. */
2280 enum processor_type ix86_arch
;
2282 /* True if processor has SSE prefetch instruction. */
2283 unsigned char x86_prefetch_sse
;
2285 /* -mstackrealign option */
2286 static const char ix86_force_align_arg_pointer_string
[]
2287 = "force_align_arg_pointer";
2289 static rtx (*ix86_gen_leave
) (void);
2290 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2291 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2292 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2293 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2294 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2295 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2296 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2297 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2298 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2299 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2300 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2302 /* Preferred alignment for stack boundary in bits. */
2303 unsigned int ix86_preferred_stack_boundary
;
2305 /* Alignment for incoming stack boundary in bits specified at
2307 static unsigned int ix86_user_incoming_stack_boundary
;
2309 /* Default alignment for incoming stack boundary in bits. */
2310 static unsigned int ix86_default_incoming_stack_boundary
;
2312 /* Alignment for incoming stack boundary in bits. */
2313 unsigned int ix86_incoming_stack_boundary
;
2315 /* Calling abi specific va_list type nodes. */
2316 static GTY(()) tree sysv_va_list_type_node
;
2317 static GTY(()) tree ms_va_list_type_node
;
2319 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2320 char internal_label_prefix
[16];
2321 int internal_label_prefix_len
;
2323 /* Fence to use after loop using movnt. */
2326 /* Register class used for passing given 64bit part of the argument.
2327 These represent classes as documented by the PS ABI, with the exception
2328 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2329 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2331 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2332 whenever possible (upper half does contain padding). */
2333 enum x86_64_reg_class
2336 X86_64_INTEGER_CLASS
,
2337 X86_64_INTEGERSI_CLASS
,
2344 X86_64_COMPLEX_X87_CLASS
,
2348 #define MAX_CLASSES 4
2350 /* Table of constants used by fldpi, fldln2, etc.... */
2351 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2352 static bool ext_80387_constants_init
= 0;
2355 static struct machine_function
* ix86_init_machine_status (void);
2356 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2357 static bool ix86_function_value_regno_p (const unsigned int);
2358 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2360 static rtx
ix86_static_chain (const_tree
, bool);
2361 static int ix86_function_regparm (const_tree
, const_tree
);
2362 static void ix86_compute_frame_layout (struct ix86_frame
*);
2363 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2365 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2366 static tree
ix86_canonical_va_list_type (tree
);
2367 static void predict_jump (int);
2368 static unsigned int split_stack_prologue_scratch_regno (void);
2369 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2371 enum ix86_function_specific_strings
2373 IX86_FUNCTION_SPECIFIC_ARCH
,
2374 IX86_FUNCTION_SPECIFIC_TUNE
,
2375 IX86_FUNCTION_SPECIFIC_MAX
2378 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2379 const char *, enum fpmath_unit
, bool);
2380 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2381 static void ix86_function_specific_save (struct cl_target_option
*);
2382 static void ix86_function_specific_restore (struct cl_target_option
*);
2383 static void ix86_function_specific_print (FILE *, int,
2384 struct cl_target_option
*);
2385 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2386 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2387 struct gcc_options
*);
2388 static bool ix86_can_inline_p (tree
, tree
);
2389 static void ix86_set_current_function (tree
);
2390 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2392 static enum calling_abi
ix86_function_abi (const_tree
);
2395 #ifndef SUBTARGET32_DEFAULT_CPU
2396 #define SUBTARGET32_DEFAULT_CPU "i386"
2399 /* Whether -mtune= or -march= were specified */
2400 static int ix86_tune_defaulted
;
2401 static int ix86_arch_specified
;
2403 /* Vectorization library interface and handlers. */
2404 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2406 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2407 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2409 /* Processor target table, indexed by processor number */
2412 const struct processor_costs
*cost
; /* Processor costs */
2413 const int align_loop
; /* Default alignments. */
2414 const int align_loop_max_skip
;
2415 const int align_jump
;
2416 const int align_jump_max_skip
;
2417 const int align_func
;
2420 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2422 {&i386_cost
, 4, 3, 4, 3, 4},
2423 {&i486_cost
, 16, 15, 16, 15, 16},
2424 {&pentium_cost
, 16, 7, 16, 7, 16},
2425 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2426 {&geode_cost
, 0, 0, 0, 0, 0},
2427 {&k6_cost
, 32, 7, 32, 7, 32},
2428 {&athlon_cost
, 16, 7, 16, 7, 16},
2429 {&pentium4_cost
, 0, 0, 0, 0, 0},
2430 {&k8_cost
, 16, 7, 16, 7, 16},
2431 {&nocona_cost
, 0, 0, 0, 0, 0},
2433 {&core_cost
, 16, 10, 16, 10, 16},
2435 {&core_cost
, 16, 10, 16, 10, 16},
2437 {&core_cost
, 16, 10, 16, 10, 16},
2438 {&generic32_cost
, 16, 7, 16, 7, 16},
2439 {&generic64_cost
, 16, 10, 16, 10, 16},
2440 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2441 {&bdver1_cost
, 32, 24, 32, 7, 32},
2442 {&bdver2_cost
, 32, 24, 32, 7, 32},
2443 {&bdver3_cost
, 32, 24, 32, 7, 32},
2444 {&btver1_cost
, 32, 24, 32, 7, 32},
2445 {&btver2_cost
, 32, 24, 32, 7, 32},
2446 {&atom_cost
, 16, 15, 16, 7, 16}
2449 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2483 gate_insert_vzeroupper (void)
2485 return TARGET_VZEROUPPER
;
2489 rest_of_handle_insert_vzeroupper (void)
2493 /* vzeroupper instructions are inserted immediately after reload to
2494 account for possible spills from 256bit registers. The pass
2495 reuses mode switching infrastructure by re-running mode insertion
2496 pass, so disable entities that have already been processed. */
2497 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2498 ix86_optimize_mode_switching
[i
] = 0;
2500 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2502 /* Call optimize_mode_switching. */
2503 pass_mode_switching
.pass
.execute ();
2507 struct rtl_opt_pass pass_insert_vzeroupper
=
2511 "vzeroupper", /* name */
2512 OPTGROUP_NONE
, /* optinfo_flags */
2513 gate_insert_vzeroupper
, /* gate */
2514 rest_of_handle_insert_vzeroupper
, /* execute */
2517 0, /* static_pass_number */
2518 TV_NONE
, /* tv_id */
2519 0, /* properties_required */
2520 0, /* properties_provided */
2521 0, /* properties_destroyed */
2522 0, /* todo_flags_start */
2523 TODO_df_finish
| TODO_verify_rtl_sharing
|
2524 0, /* todo_flags_finish */
2528 /* Return true if a red-zone is in use. */
2531 ix86_using_red_zone (void)
2533 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2536 /* Return a string that documents the current -m options. The caller is
2537 responsible for freeing the string. */
2540 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2541 const char *tune
, enum fpmath_unit fpmath
,
2544 struct ix86_target_opts
2546 const char *option
; /* option string */
2547 HOST_WIDE_INT mask
; /* isa mask options */
2550 /* This table is ordered so that options like -msse4.2 that imply
2551 preceding options while match those first. */
2552 static struct ix86_target_opts isa_opts
[] =
2554 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2555 { "-mfma", OPTION_MASK_ISA_FMA
},
2556 { "-mxop", OPTION_MASK_ISA_XOP
},
2557 { "-mlwp", OPTION_MASK_ISA_LWP
},
2558 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2559 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2560 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2561 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2562 { "-msse3", OPTION_MASK_ISA_SSE3
},
2563 { "-msse2", OPTION_MASK_ISA_SSE2
},
2564 { "-msse", OPTION_MASK_ISA_SSE
},
2565 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2566 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2567 { "-mmmx", OPTION_MASK_ISA_MMX
},
2568 { "-mabm", OPTION_MASK_ISA_ABM
},
2569 { "-mbmi", OPTION_MASK_ISA_BMI
},
2570 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2571 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2572 { "-mhle", OPTION_MASK_ISA_HLE
},
2573 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2574 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2575 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2576 { "-madx", OPTION_MASK_ISA_ADX
},
2577 { "-mtbm", OPTION_MASK_ISA_TBM
},
2578 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2579 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2580 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2581 { "-maes", OPTION_MASK_ISA_AES
},
2582 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2583 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2584 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2585 { "-mf16c", OPTION_MASK_ISA_F16C
},
2586 { "-mrtm", OPTION_MASK_ISA_RTM
},
2587 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2588 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2592 static struct ix86_target_opts flag_opts
[] =
2594 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2595 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2596 { "-m80387", MASK_80387
},
2597 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2598 { "-malign-double", MASK_ALIGN_DOUBLE
},
2599 { "-mcld", MASK_CLD
},
2600 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2601 { "-mieee-fp", MASK_IEEE_FP
},
2602 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2603 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2604 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2605 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2606 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2607 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2608 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2609 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2610 { "-mrecip", MASK_RECIP
},
2611 { "-mrtd", MASK_RTD
},
2612 { "-msseregparm", MASK_SSEREGPARM
},
2613 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2614 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2615 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2616 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2617 { "-mvzeroupper", MASK_VZEROUPPER
},
2618 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2619 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2620 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2623 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2626 char target_other
[40];
2636 memset (opts
, '\0', sizeof (opts
));
2638 /* Add -march= option. */
2641 opts
[num
][0] = "-march=";
2642 opts
[num
++][1] = arch
;
2645 /* Add -mtune= option. */
2648 opts
[num
][0] = "-mtune=";
2649 opts
[num
++][1] = tune
;
2652 /* Add -m32/-m64/-mx32. */
2653 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2655 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2659 isa
&= ~ (OPTION_MASK_ISA_64BIT
2660 | OPTION_MASK_ABI_64
2661 | OPTION_MASK_ABI_X32
);
2665 opts
[num
++][0] = abi
;
2667 /* Pick out the options in isa options. */
2668 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2670 if ((isa
& isa_opts
[i
].mask
) != 0)
2672 opts
[num
++][0] = isa_opts
[i
].option
;
2673 isa
&= ~ isa_opts
[i
].mask
;
2677 if (isa
&& add_nl_p
)
2679 opts
[num
++][0] = isa_other
;
2680 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2684 /* Add flag options. */
2685 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2687 if ((flags
& flag_opts
[i
].mask
) != 0)
2689 opts
[num
++][0] = flag_opts
[i
].option
;
2690 flags
&= ~ flag_opts
[i
].mask
;
2694 if (flags
&& add_nl_p
)
2696 opts
[num
++][0] = target_other
;
2697 sprintf (target_other
, "(other flags: %#x)", flags
);
2700 /* Add -fpmath= option. */
2703 opts
[num
][0] = "-mfpmath=";
2704 switch ((int) fpmath
)
2707 opts
[num
++][1] = "387";
2711 opts
[num
++][1] = "sse";
2714 case FPMATH_387
| FPMATH_SSE
:
2715 opts
[num
++][1] = "sse+387";
2727 gcc_assert (num
< ARRAY_SIZE (opts
));
2729 /* Size the string. */
2731 sep_len
= (add_nl_p
) ? 3 : 1;
2732 for (i
= 0; i
< num
; i
++)
2735 for (j
= 0; j
< 2; j
++)
2737 len
+= strlen (opts
[i
][j
]);
2740 /* Build the string. */
2741 ret
= ptr
= (char *) xmalloc (len
);
2744 for (i
= 0; i
< num
; i
++)
2748 for (j
= 0; j
< 2; j
++)
2749 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2756 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2764 for (j
= 0; j
< 2; j
++)
2767 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2769 line_len
+= len2
[j
];
2774 gcc_assert (ret
+ len
>= ptr
);
2779 /* Return true, if profiling code should be emitted before
2780 prologue. Otherwise it returns false.
2781 Note: For x86 with "hotfix" it is sorried. */
2783 ix86_profile_before_prologue (void)
2785 return flag_fentry
!= 0;
2788 /* Function that is callable from the debugger to print the current
2791 ix86_debug_options (void)
2793 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2794 ix86_arch_string
, ix86_tune_string
,
2799 fprintf (stderr
, "%s\n\n", opts
);
2803 fputs ("<no options>\n\n", stderr
);
2808 /* Override various settings based on options. If MAIN_ARGS_P, the
2809 options are from the command line, otherwise they are from
2813 ix86_option_override_internal (bool main_args_p
)
2816 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2817 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2822 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2823 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2824 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2825 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2826 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2827 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2828 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2829 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2830 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2831 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2832 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2833 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2834 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2835 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2836 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2837 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2838 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2839 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2840 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2841 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2842 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2843 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2844 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2845 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2846 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2847 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2848 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2849 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2850 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2851 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2852 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2853 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2854 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2855 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2856 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2857 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2858 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2859 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2860 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2861 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2863 /* if this reaches 64, need to widen struct pta flags below */
2867 const char *const name
; /* processor name or nickname. */
2868 const enum processor_type processor
;
2869 const enum attr_cpu schedule
;
2870 const unsigned HOST_WIDE_INT flags
;
2872 const processor_alias_table
[] =
2874 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2875 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2876 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2877 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2878 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2879 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2880 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2881 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2882 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2883 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2884 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2885 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2886 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2887 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2888 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2889 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2890 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2891 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2892 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2893 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2894 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2895 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2896 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2897 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2898 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2899 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2900 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2901 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2902 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2903 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2904 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
2905 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2906 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2907 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
2908 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2909 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2910 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2911 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2912 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
2913 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2914 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2915 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2916 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2917 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
2918 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2919 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2920 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2921 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2922 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2924 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2925 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2926 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2927 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2928 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2929 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2930 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2931 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2932 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2933 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2934 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2935 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2936 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2937 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2938 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2939 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2940 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2941 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2942 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2943 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2944 {"k8", PROCESSOR_K8
, CPU_K8
,
2945 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2946 | PTA_SSE2
| PTA_NO_SAHF
},
2947 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2948 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2949 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2950 {"opteron", PROCESSOR_K8
, CPU_K8
,
2951 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2952 | PTA_SSE2
| PTA_NO_SAHF
},
2953 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2954 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2955 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2956 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2957 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2958 | PTA_SSE2
| PTA_NO_SAHF
},
2959 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2960 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2961 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2962 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2963 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2964 | PTA_SSE2
| PTA_NO_SAHF
},
2965 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2966 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2967 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2968 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2969 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2970 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2971 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2974 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2975 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2976 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2977 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2978 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2979 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2980 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2981 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2982 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2983 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2984 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2985 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2986 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2987 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2989 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2990 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2991 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2992 | PTA_FXSR
| PTA_XSAVE
},
2993 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
2994 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2995 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2996 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2997 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2998 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3000 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3001 PTA_HLE
/* flags are only used for -march switch. */ },
3002 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3004 | PTA_HLE
/* flags are only used for -march switch. */ },
3007 /* -mrecip options. */
3010 const char *string
; /* option name */
3011 unsigned int mask
; /* mask bits to set */
3013 const recip_options
[] =
3015 { "all", RECIP_MASK_ALL
},
3016 { "none", RECIP_MASK_NONE
},
3017 { "div", RECIP_MASK_DIV
},
3018 { "sqrt", RECIP_MASK_SQRT
},
3019 { "vec-div", RECIP_MASK_VEC_DIV
},
3020 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3023 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3025 /* Set up prefix/suffix so the error messages refer to either the command
3026 line argument, or the attribute(target). */
3035 prefix
= "option(\"";
3040 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3041 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3042 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3043 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3044 #ifdef TARGET_BI_ARCH
3047 #if TARGET_BI_ARCH == 1
3048 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3049 is on and OPTION_MASK_ABI_X32 is off. We turn off
3050 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3053 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3055 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3056 on and OPTION_MASK_ABI_64 is off. We turn off
3057 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3060 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3067 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3068 OPTION_MASK_ABI_64 for TARGET_X32. */
3069 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3070 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3072 else if (TARGET_LP64
)
3074 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3075 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3076 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3077 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3080 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3081 SUBTARGET_OVERRIDE_OPTIONS
;
3084 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3085 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3088 /* -fPIC is the default for x86_64. */
3089 if (TARGET_MACHO
&& TARGET_64BIT
)
3092 /* Need to check -mtune=generic first. */
3093 if (ix86_tune_string
)
3095 if (!strcmp (ix86_tune_string
, "generic")
3096 || !strcmp (ix86_tune_string
, "i686")
3097 /* As special support for cross compilers we read -mtune=native
3098 as -mtune=generic. With native compilers we won't see the
3099 -mtune=native, as it was changed by the driver. */
3100 || !strcmp (ix86_tune_string
, "native"))
3103 ix86_tune_string
= "generic64";
3105 ix86_tune_string
= "generic32";
3107 /* If this call is for setting the option attribute, allow the
3108 generic32/generic64 that was previously set. */
3109 else if (!main_args_p
3110 && (!strcmp (ix86_tune_string
, "generic32")
3111 || !strcmp (ix86_tune_string
, "generic64")))
3113 else if (!strncmp (ix86_tune_string
, "generic", 7))
3114 error ("bad value (%s) for %stune=%s %s",
3115 ix86_tune_string
, prefix
, suffix
, sw
);
3116 else if (!strcmp (ix86_tune_string
, "x86-64"))
3117 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3118 "%stune=k8%s or %stune=generic%s instead as appropriate",
3119 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3123 if (ix86_arch_string
)
3124 ix86_tune_string
= ix86_arch_string
;
3125 if (!ix86_tune_string
)
3127 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3128 ix86_tune_defaulted
= 1;
3131 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3132 need to use a sensible tune option. */
3133 if (!strcmp (ix86_tune_string
, "generic")
3134 || !strcmp (ix86_tune_string
, "x86-64")
3135 || !strcmp (ix86_tune_string
, "i686"))
3138 ix86_tune_string
= "generic64";
3140 ix86_tune_string
= "generic32";
3144 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3146 /* rep; movq isn't available in 32-bit code. */
3147 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3148 ix86_stringop_alg
= no_stringop
;
3151 if (!ix86_arch_string
)
3152 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3154 ix86_arch_specified
= 1;
3156 if (global_options_set
.x_ix86_pmode
)
3158 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3159 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3160 error ("address mode %qs not supported in the %s bit mode",
3161 TARGET_64BIT
? "short" : "long",
3162 TARGET_64BIT
? "64" : "32");
3165 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3167 if (!global_options_set
.x_ix86_abi
)
3168 ix86_abi
= DEFAULT_ABI
;
3170 if (global_options_set
.x_ix86_cmodel
)
3172 switch (ix86_cmodel
)
3177 ix86_cmodel
= CM_SMALL_PIC
;
3179 error ("code model %qs not supported in the %s bit mode",
3186 ix86_cmodel
= CM_MEDIUM_PIC
;
3188 error ("code model %qs not supported in the %s bit mode",
3190 else if (TARGET_X32
)
3191 error ("code model %qs not supported in x32 mode",
3198 ix86_cmodel
= CM_LARGE_PIC
;
3200 error ("code model %qs not supported in the %s bit mode",
3202 else if (TARGET_X32
)
3203 error ("code model %qs not supported in x32 mode",
3209 error ("code model %s does not support PIC mode", "32");
3211 error ("code model %qs not supported in the %s bit mode",
3218 error ("code model %s does not support PIC mode", "kernel");
3219 ix86_cmodel
= CM_32
;
3222 error ("code model %qs not supported in the %s bit mode",
3232 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3233 use of rip-relative addressing. This eliminates fixups that
3234 would otherwise be needed if this object is to be placed in a
3235 DLL, and is essentially just as efficient as direct addressing. */
3236 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3237 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3238 else if (TARGET_64BIT
&& TARGET_RDOS
)
3239 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3240 else if (TARGET_64BIT
)
3241 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3243 ix86_cmodel
= CM_32
;
3245 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3247 error ("-masm=intel not supported in this configuration");
3248 ix86_asm_dialect
= ASM_ATT
;
3250 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3251 sorry ("%i-bit mode not compiled in",
3252 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3254 for (i
= 0; i
< pta_size
; i
++)
3255 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3257 ix86_schedule
= processor_alias_table
[i
].schedule
;
3258 ix86_arch
= processor_alias_table
[i
].processor
;
3259 /* Default cpu tuning to the architecture. */
3260 ix86_tune
= ix86_arch
;
3262 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3263 error ("CPU you selected does not support x86-64 "
3266 if (processor_alias_table
[i
].flags
& PTA_MMX
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3269 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3272 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3275 if (processor_alias_table
[i
].flags
& PTA_SSE
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3278 if (processor_alias_table
[i
].flags
& PTA_SSE2
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3281 if (processor_alias_table
[i
].flags
& PTA_SSE3
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3284 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3287 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3290 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3293 if (processor_alias_table
[i
].flags
& PTA_AVX
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3296 if (processor_alias_table
[i
].flags
& PTA_AVX2
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3299 if (processor_alias_table
[i
].flags
& PTA_FMA
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3302 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3305 if (processor_alias_table
[i
].flags
& PTA_FMA4
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3308 if (processor_alias_table
[i
].flags
& PTA_XOP
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3311 if (processor_alias_table
[i
].flags
& PTA_LWP
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3314 if (processor_alias_table
[i
].flags
& PTA_ABM
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3317 if (processor_alias_table
[i
].flags
& PTA_BMI
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3320 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3323 if (processor_alias_table
[i
].flags
& PTA_TBM
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3326 if (processor_alias_table
[i
].flags
& PTA_BMI2
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3329 if (processor_alias_table
[i
].flags
& PTA_CX16
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3332 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3335 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3338 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3341 if (processor_alias_table
[i
].flags
& PTA_AES
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3344 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3347 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3350 if (processor_alias_table
[i
].flags
& PTA_RDRND
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3353 if (processor_alias_table
[i
].flags
& PTA_F16C
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3356 if (processor_alias_table
[i
].flags
& PTA_RTM
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3359 if (processor_alias_table
[i
].flags
& PTA_HLE
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3362 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3365 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3368 if (processor_alias_table
[i
].flags
& PTA_ADX
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3371 if (processor_alias_table
[i
].flags
& PTA_FXSR
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3374 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3375 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3376 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3377 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3378 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3379 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3380 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3381 x86_prefetch_sse
= true;
3386 if (!strcmp (ix86_arch_string
, "generic"))
3387 error ("generic CPU can be used only for %stune=%s %s",
3388 prefix
, suffix
, sw
);
3389 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3390 error ("bad value (%s) for %sarch=%s %s",
3391 ix86_arch_string
, prefix
, suffix
, sw
);
3393 ix86_arch_mask
= 1u << ix86_arch
;
3394 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3395 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3397 for (i
= 0; i
< pta_size
; i
++)
3398 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3400 ix86_schedule
= processor_alias_table
[i
].schedule
;
3401 ix86_tune
= processor_alias_table
[i
].processor
;
3404 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3406 if (ix86_tune_defaulted
)
3408 ix86_tune_string
= "x86-64";
3409 for (i
= 0; i
< pta_size
; i
++)
3410 if (! strcmp (ix86_tune_string
,
3411 processor_alias_table
[i
].name
))
3413 ix86_schedule
= processor_alias_table
[i
].schedule
;
3414 ix86_tune
= processor_alias_table
[i
].processor
;
3417 error ("CPU you selected does not support x86-64 "
3423 /* Adjust tuning when compiling for 32-bit ABI. */
3426 case PROCESSOR_GENERIC64
:
3427 ix86_tune
= PROCESSOR_GENERIC32
;
3428 ix86_schedule
= CPU_PENTIUMPRO
;
3435 /* Intel CPUs have always interpreted SSE prefetch instructions as
3436 NOPs; so, we can enable SSE prefetch instructions even when
3437 -mtune (rather than -march) points us to a processor that has them.
3438 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3439 higher processors. */
3441 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3442 x86_prefetch_sse
= true;
3446 if (ix86_tune_specified
&& i
== pta_size
)
3447 error ("bad value (%s) for %stune=%s %s",
3448 ix86_tune_string
, prefix
, suffix
, sw
);
3450 ix86_tune_mask
= 1u << ix86_tune
;
3451 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3452 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3454 #ifndef USE_IX86_FRAME_POINTER
3455 #define USE_IX86_FRAME_POINTER 0
3458 #ifndef USE_X86_64_FRAME_POINTER
3459 #define USE_X86_64_FRAME_POINTER 0
3462 /* Set the default values for switches whose default depends on TARGET_64BIT
3463 in case they weren't overwritten by command line options. */
3466 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3467 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3468 if (flag_asynchronous_unwind_tables
== 2)
3469 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3470 if (flag_pcc_struct_return
== 2)
3471 flag_pcc_struct_return
= 0;
3475 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3476 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3477 if (flag_asynchronous_unwind_tables
== 2)
3478 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3479 if (flag_pcc_struct_return
== 2)
3480 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3483 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3485 ix86_cost
= &ix86_size_cost
;
3487 ix86_cost
= ix86_tune_cost
;
3489 /* Arrange to set up i386_stack_locals for all functions. */
3490 init_machine_status
= ix86_init_machine_status
;
3492 /* Validate -mregparm= value. */
3493 if (global_options_set
.x_ix86_regparm
)
3496 warning (0, "-mregparm is ignored in 64-bit mode");
3497 if (ix86_regparm
> REGPARM_MAX
)
3499 error ("-mregparm=%d is not between 0 and %d",
3500 ix86_regparm
, REGPARM_MAX
);
3505 ix86_regparm
= REGPARM_MAX
;
3507 /* Default align_* from the processor table. */
3508 if (align_loops
== 0)
3510 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3511 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3513 if (align_jumps
== 0)
3515 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3516 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3518 if (align_functions
== 0)
3520 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3523 /* Provide default for -mbranch-cost= value. */
3524 if (!global_options_set
.x_ix86_branch_cost
)
3525 ix86_branch_cost
= ix86_cost
->branch_cost
;
3529 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3531 /* Enable by default the SSE and MMX builtins. Do allow the user to
3532 explicitly disable any of these. In particular, disabling SSE and
3533 MMX for kernel code is extremely useful. */
3534 if (!ix86_arch_specified
)
3536 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3537 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3540 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3544 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3546 if (!ix86_arch_specified
)
3548 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3550 /* i386 ABI does not specify red zone. It still makes sense to use it
3551 when programmer takes care to stack from being destroyed. */
3552 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3553 target_flags
|= MASK_NO_RED_ZONE
;
3556 /* Keep nonleaf frame pointers. */
3557 if (flag_omit_frame_pointer
)
3558 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3559 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3560 flag_omit_frame_pointer
= 1;
3562 /* If we're doing fast math, we don't care about comparison order
3563 wrt NaNs. This lets us use a shorter comparison sequence. */
3564 if (flag_finite_math_only
)
3565 target_flags
&= ~MASK_IEEE_FP
;
3567 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3568 since the insns won't need emulation. */
3569 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3570 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3572 /* Likewise, if the target doesn't have a 387, or we've specified
3573 software floating point, don't use 387 inline intrinsics. */
3575 target_flags
|= MASK_NO_FANCY_MATH_387
;
3577 /* Turn on MMX builtins for -msse. */
3579 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3581 /* Enable SSE prefetch. */
3582 if (TARGET_SSE
|| TARGET_PRFCHW
)
3583 x86_prefetch_sse
= true;
3585 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3586 if (TARGET_SSE4_2
|| TARGET_ABM
)
3587 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3589 /* Turn on lzcnt instruction for -mabm. */
3591 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3593 /* Validate -mpreferred-stack-boundary= value or default it to
3594 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3595 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3596 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3598 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3599 int max
= (TARGET_SEH
? 4 : 12);
3601 if (ix86_preferred_stack_boundary_arg
< min
3602 || ix86_preferred_stack_boundary_arg
> max
)
3605 error ("-mpreferred-stack-boundary is not supported "
3608 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3609 ix86_preferred_stack_boundary_arg
, min
, max
);
3612 ix86_preferred_stack_boundary
3613 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3616 /* Set the default value for -mstackrealign. */
3617 if (ix86_force_align_arg_pointer
== -1)
3618 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3620 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3622 /* Validate -mincoming-stack-boundary= value or default it to
3623 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3624 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3625 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3627 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3628 || ix86_incoming_stack_boundary_arg
> 12)
3629 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3630 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3633 ix86_user_incoming_stack_boundary
3634 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3635 ix86_incoming_stack_boundary
3636 = ix86_user_incoming_stack_boundary
;
3640 /* Accept -msseregparm only if at least SSE support is enabled. */
3641 if (TARGET_SSEREGPARM
3643 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3645 if (global_options_set
.x_ix86_fpmath
)
3647 if (ix86_fpmath
& FPMATH_SSE
)
3651 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3652 ix86_fpmath
= FPMATH_387
;
3654 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3656 warning (0, "387 instruction set disabled, using SSE arithmetics");
3657 ix86_fpmath
= FPMATH_SSE
;
3662 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3664 /* If the i387 is disabled, then do not return values in it. */
3666 target_flags
&= ~MASK_FLOAT_RETURNS
;
3668 /* Use external vectorized library in vectorizing intrinsics. */
3669 if (global_options_set
.x_ix86_veclibabi_type
)
3670 switch (ix86_veclibabi_type
)
3672 case ix86_veclibabi_type_svml
:
3673 ix86_veclib_handler
= ix86_veclibabi_svml
;
3676 case ix86_veclibabi_type_acml
:
3677 ix86_veclib_handler
= ix86_veclibabi_acml
;
3684 if ((!USE_IX86_FRAME_POINTER
3685 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3686 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3688 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3690 /* ??? Unwind info is not correct around the CFG unless either a frame
3691 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3692 unwind info generation to be aware of the CFG and propagating states
3694 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3695 || flag_exceptions
|| flag_non_call_exceptions
)
3696 && flag_omit_frame_pointer
3697 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3699 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3700 warning (0, "unwind tables currently require either a frame pointer "
3701 "or %saccumulate-outgoing-args%s for correctness",
3703 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3706 /* If stack probes are required, the space used for large function
3707 arguments on the stack must also be probed, so enable
3708 -maccumulate-outgoing-args so this happens in the prologue. */
3709 if (TARGET_STACK_PROBE
3710 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3712 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3713 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3714 "for correctness", prefix
, suffix
);
3715 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3718 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3721 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3722 p
= strchr (internal_label_prefix
, 'X');
3723 internal_label_prefix_len
= p
- internal_label_prefix
;
3727 /* When scheduling description is not available, disable scheduler pass
3728 so it won't slow down the compilation and make x87 code slower. */
3729 if (!TARGET_SCHEDULE
)
3730 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3732 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3733 ix86_tune_cost
->simultaneous_prefetches
,
3734 global_options
.x_param_values
,
3735 global_options_set
.x_param_values
);
3736 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3737 ix86_tune_cost
->prefetch_block
,
3738 global_options
.x_param_values
,
3739 global_options_set
.x_param_values
);
3740 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3741 ix86_tune_cost
->l1_cache_size
,
3742 global_options
.x_param_values
,
3743 global_options_set
.x_param_values
);
3744 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3745 ix86_tune_cost
->l2_cache_size
,
3746 global_options
.x_param_values
,
3747 global_options_set
.x_param_values
);
3749 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3750 if (flag_prefetch_loop_arrays
< 0
3752 && (optimize
>= 3 || flag_profile_use
)
3753 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3754 flag_prefetch_loop_arrays
= 1;
3756 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3757 can be optimized to ap = __builtin_next_arg (0). */
3758 if (!TARGET_64BIT
&& !flag_split_stack
)
3759 targetm
.expand_builtin_va_start
= NULL
;
3763 ix86_gen_leave
= gen_leave_rex64
;
3764 if (Pmode
== DImode
)
3766 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3767 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3768 ix86_gen_tls_local_dynamic_base_64
3769 = gen_tls_local_dynamic_base_64_di
;
3773 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3774 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3775 ix86_gen_tls_local_dynamic_base_64
3776 = gen_tls_local_dynamic_base_64_si
;
3781 ix86_gen_leave
= gen_leave
;
3782 ix86_gen_monitor
= gen_sse3_monitor
;
3785 if (Pmode
== DImode
)
3787 ix86_gen_add3
= gen_adddi3
;
3788 ix86_gen_sub3
= gen_subdi3
;
3789 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3790 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3791 ix86_gen_andsp
= gen_anddi3
;
3792 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3793 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3794 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3798 ix86_gen_add3
= gen_addsi3
;
3799 ix86_gen_sub3
= gen_subsi3
;
3800 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3801 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3802 ix86_gen_andsp
= gen_andsi3
;
3803 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3804 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3805 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3809 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3811 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3814 if (!TARGET_64BIT
&& flag_pic
)
3816 if (flag_fentry
> 0)
3817 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3821 else if (TARGET_SEH
)
3823 if (flag_fentry
== 0)
3824 sorry ("-mno-fentry isn%'t compatible with SEH");
3827 else if (flag_fentry
< 0)
3829 #if defined(PROFILE_BEFORE_PROLOGUE)
3838 /* When not optimize for size, enable vzeroupper optimization for
3839 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3840 AVX unaligned load/store. */
3843 if (flag_expensive_optimizations
3844 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3845 target_flags
|= MASK_VZEROUPPER
;
3846 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3847 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3848 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3849 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3850 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3851 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3852 /* Enable 128-bit AVX instruction generation
3853 for the auto-vectorizer. */
3854 if (TARGET_AVX128_OPTIMAL
3855 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3856 target_flags
|= MASK_PREFER_AVX128
;
3861 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3862 target_flags
&= ~MASK_VZEROUPPER
;
3865 if (ix86_recip_name
)
3867 char *p
= ASTRDUP (ix86_recip_name
);
3869 unsigned int mask
, i
;
3872 while ((q
= strtok (p
, ",")) != NULL
)
3883 if (!strcmp (q
, "default"))
3884 mask
= RECIP_MASK_ALL
;
3887 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3888 if (!strcmp (q
, recip_options
[i
].string
))
3890 mask
= recip_options
[i
].mask
;
3894 if (i
== ARRAY_SIZE (recip_options
))
3896 error ("unknown option for -mrecip=%s", q
);
3898 mask
= RECIP_MASK_NONE
;
3902 recip_mask_explicit
|= mask
;
3904 recip_mask
&= ~mask
;
3911 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3912 else if (target_flags_explicit
& MASK_RECIP
)
3913 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3915 /* Default long double to 64-bit for Bionic. */
3916 if (TARGET_HAS_BIONIC
3917 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3918 target_flags
|= MASK_LONG_DOUBLE_64
;
3920 /* Save the initial options in case the user does function specific
3923 target_option_default_node
= target_option_current_node
3924 = build_target_option_node ();
3927 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3930 ix86_option_override (void)
3932 static struct register_pass_info insert_vzeroupper_info
3933 = { &pass_insert_vzeroupper
.pass
, "reload",
3934 1, PASS_POS_INSERT_AFTER
3937 ix86_option_override_internal (true);
3940 /* This needs to be done at start up. It's convenient to do it here. */
3941 register_pass (&insert_vzeroupper_info
);
3944 /* Update register usage after having seen the compiler flags. */
3947 ix86_conditional_register_usage (void)
3952 /* The PIC register, if it exists, is fixed. */
3953 j
= PIC_OFFSET_TABLE_REGNUM
;
3954 if (j
!= INVALID_REGNUM
)
3955 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3957 /* For 32-bit targets, squash the REX registers. */
3960 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3961 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3962 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3963 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3966 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3967 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3968 : TARGET_64BIT
? (1 << 2)
3971 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3973 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 /* Set/reset conditionally defined registers from
3976 CALL_USED_REGISTERS initializer. */
3977 if (call_used_regs
[i
] > 1)
3978 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3980 /* Calculate registers of CLOBBERED_REGS register set
3981 as call used registers from GENERAL_REGS register set. */
3982 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3983 && call_used_regs
[i
])
3984 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3987 /* If MMX is disabled, squash the registers. */
3989 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3990 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3991 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3993 /* If SSE is disabled, squash the registers. */
3995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3996 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3997 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3999 /* If the FPU is disabled, squash the registers. */
4000 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4001 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4002 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4003 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4007 /* Save the current options */
4010 ix86_function_specific_save (struct cl_target_option
*ptr
)
4012 ptr
->arch
= ix86_arch
;
4013 ptr
->schedule
= ix86_schedule
;
4014 ptr
->tune
= ix86_tune
;
4015 ptr
->branch_cost
= ix86_branch_cost
;
4016 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4017 ptr
->arch_specified
= ix86_arch_specified
;
4018 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4019 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4020 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4022 /* The fields are char but the variables are not; make sure the
4023 values fit in the fields. */
4024 gcc_assert (ptr
->arch
== ix86_arch
);
4025 gcc_assert (ptr
->schedule
== ix86_schedule
);
4026 gcc_assert (ptr
->tune
== ix86_tune
);
4027 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4030 /* Restore the current options */
4033 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4035 enum processor_type old_tune
= ix86_tune
;
4036 enum processor_type old_arch
= ix86_arch
;
4037 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4040 ix86_arch
= (enum processor_type
) ptr
->arch
;
4041 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4042 ix86_tune
= (enum processor_type
) ptr
->tune
;
4043 ix86_branch_cost
= ptr
->branch_cost
;
4044 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4045 ix86_arch_specified
= ptr
->arch_specified
;
4046 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4047 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4048 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4050 /* Recreate the arch feature tests if the arch changed */
4051 if (old_arch
!= ix86_arch
)
4053 ix86_arch_mask
= 1u << ix86_arch
;
4054 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4055 ix86_arch_features
[i
]
4056 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4059 /* Recreate the tune optimization tests */
4060 if (old_tune
!= ix86_tune
)
4062 ix86_tune_mask
= 1u << ix86_tune
;
4063 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4064 ix86_tune_features
[i
]
4065 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4069 /* Print the current options */
4072 ix86_function_specific_print (FILE *file
, int indent
,
4073 struct cl_target_option
*ptr
)
4076 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4077 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4079 fprintf (file
, "%*sarch = %d (%s)\n",
4082 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4083 ? cpu_names
[ptr
->arch
]
4086 fprintf (file
, "%*stune = %d (%s)\n",
4089 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4090 ? cpu_names
[ptr
->tune
]
4093 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4097 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4098 free (target_string
);
4103 /* Inner function to process the attribute((target(...))), take an argument and
4104 set the current options from the argument. If we have a list, recursively go
4108 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4109 struct gcc_options
*enum_opts_set
)
4114 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4115 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4116 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4117 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4118 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4134 enum ix86_opt_type type
;
4139 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4140 IX86_ATTR_ISA ("abm", OPT_mabm
),
4141 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4142 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4143 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4144 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4145 IX86_ATTR_ISA ("aes", OPT_maes
),
4146 IX86_ATTR_ISA ("avx", OPT_mavx
),
4147 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4148 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4149 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4150 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4151 IX86_ATTR_ISA ("sse", OPT_msse
),
4152 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4153 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4154 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4155 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4156 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4157 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4158 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4159 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4160 IX86_ATTR_ISA ("fma", OPT_mfma
),
4161 IX86_ATTR_ISA ("xop", OPT_mxop
),
4162 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4163 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4164 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4165 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4166 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4167 IX86_ATTR_ISA ("hle", OPT_mhle
),
4168 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4169 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4170 IX86_ATTR_ISA ("adx", OPT_madx
),
4171 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4172 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4173 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4176 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4178 /* string options */
4179 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4180 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4183 IX86_ATTR_YES ("cld",
4187 IX86_ATTR_NO ("fancy-math-387",
4188 OPT_mfancy_math_387
,
4189 MASK_NO_FANCY_MATH_387
),
4191 IX86_ATTR_YES ("ieee-fp",
4195 IX86_ATTR_YES ("inline-all-stringops",
4196 OPT_minline_all_stringops
,
4197 MASK_INLINE_ALL_STRINGOPS
),
4199 IX86_ATTR_YES ("inline-stringops-dynamically",
4200 OPT_minline_stringops_dynamically
,
4201 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4203 IX86_ATTR_NO ("align-stringops",
4204 OPT_mno_align_stringops
,
4205 MASK_NO_ALIGN_STRINGOPS
),
4207 IX86_ATTR_YES ("recip",
4213 /* If this is a list, recurse to get the options. */
4214 if (TREE_CODE (args
) == TREE_LIST
)
4218 for (; args
; args
= TREE_CHAIN (args
))
4219 if (TREE_VALUE (args
)
4220 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4221 p_strings
, enum_opts_set
))
4227 else if (TREE_CODE (args
) != STRING_CST
)
4229 error ("attribute %<target%> argument not a string");
4233 /* Handle multiple arguments separated by commas. */
4234 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4236 while (next_optstr
&& *next_optstr
!= '\0')
4238 char *p
= next_optstr
;
4240 char *comma
= strchr (next_optstr
, ',');
4241 const char *opt_string
;
4242 size_t len
, opt_len
;
4247 enum ix86_opt_type type
= ix86_opt_unknown
;
4253 len
= comma
- next_optstr
;
4254 next_optstr
= comma
+ 1;
4262 /* Recognize no-xxx. */
4263 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4272 /* Find the option. */
4275 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4277 type
= attrs
[i
].type
;
4278 opt_len
= attrs
[i
].len
;
4279 if (ch
== attrs
[i
].string
[0]
4280 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4283 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4286 mask
= attrs
[i
].mask
;
4287 opt_string
= attrs
[i
].string
;
4292 /* Process the option. */
4295 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4299 else if (type
== ix86_opt_isa
)
4301 struct cl_decoded_option decoded
;
4303 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4304 ix86_handle_option (&global_options
, &global_options_set
,
4305 &decoded
, input_location
);
4308 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4310 if (type
== ix86_opt_no
)
4311 opt_set_p
= !opt_set_p
;
4314 target_flags
|= mask
;
4316 target_flags
&= ~mask
;
4319 else if (type
== ix86_opt_str
)
4323 error ("option(\"%s\") was already specified", opt_string
);
4327 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4330 else if (type
== ix86_opt_enum
)
4335 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4337 set_option (&global_options
, enum_opts_set
, opt
, value
,
4338 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4342 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4354 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4357 ix86_valid_target_attribute_tree (tree args
)
4359 const char *orig_arch_string
= ix86_arch_string
;
4360 const char *orig_tune_string
= ix86_tune_string
;
4361 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4362 int orig_tune_defaulted
= ix86_tune_defaulted
;
4363 int orig_arch_specified
= ix86_arch_specified
;
4364 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4367 struct cl_target_option
*def
4368 = TREE_TARGET_OPTION (target_option_default_node
);
4369 struct gcc_options enum_opts_set
;
4371 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4373 /* Process each of the options on the chain. */
4374 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4376 return error_mark_node
;
4378 /* If the changed options are different from the default, rerun
4379 ix86_option_override_internal, and then save the options away.
4380 The string options are are attribute options, and will be undone
4381 when we copy the save structure. */
4382 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4383 || target_flags
!= def
->x_target_flags
4384 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4385 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4386 || enum_opts_set
.x_ix86_fpmath
)
4388 /* If we are using the default tune= or arch=, undo the string assigned,
4389 and use the default. */
4390 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4391 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4392 else if (!orig_arch_specified
)
4393 ix86_arch_string
= NULL
;
4395 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4396 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4397 else if (orig_tune_defaulted
)
4398 ix86_tune_string
= NULL
;
4400 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4401 if (enum_opts_set
.x_ix86_fpmath
)
4402 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4403 else if (!TARGET_64BIT
&& TARGET_SSE
)
4405 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4406 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4409 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4410 ix86_option_override_internal (false);
4412 /* Add any builtin functions with the new isa if any. */
4413 ix86_add_new_builtins (ix86_isa_flags
);
4415 /* Save the current options unless we are validating options for
4417 t
= build_target_option_node ();
4419 ix86_arch_string
= orig_arch_string
;
4420 ix86_tune_string
= orig_tune_string
;
4421 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4423 /* Free up memory allocated to hold the strings */
4424 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4425 free (option_strings
[i
]);
4431 /* Hook to validate attribute((target("string"))). */
4434 ix86_valid_target_attribute_p (tree fndecl
,
4435 tree
ARG_UNUSED (name
),
4437 int ARG_UNUSED (flags
))
4439 struct cl_target_option cur_target
;
4442 /* attribute((target("default"))) does nothing, beyond
4443 affecting multi-versioning. */
4444 if (TREE_VALUE (args
)
4445 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4446 && TREE_CHAIN (args
) == NULL_TREE
4447 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4450 tree old_optimize
= build_optimization_node ();
4451 tree new_target
, new_optimize
;
4452 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4454 /* If the function changed the optimization levels as well as setting target
4455 options, start with the optimizations specified. */
4456 if (func_optimize
&& func_optimize
!= old_optimize
)
4457 cl_optimization_restore (&global_options
,
4458 TREE_OPTIMIZATION (func_optimize
));
4460 /* The target attributes may also change some optimization flags, so update
4461 the optimization options if necessary. */
4462 cl_target_option_save (&cur_target
, &global_options
);
4463 new_target
= ix86_valid_target_attribute_tree (args
);
4464 new_optimize
= build_optimization_node ();
4466 if (new_target
== error_mark_node
)
4469 else if (fndecl
&& new_target
)
4471 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4473 if (old_optimize
!= new_optimize
)
4474 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4477 cl_target_option_restore (&global_options
, &cur_target
);
4479 if (old_optimize
!= new_optimize
)
4480 cl_optimization_restore (&global_options
,
4481 TREE_OPTIMIZATION (old_optimize
));
4487 /* Hook to determine if one function can safely inline another. */
4490 ix86_can_inline_p (tree caller
, tree callee
)
4493 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4494 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4496 /* If callee has no option attributes, then it is ok to inline. */
4500 /* If caller has no option attributes, but callee does then it is not ok to
4502 else if (!caller_tree
)
4507 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4508 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4510 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4511 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4513 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4514 != callee_opts
->x_ix86_isa_flags
)
4517 /* See if we have the same non-isa options. */
4518 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4521 /* See if arch, tune, etc. are the same. */
4522 else if (caller_opts
->arch
!= callee_opts
->arch
)
4525 else if (caller_opts
->tune
!= callee_opts
->tune
)
4528 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4531 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4542 /* Remember the last target of ix86_set_current_function. */
4543 static GTY(()) tree ix86_previous_fndecl
;
4545 /* Establish appropriate back-end context for processing the function
4546 FNDECL. The argument might be NULL to indicate processing at top
4547 level, outside of any function scope. */
4549 ix86_set_current_function (tree fndecl
)
4551 /* Only change the context if the function changes. This hook is called
4552 several times in the course of compiling a function, and we don't want to
4553 slow things down too much or call target_reinit when it isn't safe. */
4554 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4556 tree old_tree
= (ix86_previous_fndecl
4557 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4560 tree new_tree
= (fndecl
4561 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4564 ix86_previous_fndecl
= fndecl
;
4565 if (old_tree
== new_tree
)
4570 cl_target_option_restore (&global_options
,
4571 TREE_TARGET_OPTION (new_tree
));
4577 struct cl_target_option
*def
4578 = TREE_TARGET_OPTION (target_option_current_node
);
4580 cl_target_option_restore (&global_options
, def
);
4587 /* Return true if this goes in large data/bss. */
4590 ix86_in_large_data_p (tree exp
)
4592 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4595 /* Functions are never large data. */
4596 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4599 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4601 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4602 if (strcmp (section
, ".ldata") == 0
4603 || strcmp (section
, ".lbss") == 0)
4609 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4611 /* If this is an incomplete type with size 0, then we can't put it
4612 in data because it might be too big when completed. */
4613 if (!size
|| size
> ix86_section_threshold
)
4620 /* Switch to the appropriate section for output of DECL.
4621 DECL is either a `VAR_DECL' node or a constant of some sort.
4622 RELOC indicates whether forming the initial value of DECL requires
4623 link-time relocations. */
4625 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4629 x86_64_elf_select_section (tree decl
, int reloc
,
4630 unsigned HOST_WIDE_INT align
)
4632 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4633 && ix86_in_large_data_p (decl
))
4635 const char *sname
= NULL
;
4636 unsigned int flags
= SECTION_WRITE
;
4637 switch (categorize_decl_for_section (decl
, reloc
))
4642 case SECCAT_DATA_REL
:
4643 sname
= ".ldata.rel";
4645 case SECCAT_DATA_REL_LOCAL
:
4646 sname
= ".ldata.rel.local";
4648 case SECCAT_DATA_REL_RO
:
4649 sname
= ".ldata.rel.ro";
4651 case SECCAT_DATA_REL_RO_LOCAL
:
4652 sname
= ".ldata.rel.ro.local";
4656 flags
|= SECTION_BSS
;
4659 case SECCAT_RODATA_MERGE_STR
:
4660 case SECCAT_RODATA_MERGE_STR_INIT
:
4661 case SECCAT_RODATA_MERGE_CONST
:
4665 case SECCAT_SRODATA
:
4672 /* We don't split these for medium model. Place them into
4673 default sections and hope for best. */
4678 /* We might get called with string constants, but get_named_section
4679 doesn't like them as they are not DECLs. Also, we need to set
4680 flags in that case. */
4682 return get_section (sname
, flags
, NULL
);
4683 return get_named_section (decl
, sname
, reloc
);
4686 return default_elf_select_section (decl
, reloc
, align
);
4689 /* Build up a unique section name, expressed as a
4690 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4691 RELOC indicates whether the initial value of EXP requires
4692 link-time relocations. */
4694 static void ATTRIBUTE_UNUSED
4695 x86_64_elf_unique_section (tree decl
, int reloc
)
4697 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4698 && ix86_in_large_data_p (decl
))
4700 const char *prefix
= NULL
;
4701 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4702 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4704 switch (categorize_decl_for_section (decl
, reloc
))
4707 case SECCAT_DATA_REL
:
4708 case SECCAT_DATA_REL_LOCAL
:
4709 case SECCAT_DATA_REL_RO
:
4710 case SECCAT_DATA_REL_RO_LOCAL
:
4711 prefix
= one_only
? ".ld" : ".ldata";
4714 prefix
= one_only
? ".lb" : ".lbss";
4717 case SECCAT_RODATA_MERGE_STR
:
4718 case SECCAT_RODATA_MERGE_STR_INIT
:
4719 case SECCAT_RODATA_MERGE_CONST
:
4720 prefix
= one_only
? ".lr" : ".lrodata";
4722 case SECCAT_SRODATA
:
4729 /* We don't split these for medium model. Place them into
4730 default sections and hope for best. */
4735 const char *name
, *linkonce
;
4738 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4739 name
= targetm
.strip_name_encoding (name
);
4741 /* If we're using one_only, then there needs to be a .gnu.linkonce
4742 prefix to the section name. */
4743 linkonce
= one_only
? ".gnu.linkonce" : "";
4745 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4747 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4751 default_unique_section (decl
, reloc
);
4754 #ifdef COMMON_ASM_OP
4755 /* This says how to output assembler code to declare an
4756 uninitialized external linkage data object.
4758 For medium model x86-64 we need to use .largecomm opcode for
4761 x86_elf_aligned_common (FILE *file
,
4762 const char *name
, unsigned HOST_WIDE_INT size
,
4765 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4766 && size
> (unsigned int)ix86_section_threshold
)
4767 fputs (".largecomm\t", file
);
4769 fputs (COMMON_ASM_OP
, file
);
4770 assemble_name (file
, name
);
4771 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4772 size
, align
/ BITS_PER_UNIT
);
4776 /* Utility function for targets to use in implementing
4777 ASM_OUTPUT_ALIGNED_BSS. */
4780 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4781 const char *name
, unsigned HOST_WIDE_INT size
,
4784 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4785 && size
> (unsigned int)ix86_section_threshold
)
4786 switch_to_section (get_named_section (decl
, ".lbss", 0));
4788 switch_to_section (bss_section
);
4789 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4790 #ifdef ASM_DECLARE_OBJECT_NAME
4791 last_assemble_variable_decl
= decl
;
4792 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4794 /* Standard thing is just output label for the object. */
4795 ASM_OUTPUT_LABEL (file
, name
);
4796 #endif /* ASM_DECLARE_OBJECT_NAME */
4797 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4800 /* Decide whether we must probe the stack before any space allocation
4801 on this target. It's essentially TARGET_STACK_PROBE except when
4802 -fstack-check causes the stack to be already probed differently. */
4805 ix86_target_stack_probe (void)
4807 /* Do not probe the stack twice if static stack checking is enabled. */
4808 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4811 return TARGET_STACK_PROBE
;
4814 /* Decide whether we can make a sibling call to a function. DECL is the
4815 declaration of the function being targeted by the call and EXP is the
4816 CALL_EXPR representing the call. */
4819 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4821 tree type
, decl_or_type
;
4824 /* If we are generating position-independent code, we cannot sibcall
4825 optimize any indirect call, or a direct call to a global function,
4826 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4830 && (!decl
|| !targetm
.binds_local_p (decl
)))
4833 /* If we need to align the outgoing stack, then sibcalling would
4834 unalign the stack, which may break the called function. */
4835 if (ix86_minimum_incoming_stack_boundary (true)
4836 < PREFERRED_STACK_BOUNDARY
)
4841 decl_or_type
= decl
;
4842 type
= TREE_TYPE (decl
);
4846 /* We're looking at the CALL_EXPR, we need the type of the function. */
4847 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4848 type
= TREE_TYPE (type
); /* pointer type */
4849 type
= TREE_TYPE (type
); /* function type */
4850 decl_or_type
= type
;
4853 /* Check that the return value locations are the same. Like
4854 if we are returning floats on the 80387 register stack, we cannot
4855 make a sibcall from a function that doesn't return a float to a
4856 function that does or, conversely, from a function that does return
4857 a float to a function that doesn't; the necessary stack adjustment
4858 would not be executed. This is also the place we notice
4859 differences in the return value ABI. Note that it is ok for one
4860 of the functions to have void return type as long as the return
4861 value of the other is passed in a register. */
4862 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4863 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4865 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4867 if (!rtx_equal_p (a
, b
))
4870 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4872 else if (!rtx_equal_p (a
, b
))
4877 /* The SYSV ABI has more call-clobbered registers;
4878 disallow sibcalls from MS to SYSV. */
4879 if (cfun
->machine
->call_abi
== MS_ABI
4880 && ix86_function_type_abi (type
) == SYSV_ABI
)
4885 /* If this call is indirect, we'll need to be able to use a
4886 call-clobbered register for the address of the target function.
4887 Make sure that all such registers are not used for passing
4888 parameters. Note that DLLIMPORT functions are indirect. */
4890 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4892 if (ix86_function_regparm (type
, NULL
) >= 3)
4894 /* ??? Need to count the actual number of registers to be used,
4895 not the possible number of registers. Fix later. */
4901 /* Otherwise okay. That also includes certain types of indirect calls. */
4905 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4906 and "sseregparm" calling convention attributes;
4907 arguments as in struct attribute_spec.handler. */
4910 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4912 int flags ATTRIBUTE_UNUSED
,
4915 if (TREE_CODE (*node
) != FUNCTION_TYPE
4916 && TREE_CODE (*node
) != METHOD_TYPE
4917 && TREE_CODE (*node
) != FIELD_DECL
4918 && TREE_CODE (*node
) != TYPE_DECL
)
4920 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4922 *no_add_attrs
= true;
4926 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4927 if (is_attribute_p ("regparm", name
))
4931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4933 error ("fastcall and regparm attributes are not compatible");
4936 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4938 error ("regparam and thiscall attributes are not compatible");
4941 cst
= TREE_VALUE (args
);
4942 if (TREE_CODE (cst
) != INTEGER_CST
)
4944 warning (OPT_Wattributes
,
4945 "%qE attribute requires an integer constant argument",
4947 *no_add_attrs
= true;
4949 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4951 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4953 *no_add_attrs
= true;
4961 /* Do not warn when emulating the MS ABI. */
4962 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4963 && TREE_CODE (*node
) != METHOD_TYPE
)
4964 || ix86_function_type_abi (*node
) != MS_ABI
)
4965 warning (OPT_Wattributes
, "%qE attribute ignored",
4967 *no_add_attrs
= true;
4971 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4972 if (is_attribute_p ("fastcall", name
))
4974 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4976 error ("fastcall and cdecl attributes are not compatible");
4978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4980 error ("fastcall and stdcall attributes are not compatible");
4982 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4984 error ("fastcall and regparm attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4988 error ("fastcall and thiscall attributes are not compatible");
4992 /* Can combine stdcall with fastcall (redundant), regparm and
4994 else if (is_attribute_p ("stdcall", name
))
4996 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4998 error ("stdcall and cdecl attributes are not compatible");
5000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5002 error ("stdcall and fastcall attributes are not compatible");
5004 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5006 error ("stdcall and thiscall attributes are not compatible");
5010 /* Can combine cdecl with regparm and sseregparm. */
5011 else if (is_attribute_p ("cdecl", name
))
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5015 error ("stdcall and cdecl attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5019 error ("fastcall and cdecl attributes are not compatible");
5021 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5023 error ("cdecl and thiscall attributes are not compatible");
5026 else if (is_attribute_p ("thiscall", name
))
5028 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5029 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5031 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5033 error ("stdcall and thiscall attributes are not compatible");
5035 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5037 error ("fastcall and thiscall attributes are not compatible");
5039 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5041 error ("cdecl and thiscall attributes are not compatible");
5045 /* Can combine sseregparm with all attributes. */
5050 /* The transactional memory builtins are implicitly regparm or fastcall
5051 depending on the ABI. Override the generic do-nothing attribute that
5052 these builtins were declared with, and replace it with one of the two
5053 attributes that we expect elsewhere. */
5056 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5057 tree args ATTRIBUTE_UNUSED
,
5058 int flags ATTRIBUTE_UNUSED
,
5063 /* In no case do we want to add the placeholder attribute. */
5064 *no_add_attrs
= true;
5066 /* The 64-bit ABI is unchanged for transactional memory. */
5070 /* ??? Is there a better way to validate 32-bit windows? We have
5071 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5072 if (CHECK_STACK_LIMIT
> 0)
5073 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5076 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5077 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5079 decl_attributes (node
, alt
, flags
);
5084 /* This function determines from TYPE the calling-convention. */
5087 ix86_get_callcvt (const_tree type
)
5089 unsigned int ret
= 0;
5094 return IX86_CALLCVT_CDECL
;
5096 attrs
= TYPE_ATTRIBUTES (type
);
5097 if (attrs
!= NULL_TREE
)
5099 if (lookup_attribute ("cdecl", attrs
))
5100 ret
|= IX86_CALLCVT_CDECL
;
5101 else if (lookup_attribute ("stdcall", attrs
))
5102 ret
|= IX86_CALLCVT_STDCALL
;
5103 else if (lookup_attribute ("fastcall", attrs
))
5104 ret
|= IX86_CALLCVT_FASTCALL
;
5105 else if (lookup_attribute ("thiscall", attrs
))
5106 ret
|= IX86_CALLCVT_THISCALL
;
5108 /* Regparam isn't allowed for thiscall and fastcall. */
5109 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5111 if (lookup_attribute ("regparm", attrs
))
5112 ret
|= IX86_CALLCVT_REGPARM
;
5113 if (lookup_attribute ("sseregparm", attrs
))
5114 ret
|= IX86_CALLCVT_SSEREGPARM
;
5117 if (IX86_BASE_CALLCVT(ret
) != 0)
5121 is_stdarg
= stdarg_p (type
);
5122 if (TARGET_RTD
&& !is_stdarg
)
5123 return IX86_CALLCVT_STDCALL
| ret
;
5127 || TREE_CODE (type
) != METHOD_TYPE
5128 || ix86_function_type_abi (type
) != MS_ABI
)
5129 return IX86_CALLCVT_CDECL
| ret
;
5131 return IX86_CALLCVT_THISCALL
;
5134 /* Return 0 if the attributes for two types are incompatible, 1 if they
5135 are compatible, and 2 if they are nearly compatible (which causes a
5136 warning to be generated). */
5139 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5141 unsigned int ccvt1
, ccvt2
;
5143 if (TREE_CODE (type1
) != FUNCTION_TYPE
5144 && TREE_CODE (type1
) != METHOD_TYPE
)
5147 ccvt1
= ix86_get_callcvt (type1
);
5148 ccvt2
= ix86_get_callcvt (type2
);
5151 if (ix86_function_regparm (type1
, NULL
)
5152 != ix86_function_regparm (type2
, NULL
))
5158 /* Return the regparm value for a function with the indicated TYPE and DECL.
5159 DECL may be NULL when calling function indirectly
5160 or considering a libcall. */
5163 ix86_function_regparm (const_tree type
, const_tree decl
)
5170 return (ix86_function_type_abi (type
) == SYSV_ABI
5171 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5172 ccvt
= ix86_get_callcvt (type
);
5173 regparm
= ix86_regparm
;
5175 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5177 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5180 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5184 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5186 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5189 /* Use register calling convention for local functions when possible. */
5191 && TREE_CODE (decl
) == FUNCTION_DECL
5193 && !(profile_flag
&& !flag_fentry
))
5195 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5196 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5197 if (i
&& i
->local
&& i
->can_change_signature
)
5199 int local_regparm
, globals
= 0, regno
;
5201 /* Make sure no regparm register is taken by a
5202 fixed register variable. */
5203 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5204 if (fixed_regs
[local_regparm
])
5207 /* We don't want to use regparm(3) for nested functions as
5208 these use a static chain pointer in the third argument. */
5209 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5212 /* In 32-bit mode save a register for the split stack. */
5213 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5216 /* Each fixed register usage increases register pressure,
5217 so less registers should be used for argument passing.
5218 This functionality can be overriden by an explicit
5220 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5221 if (fixed_regs
[regno
])
5225 = globals
< local_regparm
? local_regparm
- globals
: 0;
5227 if (local_regparm
> regparm
)
5228 regparm
= local_regparm
;
5235 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5236 DFmode (2) arguments in SSE registers for a function with the
5237 indicated TYPE and DECL. DECL may be NULL when calling function
5238 indirectly or considering a libcall. Otherwise return 0. */
5241 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5243 gcc_assert (!TARGET_64BIT
);
5245 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5246 by the sseregparm attribute. */
5247 if (TARGET_SSEREGPARM
5248 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5255 error ("calling %qD with attribute sseregparm without "
5256 "SSE/SSE2 enabled", decl
);
5258 error ("calling %qT with attribute sseregparm without "
5259 "SSE/SSE2 enabled", type
);
5267 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5268 (and DFmode for SSE2) arguments in SSE registers. */
5269 if (decl
&& TARGET_SSE_MATH
&& optimize
5270 && !(profile_flag
&& !flag_fentry
))
5272 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5273 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5274 if (i
&& i
->local
&& i
->can_change_signature
)
5275 return TARGET_SSE2
? 2 : 1;
5281 /* Return true if EAX is live at the start of the function. Used by
5282 ix86_expand_prologue to determine if we need special help before
5283 calling allocate_stack_worker. */
5286 ix86_eax_live_at_start_p (void)
5288 /* Cheat. Don't bother working forward from ix86_function_regparm
5289 to the function type to whether an actual argument is located in
5290 eax. Instead just look at cfg info, which is still close enough
5291 to correct at this point. This gives false positives for broken
5292 functions that might use uninitialized data that happens to be
5293 allocated in eax, but who cares? */
5294 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5298 ix86_keep_aggregate_return_pointer (tree fntype
)
5304 attr
= lookup_attribute ("callee_pop_aggregate_return",
5305 TYPE_ATTRIBUTES (fntype
));
5307 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5309 /* For 32-bit MS-ABI the default is to keep aggregate
5311 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5314 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5317 /* Value is the number of bytes of arguments automatically
5318 popped when returning from a subroutine call.
5319 FUNDECL is the declaration node of the function (as a tree),
5320 FUNTYPE is the data type of the function (as a tree),
5321 or for a library call it is an identifier node for the subroutine name.
5322 SIZE is the number of bytes of arguments passed on the stack.
5324 On the 80386, the RTD insn may be used to pop them if the number
5325 of args is fixed, but if the number is variable then the caller
5326 must pop them all. RTD can't be used for library calls now
5327 because the library is compiled with the Unix compiler.
5328 Use of RTD is a selectable option, since it is incompatible with
5329 standard Unix calling sequences. If the option is not selected,
5330 the caller must always pop the args.
5332 The attribute stdcall is equivalent to RTD on a per module basis. */
5335 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5339 /* None of the 64-bit ABIs pop arguments. */
5343 ccvt
= ix86_get_callcvt (funtype
);
5345 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5346 | IX86_CALLCVT_THISCALL
)) != 0
5347 && ! stdarg_p (funtype
))
5350 /* Lose any fake structure return argument if it is passed on the stack. */
5351 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5352 && !ix86_keep_aggregate_return_pointer (funtype
))
5354 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5356 return GET_MODE_SIZE (Pmode
);
5362 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5365 ix86_legitimate_combined_insn (rtx insn
)
5367 /* Check operand constraints in case hard registers were propagated
5368 into insn pattern. This check prevents combine pass from
5369 generating insn patterns with invalid hard register operands.
5370 These invalid insns can eventually confuse reload to error out
5371 with a spill failure. See also PRs 46829 and 46843. */
5372 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5376 extract_insn (insn
);
5377 preprocess_constraints ();
5379 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5381 rtx op
= recog_data
.operand
[i
];
5382 enum machine_mode mode
= GET_MODE (op
);
5383 struct operand_alternative
*op_alt
;
5388 /* A unary operator may be accepted by the predicate, but it
5389 is irrelevant for matching constraints. */
5393 if (GET_CODE (op
) == SUBREG
)
5395 if (REG_P (SUBREG_REG (op
))
5396 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5397 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5398 GET_MODE (SUBREG_REG (op
)),
5401 op
= SUBREG_REG (op
);
5404 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5407 op_alt
= recog_op_alt
[i
];
5409 /* Operand has no constraints, anything is OK. */
5410 win
= !recog_data
.n_alternatives
;
5412 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5414 if (op_alt
[j
].anything_ok
5415 || (op_alt
[j
].matches
!= -1
5417 (recog_data
.operand
[i
],
5418 recog_data
.operand
[op_alt
[j
].matches
]))
5419 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5434 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5436 static unsigned HOST_WIDE_INT
5437 ix86_asan_shadow_offset (void)
5439 return TARGET_LP64
? (HOST_WIDE_INT_1
<< 44)
5440 : (HOST_WIDE_INT_1
<< 29);
5443 /* Argument support functions. */
5445 /* Return true when register may be used to pass function parameters. */
5447 ix86_function_arg_regno_p (int regno
)
5450 const int *parm_regs
;
5455 return (regno
< REGPARM_MAX
5456 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5458 return (regno
< REGPARM_MAX
5459 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5460 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5461 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5462 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5467 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5472 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5473 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5477 /* TODO: The function should depend on current function ABI but
5478 builtins.c would need updating then. Therefore we use the
5481 /* RAX is used as hidden argument to va_arg functions. */
5482 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5485 if (ix86_abi
== MS_ABI
)
5486 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5488 parm_regs
= x86_64_int_parameter_registers
;
5489 for (i
= 0; i
< (ix86_abi
== MS_ABI
5490 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5491 if (regno
== parm_regs
[i
])
5496 /* Return if we do not know how to pass TYPE solely in registers. */
5499 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5501 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5504 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5505 The layout_type routine is crafty and tries to trick us into passing
5506 currently unsupported vector types on the stack by using TImode. */
5507 return (!TARGET_64BIT
&& mode
== TImode
5508 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5511 /* It returns the size, in bytes, of the area reserved for arguments passed
5512 in registers for the function represented by fndecl dependent to the used
5515 ix86_reg_parm_stack_space (const_tree fndecl
)
5517 enum calling_abi call_abi
= SYSV_ABI
;
5518 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5519 call_abi
= ix86_function_abi (fndecl
);
5521 call_abi
= ix86_function_type_abi (fndecl
);
5522 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5527 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5530 ix86_function_type_abi (const_tree fntype
)
5532 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5534 enum calling_abi abi
= ix86_abi
;
5535 if (abi
== SYSV_ABI
)
5537 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5540 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5548 ix86_function_ms_hook_prologue (const_tree fn
)
5550 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5552 if (decl_function_context (fn
) != NULL_TREE
)
5553 error_at (DECL_SOURCE_LOCATION (fn
),
5554 "ms_hook_prologue is not compatible with nested function");
5561 static enum calling_abi
5562 ix86_function_abi (const_tree fndecl
)
5566 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5569 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5572 ix86_cfun_abi (void)
5576 return cfun
->machine
->call_abi
;
5579 /* Write the extra assembler code needed to declare a function properly. */
5582 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5585 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5589 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5590 unsigned int filler_cc
= 0xcccccccc;
5592 for (i
= 0; i
< filler_count
; i
+= 4)
5593 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5596 #ifdef SUBTARGET_ASM_UNWIND_INIT
5597 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5600 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5602 /* Output magic byte marker, if hot-patch attribute is set. */
5607 /* leaq [%rsp + 0], %rsp */
5608 asm_fprintf (asm_out_file
, ASM_BYTE
5609 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5613 /* movl.s %edi, %edi
5615 movl.s %esp, %ebp */
5616 asm_fprintf (asm_out_file
, ASM_BYTE
5617 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5623 extern void init_regs (void);
5625 /* Implementation of call abi switching target hook. Specific to FNDECL
5626 the specific call register sets are set. See also
5627 ix86_conditional_register_usage for more details. */
5629 ix86_call_abi_override (const_tree fndecl
)
5631 if (fndecl
== NULL_TREE
)
5632 cfun
->machine
->call_abi
= ix86_abi
;
5634 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5637 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5638 expensive re-initialization of init_regs each time we switch function context
5639 since this is needed only during RTL expansion. */
5641 ix86_maybe_switch_abi (void)
5644 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5648 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5649 for a call to a function whose data type is FNTYPE.
5650 For a library call, FNTYPE is 0. */
5653 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5654 tree fntype
, /* tree ptr for function decl */
5655 rtx libname
, /* SYMBOL_REF of library name or 0 */
5659 struct cgraph_local_info
*i
;
5661 memset (cum
, 0, sizeof (*cum
));
5665 i
= cgraph_local_info (fndecl
);
5666 cum
->call_abi
= ix86_function_abi (fndecl
);
5671 cum
->call_abi
= ix86_function_type_abi (fntype
);
5674 cum
->caller
= caller
;
5676 /* Set up the number of registers to use for passing arguments. */
5678 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5679 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5680 "or subtarget optimization implying it");
5681 cum
->nregs
= ix86_regparm
;
5684 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5685 ? X86_64_REGPARM_MAX
5686 : X86_64_MS_REGPARM_MAX
);
5690 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5693 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5694 ? X86_64_SSE_REGPARM_MAX
5695 : X86_64_MS_SSE_REGPARM_MAX
);
5699 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5700 cum
->warn_avx
= true;
5701 cum
->warn_sse
= true;
5702 cum
->warn_mmx
= true;
5704 /* Because type might mismatch in between caller and callee, we need to
5705 use actual type of function for local calls.
5706 FIXME: cgraph_analyze can be told to actually record if function uses
5707 va_start so for local functions maybe_vaarg can be made aggressive
5709 FIXME: once typesytem is fixed, we won't need this code anymore. */
5710 if (i
&& i
->local
&& i
->can_change_signature
)
5711 fntype
= TREE_TYPE (fndecl
);
5712 cum
->maybe_vaarg
= (fntype
5713 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5718 /* If there are variable arguments, then we won't pass anything
5719 in registers in 32-bit mode. */
5720 if (stdarg_p (fntype
))
5731 /* Use ecx and edx registers if function has fastcall attribute,
5732 else look for regparm information. */
5735 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5736 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5739 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5741 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5747 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5750 /* Set up the number of SSE registers used for passing SFmode
5751 and DFmode arguments. Warn for mismatching ABI. */
5752 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5756 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5757 But in the case of vector types, it is some vector mode.
5759 When we have only some of our vector isa extensions enabled, then there
5760 are some modes for which vector_mode_supported_p is false. For these
5761 modes, the generic vector support in gcc will choose some non-vector mode
5762 in order to implement the type. By computing the natural mode, we'll
5763 select the proper ABI location for the operand and not depend on whatever
5764 the middle-end decides to do with these vector types.
5766 The midde-end can't deal with the vector types > 16 bytes. In this
5767 case, we return the original mode and warn ABI change if CUM isn't
5770 static enum machine_mode
5771 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5773 enum machine_mode mode
= TYPE_MODE (type
);
5775 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5777 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5778 if ((size
== 8 || size
== 16 || size
== 32)
5779 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5780 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5782 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5784 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5785 mode
= MIN_MODE_VECTOR_FLOAT
;
5787 mode
= MIN_MODE_VECTOR_INT
;
5789 /* Get the mode which has this inner mode and number of units. */
5790 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5791 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5792 && GET_MODE_INNER (mode
) == innermode
)
5794 if (size
== 32 && !TARGET_AVX
)
5796 static bool warnedavx
;
5803 warning (0, "AVX vector argument without AVX "
5804 "enabled changes the ABI");
5806 return TYPE_MODE (type
);
5808 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5810 static bool warnedsse
;
5817 warning (0, "SSE vector argument without SSE "
5818 "enabled changes the ABI");
5833 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5834 this may not agree with the mode that the type system has chosen for the
5835 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5836 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5839 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5844 if (orig_mode
!= BLKmode
)
5845 tmp
= gen_rtx_REG (orig_mode
, regno
);
5848 tmp
= gen_rtx_REG (mode
, regno
);
5849 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5850 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5856 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5857 of this code is to classify each 8bytes of incoming argument by the register
5858 class and assign registers accordingly. */
5860 /* Return the union class of CLASS1 and CLASS2.
5861 See the x86-64 PS ABI for details. */
5863 static enum x86_64_reg_class
5864 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5866 /* Rule #1: If both classes are equal, this is the resulting class. */
5867 if (class1
== class2
)
5870 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5872 if (class1
== X86_64_NO_CLASS
)
5874 if (class2
== X86_64_NO_CLASS
)
5877 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5878 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5879 return X86_64_MEMORY_CLASS
;
5881 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5882 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5883 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5884 return X86_64_INTEGERSI_CLASS
;
5885 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5886 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5887 return X86_64_INTEGER_CLASS
;
5889 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5891 if (class1
== X86_64_X87_CLASS
5892 || class1
== X86_64_X87UP_CLASS
5893 || class1
== X86_64_COMPLEX_X87_CLASS
5894 || class2
== X86_64_X87_CLASS
5895 || class2
== X86_64_X87UP_CLASS
5896 || class2
== X86_64_COMPLEX_X87_CLASS
)
5897 return X86_64_MEMORY_CLASS
;
5899 /* Rule #6: Otherwise class SSE is used. */
5900 return X86_64_SSE_CLASS
;
5903 /* Classify the argument of type TYPE and mode MODE.
5904 CLASSES will be filled by the register class used to pass each word
5905 of the operand. The number of words is returned. In case the parameter
5906 should be passed in memory, 0 is returned. As a special case for zero
5907 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5909 BIT_OFFSET is used internally for handling records and specifies offset
5910 of the offset in bits modulo 256 to avoid overflow cases.
5912 See the x86-64 PS ABI for details.
5916 classify_argument (enum machine_mode mode
, const_tree type
,
5917 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5919 HOST_WIDE_INT bytes
=
5920 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5922 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5924 /* Variable sized entities are always passed/returned in memory. */
5928 if (mode
!= VOIDmode
5929 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5932 if (type
&& AGGREGATE_TYPE_P (type
))
5936 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5938 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5942 for (i
= 0; i
< words
; i
++)
5943 classes
[i
] = X86_64_NO_CLASS
;
5945 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5946 signalize memory class, so handle it as special case. */
5949 classes
[0] = X86_64_NO_CLASS
;
5953 /* Classify each field of record and merge classes. */
5954 switch (TREE_CODE (type
))
5957 /* And now merge the fields of structure. */
5958 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5960 if (TREE_CODE (field
) == FIELD_DECL
)
5964 if (TREE_TYPE (field
) == error_mark_node
)
5967 /* Bitfields are always classified as integer. Handle them
5968 early, since later code would consider them to be
5969 misaligned integers. */
5970 if (DECL_BIT_FIELD (field
))
5972 for (i
= (int_bit_position (field
)
5973 + (bit_offset
% 64)) / 8 / 8;
5974 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5975 + tree_low_cst (DECL_SIZE (field
), 0)
5978 merge_classes (X86_64_INTEGER_CLASS
,
5985 type
= TREE_TYPE (field
);
5987 /* Flexible array member is ignored. */
5988 if (TYPE_MODE (type
) == BLKmode
5989 && TREE_CODE (type
) == ARRAY_TYPE
5990 && TYPE_SIZE (type
) == NULL_TREE
5991 && TYPE_DOMAIN (type
) != NULL_TREE
5992 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5997 if (!warned
&& warn_psabi
)
6000 inform (input_location
,
6001 "the ABI of passing struct with"
6002 " a flexible array member has"
6003 " changed in GCC 4.4");
6007 num
= classify_argument (TYPE_MODE (type
), type
,
6009 (int_bit_position (field
)
6010 + bit_offset
) % 256);
6013 pos
= (int_bit_position (field
)
6014 + (bit_offset
% 64)) / 8 / 8;
6015 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6017 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6024 /* Arrays are handled as small records. */
6027 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6028 TREE_TYPE (type
), subclasses
, bit_offset
);
6032 /* The partial classes are now full classes. */
6033 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6034 subclasses
[0] = X86_64_SSE_CLASS
;
6035 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6036 && !((bit_offset
% 64) == 0 && bytes
== 4))
6037 subclasses
[0] = X86_64_INTEGER_CLASS
;
6039 for (i
= 0; i
< words
; i
++)
6040 classes
[i
] = subclasses
[i
% num
];
6045 case QUAL_UNION_TYPE
:
6046 /* Unions are similar to RECORD_TYPE but offset is always 0.
6048 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6050 if (TREE_CODE (field
) == FIELD_DECL
)
6054 if (TREE_TYPE (field
) == error_mark_node
)
6057 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6058 TREE_TYPE (field
), subclasses
,
6062 for (i
= 0; i
< num
; i
++)
6063 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6074 /* When size > 16 bytes, if the first one isn't
6075 X86_64_SSE_CLASS or any other ones aren't
6076 X86_64_SSEUP_CLASS, everything should be passed in
6078 if (classes
[0] != X86_64_SSE_CLASS
)
6081 for (i
= 1; i
< words
; i
++)
6082 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6086 /* Final merger cleanup. */
6087 for (i
= 0; i
< words
; i
++)
6089 /* If one class is MEMORY, everything should be passed in
6091 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6094 /* The X86_64_SSEUP_CLASS should be always preceded by
6095 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6096 if (classes
[i
] == X86_64_SSEUP_CLASS
6097 && classes
[i
- 1] != X86_64_SSE_CLASS
6098 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6100 /* The first one should never be X86_64_SSEUP_CLASS. */
6101 gcc_assert (i
!= 0);
6102 classes
[i
] = X86_64_SSE_CLASS
;
6105 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6106 everything should be passed in memory. */
6107 if (classes
[i
] == X86_64_X87UP_CLASS
6108 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6112 /* The first one should never be X86_64_X87UP_CLASS. */
6113 gcc_assert (i
!= 0);
6114 if (!warned
&& warn_psabi
)
6117 inform (input_location
,
6118 "the ABI of passing union with long double"
6119 " has changed in GCC 4.4");
6127 /* Compute alignment needed. We align all types to natural boundaries with
6128 exception of XFmode that is aligned to 64bits. */
6129 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6131 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6134 mode_alignment
= 128;
6135 else if (mode
== XCmode
)
6136 mode_alignment
= 256;
6137 if (COMPLEX_MODE_P (mode
))
6138 mode_alignment
/= 2;
6139 /* Misaligned fields are always returned in memory. */
6140 if (bit_offset
% mode_alignment
)
6144 /* for V1xx modes, just use the base mode */
6145 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6146 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6147 mode
= GET_MODE_INNER (mode
);
6149 /* Classification of atomic types. */
6154 classes
[0] = X86_64_SSE_CLASS
;
6157 classes
[0] = X86_64_SSE_CLASS
;
6158 classes
[1] = X86_64_SSEUP_CLASS
;
6168 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6172 classes
[0] = X86_64_INTEGERSI_CLASS
;
6175 else if (size
<= 64)
6177 classes
[0] = X86_64_INTEGER_CLASS
;
6180 else if (size
<= 64+32)
6182 classes
[0] = X86_64_INTEGER_CLASS
;
6183 classes
[1] = X86_64_INTEGERSI_CLASS
;
6186 else if (size
<= 64+64)
6188 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6196 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6200 /* OImode shouldn't be used directly. */
6205 if (!(bit_offset
% 64))
6206 classes
[0] = X86_64_SSESF_CLASS
;
6208 classes
[0] = X86_64_SSE_CLASS
;
6211 classes
[0] = X86_64_SSEDF_CLASS
;
6214 classes
[0] = X86_64_X87_CLASS
;
6215 classes
[1] = X86_64_X87UP_CLASS
;
6218 classes
[0] = X86_64_SSE_CLASS
;
6219 classes
[1] = X86_64_SSEUP_CLASS
;
6222 classes
[0] = X86_64_SSE_CLASS
;
6223 if (!(bit_offset
% 64))
6229 if (!warned
&& warn_psabi
)
6232 inform (input_location
,
6233 "the ABI of passing structure with complex float"
6234 " member has changed in GCC 4.4");
6236 classes
[1] = X86_64_SSESF_CLASS
;
6240 classes
[0] = X86_64_SSEDF_CLASS
;
6241 classes
[1] = X86_64_SSEDF_CLASS
;
6244 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6247 /* This modes is larger than 16 bytes. */
6255 classes
[0] = X86_64_SSE_CLASS
;
6256 classes
[1] = X86_64_SSEUP_CLASS
;
6257 classes
[2] = X86_64_SSEUP_CLASS
;
6258 classes
[3] = X86_64_SSEUP_CLASS
;
6266 classes
[0] = X86_64_SSE_CLASS
;
6267 classes
[1] = X86_64_SSEUP_CLASS
;
6275 classes
[0] = X86_64_SSE_CLASS
;
6281 gcc_assert (VECTOR_MODE_P (mode
));
6286 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6288 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6289 classes
[0] = X86_64_INTEGERSI_CLASS
;
6291 classes
[0] = X86_64_INTEGER_CLASS
;
6292 classes
[1] = X86_64_INTEGER_CLASS
;
6293 return 1 + (bytes
> 8);
6297 /* Examine the argument and return set number of register required in each
6298 class. Return 0 iff parameter should be passed in memory. */
6300 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6301 int *int_nregs
, int *sse_nregs
)
6303 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6304 int n
= classify_argument (mode
, type
, regclass
, 0);
6310 for (n
--; n
>= 0; n
--)
6311 switch (regclass
[n
])
6313 case X86_64_INTEGER_CLASS
:
6314 case X86_64_INTEGERSI_CLASS
:
6317 case X86_64_SSE_CLASS
:
6318 case X86_64_SSESF_CLASS
:
6319 case X86_64_SSEDF_CLASS
:
6322 case X86_64_NO_CLASS
:
6323 case X86_64_SSEUP_CLASS
:
6325 case X86_64_X87_CLASS
:
6326 case X86_64_X87UP_CLASS
:
6330 case X86_64_COMPLEX_X87_CLASS
:
6331 return in_return
? 2 : 0;
6332 case X86_64_MEMORY_CLASS
:
6338 /* Construct container for the argument used by GCC interface. See
6339 FUNCTION_ARG for the detailed description. */
6342 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6343 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6344 const int *intreg
, int sse_regno
)
6346 /* The following variables hold the static issued_error state. */
6347 static bool issued_sse_arg_error
;
6348 static bool issued_sse_ret_error
;
6349 static bool issued_x87_ret_error
;
6351 enum machine_mode tmpmode
;
6353 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6354 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6358 int needed_sseregs
, needed_intregs
;
6359 rtx exp
[MAX_CLASSES
];
6362 n
= classify_argument (mode
, type
, regclass
, 0);
6365 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6368 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6371 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6372 some less clueful developer tries to use floating-point anyway. */
6373 if (needed_sseregs
&& !TARGET_SSE
)
6377 if (!issued_sse_ret_error
)
6379 error ("SSE register return with SSE disabled");
6380 issued_sse_ret_error
= true;
6383 else if (!issued_sse_arg_error
)
6385 error ("SSE register argument with SSE disabled");
6386 issued_sse_arg_error
= true;
6391 /* Likewise, error if the ABI requires us to return values in the
6392 x87 registers and the user specified -mno-80387. */
6393 if (!TARGET_80387
&& in_return
)
6394 for (i
= 0; i
< n
; i
++)
6395 if (regclass
[i
] == X86_64_X87_CLASS
6396 || regclass
[i
] == X86_64_X87UP_CLASS
6397 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6399 if (!issued_x87_ret_error
)
6401 error ("x87 register return with x87 disabled");
6402 issued_x87_ret_error
= true;
6407 /* First construct simple cases. Avoid SCmode, since we want to use
6408 single register to pass this type. */
6409 if (n
== 1 && mode
!= SCmode
)
6410 switch (regclass
[0])
6412 case X86_64_INTEGER_CLASS
:
6413 case X86_64_INTEGERSI_CLASS
:
6414 return gen_rtx_REG (mode
, intreg
[0]);
6415 case X86_64_SSE_CLASS
:
6416 case X86_64_SSESF_CLASS
:
6417 case X86_64_SSEDF_CLASS
:
6418 if (mode
!= BLKmode
)
6419 return gen_reg_or_parallel (mode
, orig_mode
,
6420 SSE_REGNO (sse_regno
));
6422 case X86_64_X87_CLASS
:
6423 case X86_64_COMPLEX_X87_CLASS
:
6424 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6425 case X86_64_NO_CLASS
:
6426 /* Zero sized array, struct or class. */
6432 && regclass
[0] == X86_64_SSE_CLASS
6433 && regclass
[1] == X86_64_SSEUP_CLASS
6435 return gen_reg_or_parallel (mode
, orig_mode
,
6436 SSE_REGNO (sse_regno
));
6438 && regclass
[0] == X86_64_SSE_CLASS
6439 && regclass
[1] == X86_64_SSEUP_CLASS
6440 && regclass
[2] == X86_64_SSEUP_CLASS
6441 && regclass
[3] == X86_64_SSEUP_CLASS
6443 return gen_reg_or_parallel (mode
, orig_mode
,
6444 SSE_REGNO (sse_regno
));
6446 && regclass
[0] == X86_64_X87_CLASS
6447 && regclass
[1] == X86_64_X87UP_CLASS
)
6448 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6451 && regclass
[0] == X86_64_INTEGER_CLASS
6452 && regclass
[1] == X86_64_INTEGER_CLASS
6453 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6454 && intreg
[0] + 1 == intreg
[1])
6455 return gen_rtx_REG (mode
, intreg
[0]);
6457 /* Otherwise figure out the entries of the PARALLEL. */
6458 for (i
= 0; i
< n
; i
++)
6462 switch (regclass
[i
])
6464 case X86_64_NO_CLASS
:
6466 case X86_64_INTEGER_CLASS
:
6467 case X86_64_INTEGERSI_CLASS
:
6468 /* Merge TImodes on aligned occasions here too. */
6469 if (i
* 8 + 8 > bytes
)
6471 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6472 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6476 /* We've requested 24 bytes we
6477 don't have mode for. Use DImode. */
6478 if (tmpmode
== BLKmode
)
6481 = gen_rtx_EXPR_LIST (VOIDmode
,
6482 gen_rtx_REG (tmpmode
, *intreg
),
6486 case X86_64_SSESF_CLASS
:
6488 = gen_rtx_EXPR_LIST (VOIDmode
,
6489 gen_rtx_REG (SFmode
,
6490 SSE_REGNO (sse_regno
)),
6494 case X86_64_SSEDF_CLASS
:
6496 = gen_rtx_EXPR_LIST (VOIDmode
,
6497 gen_rtx_REG (DFmode
,
6498 SSE_REGNO (sse_regno
)),
6502 case X86_64_SSE_CLASS
:
6510 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6520 && regclass
[1] == X86_64_SSEUP_CLASS
6521 && regclass
[2] == X86_64_SSEUP_CLASS
6522 && regclass
[3] == X86_64_SSEUP_CLASS
);
6530 = gen_rtx_EXPR_LIST (VOIDmode
,
6531 gen_rtx_REG (tmpmode
,
6532 SSE_REGNO (sse_regno
)),
6541 /* Empty aligned struct, union or class. */
6545 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6546 for (i
= 0; i
< nexps
; i
++)
6547 XVECEXP (ret
, 0, i
) = exp
[i
];
6551 /* Update the data in CUM to advance over an argument of mode MODE
6552 and data type TYPE. (TYPE is null for libcalls where that information
6553 may not be available.) */
6556 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6557 const_tree type
, HOST_WIDE_INT bytes
,
6558 HOST_WIDE_INT words
)
6574 cum
->words
+= words
;
6575 cum
->nregs
-= words
;
6576 cum
->regno
+= words
;
6578 if (cum
->nregs
<= 0)
6586 /* OImode shouldn't be used directly. */
6590 if (cum
->float_in_sse
< 2)
6593 if (cum
->float_in_sse
< 1)
6610 if (!type
|| !AGGREGATE_TYPE_P (type
))
6612 cum
->sse_words
+= words
;
6613 cum
->sse_nregs
-= 1;
6614 cum
->sse_regno
+= 1;
6615 if (cum
->sse_nregs
<= 0)
6629 if (!type
|| !AGGREGATE_TYPE_P (type
))
6631 cum
->mmx_words
+= words
;
6632 cum
->mmx_nregs
-= 1;
6633 cum
->mmx_regno
+= 1;
6634 if (cum
->mmx_nregs
<= 0)
6645 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6646 const_tree type
, HOST_WIDE_INT words
, bool named
)
6648 int int_nregs
, sse_nregs
;
6650 /* Unnamed 256bit vector mode parameters are passed on stack. */
6651 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6654 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6655 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6657 cum
->nregs
-= int_nregs
;
6658 cum
->sse_nregs
-= sse_nregs
;
6659 cum
->regno
+= int_nregs
;
6660 cum
->sse_regno
+= sse_nregs
;
6664 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6665 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6666 cum
->words
+= words
;
6671 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6672 HOST_WIDE_INT words
)
6674 /* Otherwise, this should be passed indirect. */
6675 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6677 cum
->words
+= words
;
6685 /* Update the data in CUM to advance over an argument of mode MODE and
6686 data type TYPE. (TYPE is null for libcalls where that information
6687 may not be available.) */
6690 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6691 const_tree type
, bool named
)
6693 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6694 HOST_WIDE_INT bytes
, words
;
6696 if (mode
== BLKmode
)
6697 bytes
= int_size_in_bytes (type
);
6699 bytes
= GET_MODE_SIZE (mode
);
6700 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6703 mode
= type_natural_mode (type
, NULL
);
6705 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6706 function_arg_advance_ms_64 (cum
, bytes
, words
);
6707 else if (TARGET_64BIT
)
6708 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6710 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6713 /* Define where to put the arguments to a function.
6714 Value is zero to push the argument on the stack,
6715 or a hard register in which to store the argument.
6717 MODE is the argument's machine mode.
6718 TYPE is the data type of the argument (as a tree).
6719 This is null for libcalls where that information may
6721 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6722 the preceding args and about the function being called.
6723 NAMED is nonzero if this argument is a named parameter
6724 (otherwise it is an extra parameter matching an ellipsis). */
6727 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6728 enum machine_mode orig_mode
, const_tree type
,
6729 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6731 static bool warnedsse
, warnedmmx
;
6733 /* Avoid the AL settings for the Unix64 ABI. */
6734 if (mode
== VOIDmode
)
6750 if (words
<= cum
->nregs
)
6752 int regno
= cum
->regno
;
6754 /* Fastcall allocates the first two DWORD (SImode) or
6755 smaller arguments to ECX and EDX if it isn't an
6761 || (type
&& AGGREGATE_TYPE_P (type
)))
6764 /* ECX not EAX is the first allocated register. */
6765 if (regno
== AX_REG
)
6768 return gen_rtx_REG (mode
, regno
);
6773 if (cum
->float_in_sse
< 2)
6776 if (cum
->float_in_sse
< 1)
6780 /* In 32bit, we pass TImode in xmm registers. */
6787 if (!type
|| !AGGREGATE_TYPE_P (type
))
6789 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6792 warning (0, "SSE vector argument without SSE enabled "
6796 return gen_reg_or_parallel (mode
, orig_mode
,
6797 cum
->sse_regno
+ FIRST_SSE_REG
);
6802 /* OImode shouldn't be used directly. */
6811 if (!type
|| !AGGREGATE_TYPE_P (type
))
6814 return gen_reg_or_parallel (mode
, orig_mode
,
6815 cum
->sse_regno
+ FIRST_SSE_REG
);
6825 if (!type
|| !AGGREGATE_TYPE_P (type
))
6827 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6830 warning (0, "MMX vector argument without MMX enabled "
6834 return gen_reg_or_parallel (mode
, orig_mode
,
6835 cum
->mmx_regno
+ FIRST_MMX_REG
);
6844 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6845 enum machine_mode orig_mode
, const_tree type
, bool named
)
6847 /* Handle a hidden AL argument containing number of registers
6848 for varargs x86-64 functions. */
6849 if (mode
== VOIDmode
)
6850 return GEN_INT (cum
->maybe_vaarg
6851 ? (cum
->sse_nregs
< 0
6852 ? X86_64_SSE_REGPARM_MAX
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6873 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6875 &x86_64_int_parameter_registers
[cum
->regno
],
6880 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6881 enum machine_mode orig_mode
, bool named
,
6882 HOST_WIDE_INT bytes
)
6886 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6887 We use value of -2 to specify that current function call is MSABI. */
6888 if (mode
== VOIDmode
)
6889 return GEN_INT (-2);
6891 /* If we've run out of registers, it goes on the stack. */
6892 if (cum
->nregs
== 0)
6895 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6897 /* Only floating point modes are passed in anything but integer regs. */
6898 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6901 regno
= cum
->regno
+ FIRST_SSE_REG
;
6906 /* Unnamed floating parameters are passed in both the
6907 SSE and integer registers. */
6908 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6909 t2
= gen_rtx_REG (mode
, regno
);
6910 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6911 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6912 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6915 /* Handle aggregated types passed in register. */
6916 if (orig_mode
== BLKmode
)
6918 if (bytes
> 0 && bytes
<= 8)
6919 mode
= (bytes
> 4 ? DImode
: SImode
);
6920 if (mode
== BLKmode
)
6924 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6927 /* Return where to put the arguments to a function.
6928 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6930 MODE is the argument's machine mode. TYPE is the data type of the
6931 argument. It is null for libcalls where that information may not be
6932 available. CUM gives information about the preceding args and about
6933 the function being called. NAMED is nonzero if this argument is a
6934 named parameter (otherwise it is an extra parameter matching an
6938 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6939 const_tree type
, bool named
)
6941 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6942 enum machine_mode mode
= omode
;
6943 HOST_WIDE_INT bytes
, words
;
6946 if (mode
== BLKmode
)
6947 bytes
= int_size_in_bytes (type
);
6949 bytes
= GET_MODE_SIZE (mode
);
6950 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6952 /* To simplify the code below, represent vector types with a vector mode
6953 even if MMX/SSE are not active. */
6954 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6955 mode
= type_natural_mode (type
, cum
);
6957 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6958 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6959 else if (TARGET_64BIT
)
6960 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6962 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6967 /* A C expression that indicates when an argument must be passed by
6968 reference. If nonzero for an argument, a copy of that argument is
6969 made in memory and a pointer to the argument is passed instead of
6970 the argument itself. The pointer is passed in whatever way is
6971 appropriate for passing a pointer to that type. */
6974 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6975 enum machine_mode mode ATTRIBUTE_UNUSED
,
6976 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6978 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6980 /* See Windows x64 Software Convention. */
6981 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6983 int msize
= (int) GET_MODE_SIZE (mode
);
6986 /* Arrays are passed by reference. */
6987 if (TREE_CODE (type
) == ARRAY_TYPE
)
6990 if (AGGREGATE_TYPE_P (type
))
6992 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6993 are passed by reference. */
6994 msize
= int_size_in_bytes (type
);
6998 /* __m128 is passed by reference. */
7000 case 1: case 2: case 4: case 8:
7006 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7012 /* Return true when TYPE should be 128bit aligned for 32bit argument
7013 passing ABI. XXX: This function is obsolete and is only used for
7014 checking psABI compatibility with previous versions of GCC. */
7017 ix86_compat_aligned_value_p (const_tree type
)
7019 enum machine_mode mode
= TYPE_MODE (type
);
7020 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7024 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7026 if (TYPE_ALIGN (type
) < 128)
7029 if (AGGREGATE_TYPE_P (type
))
7031 /* Walk the aggregates recursively. */
7032 switch (TREE_CODE (type
))
7036 case QUAL_UNION_TYPE
:
7040 /* Walk all the structure fields. */
7041 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7043 if (TREE_CODE (field
) == FIELD_DECL
7044 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7051 /* Just for use if some languages passes arrays by value. */
7052 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7063 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7064 XXX: This function is obsolete and is only used for checking psABI
7065 compatibility with previous versions of GCC. */
7068 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7069 const_tree type
, unsigned int align
)
7071 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7072 natural boundaries. */
7073 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7075 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7076 make an exception for SSE modes since these require 128bit
7079 The handling here differs from field_alignment. ICC aligns MMX
7080 arguments to 4 byte boundaries, while structure fields are aligned
7081 to 8 byte boundaries. */
7084 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7085 align
= PARM_BOUNDARY
;
7089 if (!ix86_compat_aligned_value_p (type
))
7090 align
= PARM_BOUNDARY
;
7093 if (align
> BIGGEST_ALIGNMENT
)
7094 align
= BIGGEST_ALIGNMENT
;
7098 /* Return true when TYPE should be 128bit aligned for 32bit argument
7102 ix86_contains_aligned_value_p (const_tree type
)
7104 enum machine_mode mode
= TYPE_MODE (type
);
7106 if (mode
== XFmode
|| mode
== XCmode
)
7109 if (TYPE_ALIGN (type
) < 128)
7112 if (AGGREGATE_TYPE_P (type
))
7114 /* Walk the aggregates recursively. */
7115 switch (TREE_CODE (type
))
7119 case QUAL_UNION_TYPE
:
7123 /* Walk all the structure fields. */
7124 for (field
= TYPE_FIELDS (type
);
7126 field
= DECL_CHAIN (field
))
7128 if (TREE_CODE (field
) == FIELD_DECL
7129 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7136 /* Just for use if some languages passes arrays by value. */
7137 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7146 return TYPE_ALIGN (type
) >= 128;
7151 /* Gives the alignment boundary, in bits, of an argument with the
7152 specified mode and type. */
7155 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7160 /* Since the main variant type is used for call, we convert it to
7161 the main variant type. */
7162 type
= TYPE_MAIN_VARIANT (type
);
7163 align
= TYPE_ALIGN (type
);
7166 align
= GET_MODE_ALIGNMENT (mode
);
7167 if (align
< PARM_BOUNDARY
)
7168 align
= PARM_BOUNDARY
;
7172 unsigned int saved_align
= align
;
7176 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7179 if (mode
== XFmode
|| mode
== XCmode
)
7180 align
= PARM_BOUNDARY
;
7182 else if (!ix86_contains_aligned_value_p (type
))
7183 align
= PARM_BOUNDARY
;
7186 align
= PARM_BOUNDARY
;
7191 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7195 inform (input_location
,
7196 "The ABI for passing parameters with %d-byte"
7197 " alignment has changed in GCC 4.6",
7198 align
/ BITS_PER_UNIT
);
7205 /* Return true if N is a possible register number of function value. */
7208 ix86_function_value_regno_p (const unsigned int regno
)
7215 case FIRST_FLOAT_REG
:
7216 /* TODO: The function should depend on current function ABI but
7217 builtins.c would need updating then. Therefore we use the
7219 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7221 return TARGET_FLOAT_RETURNS_IN_80387
;
7227 if (TARGET_MACHO
|| TARGET_64BIT
)
7235 /* Define how to find the value returned by a function.
7236 VALTYPE is the data type of the value (as a tree).
7237 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7238 otherwise, FUNC is 0. */
7241 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7242 const_tree fntype
, const_tree fn
)
7246 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7247 we normally prevent this case when mmx is not available. However
7248 some ABIs may require the result to be returned like DImode. */
7249 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7250 regno
= FIRST_MMX_REG
;
7252 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7253 we prevent this case when sse is not available. However some ABIs
7254 may require the result to be returned like integer TImode. */
7255 else if (mode
== TImode
7256 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7257 regno
= FIRST_SSE_REG
;
7259 /* 32-byte vector modes in %ymm0. */
7260 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7261 regno
= FIRST_SSE_REG
;
7263 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7264 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7265 regno
= FIRST_FLOAT_REG
;
7267 /* Most things go in %eax. */
7270 /* Override FP return register with %xmm0 for local functions when
7271 SSE math is enabled or for functions with sseregparm attribute. */
7272 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7274 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7275 if ((sse_level
>= 1 && mode
== SFmode
)
7276 || (sse_level
== 2 && mode
== DFmode
))
7277 regno
= FIRST_SSE_REG
;
7280 /* OImode shouldn't be used directly. */
7281 gcc_assert (mode
!= OImode
);
7283 return gen_rtx_REG (orig_mode
, regno
);
7287 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7292 /* Handle libcalls, which don't provide a type node. */
7293 if (valtype
== NULL
)
7307 regno
= FIRST_SSE_REG
;
7311 regno
= FIRST_FLOAT_REG
;
7319 return gen_rtx_REG (mode
, regno
);
7321 else if (POINTER_TYPE_P (valtype
))
7323 /* Pointers are always returned in word_mode. */
7327 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7328 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7329 x86_64_int_return_registers
, 0);
7331 /* For zero sized structures, construct_container returns NULL, but we
7332 need to keep rest of compiler happy by returning meaningful value. */
7334 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7340 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7343 unsigned int regno
= AX_REG
;
7347 switch (GET_MODE_SIZE (mode
))
7350 if (valtype
!= NULL_TREE
7351 && !VECTOR_INTEGER_TYPE_P (valtype
)
7352 && !VECTOR_INTEGER_TYPE_P (valtype
)
7353 && !INTEGRAL_TYPE_P (valtype
)
7354 && !VECTOR_FLOAT_TYPE_P (valtype
))
7356 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7357 && !COMPLEX_MODE_P (mode
))
7358 regno
= FIRST_SSE_REG
;
7362 if (mode
== SFmode
|| mode
== DFmode
)
7363 regno
= FIRST_SSE_REG
;
7369 return gen_rtx_REG (orig_mode
, regno
);
7373 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7374 enum machine_mode orig_mode
, enum machine_mode mode
)
7376 const_tree fn
, fntype
;
7379 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7380 fn
= fntype_or_decl
;
7381 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7383 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7384 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7385 else if (TARGET_64BIT
)
7386 return function_value_64 (orig_mode
, mode
, valtype
);
7388 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7392 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7393 bool outgoing ATTRIBUTE_UNUSED
)
7395 enum machine_mode mode
, orig_mode
;
7397 orig_mode
= TYPE_MODE (valtype
);
7398 mode
= type_natural_mode (valtype
, NULL
);
7399 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7402 /* Pointer function arguments and return values are promoted to
7405 static enum machine_mode
7406 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7407 int *punsignedp
, const_tree fntype
,
7410 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7412 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7415 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7419 /* Return true if a structure, union or array with MODE containing FIELD
7420 should be accessed using BLKmode. */
7423 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7425 /* Union with XFmode must be in BLKmode. */
7426 return (mode
== XFmode
7427 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7428 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7432 ix86_libcall_value (enum machine_mode mode
)
7434 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7437 /* Return true iff type is returned in memory. */
7439 static bool ATTRIBUTE_UNUSED
7440 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7444 if (mode
== BLKmode
)
7447 size
= int_size_in_bytes (type
);
7449 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7452 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7454 /* User-created vectors small enough to fit in EAX. */
7458 /* MMX/3dNow values are returned in MM0,
7459 except when it doesn't exits or the ABI prescribes otherwise. */
7461 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7463 /* SSE values are returned in XMM0, except when it doesn't exist. */
7467 /* AVX values are returned in YMM0, except when it doesn't exist. */
7478 /* OImode shouldn't be used directly. */
7479 gcc_assert (mode
!= OImode
);
7484 static bool ATTRIBUTE_UNUSED
7485 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7487 int needed_intregs
, needed_sseregs
;
7488 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7491 static bool ATTRIBUTE_UNUSED
7492 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7494 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7496 /* __m128 is returned in xmm0. */
7497 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7498 || VECTOR_FLOAT_TYPE_P (type
))
7499 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7500 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7503 /* Otherwise, the size must be exactly in [1248]. */
7504 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7508 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7510 #ifdef SUBTARGET_RETURN_IN_MEMORY
7511 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7513 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7517 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7518 return return_in_memory_ms_64 (type
, mode
);
7520 return return_in_memory_64 (type
, mode
);
7523 return return_in_memory_32 (type
, mode
);
7527 /* When returning SSE vector types, we have a choice of either
7528 (1) being abi incompatible with a -march switch, or
7529 (2) generating an error.
7530 Given no good solution, I think the safest thing is one warning.
7531 The user won't be able to use -Werror, but....
7533 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7534 called in response to actually generating a caller or callee that
7535 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7536 via aggregate_value_p for general type probing from tree-ssa. */
7539 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7541 static bool warnedsse
, warnedmmx
;
7543 if (!TARGET_64BIT
&& type
)
7545 /* Look at the return type of the function, not the function type. */
7546 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7548 if (!TARGET_SSE
&& !warnedsse
)
7551 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7554 warning (0, "SSE vector return without SSE enabled "
7559 if (!TARGET_MMX
&& !warnedmmx
)
7561 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7564 warning (0, "MMX vector return without MMX enabled "
7574 /* Create the va_list data type. */
7576 /* Returns the calling convention specific va_list date type.
7577 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7580 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7582 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7584 /* For i386 we use plain pointer to argument area. */
7585 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7586 return build_pointer_type (char_type_node
);
7588 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7589 type_decl
= build_decl (BUILTINS_LOCATION
,
7590 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7592 f_gpr
= build_decl (BUILTINS_LOCATION
,
7593 FIELD_DECL
, get_identifier ("gp_offset"),
7594 unsigned_type_node
);
7595 f_fpr
= build_decl (BUILTINS_LOCATION
,
7596 FIELD_DECL
, get_identifier ("fp_offset"),
7597 unsigned_type_node
);
7598 f_ovf
= build_decl (BUILTINS_LOCATION
,
7599 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7601 f_sav
= build_decl (BUILTINS_LOCATION
,
7602 FIELD_DECL
, get_identifier ("reg_save_area"),
7605 va_list_gpr_counter_field
= f_gpr
;
7606 va_list_fpr_counter_field
= f_fpr
;
7608 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7609 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7610 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7611 DECL_FIELD_CONTEXT (f_sav
) = record
;
7613 TYPE_STUB_DECL (record
) = type_decl
;
7614 TYPE_NAME (record
) = type_decl
;
7615 TYPE_FIELDS (record
) = f_gpr
;
7616 DECL_CHAIN (f_gpr
) = f_fpr
;
7617 DECL_CHAIN (f_fpr
) = f_ovf
;
7618 DECL_CHAIN (f_ovf
) = f_sav
;
7620 layout_type (record
);
7622 /* The correct type is an array type of one element. */
7623 return build_array_type (record
, build_index_type (size_zero_node
));
7626 /* Setup the builtin va_list data type and for 64-bit the additional
7627 calling convention specific va_list data types. */
7630 ix86_build_builtin_va_list (void)
7632 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7634 /* Initialize abi specific va_list builtin types. */
7638 if (ix86_abi
== MS_ABI
)
7640 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7641 if (TREE_CODE (t
) != RECORD_TYPE
)
7642 t
= build_variant_type_copy (t
);
7643 sysv_va_list_type_node
= t
;
7648 if (TREE_CODE (t
) != RECORD_TYPE
)
7649 t
= build_variant_type_copy (t
);
7650 sysv_va_list_type_node
= t
;
7652 if (ix86_abi
!= MS_ABI
)
7654 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7655 if (TREE_CODE (t
) != RECORD_TYPE
)
7656 t
= build_variant_type_copy (t
);
7657 ms_va_list_type_node
= t
;
7662 if (TREE_CODE (t
) != RECORD_TYPE
)
7663 t
= build_variant_type_copy (t
);
7664 ms_va_list_type_node
= t
;
7671 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7674 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7680 /* GPR size of varargs save area. */
7681 if (cfun
->va_list_gpr_size
)
7682 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7684 ix86_varargs_gpr_size
= 0;
7686 /* FPR size of varargs save area. We don't need it if we don't pass
7687 anything in SSE registers. */
7688 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7689 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7691 ix86_varargs_fpr_size
= 0;
7693 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7696 save_area
= frame_pointer_rtx
;
7697 set
= get_varargs_alias_set ();
7699 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7700 if (max
> X86_64_REGPARM_MAX
)
7701 max
= X86_64_REGPARM_MAX
;
7703 for (i
= cum
->regno
; i
< max
; i
++)
7705 mem
= gen_rtx_MEM (word_mode
,
7706 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7707 MEM_NOTRAP_P (mem
) = 1;
7708 set_mem_alias_set (mem
, set
);
7709 emit_move_insn (mem
,
7710 gen_rtx_REG (word_mode
,
7711 x86_64_int_parameter_registers
[i
]));
7714 if (ix86_varargs_fpr_size
)
7716 enum machine_mode smode
;
7719 /* Now emit code to save SSE registers. The AX parameter contains number
7720 of SSE parameter registers used to call this function, though all we
7721 actually check here is the zero/non-zero status. */
7723 label
= gen_label_rtx ();
7724 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7725 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7728 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7729 we used movdqa (i.e. TImode) instead? Perhaps even better would
7730 be if we could determine the real mode of the data, via a hook
7731 into pass_stdarg. Ignore all that for now. */
7733 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7734 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7736 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7737 if (max
> X86_64_SSE_REGPARM_MAX
)
7738 max
= X86_64_SSE_REGPARM_MAX
;
7740 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7742 mem
= plus_constant (Pmode
, save_area
,
7743 i
* 16 + ix86_varargs_gpr_size
);
7744 mem
= gen_rtx_MEM (smode
, mem
);
7745 MEM_NOTRAP_P (mem
) = 1;
7746 set_mem_alias_set (mem
, set
);
7747 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7749 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7757 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7759 alias_set_type set
= get_varargs_alias_set ();
7762 /* Reset to zero, as there might be a sysv vaarg used
7764 ix86_varargs_gpr_size
= 0;
7765 ix86_varargs_fpr_size
= 0;
7767 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7771 mem
= gen_rtx_MEM (Pmode
,
7772 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7773 i
* UNITS_PER_WORD
));
7774 MEM_NOTRAP_P (mem
) = 1;
7775 set_mem_alias_set (mem
, set
);
7777 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7778 emit_move_insn (mem
, reg
);
7783 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7784 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7787 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7788 CUMULATIVE_ARGS next_cum
;
7791 /* This argument doesn't appear to be used anymore. Which is good,
7792 because the old code here didn't suppress rtl generation. */
7793 gcc_assert (!no_rtl
);
7798 fntype
= TREE_TYPE (current_function_decl
);
7800 /* For varargs, we do not want to skip the dummy va_dcl argument.
7801 For stdargs, we do want to skip the last named argument. */
7803 if (stdarg_p (fntype
))
7804 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7807 if (cum
->call_abi
== MS_ABI
)
7808 setup_incoming_varargs_ms_64 (&next_cum
);
7810 setup_incoming_varargs_64 (&next_cum
);
7813 /* Checks if TYPE is of kind va_list char *. */
7816 is_va_list_char_pointer (tree type
)
7820 /* For 32-bit it is always true. */
7823 canonic
= ix86_canonical_va_list_type (type
);
7824 return (canonic
== ms_va_list_type_node
7825 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7828 /* Implement va_start. */
7831 ix86_va_start (tree valist
, rtx nextarg
)
7833 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7834 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7835 tree gpr
, fpr
, ovf
, sav
, t
;
7839 if (flag_split_stack
7840 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7842 unsigned int scratch_regno
;
7844 /* When we are splitting the stack, we can't refer to the stack
7845 arguments using internal_arg_pointer, because they may be on
7846 the old stack. The split stack prologue will arrange to
7847 leave a pointer to the old stack arguments in a scratch
7848 register, which we here copy to a pseudo-register. The split
7849 stack prologue can't set the pseudo-register directly because
7850 it (the prologue) runs before any registers have been saved. */
7852 scratch_regno
= split_stack_prologue_scratch_regno ();
7853 if (scratch_regno
!= INVALID_REGNUM
)
7857 reg
= gen_reg_rtx (Pmode
);
7858 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7861 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7865 push_topmost_sequence ();
7866 emit_insn_after (seq
, entry_of_function ());
7867 pop_topmost_sequence ();
7871 /* Only 64bit target needs something special. */
7872 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7874 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7875 std_expand_builtin_va_start (valist
, nextarg
);
7880 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7881 next
= expand_binop (ptr_mode
, add_optab
,
7882 cfun
->machine
->split_stack_varargs_pointer
,
7883 crtl
->args
.arg_offset_rtx
,
7884 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7885 convert_move (va_r
, next
, 0);
7890 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7891 f_fpr
= DECL_CHAIN (f_gpr
);
7892 f_ovf
= DECL_CHAIN (f_fpr
);
7893 f_sav
= DECL_CHAIN (f_ovf
);
7895 valist
= build_simple_mem_ref (valist
);
7896 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7897 /* The following should be folded into the MEM_REF offset. */
7898 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7900 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7902 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7904 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7907 /* Count number of gp and fp argument registers used. */
7908 words
= crtl
->args
.info
.words
;
7909 n_gpr
= crtl
->args
.info
.regno
;
7910 n_fpr
= crtl
->args
.info
.sse_regno
;
7912 if (cfun
->va_list_gpr_size
)
7914 type
= TREE_TYPE (gpr
);
7915 t
= build2 (MODIFY_EXPR
, type
,
7916 gpr
, build_int_cst (type
, n_gpr
* 8));
7917 TREE_SIDE_EFFECTS (t
) = 1;
7918 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7921 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7923 type
= TREE_TYPE (fpr
);
7924 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7925 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7926 TREE_SIDE_EFFECTS (t
) = 1;
7927 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7930 /* Find the overflow area. */
7931 type
= TREE_TYPE (ovf
);
7932 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7933 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7935 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7936 t
= make_tree (type
, ovf_rtx
);
7938 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7939 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7940 TREE_SIDE_EFFECTS (t
) = 1;
7941 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7943 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7945 /* Find the register save area.
7946 Prologue of the function save it right above stack frame. */
7947 type
= TREE_TYPE (sav
);
7948 t
= make_tree (type
, frame_pointer_rtx
);
7949 if (!ix86_varargs_gpr_size
)
7950 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7951 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7952 TREE_SIDE_EFFECTS (t
) = 1;
7953 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7957 /* Implement va_arg. */
7960 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7963 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7964 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7965 tree gpr
, fpr
, ovf
, sav
, t
;
7967 tree lab_false
, lab_over
= NULL_TREE
;
7972 enum machine_mode nat_mode
;
7973 unsigned int arg_boundary
;
7975 /* Only 64bit target needs something special. */
7976 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7977 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7979 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7980 f_fpr
= DECL_CHAIN (f_gpr
);
7981 f_ovf
= DECL_CHAIN (f_fpr
);
7982 f_sav
= DECL_CHAIN (f_ovf
);
7984 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7985 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7986 valist
= build_va_arg_indirect_ref (valist
);
7987 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7988 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7989 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7991 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7993 type
= build_pointer_type (type
);
7994 size
= int_size_in_bytes (type
);
7995 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7997 nat_mode
= type_natural_mode (type
, NULL
);
8006 /* Unnamed 256bit vector mode parameters are passed on stack. */
8007 if (!TARGET_64BIT_MS_ABI
)
8014 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8015 type
, 0, X86_64_REGPARM_MAX
,
8016 X86_64_SSE_REGPARM_MAX
, intreg
,
8021 /* Pull the value out of the saved registers. */
8023 addr
= create_tmp_var (ptr_type_node
, "addr");
8027 int needed_intregs
, needed_sseregs
;
8029 tree int_addr
, sse_addr
;
8031 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8032 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8034 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8036 need_temp
= (!REG_P (container
)
8037 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8038 || TYPE_ALIGN (type
) > 128));
8040 /* In case we are passing structure, verify that it is consecutive block
8041 on the register save area. If not we need to do moves. */
8042 if (!need_temp
&& !REG_P (container
))
8044 /* Verify that all registers are strictly consecutive */
8045 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8049 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8051 rtx slot
= XVECEXP (container
, 0, i
);
8052 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8053 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8061 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8063 rtx slot
= XVECEXP (container
, 0, i
);
8064 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8065 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8077 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8078 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8081 /* First ensure that we fit completely in registers. */
8084 t
= build_int_cst (TREE_TYPE (gpr
),
8085 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8086 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8087 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8088 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8089 gimplify_and_add (t
, pre_p
);
8093 t
= build_int_cst (TREE_TYPE (fpr
),
8094 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8095 + X86_64_REGPARM_MAX
* 8);
8096 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8097 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8098 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8099 gimplify_and_add (t
, pre_p
);
8102 /* Compute index to start of area used for integer regs. */
8105 /* int_addr = gpr + sav; */
8106 t
= fold_build_pointer_plus (sav
, gpr
);
8107 gimplify_assign (int_addr
, t
, pre_p
);
8111 /* sse_addr = fpr + sav; */
8112 t
= fold_build_pointer_plus (sav
, fpr
);
8113 gimplify_assign (sse_addr
, t
, pre_p
);
8117 int i
, prev_size
= 0;
8118 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8121 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8122 gimplify_assign (addr
, t
, pre_p
);
8124 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8126 rtx slot
= XVECEXP (container
, 0, i
);
8127 rtx reg
= XEXP (slot
, 0);
8128 enum machine_mode mode
= GET_MODE (reg
);
8134 tree dest_addr
, dest
;
8135 int cur_size
= GET_MODE_SIZE (mode
);
8137 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8138 prev_size
= INTVAL (XEXP (slot
, 1));
8139 if (prev_size
+ cur_size
> size
)
8141 cur_size
= size
- prev_size
;
8142 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8143 if (mode
== BLKmode
)
8146 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8147 if (mode
== GET_MODE (reg
))
8148 addr_type
= build_pointer_type (piece_type
);
8150 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8152 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8155 if (SSE_REGNO_P (REGNO (reg
)))
8157 src_addr
= sse_addr
;
8158 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8162 src_addr
= int_addr
;
8163 src_offset
= REGNO (reg
) * 8;
8165 src_addr
= fold_convert (addr_type
, src_addr
);
8166 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8168 dest_addr
= fold_convert (daddr_type
, addr
);
8169 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8170 if (cur_size
== GET_MODE_SIZE (mode
))
8172 src
= build_va_arg_indirect_ref (src_addr
);
8173 dest
= build_va_arg_indirect_ref (dest_addr
);
8175 gimplify_assign (dest
, src
, pre_p
);
8180 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8181 3, dest_addr
, src_addr
,
8182 size_int (cur_size
));
8183 gimplify_and_add (copy
, pre_p
);
8185 prev_size
+= cur_size
;
8191 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8192 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8193 gimplify_assign (gpr
, t
, pre_p
);
8198 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8199 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8200 gimplify_assign (fpr
, t
, pre_p
);
8203 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8205 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8208 /* ... otherwise out of the overflow area. */
8210 /* When we align parameter on stack for caller, if the parameter
8211 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8212 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8213 here with caller. */
8214 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8215 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8216 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8218 /* Care for on-stack alignment if needed. */
8219 if (arg_boundary
<= 64 || size
== 0)
8223 HOST_WIDE_INT align
= arg_boundary
/ 8;
8224 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8225 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8226 build_int_cst (TREE_TYPE (t
), -align
));
8229 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8230 gimplify_assign (addr
, t
, pre_p
);
8232 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8233 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8236 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8238 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8239 addr
= fold_convert (ptrtype
, addr
);
8242 addr
= build_va_arg_indirect_ref (addr
);
8243 return build_va_arg_indirect_ref (addr
);
8246 /* Return true if OPNUM's MEM should be matched
8247 in movabs* patterns. */
8250 ix86_check_movabs (rtx insn
, int opnum
)
8254 set
= PATTERN (insn
);
8255 if (GET_CODE (set
) == PARALLEL
)
8256 set
= XVECEXP (set
, 0, 0);
8257 gcc_assert (GET_CODE (set
) == SET
);
8258 mem
= XEXP (set
, opnum
);
8259 while (GET_CODE (mem
) == SUBREG
)
8260 mem
= SUBREG_REG (mem
);
8261 gcc_assert (MEM_P (mem
));
8262 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8265 /* Initialize the table of extra 80387 mathematical constants. */
8268 init_ext_80387_constants (void)
8270 static const char * cst
[5] =
8272 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8273 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8274 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8275 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8276 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8280 for (i
= 0; i
< 5; i
++)
8282 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8283 /* Ensure each constant is rounded to XFmode precision. */
8284 real_convert (&ext_80387_constants_table
[i
],
8285 XFmode
, &ext_80387_constants_table
[i
]);
8288 ext_80387_constants_init
= 1;
8291 /* Return non-zero if the constant is something that
8292 can be loaded with a special instruction. */
8295 standard_80387_constant_p (rtx x
)
8297 enum machine_mode mode
= GET_MODE (x
);
8301 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8304 if (x
== CONST0_RTX (mode
))
8306 if (x
== CONST1_RTX (mode
))
8309 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8311 /* For XFmode constants, try to find a special 80387 instruction when
8312 optimizing for size or on those CPUs that benefit from them. */
8314 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8318 if (! ext_80387_constants_init
)
8319 init_ext_80387_constants ();
8321 for (i
= 0; i
< 5; i
++)
8322 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8326 /* Load of the constant -0.0 or -1.0 will be split as
8327 fldz;fchs or fld1;fchs sequence. */
8328 if (real_isnegzero (&r
))
8330 if (real_identical (&r
, &dconstm1
))
8336 /* Return the opcode of the special instruction to be used to load
8340 standard_80387_constant_opcode (rtx x
)
8342 switch (standard_80387_constant_p (x
))
8366 /* Return the CONST_DOUBLE representing the 80387 constant that is
8367 loaded by the specified special instruction. The argument IDX
8368 matches the return value from standard_80387_constant_p. */
8371 standard_80387_constant_rtx (int idx
)
8375 if (! ext_80387_constants_init
)
8376 init_ext_80387_constants ();
8392 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8396 /* Return 1 if X is all 0s and 2 if x is all 1s
8397 in supported SSE/AVX vector mode. */
8400 standard_sse_constant_p (rtx x
)
8402 enum machine_mode mode
= GET_MODE (x
);
8404 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8406 if (vector_all_ones_operand (x
, mode
))
8428 /* Return the opcode of the special instruction to be used to load
8432 standard_sse_constant_opcode (rtx insn
, rtx x
)
8434 switch (standard_sse_constant_p (x
))
8437 switch (get_attr_mode (insn
))
8440 return "%vpxor\t%0, %d0";
8442 return "%vxorpd\t%0, %d0";
8444 return "%vxorps\t%0, %d0";
8447 return "vpxor\t%x0, %x0, %x0";
8449 return "vxorpd\t%x0, %x0, %x0";
8451 return "vxorps\t%x0, %x0, %x0";
8459 return "vpcmpeqd\t%0, %0, %0";
8461 return "pcmpeqd\t%0, %0";
8469 /* Returns true if OP contains a symbol reference */
8472 symbolic_reference_mentioned_p (rtx op
)
8477 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8480 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8481 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8487 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8488 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8492 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8499 /* Return true if it is appropriate to emit `ret' instructions in the
8500 body of a function. Do this only if the epilogue is simple, needing a
8501 couple of insns. Prior to reloading, we can't tell how many registers
8502 must be saved, so return false then. Return false if there is no frame
8503 marker to de-allocate. */
8506 ix86_can_use_return_insn_p (void)
8508 struct ix86_frame frame
;
8510 if (! reload_completed
|| frame_pointer_needed
)
8513 /* Don't allow more than 32k pop, since that's all we can do
8514 with one instruction. */
8515 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8518 ix86_compute_frame_layout (&frame
);
8519 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8520 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8523 /* Value should be nonzero if functions must have frame pointers.
8524 Zero means the frame pointer need not be set up (and parms may
8525 be accessed via the stack pointer) in functions that seem suitable. */
8528 ix86_frame_pointer_required (void)
8530 /* If we accessed previous frames, then the generated code expects
8531 to be able to access the saved ebp value in our frame. */
8532 if (cfun
->machine
->accesses_prev_frame
)
8535 /* Several x86 os'es need a frame pointer for other reasons,
8536 usually pertaining to setjmp. */
8537 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8540 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8541 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8544 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8545 allocation is 4GB. */
8546 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8549 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8550 turns off the frame pointer by default. Turn it back on now if
8551 we've not got a leaf function. */
8552 if (TARGET_OMIT_LEAF_FRAME_POINTER
8554 || ix86_current_function_calls_tls_descriptor
))
8557 if (crtl
->profile
&& !flag_fentry
)
8563 /* Record that the current function accesses previous call frames. */
8566 ix86_setup_frame_addresses (void)
8568 cfun
->machine
->accesses_prev_frame
= 1;
8571 #ifndef USE_HIDDEN_LINKONCE
8572 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8573 # define USE_HIDDEN_LINKONCE 1
8575 # define USE_HIDDEN_LINKONCE 0
8579 static int pic_labels_used
;
8581 /* Fills in the label name that should be used for a pc thunk for
8582 the given register. */
8585 get_pc_thunk_name (char name
[32], unsigned int regno
)
8587 gcc_assert (!TARGET_64BIT
);
8589 if (USE_HIDDEN_LINKONCE
)
8590 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8592 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8596 /* This function generates code for -fpic that loads %ebx with
8597 the return address of the caller and then returns. */
8600 ix86_code_end (void)
8605 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8610 if (!(pic_labels_used
& (1 << regno
)))
8613 get_pc_thunk_name (name
, regno
);
8615 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8616 get_identifier (name
),
8617 build_function_type_list (void_type_node
, NULL_TREE
));
8618 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8619 NULL_TREE
, void_type_node
);
8620 TREE_PUBLIC (decl
) = 1;
8621 TREE_STATIC (decl
) = 1;
8622 DECL_IGNORED_P (decl
) = 1;
8627 switch_to_section (darwin_sections
[text_coal_section
]);
8628 fputs ("\t.weak_definition\t", asm_out_file
);
8629 assemble_name (asm_out_file
, name
);
8630 fputs ("\n\t.private_extern\t", asm_out_file
);
8631 assemble_name (asm_out_file
, name
);
8632 putc ('\n', asm_out_file
);
8633 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8634 DECL_WEAK (decl
) = 1;
8638 if (USE_HIDDEN_LINKONCE
)
8640 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8642 targetm
.asm_out
.unique_section (decl
, 0);
8643 switch_to_section (get_named_section (decl
, NULL
, 0));
8645 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8646 fputs ("\t.hidden\t", asm_out_file
);
8647 assemble_name (asm_out_file
, name
);
8648 putc ('\n', asm_out_file
);
8649 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8653 switch_to_section (text_section
);
8654 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8657 DECL_INITIAL (decl
) = make_node (BLOCK
);
8658 current_function_decl
= decl
;
8659 init_function_start (decl
);
8660 first_function_block_is_cold
= false;
8661 /* Make sure unwind info is emitted for the thunk if needed. */
8662 final_start_function (emit_barrier (), asm_out_file
, 1);
8664 /* Pad stack IP move with 4 instructions (two NOPs count
8665 as one instruction). */
8666 if (TARGET_PAD_SHORT_FUNCTION
)
8671 fputs ("\tnop\n", asm_out_file
);
8674 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8675 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8676 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8677 fputs ("\tret\n", asm_out_file
);
8678 final_end_function ();
8679 init_insn_lengths ();
8680 free_after_compilation (cfun
);
8682 current_function_decl
= NULL
;
8685 if (flag_split_stack
)
8686 file_end_indicate_split_stack ();
8689 /* Emit code for the SET_GOT patterns. */
8692 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8698 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8700 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8701 xops
[2] = gen_rtx_MEM (Pmode
,
8702 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8703 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8705 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8706 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8707 an unadorned address. */
8708 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8709 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8710 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8714 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8718 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8720 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8723 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8724 is what will be referenced by the Mach-O PIC subsystem. */
8726 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8729 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8730 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8735 get_pc_thunk_name (name
, REGNO (dest
));
8736 pic_labels_used
|= 1 << REGNO (dest
);
8738 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8739 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8740 output_asm_insn ("call\t%X2", xops
);
8741 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8742 is what will be referenced by the Mach-O PIC subsystem. */
8745 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8747 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8748 CODE_LABEL_NUMBER (label
));
8753 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8758 /* Generate an "push" pattern for input ARG. */
8763 struct machine_function
*m
= cfun
->machine
;
8765 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8766 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8767 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8769 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8770 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8772 return gen_rtx_SET (VOIDmode
,
8773 gen_rtx_MEM (word_mode
,
8774 gen_rtx_PRE_DEC (Pmode
,
8775 stack_pointer_rtx
)),
8779 /* Generate an "pop" pattern for input ARG. */
8784 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8785 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8787 return gen_rtx_SET (VOIDmode
,
8789 gen_rtx_MEM (word_mode
,
8790 gen_rtx_POST_INC (Pmode
,
8791 stack_pointer_rtx
)));
8794 /* Return >= 0 if there is an unused call-clobbered register available
8795 for the entire function. */
8798 ix86_select_alt_pic_regnum (void)
8802 && !ix86_current_function_calls_tls_descriptor
)
8805 /* Can't use the same register for both PIC and DRAP. */
8807 drap
= REGNO (crtl
->drap_reg
);
8810 for (i
= 2; i
>= 0; --i
)
8811 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8815 return INVALID_REGNUM
;
8818 /* Return TRUE if we need to save REGNO. */
8821 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8823 if (pic_offset_table_rtx
8824 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8825 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8827 || crtl
->calls_eh_return
8828 || crtl
->uses_const_pool
))
8829 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8831 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8836 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8837 if (test
== INVALID_REGNUM
)
8844 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8847 return (df_regs_ever_live_p (regno
)
8848 && !call_used_regs
[regno
]
8849 && !fixed_regs
[regno
]
8850 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8853 /* Return number of saved general prupose registers. */
8856 ix86_nsaved_regs (void)
8861 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8862 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8867 /* Return number of saved SSE registrers. */
8870 ix86_nsaved_sseregs (void)
8875 if (!TARGET_64BIT_MS_ABI
)
8877 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8878 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8883 /* Given FROM and TO register numbers, say whether this elimination is
8884 allowed. If stack alignment is needed, we can only replace argument
8885 pointer with hard frame pointer, or replace frame pointer with stack
8886 pointer. Otherwise, frame pointer elimination is automatically
8887 handled and all other eliminations are valid. */
8890 ix86_can_eliminate (const int from
, const int to
)
8892 if (stack_realign_fp
)
8893 return ((from
== ARG_POINTER_REGNUM
8894 && to
== HARD_FRAME_POINTER_REGNUM
)
8895 || (from
== FRAME_POINTER_REGNUM
8896 && to
== STACK_POINTER_REGNUM
));
8898 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8901 /* Return the offset between two registers, one to be eliminated, and the other
8902 its replacement, at the start of a routine. */
8905 ix86_initial_elimination_offset (int from
, int to
)
8907 struct ix86_frame frame
;
8908 ix86_compute_frame_layout (&frame
);
8910 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8911 return frame
.hard_frame_pointer_offset
;
8912 else if (from
== FRAME_POINTER_REGNUM
8913 && to
== HARD_FRAME_POINTER_REGNUM
)
8914 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8917 gcc_assert (to
== STACK_POINTER_REGNUM
);
8919 if (from
== ARG_POINTER_REGNUM
)
8920 return frame
.stack_pointer_offset
;
8922 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8923 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8927 /* In a dynamically-aligned function, we can't know the offset from
8928 stack pointer to frame pointer, so we must ensure that setjmp
8929 eliminates fp against the hard fp (%ebp) rather than trying to
8930 index from %esp up to the top of the frame across a gap that is
8931 of unknown (at compile-time) size. */
8933 ix86_builtin_setjmp_frame_value (void)
8935 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8938 /* When using -fsplit-stack, the allocation routines set a field in
8939 the TCB to the bottom of the stack plus this much space, measured
8942 #define SPLIT_STACK_AVAILABLE 256
8944 /* Fill structure ix86_frame about frame of currently computed function. */
8947 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8949 unsigned HOST_WIDE_INT stack_alignment_needed
;
8950 HOST_WIDE_INT offset
;
8951 unsigned HOST_WIDE_INT preferred_alignment
;
8952 HOST_WIDE_INT size
= get_frame_size ();
8953 HOST_WIDE_INT to_allocate
;
8955 frame
->nregs
= ix86_nsaved_regs ();
8956 frame
->nsseregs
= ix86_nsaved_sseregs ();
8958 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8959 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8961 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8962 function prologues and leaf. */
8963 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8964 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8965 || ix86_current_function_calls_tls_descriptor
))
8967 preferred_alignment
= 16;
8968 stack_alignment_needed
= 16;
8969 crtl
->preferred_stack_boundary
= 128;
8970 crtl
->stack_alignment_needed
= 128;
8973 gcc_assert (!size
|| stack_alignment_needed
);
8974 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8975 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8977 /* For SEH we have to limit the amount of code movement into the prologue.
8978 At present we do this via a BLOCKAGE, at which point there's very little
8979 scheduling that can be done, which means that there's very little point
8980 in doing anything except PUSHs. */
8982 cfun
->machine
->use_fast_prologue_epilogue
= false;
8984 /* During reload iteration the amount of registers saved can change.
8985 Recompute the value as needed. Do not recompute when amount of registers
8986 didn't change as reload does multiple calls to the function and does not
8987 expect the decision to change within single iteration. */
8988 else if (!optimize_function_for_size_p (cfun
)
8989 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8991 int count
= frame
->nregs
;
8992 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8994 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8996 /* The fast prologue uses move instead of push to save registers. This
8997 is significantly longer, but also executes faster as modern hardware
8998 can execute the moves in parallel, but can't do that for push/pop.
9000 Be careful about choosing what prologue to emit: When function takes
9001 many instructions to execute we may use slow version as well as in
9002 case function is known to be outside hot spot (this is known with
9003 feedback only). Weight the size of function by number of registers
9004 to save as it is cheap to use one or two push instructions but very
9005 slow to use many of them. */
9007 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9008 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9009 || (flag_branch_probabilities
9010 && node
->frequency
< NODE_FREQUENCY_HOT
))
9011 cfun
->machine
->use_fast_prologue_epilogue
= false;
9013 cfun
->machine
->use_fast_prologue_epilogue
9014 = !expensive_function_p (count
);
9017 frame
->save_regs_using_mov
9018 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9019 /* If static stack checking is enabled and done with probes,
9020 the registers need to be saved before allocating the frame. */
9021 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9023 /* Skip return address. */
9024 offset
= UNITS_PER_WORD
;
9026 /* Skip pushed static chain. */
9027 if (ix86_static_chain_on_stack
)
9028 offset
+= UNITS_PER_WORD
;
9030 /* Skip saved base pointer. */
9031 if (frame_pointer_needed
)
9032 offset
+= UNITS_PER_WORD
;
9033 frame
->hfp_save_offset
= offset
;
9035 /* The traditional frame pointer location is at the top of the frame. */
9036 frame
->hard_frame_pointer_offset
= offset
;
9038 /* Register save area */
9039 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9040 frame
->reg_save_offset
= offset
;
9042 /* On SEH target, registers are pushed just before the frame pointer
9045 frame
->hard_frame_pointer_offset
= offset
;
9047 /* Align and set SSE register save area. */
9048 if (frame
->nsseregs
)
9050 /* The only ABI that has saved SSE registers (Win64) also has a
9051 16-byte aligned default stack, and thus we don't need to be
9052 within the re-aligned local stack frame to save them. */
9053 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9054 offset
= (offset
+ 16 - 1) & -16;
9055 offset
+= frame
->nsseregs
* 16;
9057 frame
->sse_reg_save_offset
= offset
;
9059 /* The re-aligned stack starts here. Values before this point are not
9060 directly comparable with values below this point. In order to make
9061 sure that no value happens to be the same before and after, force
9062 the alignment computation below to add a non-zero value. */
9063 if (stack_realign_fp
)
9064 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9067 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9068 offset
+= frame
->va_arg_size
;
9070 /* Align start of frame for local function. */
9071 if (stack_realign_fp
9072 || offset
!= frame
->sse_reg_save_offset
9075 || cfun
->calls_alloca
9076 || ix86_current_function_calls_tls_descriptor
)
9077 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9079 /* Frame pointer points here. */
9080 frame
->frame_pointer_offset
= offset
;
9084 /* Add outgoing arguments area. Can be skipped if we eliminated
9085 all the function calls as dead code.
9086 Skipping is however impossible when function calls alloca. Alloca
9087 expander assumes that last crtl->outgoing_args_size
9088 of stack frame are unused. */
9089 if (ACCUMULATE_OUTGOING_ARGS
9090 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9091 || ix86_current_function_calls_tls_descriptor
))
9093 offset
+= crtl
->outgoing_args_size
;
9094 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9097 frame
->outgoing_arguments_size
= 0;
9099 /* Align stack boundary. Only needed if we're calling another function
9101 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9102 || ix86_current_function_calls_tls_descriptor
)
9103 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9105 /* We've reached end of stack frame. */
9106 frame
->stack_pointer_offset
= offset
;
9108 /* Size prologue needs to allocate. */
9109 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9111 if ((!to_allocate
&& frame
->nregs
<= 1)
9112 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9113 frame
->save_regs_using_mov
= false;
9115 if (ix86_using_red_zone ()
9116 && crtl
->sp_is_unchanging
9118 && !ix86_current_function_calls_tls_descriptor
)
9120 frame
->red_zone_size
= to_allocate
;
9121 if (frame
->save_regs_using_mov
)
9122 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9123 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9124 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9127 frame
->red_zone_size
= 0;
9128 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9130 /* The SEH frame pointer location is near the bottom of the frame.
9131 This is enforced by the fact that the difference between the
9132 stack pointer and the frame pointer is limited to 240 bytes in
9133 the unwind data structure. */
9138 /* If we can leave the frame pointer where it is, do so. Also, returns
9139 the establisher frame for __builtin_frame_address (0). */
9140 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9141 if (diff
<= SEH_MAX_FRAME_SIZE
9142 && (diff
> 240 || (diff
& 15) != 0)
9143 && !crtl
->accesses_prior_frames
)
9145 /* Ideally we'd determine what portion of the local stack frame
9146 (within the constraint of the lowest 240) is most heavily used.
9147 But without that complication, simply bias the frame pointer
9148 by 128 bytes so as to maximize the amount of the local stack
9149 frame that is addressable with 8-bit offsets. */
9150 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9155 /* This is semi-inlined memory_address_length, but simplified
9156 since we know that we're always dealing with reg+offset, and
9157 to avoid having to create and discard all that rtl. */
9160 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9166 /* EBP and R13 cannot be encoded without an offset. */
9167 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9169 else if (IN_RANGE (offset
, -128, 127))
9172 /* ESP and R12 must be encoded with a SIB byte. */
9173 if (regno
== SP_REG
|| regno
== R12_REG
)
9179 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9180 The valid base registers are taken from CFUN->MACHINE->FS. */
9183 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9185 const struct machine_function
*m
= cfun
->machine
;
9186 rtx base_reg
= NULL
;
9187 HOST_WIDE_INT base_offset
= 0;
9189 if (m
->use_fast_prologue_epilogue
)
9191 /* Choose the base register most likely to allow the most scheduling
9192 opportunities. Generally FP is valid throughout the function,
9193 while DRAP must be reloaded within the epilogue. But choose either
9194 over the SP due to increased encoding size. */
9198 base_reg
= hard_frame_pointer_rtx
;
9199 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9201 else if (m
->fs
.drap_valid
)
9203 base_reg
= crtl
->drap_reg
;
9204 base_offset
= 0 - cfa_offset
;
9206 else if (m
->fs
.sp_valid
)
9208 base_reg
= stack_pointer_rtx
;
9209 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9214 HOST_WIDE_INT toffset
;
9217 /* Choose the base register with the smallest address encoding.
9218 With a tie, choose FP > DRAP > SP. */
9221 base_reg
= stack_pointer_rtx
;
9222 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9223 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9225 if (m
->fs
.drap_valid
)
9227 toffset
= 0 - cfa_offset
;
9228 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9231 base_reg
= crtl
->drap_reg
;
9232 base_offset
= toffset
;
9238 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9239 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9242 base_reg
= hard_frame_pointer_rtx
;
9243 base_offset
= toffset
;
9248 gcc_assert (base_reg
!= NULL
);
9250 return plus_constant (Pmode
, base_reg
, base_offset
);
9253 /* Emit code to save registers in the prologue. */
9256 ix86_emit_save_regs (void)
9261 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9262 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9264 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9265 RTX_FRAME_RELATED_P (insn
) = 1;
9269 /* Emit a single register save at CFA - CFA_OFFSET. */
9272 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9273 HOST_WIDE_INT cfa_offset
)
9275 struct machine_function
*m
= cfun
->machine
;
9276 rtx reg
= gen_rtx_REG (mode
, regno
);
9277 rtx mem
, addr
, base
, insn
;
9279 addr
= choose_baseaddr (cfa_offset
);
9280 mem
= gen_frame_mem (mode
, addr
);
9282 /* For SSE saves, we need to indicate the 128-bit alignment. */
9283 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9285 insn
= emit_move_insn (mem
, reg
);
9286 RTX_FRAME_RELATED_P (insn
) = 1;
9289 if (GET_CODE (base
) == PLUS
)
9290 base
= XEXP (base
, 0);
9291 gcc_checking_assert (REG_P (base
));
9293 /* When saving registers into a re-aligned local stack frame, avoid
9294 any tricky guessing by dwarf2out. */
9295 if (m
->fs
.realigned
)
9297 gcc_checking_assert (stack_realign_drap
);
9299 if (regno
== REGNO (crtl
->drap_reg
))
9301 /* A bit of a hack. We force the DRAP register to be saved in
9302 the re-aligned stack frame, which provides us with a copy
9303 of the CFA that will last past the prologue. Install it. */
9304 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9305 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9306 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9307 mem
= gen_rtx_MEM (mode
, addr
);
9308 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9312 /* The frame pointer is a stable reference within the
9313 aligned frame. Use it. */
9314 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9315 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9316 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9317 mem
= gen_rtx_MEM (mode
, addr
);
9318 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9319 gen_rtx_SET (VOIDmode
, mem
, reg
));
9323 /* The memory may not be relative to the current CFA register,
9324 which means that we may need to generate a new pattern for
9325 use by the unwind info. */
9326 else if (base
!= m
->fs
.cfa_reg
)
9328 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9329 m
->fs
.cfa_offset
- cfa_offset
);
9330 mem
= gen_rtx_MEM (mode
, addr
);
9331 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9335 /* Emit code to save registers using MOV insns.
9336 First register is stored at CFA - CFA_OFFSET. */
9338 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9342 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9343 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9345 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9346 cfa_offset
-= UNITS_PER_WORD
;
9350 /* Emit code to save SSE registers using MOV insns.
9351 First register is stored at CFA - CFA_OFFSET. */
9353 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9357 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9358 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9360 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9365 static GTY(()) rtx queued_cfa_restores
;
9367 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9368 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9369 Don't add the note if the previously saved value will be left untouched
9370 within stack red-zone till return, as unwinders can find the same value
9371 in the register and on the stack. */
9374 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9376 if (!crtl
->shrink_wrapped
9377 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9382 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9383 RTX_FRAME_RELATED_P (insn
) = 1;
9387 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9390 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9393 ix86_add_queued_cfa_restore_notes (rtx insn
)
9396 if (!queued_cfa_restores
)
9398 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9400 XEXP (last
, 1) = REG_NOTES (insn
);
9401 REG_NOTES (insn
) = queued_cfa_restores
;
9402 queued_cfa_restores
= NULL_RTX
;
9403 RTX_FRAME_RELATED_P (insn
) = 1;
9406 /* Expand prologue or epilogue stack adjustment.
9407 The pattern exist to put a dependency on all ebp-based memory accesses.
9408 STYLE should be negative if instructions should be marked as frame related,
9409 zero if %r11 register is live and cannot be freely used and positive
9413 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9414 int style
, bool set_cfa
)
9416 struct machine_function
*m
= cfun
->machine
;
9418 bool add_frame_related_expr
= false;
9420 if (Pmode
== SImode
)
9421 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9422 else if (x86_64_immediate_operand (offset
, DImode
))
9423 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9427 /* r11 is used by indirect sibcall return as well, set before the
9428 epilogue and used after the epilogue. */
9430 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9433 gcc_assert (src
!= hard_frame_pointer_rtx
9434 && dest
!= hard_frame_pointer_rtx
);
9435 tmp
= hard_frame_pointer_rtx
;
9437 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9439 add_frame_related_expr
= true;
9441 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9444 insn
= emit_insn (insn
);
9446 ix86_add_queued_cfa_restore_notes (insn
);
9452 gcc_assert (m
->fs
.cfa_reg
== src
);
9453 m
->fs
.cfa_offset
+= INTVAL (offset
);
9454 m
->fs
.cfa_reg
= dest
;
9456 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9457 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9458 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9459 RTX_FRAME_RELATED_P (insn
) = 1;
9463 RTX_FRAME_RELATED_P (insn
) = 1;
9464 if (add_frame_related_expr
)
9466 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9467 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9468 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9472 if (dest
== stack_pointer_rtx
)
9474 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9475 bool valid
= m
->fs
.sp_valid
;
9477 if (src
== hard_frame_pointer_rtx
)
9479 valid
= m
->fs
.fp_valid
;
9480 ooffset
= m
->fs
.fp_offset
;
9482 else if (src
== crtl
->drap_reg
)
9484 valid
= m
->fs
.drap_valid
;
9489 /* Else there are two possibilities: SP itself, which we set
9490 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9491 taken care of this by hand along the eh_return path. */
9492 gcc_checking_assert (src
== stack_pointer_rtx
9493 || offset
== const0_rtx
);
9496 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9497 m
->fs
.sp_valid
= valid
;
9501 /* Find an available register to be used as dynamic realign argument
9502 pointer regsiter. Such a register will be written in prologue and
9503 used in begin of body, so it must not be
9504 1. parameter passing register.
9506 We reuse static-chain register if it is available. Otherwise, we
9507 use DI for i386 and R13 for x86-64. We chose R13 since it has
9510 Return: the regno of chosen register. */
9513 find_drap_reg (void)
9515 tree decl
= cfun
->decl
;
9519 /* Use R13 for nested function or function need static chain.
9520 Since function with tail call may use any caller-saved
9521 registers in epilogue, DRAP must not use caller-saved
9522 register in such case. */
9523 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9530 /* Use DI for nested function or function need static chain.
9531 Since function with tail call may use any caller-saved
9532 registers in epilogue, DRAP must not use caller-saved
9533 register in such case. */
9534 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9537 /* Reuse static chain register if it isn't used for parameter
9539 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9541 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9542 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9549 /* Return minimum incoming stack alignment. */
9552 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9554 unsigned int incoming_stack_boundary
;
9556 /* Prefer the one specified at command line. */
9557 if (ix86_user_incoming_stack_boundary
)
9558 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9559 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9560 if -mstackrealign is used, it isn't used for sibcall check and
9561 estimated stack alignment is 128bit. */
9564 && ix86_force_align_arg_pointer
9565 && crtl
->stack_alignment_estimated
== 128)
9566 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9568 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9570 /* Incoming stack alignment can be changed on individual functions
9571 via force_align_arg_pointer attribute. We use the smallest
9572 incoming stack boundary. */
9573 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9574 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9575 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9576 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9578 /* The incoming stack frame has to be aligned at least at
9579 parm_stack_boundary. */
9580 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9581 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9583 /* Stack at entrance of main is aligned by runtime. We use the
9584 smallest incoming stack boundary. */
9585 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9586 && DECL_NAME (current_function_decl
)
9587 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9588 && DECL_FILE_SCOPE_P (current_function_decl
))
9589 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9591 return incoming_stack_boundary
;
9594 /* Update incoming stack boundary and estimated stack alignment. */
9597 ix86_update_stack_boundary (void)
9599 ix86_incoming_stack_boundary
9600 = ix86_minimum_incoming_stack_boundary (false);
9602 /* x86_64 vararg needs 16byte stack alignment for register save
9606 && crtl
->stack_alignment_estimated
< 128)
9607 crtl
->stack_alignment_estimated
= 128;
9610 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9611 needed or an rtx for DRAP otherwise. */
9614 ix86_get_drap_rtx (void)
9616 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9617 crtl
->need_drap
= true;
9619 if (stack_realign_drap
)
9621 /* Assign DRAP to vDRAP and returns vDRAP */
9622 unsigned int regno
= find_drap_reg ();
9627 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9628 crtl
->drap_reg
= arg_ptr
;
9631 drap_vreg
= copy_to_reg (arg_ptr
);
9635 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9638 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9639 RTX_FRAME_RELATED_P (insn
) = 1;
9647 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9650 ix86_internal_arg_pointer (void)
9652 return virtual_incoming_args_rtx
;
9655 struct scratch_reg
{
9660 /* Return a short-lived scratch register for use on function entry.
9661 In 32-bit mode, it is valid only after the registers are saved
9662 in the prologue. This register must be released by means of
9663 release_scratch_register_on_entry once it is dead. */
9666 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9674 /* We always use R11 in 64-bit mode. */
9679 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9681 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9683 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9684 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9685 int regparm
= ix86_function_regparm (fntype
, decl
);
9687 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9689 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9690 for the static chain register. */
9691 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9692 && drap_regno
!= AX_REG
)
9694 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9695 for the static chain register. */
9696 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9698 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9700 /* ecx is the static chain register. */
9701 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9703 && drap_regno
!= CX_REG
)
9705 else if (ix86_save_reg (BX_REG
, true))
9707 /* esi is the static chain register. */
9708 else if (!(regparm
== 3 && static_chain_p
)
9709 && ix86_save_reg (SI_REG
, true))
9711 else if (ix86_save_reg (DI_REG
, true))
9715 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9720 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9723 rtx insn
= emit_insn (gen_push (sr
->reg
));
9724 RTX_FRAME_RELATED_P (insn
) = 1;
9728 /* Release a scratch register obtained from the preceding function. */
9731 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9735 struct machine_function
*m
= cfun
->machine
;
9736 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9738 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9739 RTX_FRAME_RELATED_P (insn
) = 1;
9740 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9741 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9742 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9743 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9747 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9749 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9752 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9754 /* We skip the probe for the first interval + a small dope of 4 words and
9755 probe that many bytes past the specified size to maintain a protection
9756 area at the botton of the stack. */
9757 const int dope
= 4 * UNITS_PER_WORD
;
9758 rtx size_rtx
= GEN_INT (size
), last
;
9760 /* See if we have a constant small number of probes to generate. If so,
9761 that's the easy case. The run-time loop is made up of 11 insns in the
9762 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9763 for n # of intervals. */
9764 if (size
<= 5 * PROBE_INTERVAL
)
9766 HOST_WIDE_INT i
, adjust
;
9767 bool first_probe
= true;
9769 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9770 values of N from 1 until it exceeds SIZE. If only one probe is
9771 needed, this will not generate any code. Then adjust and probe
9772 to PROBE_INTERVAL + SIZE. */
9773 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9777 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9778 first_probe
= false;
9781 adjust
= PROBE_INTERVAL
;
9783 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9784 plus_constant (Pmode
, stack_pointer_rtx
,
9786 emit_stack_probe (stack_pointer_rtx
);
9790 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9792 adjust
= size
+ PROBE_INTERVAL
- i
;
9794 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9795 plus_constant (Pmode
, stack_pointer_rtx
,
9797 emit_stack_probe (stack_pointer_rtx
);
9799 /* Adjust back to account for the additional first interval. */
9800 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9801 plus_constant (Pmode
, stack_pointer_rtx
,
9802 PROBE_INTERVAL
+ dope
)));
9805 /* Otherwise, do the same as above, but in a loop. Note that we must be
9806 extra careful with variables wrapping around because we might be at
9807 the very top (or the very bottom) of the address space and we have
9808 to be able to handle this case properly; in particular, we use an
9809 equality test for the loop condition. */
9812 HOST_WIDE_INT rounded_size
;
9813 struct scratch_reg sr
;
9815 get_scratch_register_on_entry (&sr
);
9818 /* Step 1: round SIZE to the previous multiple of the interval. */
9820 rounded_size
= size
& -PROBE_INTERVAL
;
9823 /* Step 2: compute initial and final value of the loop counter. */
9825 /* SP = SP_0 + PROBE_INTERVAL. */
9826 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9827 plus_constant (Pmode
, stack_pointer_rtx
,
9828 - (PROBE_INTERVAL
+ dope
))));
9830 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9831 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9832 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9833 gen_rtx_PLUS (Pmode
, sr
.reg
,
9834 stack_pointer_rtx
)));
9839 while (SP != LAST_ADDR)
9841 SP = SP + PROBE_INTERVAL
9845 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9846 values of N from 1 until it is equal to ROUNDED_SIZE. */
9848 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9851 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9852 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9854 if (size
!= rounded_size
)
9856 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9857 plus_constant (Pmode
, stack_pointer_rtx
,
9858 rounded_size
- size
)));
9859 emit_stack_probe (stack_pointer_rtx
);
9862 /* Adjust back to account for the additional first interval. */
9863 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9864 plus_constant (Pmode
, stack_pointer_rtx
,
9865 PROBE_INTERVAL
+ dope
)));
9867 release_scratch_register_on_entry (&sr
);
9870 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9872 /* Even if the stack pointer isn't the CFA register, we need to correctly
9873 describe the adjustments made to it, in particular differentiate the
9874 frame-related ones from the frame-unrelated ones. */
9877 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9878 XVECEXP (expr
, 0, 0)
9879 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9880 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9881 XVECEXP (expr
, 0, 1)
9882 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9883 plus_constant (Pmode
, stack_pointer_rtx
,
9884 PROBE_INTERVAL
+ dope
+ size
));
9885 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9886 RTX_FRAME_RELATED_P (last
) = 1;
9888 cfun
->machine
->fs
.sp_offset
+= size
;
9891 /* Make sure nothing is scheduled before we are done. */
9892 emit_insn (gen_blockage ());
9895 /* Adjust the stack pointer up to REG while probing it. */
9898 output_adjust_stack_and_probe (rtx reg
)
9900 static int labelno
= 0;
9901 char loop_lab
[32], end_lab
[32];
9904 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9905 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9907 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9909 /* Jump to END_LAB if SP == LAST_ADDR. */
9910 xops
[0] = stack_pointer_rtx
;
9912 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9913 fputs ("\tje\t", asm_out_file
);
9914 assemble_name_raw (asm_out_file
, end_lab
);
9915 fputc ('\n', asm_out_file
);
9917 /* SP = SP + PROBE_INTERVAL. */
9918 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9919 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9922 xops
[1] = const0_rtx
;
9923 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9925 fprintf (asm_out_file
, "\tjmp\t");
9926 assemble_name_raw (asm_out_file
, loop_lab
);
9927 fputc ('\n', asm_out_file
);
9929 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9934 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9935 inclusive. These are offsets from the current stack pointer. */
9938 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9940 /* See if we have a constant small number of probes to generate. If so,
9941 that's the easy case. The run-time loop is made up of 7 insns in the
9942 generic case while the compile-time loop is made up of n insns for n #
9944 if (size
<= 7 * PROBE_INTERVAL
)
9948 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9949 it exceeds SIZE. If only one probe is needed, this will not
9950 generate any code. Then probe at FIRST + SIZE. */
9951 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9952 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9955 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9959 /* Otherwise, do the same as above, but in a loop. Note that we must be
9960 extra careful with variables wrapping around because we might be at
9961 the very top (or the very bottom) of the address space and we have
9962 to be able to handle this case properly; in particular, we use an
9963 equality test for the loop condition. */
9966 HOST_WIDE_INT rounded_size
, last
;
9967 struct scratch_reg sr
;
9969 get_scratch_register_on_entry (&sr
);
9972 /* Step 1: round SIZE to the previous multiple of the interval. */
9974 rounded_size
= size
& -PROBE_INTERVAL
;
9977 /* Step 2: compute initial and final value of the loop counter. */
9979 /* TEST_OFFSET = FIRST. */
9980 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9982 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9983 last
= first
+ rounded_size
;
9988 while (TEST_ADDR != LAST_ADDR)
9990 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9994 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9995 until it is equal to ROUNDED_SIZE. */
9997 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10000 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10001 that SIZE is equal to ROUNDED_SIZE. */
10003 if (size
!= rounded_size
)
10004 emit_stack_probe (plus_constant (Pmode
,
10005 gen_rtx_PLUS (Pmode
,
10008 rounded_size
- size
));
10010 release_scratch_register_on_entry (&sr
);
10013 /* Make sure nothing is scheduled before we are done. */
10014 emit_insn (gen_blockage ());
10017 /* Probe a range of stack addresses from REG to END, inclusive. These are
10018 offsets from the current stack pointer. */
10021 output_probe_stack_range (rtx reg
, rtx end
)
10023 static int labelno
= 0;
10024 char loop_lab
[32], end_lab
[32];
10027 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10028 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10030 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10032 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10035 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10036 fputs ("\tje\t", asm_out_file
);
10037 assemble_name_raw (asm_out_file
, end_lab
);
10038 fputc ('\n', asm_out_file
);
10040 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10041 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10042 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10044 /* Probe at TEST_ADDR. */
10045 xops
[0] = stack_pointer_rtx
;
10047 xops
[2] = const0_rtx
;
10048 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10050 fprintf (asm_out_file
, "\tjmp\t");
10051 assemble_name_raw (asm_out_file
, loop_lab
);
10052 fputc ('\n', asm_out_file
);
10054 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10059 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10060 to be generated in correct form. */
10062 ix86_finalize_stack_realign_flags (void)
10064 /* Check if stack realign is really needed after reload, and
10065 stores result in cfun */
10066 unsigned int incoming_stack_boundary
10067 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10068 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10069 unsigned int stack_realign
= (incoming_stack_boundary
10071 ? crtl
->max_used_stack_slot_alignment
10072 : crtl
->stack_alignment_needed
));
10074 if (crtl
->stack_realign_finalized
)
10076 /* After stack_realign_needed is finalized, we can't no longer
10078 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10082 /* If the only reason for frame_pointer_needed is that we conservatively
10083 assumed stack realignment might be needed, but in the end nothing that
10084 needed the stack alignment had been spilled, clear frame_pointer_needed
10085 and say we don't need stack realignment. */
10087 && !crtl
->need_drap
10088 && frame_pointer_needed
10090 && flag_omit_frame_pointer
10091 && crtl
->sp_is_unchanging
10092 && !ix86_current_function_calls_tls_descriptor
10093 && !crtl
->accesses_prior_frames
10094 && !cfun
->calls_alloca
10095 && !crtl
->calls_eh_return
10096 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10097 && !ix86_frame_pointer_required ()
10098 && get_frame_size () == 0
10099 && ix86_nsaved_sseregs () == 0
10100 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10102 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10105 CLEAR_HARD_REG_SET (prologue_used
);
10106 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10107 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10108 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10109 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10110 HARD_FRAME_POINTER_REGNUM
);
10114 FOR_BB_INSNS (bb
, insn
)
10115 if (NONDEBUG_INSN_P (insn
)
10116 && requires_stack_frame_p (insn
, prologue_used
,
10117 set_up_by_prologue
))
10119 crtl
->stack_realign_needed
= stack_realign
;
10120 crtl
->stack_realign_finalized
= true;
10125 frame_pointer_needed
= false;
10126 stack_realign
= false;
10127 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10128 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10129 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10130 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10131 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10132 df_finish_pass (true);
10133 df_scan_alloc (NULL
);
10135 df_compute_regs_ever_live (true);
10139 crtl
->stack_realign_needed
= stack_realign
;
10140 crtl
->stack_realign_finalized
= true;
10143 /* Expand the prologue into a bunch of separate insns. */
10146 ix86_expand_prologue (void)
10148 struct machine_function
*m
= cfun
->machine
;
10151 struct ix86_frame frame
;
10152 HOST_WIDE_INT allocate
;
10153 bool int_registers_saved
;
10154 bool sse_registers_saved
;
10156 ix86_finalize_stack_realign_flags ();
10158 /* DRAP should not coexist with stack_realign_fp */
10159 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10161 memset (&m
->fs
, 0, sizeof (m
->fs
));
10163 /* Initialize CFA state for before the prologue. */
10164 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10165 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10167 /* Track SP offset to the CFA. We continue tracking this after we've
10168 swapped the CFA register away from SP. In the case of re-alignment
10169 this is fudged; we're interested to offsets within the local frame. */
10170 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10171 m
->fs
.sp_valid
= true;
10173 ix86_compute_frame_layout (&frame
);
10175 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10177 /* We should have already generated an error for any use of
10178 ms_hook on a nested function. */
10179 gcc_checking_assert (!ix86_static_chain_on_stack
);
10181 /* Check if profiling is active and we shall use profiling before
10182 prologue variant. If so sorry. */
10183 if (crtl
->profile
&& flag_fentry
!= 0)
10184 sorry ("ms_hook_prologue attribute isn%'t compatible "
10185 "with -mfentry for 32-bit");
10187 /* In ix86_asm_output_function_label we emitted:
10188 8b ff movl.s %edi,%edi
10190 8b ec movl.s %esp,%ebp
10192 This matches the hookable function prologue in Win32 API
10193 functions in Microsoft Windows XP Service Pack 2 and newer.
10194 Wine uses this to enable Windows apps to hook the Win32 API
10195 functions provided by Wine.
10197 What that means is that we've already set up the frame pointer. */
10199 if (frame_pointer_needed
10200 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10204 /* We've decided to use the frame pointer already set up.
10205 Describe this to the unwinder by pretending that both
10206 push and mov insns happen right here.
10208 Putting the unwind info here at the end of the ms_hook
10209 is done so that we can make absolutely certain we get
10210 the required byte sequence at the start of the function,
10211 rather than relying on an assembler that can produce
10212 the exact encoding required.
10214 However it does mean (in the unpatched case) that we have
10215 a 1 insn window where the asynchronous unwind info is
10216 incorrect. However, if we placed the unwind info at
10217 its correct location we would have incorrect unwind info
10218 in the patched case. Which is probably all moot since
10219 I don't expect Wine generates dwarf2 unwind info for the
10220 system libraries that use this feature. */
10222 insn
= emit_insn (gen_blockage ());
10224 push
= gen_push (hard_frame_pointer_rtx
);
10225 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10226 stack_pointer_rtx
);
10227 RTX_FRAME_RELATED_P (push
) = 1;
10228 RTX_FRAME_RELATED_P (mov
) = 1;
10230 RTX_FRAME_RELATED_P (insn
) = 1;
10231 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10232 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10234 /* Note that gen_push incremented m->fs.cfa_offset, even
10235 though we didn't emit the push insn here. */
10236 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10237 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10238 m
->fs
.fp_valid
= true;
10242 /* The frame pointer is not needed so pop %ebp again.
10243 This leaves us with a pristine state. */
10244 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10248 /* The first insn of a function that accepts its static chain on the
10249 stack is to push the register that would be filled in by a direct
10250 call. This insn will be skipped by the trampoline. */
10251 else if (ix86_static_chain_on_stack
)
10253 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10254 emit_insn (gen_blockage ());
10256 /* We don't want to interpret this push insn as a register save,
10257 only as a stack adjustment. The real copy of the register as
10258 a save will be done later, if needed. */
10259 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10260 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10261 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10262 RTX_FRAME_RELATED_P (insn
) = 1;
10265 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10266 of DRAP is needed and stack realignment is really needed after reload */
10267 if (stack_realign_drap
)
10269 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10271 /* Only need to push parameter pointer reg if it is caller saved. */
10272 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10274 /* Push arg pointer reg */
10275 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10276 RTX_FRAME_RELATED_P (insn
) = 1;
10279 /* Grab the argument pointer. */
10280 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10281 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10282 RTX_FRAME_RELATED_P (insn
) = 1;
10283 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10284 m
->fs
.cfa_offset
= 0;
10286 /* Align the stack. */
10287 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10289 GEN_INT (-align_bytes
)));
10290 RTX_FRAME_RELATED_P (insn
) = 1;
10292 /* Replicate the return address on the stack so that return
10293 address can be reached via (argp - 1) slot. This is needed
10294 to implement macro RETURN_ADDR_RTX and intrinsic function
10295 expand_builtin_return_addr etc. */
10296 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10297 t
= gen_frame_mem (word_mode
, t
);
10298 insn
= emit_insn (gen_push (t
));
10299 RTX_FRAME_RELATED_P (insn
) = 1;
10301 /* For the purposes of frame and register save area addressing,
10302 we've started over with a new frame. */
10303 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10304 m
->fs
.realigned
= true;
10307 int_registers_saved
= (frame
.nregs
== 0);
10308 sse_registers_saved
= (frame
.nsseregs
== 0);
10310 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10312 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10313 slower on all targets. Also sdb doesn't like it. */
10314 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10315 RTX_FRAME_RELATED_P (insn
) = 1;
10317 /* Push registers now, before setting the frame pointer
10319 if (!int_registers_saved
10321 && !frame
.save_regs_using_mov
)
10323 ix86_emit_save_regs ();
10324 int_registers_saved
= true;
10325 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10328 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10330 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10331 RTX_FRAME_RELATED_P (insn
) = 1;
10333 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10334 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10335 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10336 m
->fs
.fp_valid
= true;
10340 if (!int_registers_saved
)
10342 /* If saving registers via PUSH, do so now. */
10343 if (!frame
.save_regs_using_mov
)
10345 ix86_emit_save_regs ();
10346 int_registers_saved
= true;
10347 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10350 /* When using red zone we may start register saving before allocating
10351 the stack frame saving one cycle of the prologue. However, avoid
10352 doing this if we have to probe the stack; at least on x86_64 the
10353 stack probe can turn into a call that clobbers a red zone location. */
10354 else if (ix86_using_red_zone ()
10355 && (! TARGET_STACK_PROBE
10356 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10358 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10359 int_registers_saved
= true;
10363 if (stack_realign_fp
)
10365 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10366 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10368 /* The computation of the size of the re-aligned stack frame means
10369 that we must allocate the size of the register save area before
10370 performing the actual alignment. Otherwise we cannot guarantee
10371 that there's enough storage above the realignment point. */
10372 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10373 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10374 GEN_INT (m
->fs
.sp_offset
10375 - frame
.sse_reg_save_offset
),
10378 /* Align the stack. */
10379 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10381 GEN_INT (-align_bytes
)));
10383 /* For the purposes of register save area addressing, the stack
10384 pointer is no longer valid. As for the value of sp_offset,
10385 see ix86_compute_frame_layout, which we need to match in order
10386 to pass verification of stack_pointer_offset at the end. */
10387 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10388 m
->fs
.sp_valid
= false;
10391 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10393 if (flag_stack_usage_info
)
10395 /* We start to count from ARG_POINTER. */
10396 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10398 /* If it was realigned, take into account the fake frame. */
10399 if (stack_realign_drap
)
10401 if (ix86_static_chain_on_stack
)
10402 stack_size
+= UNITS_PER_WORD
;
10404 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10405 stack_size
+= UNITS_PER_WORD
;
10407 /* This over-estimates by 1 minimal-stack-alignment-unit but
10408 mitigates that by counting in the new return address slot. */
10409 current_function_dynamic_stack_size
10410 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10413 current_function_static_stack_size
= stack_size
;
10416 /* On SEH target with very large frame size, allocate an area to save
10417 SSE registers (as the very large allocation won't be described). */
10419 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10420 && !sse_registers_saved
)
10422 HOST_WIDE_INT sse_size
=
10423 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10425 gcc_assert (int_registers_saved
);
10427 /* No need to do stack checking as the area will be immediately
10429 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10430 GEN_INT (-sse_size
), -1,
10431 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10432 allocate
-= sse_size
;
10433 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10434 sse_registers_saved
= true;
10437 /* The stack has already been decremented by the instruction calling us
10438 so probe if the size is non-negative to preserve the protection area. */
10439 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10441 /* We expect the registers to be saved when probes are used. */
10442 gcc_assert (int_registers_saved
);
10444 if (STACK_CHECK_MOVING_SP
)
10446 ix86_adjust_stack_and_probe (allocate
);
10451 HOST_WIDE_INT size
= allocate
;
10453 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10454 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10456 if (TARGET_STACK_PROBE
)
10457 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10459 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10465 else if (!ix86_target_stack_probe ()
10466 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10468 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10469 GEN_INT (-allocate
), -1,
10470 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10474 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10476 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10477 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10478 bool eax_live
= false;
10479 bool r10_live
= false;
10482 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10483 if (!TARGET_64BIT_MS_ABI
)
10484 eax_live
= ix86_eax_live_at_start_p ();
10486 /* Note that SEH directives need to continue tracking the stack
10487 pointer even after the frame pointer has been set up. */
10490 insn
= emit_insn (gen_push (eax
));
10491 allocate
-= UNITS_PER_WORD
;
10492 if (sp_is_cfa_reg
|| TARGET_SEH
)
10495 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10496 RTX_FRAME_RELATED_P (insn
) = 1;
10502 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10503 insn
= emit_insn (gen_push (r10
));
10504 allocate
-= UNITS_PER_WORD
;
10505 if (sp_is_cfa_reg
|| TARGET_SEH
)
10508 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10509 RTX_FRAME_RELATED_P (insn
) = 1;
10513 emit_move_insn (eax
, GEN_INT (allocate
));
10514 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10516 /* Use the fact that AX still contains ALLOCATE. */
10517 adjust_stack_insn
= (Pmode
== DImode
10518 ? gen_pro_epilogue_adjust_stack_di_sub
10519 : gen_pro_epilogue_adjust_stack_si_sub
);
10521 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10522 stack_pointer_rtx
, eax
));
10524 if (sp_is_cfa_reg
|| TARGET_SEH
)
10527 m
->fs
.cfa_offset
+= allocate
;
10528 RTX_FRAME_RELATED_P (insn
) = 1;
10529 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10530 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10531 plus_constant (Pmode
, stack_pointer_rtx
,
10534 m
->fs
.sp_offset
+= allocate
;
10536 if (r10_live
&& eax_live
)
10538 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10539 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10540 gen_frame_mem (word_mode
, t
));
10541 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10542 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10543 gen_frame_mem (word_mode
, t
));
10545 else if (eax_live
|| r10_live
)
10547 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10548 emit_move_insn (gen_rtx_REG (word_mode
,
10549 (eax_live
? AX_REG
: R10_REG
)),
10550 gen_frame_mem (word_mode
, t
));
10553 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10555 /* If we havn't already set up the frame pointer, do so now. */
10556 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10558 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10559 GEN_INT (frame
.stack_pointer_offset
10560 - frame
.hard_frame_pointer_offset
));
10561 insn
= emit_insn (insn
);
10562 RTX_FRAME_RELATED_P (insn
) = 1;
10563 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10565 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10566 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10567 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10568 m
->fs
.fp_valid
= true;
10571 if (!int_registers_saved
)
10572 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10573 if (!sse_registers_saved
)
10574 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10576 pic_reg_used
= false;
10577 if (pic_offset_table_rtx
10578 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10581 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10583 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10584 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10586 pic_reg_used
= true;
10593 if (ix86_cmodel
== CM_LARGE_PIC
)
10595 rtx label
, tmp_reg
;
10597 gcc_assert (Pmode
== DImode
);
10598 label
= gen_label_rtx ();
10599 emit_label (label
);
10600 LABEL_PRESERVE_P (label
) = 1;
10601 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10602 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10603 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10605 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10606 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10607 pic_offset_table_rtx
, tmp_reg
));
10610 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10614 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10615 RTX_FRAME_RELATED_P (insn
) = 1;
10616 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10620 /* In the pic_reg_used case, make sure that the got load isn't deleted
10621 when mcount needs it. Blockage to avoid call movement across mcount
10622 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10624 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10625 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10627 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10629 /* vDRAP is setup but after reload it turns out stack realign
10630 isn't necessary, here we will emit prologue to setup DRAP
10631 without stack realign adjustment */
10632 t
= choose_baseaddr (0);
10633 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10636 /* Prevent instructions from being scheduled into register save push
10637 sequence when access to the redzone area is done through frame pointer.
10638 The offset between the frame pointer and the stack pointer is calculated
10639 relative to the value of the stack pointer at the end of the function
10640 prologue, and moving instructions that access redzone area via frame
10641 pointer inside push sequence violates this assumption. */
10642 if (frame_pointer_needed
&& frame
.red_zone_size
)
10643 emit_insn (gen_memory_blockage ());
10645 /* Emit cld instruction if stringops are used in the function. */
10646 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10647 emit_insn (gen_cld ());
10649 /* SEH requires that the prologue end within 256 bytes of the start of
10650 the function. Prevent instruction schedules that would extend that.
10651 Further, prevent alloca modifications to the stack pointer from being
10652 combined with prologue modifications. */
10654 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10657 /* Emit code to restore REG using a POP insn. */
10660 ix86_emit_restore_reg_using_pop (rtx reg
)
10662 struct machine_function
*m
= cfun
->machine
;
10663 rtx insn
= emit_insn (gen_pop (reg
));
10665 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10666 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10668 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10669 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10671 /* Previously we'd represented the CFA as an expression
10672 like *(%ebp - 8). We've just popped that value from
10673 the stack, which means we need to reset the CFA to
10674 the drap register. This will remain until we restore
10675 the stack pointer. */
10676 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10677 RTX_FRAME_RELATED_P (insn
) = 1;
10679 /* This means that the DRAP register is valid for addressing too. */
10680 m
->fs
.drap_valid
= true;
10684 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10686 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10687 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10688 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10689 RTX_FRAME_RELATED_P (insn
) = 1;
10691 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10694 /* When the frame pointer is the CFA, and we pop it, we are
10695 swapping back to the stack pointer as the CFA. This happens
10696 for stack frames that don't allocate other data, so we assume
10697 the stack pointer is now pointing at the return address, i.e.
10698 the function entry state, which makes the offset be 1 word. */
10699 if (reg
== hard_frame_pointer_rtx
)
10701 m
->fs
.fp_valid
= false;
10702 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10704 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10705 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10707 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10708 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10709 GEN_INT (m
->fs
.cfa_offset
)));
10710 RTX_FRAME_RELATED_P (insn
) = 1;
10715 /* Emit code to restore saved registers using POP insns. */
10718 ix86_emit_restore_regs_using_pop (void)
10720 unsigned int regno
;
10722 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10723 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10724 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10727 /* Emit code and notes for the LEAVE instruction. */
10730 ix86_emit_leave (void)
10732 struct machine_function
*m
= cfun
->machine
;
10733 rtx insn
= emit_insn (ix86_gen_leave ());
10735 ix86_add_queued_cfa_restore_notes (insn
);
10737 gcc_assert (m
->fs
.fp_valid
);
10738 m
->fs
.sp_valid
= true;
10739 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10740 m
->fs
.fp_valid
= false;
10742 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10744 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10745 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10747 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10748 plus_constant (Pmode
, stack_pointer_rtx
,
10750 RTX_FRAME_RELATED_P (insn
) = 1;
10752 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10756 /* Emit code to restore saved registers using MOV insns.
10757 First register is restored from CFA - CFA_OFFSET. */
10759 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10760 bool maybe_eh_return
)
10762 struct machine_function
*m
= cfun
->machine
;
10763 unsigned int regno
;
10765 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10766 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10768 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10771 mem
= choose_baseaddr (cfa_offset
);
10772 mem
= gen_frame_mem (word_mode
, mem
);
10773 insn
= emit_move_insn (reg
, mem
);
10775 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10777 /* Previously we'd represented the CFA as an expression
10778 like *(%ebp - 8). We've just popped that value from
10779 the stack, which means we need to reset the CFA to
10780 the drap register. This will remain until we restore
10781 the stack pointer. */
10782 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10783 RTX_FRAME_RELATED_P (insn
) = 1;
10785 /* This means that the DRAP register is valid for addressing. */
10786 m
->fs
.drap_valid
= true;
10789 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10791 cfa_offset
-= UNITS_PER_WORD
;
10795 /* Emit code to restore saved registers using MOV insns.
10796 First register is restored from CFA - CFA_OFFSET. */
10798 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10799 bool maybe_eh_return
)
10801 unsigned int regno
;
10803 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10804 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10806 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10809 mem
= choose_baseaddr (cfa_offset
);
10810 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10811 set_mem_align (mem
, 128);
10812 emit_move_insn (reg
, mem
);
10814 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10820 /* Restore function stack, frame, and registers. */
10823 ix86_expand_epilogue (int style
)
10825 struct machine_function
*m
= cfun
->machine
;
10826 struct machine_frame_state frame_state_save
= m
->fs
;
10827 struct ix86_frame frame
;
10828 bool restore_regs_via_mov
;
10831 ix86_finalize_stack_realign_flags ();
10832 ix86_compute_frame_layout (&frame
);
10834 m
->fs
.sp_valid
= (!frame_pointer_needed
10835 || (crtl
->sp_is_unchanging
10836 && !stack_realign_fp
));
10837 gcc_assert (!m
->fs
.sp_valid
10838 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10840 /* The FP must be valid if the frame pointer is present. */
10841 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10842 gcc_assert (!m
->fs
.fp_valid
10843 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10845 /* We must have *some* valid pointer to the stack frame. */
10846 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10848 /* The DRAP is never valid at this point. */
10849 gcc_assert (!m
->fs
.drap_valid
);
10851 /* See the comment about red zone and frame
10852 pointer usage in ix86_expand_prologue. */
10853 if (frame_pointer_needed
&& frame
.red_zone_size
)
10854 emit_insn (gen_memory_blockage ());
10856 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10857 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10859 /* Determine the CFA offset of the end of the red-zone. */
10860 m
->fs
.red_zone_offset
= 0;
10861 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10863 /* The red-zone begins below the return address. */
10864 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10866 /* When the register save area is in the aligned portion of
10867 the stack, determine the maximum runtime displacement that
10868 matches up with the aligned frame. */
10869 if (stack_realign_drap
)
10870 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10874 /* Special care must be taken for the normal return case of a function
10875 using eh_return: the eax and edx registers are marked as saved, but
10876 not restored along this path. Adjust the save location to match. */
10877 if (crtl
->calls_eh_return
&& style
!= 2)
10878 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10880 /* EH_RETURN requires the use of moves to function properly. */
10881 if (crtl
->calls_eh_return
)
10882 restore_regs_via_mov
= true;
10883 /* SEH requires the use of pops to identify the epilogue. */
10884 else if (TARGET_SEH
)
10885 restore_regs_via_mov
= false;
10886 /* If we're only restoring one register and sp is not valid then
10887 using a move instruction to restore the register since it's
10888 less work than reloading sp and popping the register. */
10889 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10890 restore_regs_via_mov
= true;
10891 else if (TARGET_EPILOGUE_USING_MOVE
10892 && cfun
->machine
->use_fast_prologue_epilogue
10893 && (frame
.nregs
> 1
10894 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10895 restore_regs_via_mov
= true;
10896 else if (frame_pointer_needed
10898 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10899 restore_regs_via_mov
= true;
10900 else if (frame_pointer_needed
10901 && TARGET_USE_LEAVE
10902 && cfun
->machine
->use_fast_prologue_epilogue
10903 && frame
.nregs
== 1)
10904 restore_regs_via_mov
= true;
10906 restore_regs_via_mov
= false;
10908 if (restore_regs_via_mov
|| frame
.nsseregs
)
10910 /* Ensure that the entire register save area is addressable via
10911 the stack pointer, if we will restore via sp. */
10913 && m
->fs
.sp_offset
> 0x7fffffff
10914 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10915 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10917 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10918 GEN_INT (m
->fs
.sp_offset
10919 - frame
.sse_reg_save_offset
),
10921 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10925 /* If there are any SSE registers to restore, then we have to do it
10926 via moves, since there's obviously no pop for SSE regs. */
10927 if (frame
.nsseregs
)
10928 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10931 if (restore_regs_via_mov
)
10936 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10938 /* eh_return epilogues need %ecx added to the stack pointer. */
10941 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10943 /* Stack align doesn't work with eh_return. */
10944 gcc_assert (!stack_realign_drap
);
10945 /* Neither does regparm nested functions. */
10946 gcc_assert (!ix86_static_chain_on_stack
);
10948 if (frame_pointer_needed
)
10950 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10951 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10952 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10954 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10955 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10957 /* Note that we use SA as a temporary CFA, as the return
10958 address is at the proper place relative to it. We
10959 pretend this happens at the FP restore insn because
10960 prior to this insn the FP would be stored at the wrong
10961 offset relative to SA, and after this insn we have no
10962 other reasonable register to use for the CFA. We don't
10963 bother resetting the CFA to the SP for the duration of
10964 the return insn. */
10965 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10966 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10967 ix86_add_queued_cfa_restore_notes (insn
);
10968 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10969 RTX_FRAME_RELATED_P (insn
) = 1;
10971 m
->fs
.cfa_reg
= sa
;
10972 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10973 m
->fs
.fp_valid
= false;
10975 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10976 const0_rtx
, style
, false);
10980 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10981 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10982 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10983 ix86_add_queued_cfa_restore_notes (insn
);
10985 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10986 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10988 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10989 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10990 plus_constant (Pmode
, stack_pointer_rtx
,
10992 RTX_FRAME_RELATED_P (insn
) = 1;
10995 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10996 m
->fs
.sp_valid
= true;
11001 /* SEH requires that the function end with (1) a stack adjustment
11002 if necessary, (2) a sequence of pops, and (3) a return or
11003 jump instruction. Prevent insns from the function body from
11004 being scheduled into this sequence. */
11007 /* Prevent a catch region from being adjacent to the standard
11008 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11009 several other flags that would be interesting to test are
11011 if (flag_non_call_exceptions
)
11012 emit_insn (gen_nops (const1_rtx
));
11014 emit_insn (gen_blockage ());
11017 /* First step is to deallocate the stack frame so that we can
11018 pop the registers. Also do it on SEH target for very large
11019 frame as the emitted instructions aren't allowed by the ABI in
11021 if (!m
->fs
.sp_valid
11023 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11024 >= SEH_MAX_FRAME_SIZE
)))
11026 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11027 GEN_INT (m
->fs
.fp_offset
11028 - frame
.reg_save_offset
),
11031 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11033 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11034 GEN_INT (m
->fs
.sp_offset
11035 - frame
.reg_save_offset
),
11037 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11040 ix86_emit_restore_regs_using_pop ();
11043 /* If we used a stack pointer and haven't already got rid of it,
11045 if (m
->fs
.fp_valid
)
11047 /* If the stack pointer is valid and pointing at the frame
11048 pointer store address, then we only need a pop. */
11049 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11050 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11051 /* Leave results in shorter dependency chains on CPUs that are
11052 able to grok it fast. */
11053 else if (TARGET_USE_LEAVE
11054 || optimize_function_for_size_p (cfun
)
11055 || !cfun
->machine
->use_fast_prologue_epilogue
)
11056 ix86_emit_leave ();
11059 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11060 hard_frame_pointer_rtx
,
11061 const0_rtx
, style
, !using_drap
);
11062 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11068 int param_ptr_offset
= UNITS_PER_WORD
;
11071 gcc_assert (stack_realign_drap
);
11073 if (ix86_static_chain_on_stack
)
11074 param_ptr_offset
+= UNITS_PER_WORD
;
11075 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11076 param_ptr_offset
+= UNITS_PER_WORD
;
11078 insn
= emit_insn (gen_rtx_SET
11079 (VOIDmode
, stack_pointer_rtx
,
11080 gen_rtx_PLUS (Pmode
,
11082 GEN_INT (-param_ptr_offset
))));
11083 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11084 m
->fs
.cfa_offset
= param_ptr_offset
;
11085 m
->fs
.sp_offset
= param_ptr_offset
;
11086 m
->fs
.realigned
= false;
11088 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11089 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11090 GEN_INT (param_ptr_offset
)));
11091 RTX_FRAME_RELATED_P (insn
) = 1;
11093 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11094 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11097 /* At this point the stack pointer must be valid, and we must have
11098 restored all of the registers. We may not have deallocated the
11099 entire stack frame. We've delayed this until now because it may
11100 be possible to merge the local stack deallocation with the
11101 deallocation forced by ix86_static_chain_on_stack. */
11102 gcc_assert (m
->fs
.sp_valid
);
11103 gcc_assert (!m
->fs
.fp_valid
);
11104 gcc_assert (!m
->fs
.realigned
);
11105 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11107 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11108 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11112 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11114 /* Sibcall epilogues don't want a return instruction. */
11117 m
->fs
= frame_state_save
;
11121 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11123 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11125 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11126 address, do explicit add, and jump indirectly to the caller. */
11128 if (crtl
->args
.pops_args
>= 65536)
11130 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11133 /* There is no "pascal" calling convention in any 64bit ABI. */
11134 gcc_assert (!TARGET_64BIT
);
11136 insn
= emit_insn (gen_pop (ecx
));
11137 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11138 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11140 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11141 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11142 add_reg_note (insn
, REG_CFA_REGISTER
,
11143 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11144 RTX_FRAME_RELATED_P (insn
) = 1;
11146 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11148 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11151 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11154 emit_jump_insn (gen_simple_return_internal ());
11156 /* Restore the state back to the state from the prologue,
11157 so that it's correct for the next epilogue. */
11158 m
->fs
= frame_state_save
;
11161 /* Reset from the function's potential modifications. */
11164 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11165 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11167 if (pic_offset_table_rtx
)
11168 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11170 /* Mach-O doesn't support labels at the end of objects, so if
11171 it looks like we might want one, insert a NOP. */
11173 rtx insn
= get_last_insn ();
11174 rtx deleted_debug_label
= NULL_RTX
;
11177 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11179 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11180 notes only, instead set their CODE_LABEL_NUMBER to -1,
11181 otherwise there would be code generation differences
11182 in between -g and -g0. */
11183 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11184 deleted_debug_label
= insn
;
11185 insn
= PREV_INSN (insn
);
11190 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11191 fputs ("\tnop\n", file
);
11192 else if (deleted_debug_label
)
11193 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11194 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11195 CODE_LABEL_NUMBER (insn
) = -1;
11201 /* Return a scratch register to use in the split stack prologue. The
11202 split stack prologue is used for -fsplit-stack. It is the first
11203 instructions in the function, even before the regular prologue.
11204 The scratch register can be any caller-saved register which is not
11205 used for parameters or for the static chain. */
11207 static unsigned int
11208 split_stack_prologue_scratch_regno (void)
11214 bool is_fastcall
, is_thiscall
;
11217 is_fastcall
= (lookup_attribute ("fastcall",
11218 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11220 is_thiscall
= (lookup_attribute ("thiscall",
11221 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11223 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11227 if (DECL_STATIC_CHAIN (cfun
->decl
))
11229 sorry ("-fsplit-stack does not support fastcall with "
11230 "nested function");
11231 return INVALID_REGNUM
;
11235 else if (is_thiscall
)
11237 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11241 else if (regparm
< 3)
11243 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11249 sorry ("-fsplit-stack does not support 2 register "
11250 " parameters for a nested function");
11251 return INVALID_REGNUM
;
11258 /* FIXME: We could make this work by pushing a register
11259 around the addition and comparison. */
11260 sorry ("-fsplit-stack does not support 3 register parameters");
11261 return INVALID_REGNUM
;
11266 /* A SYMBOL_REF for the function which allocates new stackspace for
11269 static GTY(()) rtx split_stack_fn
;
11271 /* A SYMBOL_REF for the more stack function when using the large
11274 static GTY(()) rtx split_stack_fn_large
;
11276 /* Handle -fsplit-stack. These are the first instructions in the
11277 function, even before the regular prologue. */
11280 ix86_expand_split_stack_prologue (void)
11282 struct ix86_frame frame
;
11283 HOST_WIDE_INT allocate
;
11284 unsigned HOST_WIDE_INT args_size
;
11285 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11286 rtx scratch_reg
= NULL_RTX
;
11287 rtx varargs_label
= NULL_RTX
;
11290 gcc_assert (flag_split_stack
&& reload_completed
);
11292 ix86_finalize_stack_realign_flags ();
11293 ix86_compute_frame_layout (&frame
);
11294 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11296 /* This is the label we will branch to if we have enough stack
11297 space. We expect the basic block reordering pass to reverse this
11298 branch if optimizing, so that we branch in the unlikely case. */
11299 label
= gen_label_rtx ();
11301 /* We need to compare the stack pointer minus the frame size with
11302 the stack boundary in the TCB. The stack boundary always gives
11303 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11304 can compare directly. Otherwise we need to do an addition. */
11306 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11307 UNSPEC_STACK_CHECK
);
11308 limit
= gen_rtx_CONST (Pmode
, limit
);
11309 limit
= gen_rtx_MEM (Pmode
, limit
);
11310 if (allocate
< SPLIT_STACK_AVAILABLE
)
11311 current
= stack_pointer_rtx
;
11314 unsigned int scratch_regno
;
11317 /* We need a scratch register to hold the stack pointer minus
11318 the required frame size. Since this is the very start of the
11319 function, the scratch register can be any caller-saved
11320 register which is not used for parameters. */
11321 offset
= GEN_INT (- allocate
);
11322 scratch_regno
= split_stack_prologue_scratch_regno ();
11323 if (scratch_regno
== INVALID_REGNUM
)
11325 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11326 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11328 /* We don't use ix86_gen_add3 in this case because it will
11329 want to split to lea, but when not optimizing the insn
11330 will not be split after this point. */
11331 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11332 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11337 emit_move_insn (scratch_reg
, offset
);
11338 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11339 stack_pointer_rtx
));
11341 current
= scratch_reg
;
11344 ix86_expand_branch (GEU
, current
, limit
, label
);
11345 jump_insn
= get_last_insn ();
11346 JUMP_LABEL (jump_insn
) = label
;
11348 /* Mark the jump as very likely to be taken. */
11349 add_reg_note (jump_insn
, REG_BR_PROB
,
11350 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11352 if (split_stack_fn
== NULL_RTX
)
11353 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11354 fn
= split_stack_fn
;
11356 /* Get more stack space. We pass in the desired stack space and the
11357 size of the arguments to copy to the new stack. In 32-bit mode
11358 we push the parameters; __morestack will return on a new stack
11359 anyhow. In 64-bit mode we pass the parameters in r10 and
11361 allocate_rtx
= GEN_INT (allocate
);
11362 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11363 call_fusage
= NULL_RTX
;
11368 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11369 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11371 /* If this function uses a static chain, it will be in %r10.
11372 Preserve it across the call to __morestack. */
11373 if (DECL_STATIC_CHAIN (cfun
->decl
))
11377 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11378 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11379 use_reg (&call_fusage
, rax
);
11382 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11384 HOST_WIDE_INT argval
;
11386 gcc_assert (Pmode
== DImode
);
11387 /* When using the large model we need to load the address
11388 into a register, and we've run out of registers. So we
11389 switch to a different calling convention, and we call a
11390 different function: __morestack_large. We pass the
11391 argument size in the upper 32 bits of r10 and pass the
11392 frame size in the lower 32 bits. */
11393 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11394 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11396 if (split_stack_fn_large
== NULL_RTX
)
11397 split_stack_fn_large
=
11398 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11400 if (ix86_cmodel
== CM_LARGE_PIC
)
11404 label
= gen_label_rtx ();
11405 emit_label (label
);
11406 LABEL_PRESERVE_P (label
) = 1;
11407 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11408 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11409 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11410 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11412 x
= gen_rtx_CONST (Pmode
, x
);
11413 emit_move_insn (reg11
, x
);
11414 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11415 x
= gen_const_mem (Pmode
, x
);
11416 emit_move_insn (reg11
, x
);
11419 emit_move_insn (reg11
, split_stack_fn_large
);
11423 argval
= ((args_size
<< 16) << 16) + allocate
;
11424 emit_move_insn (reg10
, GEN_INT (argval
));
11428 emit_move_insn (reg10
, allocate_rtx
);
11429 emit_move_insn (reg11
, GEN_INT (args_size
));
11430 use_reg (&call_fusage
, reg11
);
11433 use_reg (&call_fusage
, reg10
);
11437 emit_insn (gen_push (GEN_INT (args_size
)));
11438 emit_insn (gen_push (allocate_rtx
));
11440 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11441 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11443 add_function_usage_to (call_insn
, call_fusage
);
11445 /* In order to make call/return prediction work right, we now need
11446 to execute a return instruction. See
11447 libgcc/config/i386/morestack.S for the details on how this works.
11449 For flow purposes gcc must not see this as a return
11450 instruction--we need control flow to continue at the subsequent
11451 label. Therefore, we use an unspec. */
11452 gcc_assert (crtl
->args
.pops_args
< 65536);
11453 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11455 /* If we are in 64-bit mode and this function uses a static chain,
11456 we saved %r10 in %rax before calling _morestack. */
11457 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11458 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11459 gen_rtx_REG (word_mode
, AX_REG
));
11461 /* If this function calls va_start, we need to store a pointer to
11462 the arguments on the old stack, because they may not have been
11463 all copied to the new stack. At this point the old stack can be
11464 found at the frame pointer value used by __morestack, because
11465 __morestack has set that up before calling back to us. Here we
11466 store that pointer in a scratch register, and in
11467 ix86_expand_prologue we store the scratch register in a stack
11469 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11471 unsigned int scratch_regno
;
11475 scratch_regno
= split_stack_prologue_scratch_regno ();
11476 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11477 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11481 return address within this function
11482 return address of caller of this function
11484 So we add three words to get to the stack arguments.
11488 return address within this function
11489 first argument to __morestack
11490 second argument to __morestack
11491 return address of caller of this function
11493 So we add five words to get to the stack arguments.
11495 words
= TARGET_64BIT
? 3 : 5;
11496 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11497 gen_rtx_PLUS (Pmode
, frame_reg
,
11498 GEN_INT (words
* UNITS_PER_WORD
))));
11500 varargs_label
= gen_label_rtx ();
11501 emit_jump_insn (gen_jump (varargs_label
));
11502 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11507 emit_label (label
);
11508 LABEL_NUSES (label
) = 1;
11510 /* If this function calls va_start, we now have to set the scratch
11511 register for the case where we do not call __morestack. In this
11512 case we need to set it based on the stack pointer. */
11513 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11515 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11516 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11517 GEN_INT (UNITS_PER_WORD
))));
11519 emit_label (varargs_label
);
11520 LABEL_NUSES (varargs_label
) = 1;
11524 /* We may have to tell the dataflow pass that the split stack prologue
11525 is initializing a scratch register. */
11528 ix86_live_on_entry (bitmap regs
)
11530 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11532 gcc_assert (flag_split_stack
);
11533 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11537 /* Determine if op is suitable SUBREG RTX for address. */
11540 ix86_address_subreg_operand (rtx op
)
11542 enum machine_mode mode
;
11547 mode
= GET_MODE (op
);
11549 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11552 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11553 failures when the register is one word out of a two word structure. */
11554 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11557 /* Allow only SUBREGs of non-eliminable hard registers. */
11558 return register_no_elim_operand (op
, mode
);
11561 /* Extract the parts of an RTL expression that is a valid memory address
11562 for an instruction. Return 0 if the structure of the address is
11563 grossly off. Return -1 if the address contains ASHIFT, so it is not
11564 strictly valid, but still used for computing length of lea instruction. */
11567 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11569 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11570 rtx base_reg
, index_reg
;
11571 HOST_WIDE_INT scale
= 1;
11572 rtx scale_rtx
= NULL_RTX
;
11575 enum ix86_address_seg seg
= SEG_DEFAULT
;
11577 /* Allow zero-extended SImode addresses,
11578 they will be emitted with addr32 prefix. */
11579 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11581 if (GET_CODE (addr
) == ZERO_EXTEND
11582 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11584 addr
= XEXP (addr
, 0);
11585 if (CONST_INT_P (addr
))
11588 else if (GET_CODE (addr
) == AND
11589 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11591 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11592 if (addr
== NULL_RTX
)
11595 if (CONST_INT_P (addr
))
11600 /* Allow SImode subregs of DImode addresses,
11601 they will be emitted with addr32 prefix. */
11602 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11604 if (GET_CODE (addr
) == SUBREG
11605 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11607 addr
= SUBREG_REG (addr
);
11608 if (CONST_INT_P (addr
))
11615 else if (GET_CODE (addr
) == SUBREG
)
11617 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11622 else if (GET_CODE (addr
) == PLUS
)
11624 rtx addends
[4], op
;
11632 addends
[n
++] = XEXP (op
, 1);
11635 while (GET_CODE (op
) == PLUS
);
11640 for (i
= n
; i
>= 0; --i
)
11643 switch (GET_CODE (op
))
11648 index
= XEXP (op
, 0);
11649 scale_rtx
= XEXP (op
, 1);
11655 index
= XEXP (op
, 0);
11656 tmp
= XEXP (op
, 1);
11657 if (!CONST_INT_P (tmp
))
11659 scale
= INTVAL (tmp
);
11660 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11662 scale
= 1 << scale
;
11667 if (GET_CODE (op
) != UNSPEC
)
11672 if (XINT (op
, 1) == UNSPEC_TP
11673 && TARGET_TLS_DIRECT_SEG_REFS
11674 && seg
== SEG_DEFAULT
)
11675 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11681 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11708 else if (GET_CODE (addr
) == MULT
)
11710 index
= XEXP (addr
, 0); /* index*scale */
11711 scale_rtx
= XEXP (addr
, 1);
11713 else if (GET_CODE (addr
) == ASHIFT
)
11715 /* We're called for lea too, which implements ashift on occasion. */
11716 index
= XEXP (addr
, 0);
11717 tmp
= XEXP (addr
, 1);
11718 if (!CONST_INT_P (tmp
))
11720 scale
= INTVAL (tmp
);
11721 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11723 scale
= 1 << scale
;
11726 else if (CONST_INT_P (addr
))
11728 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11731 /* Constant addresses are sign extended to 64bit, we have to
11732 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11734 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11740 disp
= addr
; /* displacement */
11746 else if (GET_CODE (index
) == SUBREG
11747 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11753 /* Address override works only on the (%reg) part of %fs:(%reg). */
11754 if (seg
!= SEG_DEFAULT
11755 && ((base
&& GET_MODE (base
) != word_mode
)
11756 || (index
&& GET_MODE (index
) != word_mode
)))
11759 /* Extract the integral value of scale. */
11762 if (!CONST_INT_P (scale_rtx
))
11764 scale
= INTVAL (scale_rtx
);
11767 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11768 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11770 /* Avoid useless 0 displacement. */
11771 if (disp
== const0_rtx
&& (base
|| index
))
11774 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11775 if (base_reg
&& index_reg
&& scale
== 1
11776 && (index_reg
== arg_pointer_rtx
11777 || index_reg
== frame_pointer_rtx
11778 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11781 tmp
= base
, base
= index
, index
= tmp
;
11782 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11785 /* Special case: %ebp cannot be encoded as a base without a displacement.
11789 && (base_reg
== hard_frame_pointer_rtx
11790 || base_reg
== frame_pointer_rtx
11791 || base_reg
== arg_pointer_rtx
11792 || (REG_P (base_reg
)
11793 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11794 || REGNO (base_reg
) == R13_REG
))))
11797 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11798 Avoid this by transforming to [%esi+0].
11799 Reload calls address legitimization without cfun defined, so we need
11800 to test cfun for being non-NULL. */
11801 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11802 && base_reg
&& !index_reg
&& !disp
11803 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11806 /* Special case: encode reg+reg instead of reg*2. */
11807 if (!base
&& index
&& scale
== 2)
11808 base
= index
, base_reg
= index_reg
, scale
= 1;
11810 /* Special case: scaling cannot be encoded without base or displacement. */
11811 if (!base
&& !disp
&& index
&& scale
!= 1)
11815 out
->index
= index
;
11817 out
->scale
= scale
;
11823 /* Return cost of the memory address x.
11824 For i386, it is better to use a complex address than let gcc copy
11825 the address into a reg and make a new pseudo. But not if the address
11826 requires to two regs - that would mean more pseudos with longer
11829 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11830 addr_space_t as ATTRIBUTE_UNUSED
,
11831 bool speed ATTRIBUTE_UNUSED
)
11833 struct ix86_address parts
;
11835 int ok
= ix86_decompose_address (x
, &parts
);
11839 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11840 parts
.base
= SUBREG_REG (parts
.base
);
11841 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11842 parts
.index
= SUBREG_REG (parts
.index
);
11844 /* Attempt to minimize number of registers in the address. */
11846 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11848 && (!REG_P (parts
.index
)
11849 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11853 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11855 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11856 && parts
.base
!= parts
.index
)
11859 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11860 since it's predecode logic can't detect the length of instructions
11861 and it degenerates to vector decoded. Increase cost of such
11862 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11863 to split such addresses or even refuse such addresses at all.
11865 Following addressing modes are affected:
11870 The first and last case may be avoidable by explicitly coding the zero in
11871 memory address, but I don't have AMD-K6 machine handy to check this
11875 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11876 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11877 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11883 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11884 this is used for to form addresses to local data when -fPIC is in
11888 darwin_local_data_pic (rtx disp
)
11890 return (GET_CODE (disp
) == UNSPEC
11891 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11894 /* Determine if a given RTX is a valid constant. We already know this
11895 satisfies CONSTANT_P. */
11898 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11900 switch (GET_CODE (x
))
11905 if (GET_CODE (x
) == PLUS
)
11907 if (!CONST_INT_P (XEXP (x
, 1)))
11912 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11915 /* Only some unspecs are valid as "constants". */
11916 if (GET_CODE (x
) == UNSPEC
)
11917 switch (XINT (x
, 1))
11920 case UNSPEC_GOTOFF
:
11921 case UNSPEC_PLTOFF
:
11922 return TARGET_64BIT
;
11924 case UNSPEC_NTPOFF
:
11925 x
= XVECEXP (x
, 0, 0);
11926 return (GET_CODE (x
) == SYMBOL_REF
11927 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11928 case UNSPEC_DTPOFF
:
11929 x
= XVECEXP (x
, 0, 0);
11930 return (GET_CODE (x
) == SYMBOL_REF
11931 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11936 /* We must have drilled down to a symbol. */
11937 if (GET_CODE (x
) == LABEL_REF
)
11939 if (GET_CODE (x
) != SYMBOL_REF
)
11944 /* TLS symbols are never valid. */
11945 if (SYMBOL_REF_TLS_MODEL (x
))
11948 /* DLLIMPORT symbols are never valid. */
11949 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11950 && SYMBOL_REF_DLLIMPORT_P (x
))
11954 /* mdynamic-no-pic */
11955 if (MACHO_DYNAMIC_NO_PIC_P
)
11956 return machopic_symbol_defined_p (x
);
11961 if (GET_MODE (x
) == TImode
11962 && x
!= CONST0_RTX (TImode
)
11968 if (!standard_sse_constant_p (x
))
11975 /* Otherwise we handle everything else in the move patterns. */
11979 /* Determine if it's legal to put X into the constant pool. This
11980 is not possible for the address of thread-local symbols, which
11981 is checked above. */
11984 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11986 /* We can always put integral constants and vectors in memory. */
11987 switch (GET_CODE (x
))
11997 return !ix86_legitimate_constant_p (mode
, x
);
12001 /* Nonzero if the constant value X is a legitimate general operand
12002 when generating PIC code. It is given that flag_pic is on and
12003 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12006 legitimate_pic_operand_p (rtx x
)
12010 switch (GET_CODE (x
))
12013 inner
= XEXP (x
, 0);
12014 if (GET_CODE (inner
) == PLUS
12015 && CONST_INT_P (XEXP (inner
, 1)))
12016 inner
= XEXP (inner
, 0);
12018 /* Only some unspecs are valid as "constants". */
12019 if (GET_CODE (inner
) == UNSPEC
)
12020 switch (XINT (inner
, 1))
12023 case UNSPEC_GOTOFF
:
12024 case UNSPEC_PLTOFF
:
12025 return TARGET_64BIT
;
12027 x
= XVECEXP (inner
, 0, 0);
12028 return (GET_CODE (x
) == SYMBOL_REF
12029 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12030 case UNSPEC_MACHOPIC_OFFSET
:
12031 return legitimate_pic_address_disp_p (x
);
12039 return legitimate_pic_address_disp_p (x
);
12046 /* Determine if a given CONST RTX is a valid memory displacement
12050 legitimate_pic_address_disp_p (rtx disp
)
12054 /* In 64bit mode we can allow direct addresses of symbols and labels
12055 when they are not dynamic symbols. */
12058 rtx op0
= disp
, op1
;
12060 switch (GET_CODE (disp
))
12066 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12068 op0
= XEXP (XEXP (disp
, 0), 0);
12069 op1
= XEXP (XEXP (disp
, 0), 1);
12070 if (!CONST_INT_P (op1
)
12071 || INTVAL (op1
) >= 16*1024*1024
12072 || INTVAL (op1
) < -16*1024*1024)
12074 if (GET_CODE (op0
) == LABEL_REF
)
12076 if (GET_CODE (op0
) == CONST
12077 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12078 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12080 if (GET_CODE (op0
) == UNSPEC
12081 && XINT (op0
, 1) == UNSPEC_PCREL
)
12083 if (GET_CODE (op0
) != SYMBOL_REF
)
12088 /* TLS references should always be enclosed in UNSPEC. */
12089 if (SYMBOL_REF_TLS_MODEL (op0
))
12091 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12092 && ix86_cmodel
!= CM_LARGE_PIC
)
12100 if (GET_CODE (disp
) != CONST
)
12102 disp
= XEXP (disp
, 0);
12106 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12107 of GOT tables. We should not need these anyway. */
12108 if (GET_CODE (disp
) != UNSPEC
12109 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12110 && XINT (disp
, 1) != UNSPEC_GOTOFF
12111 && XINT (disp
, 1) != UNSPEC_PCREL
12112 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12115 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12116 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12122 if (GET_CODE (disp
) == PLUS
)
12124 if (!CONST_INT_P (XEXP (disp
, 1)))
12126 disp
= XEXP (disp
, 0);
12130 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12133 if (GET_CODE (disp
) != UNSPEC
)
12136 switch (XINT (disp
, 1))
12141 /* We need to check for both symbols and labels because VxWorks loads
12142 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12144 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12145 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12146 case UNSPEC_GOTOFF
:
12147 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12148 While ABI specify also 32bit relocation but we don't produce it in
12149 small PIC model at all. */
12150 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12151 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12153 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12155 case UNSPEC_GOTTPOFF
:
12156 case UNSPEC_GOTNTPOFF
:
12157 case UNSPEC_INDNTPOFF
:
12160 disp
= XVECEXP (disp
, 0, 0);
12161 return (GET_CODE (disp
) == SYMBOL_REF
12162 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12163 case UNSPEC_NTPOFF
:
12164 disp
= XVECEXP (disp
, 0, 0);
12165 return (GET_CODE (disp
) == SYMBOL_REF
12166 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12167 case UNSPEC_DTPOFF
:
12168 disp
= XVECEXP (disp
, 0, 0);
12169 return (GET_CODE (disp
) == SYMBOL_REF
12170 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12176 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12177 replace the input X, or the original X if no replacement is called for.
12178 The output parameter *WIN is 1 if the calling macro should goto WIN,
12179 0 if it should not. */
12182 ix86_legitimize_reload_address (rtx x
,
12183 enum machine_mode mode ATTRIBUTE_UNUSED
,
12184 int opnum
, int type
,
12185 int ind_levels ATTRIBUTE_UNUSED
)
12187 /* Reload can generate:
12189 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12193 This RTX is rejected from ix86_legitimate_address_p due to
12194 non-strictness of base register 97. Following this rejection,
12195 reload pushes all three components into separate registers,
12196 creating invalid memory address RTX.
12198 Following code reloads only the invalid part of the
12199 memory address RTX. */
12201 if (GET_CODE (x
) == PLUS
12202 && REG_P (XEXP (x
, 1))
12203 && GET_CODE (XEXP (x
, 0)) == PLUS
12204 && REG_P (XEXP (XEXP (x
, 0), 1)))
12207 bool something_reloaded
= false;
12209 base
= XEXP (XEXP (x
, 0), 1);
12210 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12212 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12213 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12214 opnum
, (enum reload_type
) type
);
12215 something_reloaded
= true;
12218 index
= XEXP (x
, 1);
12219 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12221 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12222 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12223 opnum
, (enum reload_type
) type
);
12224 something_reloaded
= true;
12227 gcc_assert (something_reloaded
);
12234 /* Recognizes RTL expressions that are valid memory addresses for an
12235 instruction. The MODE argument is the machine mode for the MEM
12236 expression that wants to use this address.
12238 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12239 convert common non-canonical forms to canonical form so that they will
12243 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12244 rtx addr
, bool strict
)
12246 struct ix86_address parts
;
12247 rtx base
, index
, disp
;
12248 HOST_WIDE_INT scale
;
12250 if (ix86_decompose_address (addr
, &parts
) <= 0)
12251 /* Decomposition failed. */
12255 index
= parts
.index
;
12257 scale
= parts
.scale
;
12259 /* Validate base register. */
12266 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12267 reg
= SUBREG_REG (base
);
12269 /* Base is not a register. */
12272 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12275 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12276 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12277 /* Base is not valid. */
12281 /* Validate index register. */
12288 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12289 reg
= SUBREG_REG (index
);
12291 /* Index is not a register. */
12294 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12297 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12298 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12299 /* Index is not valid. */
12303 /* Index and base should have the same mode. */
12305 && GET_MODE (base
) != GET_MODE (index
))
12308 /* Validate scale factor. */
12312 /* Scale without index. */
12315 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12316 /* Scale is not a valid multiplier. */
12320 /* Validate displacement. */
12323 if (GET_CODE (disp
) == CONST
12324 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12325 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12326 switch (XINT (XEXP (disp
, 0), 1))
12328 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12329 used. While ABI specify also 32bit relocations, we don't produce
12330 them at all and use IP relative instead. */
12332 case UNSPEC_GOTOFF
:
12333 gcc_assert (flag_pic
);
12335 goto is_legitimate_pic
;
12337 /* 64bit address unspec. */
12340 case UNSPEC_GOTPCREL
:
12342 gcc_assert (flag_pic
);
12343 goto is_legitimate_pic
;
12345 case UNSPEC_GOTTPOFF
:
12346 case UNSPEC_GOTNTPOFF
:
12347 case UNSPEC_INDNTPOFF
:
12348 case UNSPEC_NTPOFF
:
12349 case UNSPEC_DTPOFF
:
12352 case UNSPEC_STACK_CHECK
:
12353 gcc_assert (flag_split_stack
);
12357 /* Invalid address unspec. */
12361 else if (SYMBOLIC_CONST (disp
)
12365 && MACHOPIC_INDIRECT
12366 && !machopic_operand_p (disp
)
12372 if (TARGET_64BIT
&& (index
|| base
))
12374 /* foo@dtpoff(%rX) is ok. */
12375 if (GET_CODE (disp
) != CONST
12376 || GET_CODE (XEXP (disp
, 0)) != PLUS
12377 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12378 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12379 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12380 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12381 /* Non-constant pic memory reference. */
12384 else if ((!TARGET_MACHO
|| flag_pic
)
12385 && ! legitimate_pic_address_disp_p (disp
))
12386 /* Displacement is an invalid pic construct. */
12389 else if (MACHO_DYNAMIC_NO_PIC_P
12390 && !ix86_legitimate_constant_p (Pmode
, disp
))
12391 /* displacment must be referenced via non_lazy_pointer */
12395 /* This code used to verify that a symbolic pic displacement
12396 includes the pic_offset_table_rtx register.
12398 While this is good idea, unfortunately these constructs may
12399 be created by "adds using lea" optimization for incorrect
12408 This code is nonsensical, but results in addressing
12409 GOT table with pic_offset_table_rtx base. We can't
12410 just refuse it easily, since it gets matched by
12411 "addsi3" pattern, that later gets split to lea in the
12412 case output register differs from input. While this
12413 can be handled by separate addsi pattern for this case
12414 that never results in lea, this seems to be easier and
12415 correct fix for crash to disable this test. */
12417 else if (GET_CODE (disp
) != LABEL_REF
12418 && !CONST_INT_P (disp
)
12419 && (GET_CODE (disp
) != CONST
12420 || !ix86_legitimate_constant_p (Pmode
, disp
))
12421 && (GET_CODE (disp
) != SYMBOL_REF
12422 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12423 /* Displacement is not constant. */
12425 else if (TARGET_64BIT
12426 && !x86_64_immediate_operand (disp
, VOIDmode
))
12427 /* Displacement is out of range. */
12431 /* Everything looks valid. */
12435 /* Determine if a given RTX is a valid constant address. */
12438 constant_address_p (rtx x
)
12440 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12443 /* Return a unique alias set for the GOT. */
12445 static alias_set_type
12446 ix86_GOT_alias_set (void)
12448 static alias_set_type set
= -1;
12450 set
= new_alias_set ();
12454 /* Return a legitimate reference for ORIG (an address) using the
12455 register REG. If REG is 0, a new pseudo is generated.
12457 There are two types of references that must be handled:
12459 1. Global data references must load the address from the GOT, via
12460 the PIC reg. An insn is emitted to do this load, and the reg is
12463 2. Static data references, constant pool addresses, and code labels
12464 compute the address as an offset from the GOT, whose base is in
12465 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12466 differentiate them from global data objects. The returned
12467 address is the PIC reg + an unspec constant.
12469 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12470 reg also appears in the address. */
12473 legitimize_pic_address (rtx orig
, rtx reg
)
12476 rtx new_rtx
= orig
;
12479 if (TARGET_MACHO
&& !TARGET_64BIT
)
12482 reg
= gen_reg_rtx (Pmode
);
12483 /* Use the generic Mach-O PIC machinery. */
12484 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12488 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12490 else if (TARGET_64BIT
12491 && ix86_cmodel
!= CM_SMALL_PIC
12492 && gotoff_operand (addr
, Pmode
))
12495 /* This symbol may be referenced via a displacement from the PIC
12496 base address (@GOTOFF). */
12498 if (reload_in_progress
)
12499 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12500 if (GET_CODE (addr
) == CONST
)
12501 addr
= XEXP (addr
, 0);
12502 if (GET_CODE (addr
) == PLUS
)
12504 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12506 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12509 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12510 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12512 tmpreg
= gen_reg_rtx (Pmode
);
12515 emit_move_insn (tmpreg
, new_rtx
);
12519 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12520 tmpreg
, 1, OPTAB_DIRECT
);
12523 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12525 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12527 /* This symbol may be referenced via a displacement from the PIC
12528 base address (@GOTOFF). */
12530 if (reload_in_progress
)
12531 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12532 if (GET_CODE (addr
) == CONST
)
12533 addr
= XEXP (addr
, 0);
12534 if (GET_CODE (addr
) == PLUS
)
12536 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12538 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12541 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12542 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12543 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12547 emit_move_insn (reg
, new_rtx
);
12551 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12552 /* We can't use @GOTOFF for text labels on VxWorks;
12553 see gotoff_operand. */
12554 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12556 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12558 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12559 return legitimize_dllimport_symbol (addr
, true);
12560 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12561 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12562 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12564 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12565 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12569 /* For x64 PE-COFF there is no GOT table. So we use address
12571 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12573 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12574 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12577 reg
= gen_reg_rtx (Pmode
);
12578 emit_move_insn (reg
, new_rtx
);
12581 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12583 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12584 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12585 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12586 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12589 reg
= gen_reg_rtx (Pmode
);
12590 /* Use directly gen_movsi, otherwise the address is loaded
12591 into register for CSE. We don't want to CSE this addresses,
12592 instead we CSE addresses from the GOT table, so skip this. */
12593 emit_insn (gen_movsi (reg
, new_rtx
));
12598 /* This symbol must be referenced via a load from the
12599 Global Offset Table (@GOT). */
12601 if (reload_in_progress
)
12602 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12603 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12604 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12606 new_rtx
= force_reg (Pmode
, new_rtx
);
12607 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12608 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12609 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12612 reg
= gen_reg_rtx (Pmode
);
12613 emit_move_insn (reg
, new_rtx
);
12619 if (CONST_INT_P (addr
)
12620 && !x86_64_immediate_operand (addr
, VOIDmode
))
12624 emit_move_insn (reg
, addr
);
12628 new_rtx
= force_reg (Pmode
, addr
);
12630 else if (GET_CODE (addr
) == CONST
)
12632 addr
= XEXP (addr
, 0);
12634 /* We must match stuff we generate before. Assume the only
12635 unspecs that can get here are ours. Not that we could do
12636 anything with them anyway.... */
12637 if (GET_CODE (addr
) == UNSPEC
12638 || (GET_CODE (addr
) == PLUS
12639 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12641 gcc_assert (GET_CODE (addr
) == PLUS
);
12643 if (GET_CODE (addr
) == PLUS
)
12645 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12647 /* Check first to see if this is a constant offset from a @GOTOFF
12648 symbol reference. */
12649 if (gotoff_operand (op0
, Pmode
)
12650 && CONST_INT_P (op1
))
12654 if (reload_in_progress
)
12655 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12656 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12658 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12659 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12660 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12664 emit_move_insn (reg
, new_rtx
);
12670 if (INTVAL (op1
) < -16*1024*1024
12671 || INTVAL (op1
) >= 16*1024*1024)
12673 if (!x86_64_immediate_operand (op1
, Pmode
))
12674 op1
= force_reg (Pmode
, op1
);
12675 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12681 rtx base
= legitimize_pic_address (op0
, reg
);
12682 enum machine_mode mode
= GET_MODE (base
);
12684 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12686 if (CONST_INT_P (new_rtx
))
12688 if (INTVAL (new_rtx
) < -16*1024*1024
12689 || INTVAL (new_rtx
) >= 16*1024*1024)
12691 if (!x86_64_immediate_operand (new_rtx
, mode
))
12692 new_rtx
= force_reg (mode
, new_rtx
);
12694 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12697 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12701 if (GET_CODE (new_rtx
) == PLUS
12702 && CONSTANT_P (XEXP (new_rtx
, 1)))
12704 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12705 new_rtx
= XEXP (new_rtx
, 1);
12707 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12715 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12718 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12720 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12722 if (GET_MODE (tp
) != tp_mode
)
12724 gcc_assert (GET_MODE (tp
) == SImode
);
12725 gcc_assert (tp_mode
== DImode
);
12727 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12731 tp
= copy_to_mode_reg (tp_mode
, tp
);
12736 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12738 static GTY(()) rtx ix86_tls_symbol
;
12741 ix86_tls_get_addr (void)
12743 if (!ix86_tls_symbol
)
12746 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12747 ? "___tls_get_addr" : "__tls_get_addr");
12749 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12752 return ix86_tls_symbol
;
12755 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12757 static GTY(()) rtx ix86_tls_module_base_symbol
;
12760 ix86_tls_module_base (void)
12762 if (!ix86_tls_module_base_symbol
)
12764 ix86_tls_module_base_symbol
12765 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12767 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12768 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12771 return ix86_tls_module_base_symbol
;
12774 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12775 false if we expect this to be used for a memory address and true if
12776 we expect to load the address into a register. */
12779 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12781 rtx dest
, base
, off
;
12782 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12783 enum machine_mode tp_mode
= Pmode
;
12788 case TLS_MODEL_GLOBAL_DYNAMIC
:
12789 dest
= gen_reg_rtx (Pmode
);
12794 pic
= pic_offset_table_rtx
;
12797 pic
= gen_reg_rtx (Pmode
);
12798 emit_insn (gen_set_got (pic
));
12802 if (TARGET_GNU2_TLS
)
12805 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12807 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12809 tp
= get_thread_pointer (Pmode
, true);
12810 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12812 if (GET_MODE (x
) != Pmode
)
12813 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12815 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12819 rtx caddr
= ix86_tls_get_addr ();
12823 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12828 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12829 insns
= get_insns ();
12832 if (GET_MODE (x
) != Pmode
)
12833 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12835 RTL_CONST_CALL_P (insns
) = 1;
12836 emit_libcall_block (insns
, dest
, rax
, x
);
12839 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12843 case TLS_MODEL_LOCAL_DYNAMIC
:
12844 base
= gen_reg_rtx (Pmode
);
12849 pic
= pic_offset_table_rtx
;
12852 pic
= gen_reg_rtx (Pmode
);
12853 emit_insn (gen_set_got (pic
));
12857 if (TARGET_GNU2_TLS
)
12859 rtx tmp
= ix86_tls_module_base ();
12862 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12864 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12866 tp
= get_thread_pointer (Pmode
, true);
12867 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12868 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12872 rtx caddr
= ix86_tls_get_addr ();
12876 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12881 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
12882 insns
= get_insns ();
12885 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12886 share the LD_BASE result with other LD model accesses. */
12887 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12888 UNSPEC_TLS_LD_BASE
);
12890 RTL_CONST_CALL_P (insns
) = 1;
12891 emit_libcall_block (insns
, base
, rax
, eqv
);
12894 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12897 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12898 off
= gen_rtx_CONST (Pmode
, off
);
12900 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12902 if (TARGET_GNU2_TLS
)
12904 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12906 if (GET_MODE (x
) != Pmode
)
12907 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12909 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12913 case TLS_MODEL_INITIAL_EXEC
:
12916 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12918 /* The Sun linker took the AMD64 TLS spec literally
12919 and can only handle %rax as destination of the
12920 initial executable code sequence. */
12922 dest
= gen_reg_rtx (DImode
);
12923 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12927 /* Generate DImode references to avoid %fs:(%reg32)
12928 problems and linker IE->LE relaxation bug. */
12931 type
= UNSPEC_GOTNTPOFF
;
12935 if (reload_in_progress
)
12936 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12937 pic
= pic_offset_table_rtx
;
12938 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12940 else if (!TARGET_ANY_GNU_TLS
)
12942 pic
= gen_reg_rtx (Pmode
);
12943 emit_insn (gen_set_got (pic
));
12944 type
= UNSPEC_GOTTPOFF
;
12949 type
= UNSPEC_INDNTPOFF
;
12952 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12953 off
= gen_rtx_CONST (tp_mode
, off
);
12955 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12956 off
= gen_const_mem (tp_mode
, off
);
12957 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12959 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12961 base
= get_thread_pointer (tp_mode
,
12962 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12963 off
= force_reg (tp_mode
, off
);
12964 return gen_rtx_PLUS (tp_mode
, base
, off
);
12968 base
= get_thread_pointer (Pmode
, true);
12969 dest
= gen_reg_rtx (Pmode
);
12970 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12974 case TLS_MODEL_LOCAL_EXEC
:
12975 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12976 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12977 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12978 off
= gen_rtx_CONST (Pmode
, off
);
12980 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12982 base
= get_thread_pointer (Pmode
,
12983 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12984 return gen_rtx_PLUS (Pmode
, base
, off
);
12988 base
= get_thread_pointer (Pmode
, true);
12989 dest
= gen_reg_rtx (Pmode
);
12990 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12995 gcc_unreachable ();
13001 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13004 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13005 htab_t dllimport_map
;
13008 get_dllimport_decl (tree decl
)
13010 struct tree_map
*h
, in
;
13013 const char *prefix
;
13014 size_t namelen
, prefixlen
;
13019 if (!dllimport_map
)
13020 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13022 in
.hash
= htab_hash_pointer (decl
);
13023 in
.base
.from
= decl
;
13024 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13025 h
= (struct tree_map
*) *loc
;
13029 *loc
= h
= ggc_alloc_tree_map ();
13031 h
->base
.from
= decl
;
13032 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13033 VAR_DECL
, NULL
, ptr_type_node
);
13034 DECL_ARTIFICIAL (to
) = 1;
13035 DECL_IGNORED_P (to
) = 1;
13036 DECL_EXTERNAL (to
) = 1;
13037 TREE_READONLY (to
) = 1;
13039 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13040 name
= targetm
.strip_name_encoding (name
);
13041 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13042 ? "*__imp_" : "*__imp__";
13043 namelen
= strlen (name
);
13044 prefixlen
= strlen (prefix
);
13045 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13046 memcpy (imp_name
, prefix
, prefixlen
);
13047 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13049 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13050 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13051 SET_SYMBOL_REF_DECL (rtl
, to
);
13052 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13054 rtl
= gen_const_mem (Pmode
, rtl
);
13055 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13057 SET_DECL_RTL (to
, rtl
);
13058 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13063 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13064 true if we require the result be a register. */
13067 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13072 gcc_assert (SYMBOL_REF_DECL (symbol
));
13073 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13075 x
= DECL_RTL (imp_decl
);
13077 x
= force_reg (Pmode
, x
);
13081 /* Try machine-dependent ways of modifying an illegitimate address
13082 to be legitimate. If we find one, return the new, valid address.
13083 This macro is used in only one place: `memory_address' in explow.c.
13085 OLDX is the address as it was before break_out_memory_refs was called.
13086 In some cases it is useful to look at this to decide what needs to be done.
13088 It is always safe for this macro to do nothing. It exists to recognize
13089 opportunities to optimize the output.
13091 For the 80386, we handle X+REG by loading X into a register R and
13092 using R+REG. R will go in a general reg and indexing will be used.
13093 However, if REG is a broken-out memory address or multiplication,
13094 nothing needs to be done because REG can certainly go in a general reg.
13096 When -fpic is used, special handling is needed for symbolic references.
13097 See comments by legitimize_pic_address in i386.c for details. */
13100 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13101 enum machine_mode mode
)
13106 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13108 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13109 if (GET_CODE (x
) == CONST
13110 && GET_CODE (XEXP (x
, 0)) == PLUS
13111 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13112 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13114 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13115 (enum tls_model
) log
, false);
13116 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13119 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13121 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13122 return legitimize_dllimport_symbol (x
, true);
13123 if (GET_CODE (x
) == CONST
13124 && GET_CODE (XEXP (x
, 0)) == PLUS
13125 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13126 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13128 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13129 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13133 if (flag_pic
&& SYMBOLIC_CONST (x
))
13134 return legitimize_pic_address (x
, 0);
13137 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13138 return machopic_indirect_data_reference (x
, 0);
13141 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13142 if (GET_CODE (x
) == ASHIFT
13143 && CONST_INT_P (XEXP (x
, 1))
13144 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13147 log
= INTVAL (XEXP (x
, 1));
13148 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13149 GEN_INT (1 << log
));
13152 if (GET_CODE (x
) == PLUS
)
13154 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13156 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13157 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13158 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13161 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13162 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13163 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13164 GEN_INT (1 << log
));
13167 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13168 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13169 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13172 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13173 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13174 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13175 GEN_INT (1 << log
));
13178 /* Put multiply first if it isn't already. */
13179 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13181 rtx tmp
= XEXP (x
, 0);
13182 XEXP (x
, 0) = XEXP (x
, 1);
13187 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13188 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13189 created by virtual register instantiation, register elimination, and
13190 similar optimizations. */
13191 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13194 x
= gen_rtx_PLUS (Pmode
,
13195 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13196 XEXP (XEXP (x
, 1), 0)),
13197 XEXP (XEXP (x
, 1), 1));
13201 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13202 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13203 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13204 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13205 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13206 && CONSTANT_P (XEXP (x
, 1)))
13209 rtx other
= NULL_RTX
;
13211 if (CONST_INT_P (XEXP (x
, 1)))
13213 constant
= XEXP (x
, 1);
13214 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13216 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13218 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13219 other
= XEXP (x
, 1);
13227 x
= gen_rtx_PLUS (Pmode
,
13228 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13229 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13230 plus_constant (Pmode
, other
,
13231 INTVAL (constant
)));
13235 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13238 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13241 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13244 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13247 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13251 && REG_P (XEXP (x
, 1))
13252 && REG_P (XEXP (x
, 0)))
13255 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13258 x
= legitimize_pic_address (x
, 0);
13261 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13264 if (REG_P (XEXP (x
, 0)))
13266 rtx temp
= gen_reg_rtx (Pmode
);
13267 rtx val
= force_operand (XEXP (x
, 1), temp
);
13270 val
= convert_to_mode (Pmode
, val
, 1);
13271 emit_move_insn (temp
, val
);
13274 XEXP (x
, 1) = temp
;
13278 else if (REG_P (XEXP (x
, 1)))
13280 rtx temp
= gen_reg_rtx (Pmode
);
13281 rtx val
= force_operand (XEXP (x
, 0), temp
);
13284 val
= convert_to_mode (Pmode
, val
, 1);
13285 emit_move_insn (temp
, val
);
13288 XEXP (x
, 0) = temp
;
13296 /* Print an integer constant expression in assembler syntax. Addition
13297 and subtraction are the only arithmetic that may appear in these
13298 expressions. FILE is the stdio stream to write to, X is the rtx, and
13299 CODE is the operand print code from the output string. */
13302 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13306 switch (GET_CODE (x
))
13309 gcc_assert (flag_pic
);
13314 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13315 output_addr_const (file
, x
);
13318 const char *name
= XSTR (x
, 0);
13320 /* Mark the decl as referenced so that cgraph will
13321 output the function. */
13322 if (SYMBOL_REF_DECL (x
))
13323 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13326 if (MACHOPIC_INDIRECT
13327 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13328 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13330 assemble_name (file
, name
);
13332 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13333 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13334 fputs ("@PLT", file
);
13341 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13342 assemble_name (asm_out_file
, buf
);
13346 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13350 /* This used to output parentheses around the expression,
13351 but that does not work on the 386 (either ATT or BSD assembler). */
13352 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13356 if (GET_MODE (x
) == VOIDmode
)
13358 /* We can use %d if the number is <32 bits and positive. */
13359 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13360 fprintf (file
, "0x%lx%08lx",
13361 (unsigned long) CONST_DOUBLE_HIGH (x
),
13362 (unsigned long) CONST_DOUBLE_LOW (x
));
13364 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13367 /* We can't handle floating point constants;
13368 TARGET_PRINT_OPERAND must handle them. */
13369 output_operand_lossage ("floating constant misused");
13373 /* Some assemblers need integer constants to appear first. */
13374 if (CONST_INT_P (XEXP (x
, 0)))
13376 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13378 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13382 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13383 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13385 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13391 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13392 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13394 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13396 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13400 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13402 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13407 gcc_assert (XVECLEN (x
, 0) == 1);
13408 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13409 switch (XINT (x
, 1))
13412 fputs ("@GOT", file
);
13414 case UNSPEC_GOTOFF
:
13415 fputs ("@GOTOFF", file
);
13417 case UNSPEC_PLTOFF
:
13418 fputs ("@PLTOFF", file
);
13421 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13422 "(%rip)" : "[rip]", file
);
13424 case UNSPEC_GOTPCREL
:
13425 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13426 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13428 case UNSPEC_GOTTPOFF
:
13429 /* FIXME: This might be @TPOFF in Sun ld too. */
13430 fputs ("@gottpoff", file
);
13433 fputs ("@tpoff", file
);
13435 case UNSPEC_NTPOFF
:
13437 fputs ("@tpoff", file
);
13439 fputs ("@ntpoff", file
);
13441 case UNSPEC_DTPOFF
:
13442 fputs ("@dtpoff", file
);
13444 case UNSPEC_GOTNTPOFF
:
13446 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13447 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13449 fputs ("@gotntpoff", file
);
13451 case UNSPEC_INDNTPOFF
:
13452 fputs ("@indntpoff", file
);
13455 case UNSPEC_MACHOPIC_OFFSET
:
13457 machopic_output_function_base_name (file
);
13461 output_operand_lossage ("invalid UNSPEC as operand");
13467 output_operand_lossage ("invalid expression as operand");
13471 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13472 We need to emit DTP-relative relocations. */
13474 static void ATTRIBUTE_UNUSED
13475 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13477 fputs (ASM_LONG
, file
);
13478 output_addr_const (file
, x
);
13479 fputs ("@dtpoff", file
);
13485 fputs (", 0", file
);
13488 gcc_unreachable ();
13492 /* Return true if X is a representation of the PIC register. This copes
13493 with calls from ix86_find_base_term, where the register might have
13494 been replaced by a cselib value. */
13497 ix86_pic_register_p (rtx x
)
13499 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13500 return (pic_offset_table_rtx
13501 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13503 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13506 /* Helper function for ix86_delegitimize_address.
13507 Attempt to delegitimize TLS local-exec accesses. */
13510 ix86_delegitimize_tls_address (rtx orig_x
)
13512 rtx x
= orig_x
, unspec
;
13513 struct ix86_address addr
;
13515 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13519 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13521 if (ix86_decompose_address (x
, &addr
) == 0
13522 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13523 || addr
.disp
== NULL_RTX
13524 || GET_CODE (addr
.disp
) != CONST
)
13526 unspec
= XEXP (addr
.disp
, 0);
13527 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13528 unspec
= XEXP (unspec
, 0);
13529 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13531 x
= XVECEXP (unspec
, 0, 0);
13532 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13533 if (unspec
!= XEXP (addr
.disp
, 0))
13534 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13537 rtx idx
= addr
.index
;
13538 if (addr
.scale
!= 1)
13539 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13540 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13543 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13544 if (MEM_P (orig_x
))
13545 x
= replace_equiv_address_nv (orig_x
, x
);
13549 /* In the name of slightly smaller debug output, and to cater to
13550 general assembler lossage, recognize PIC+GOTOFF and turn it back
13551 into a direct symbol reference.
13553 On Darwin, this is necessary to avoid a crash, because Darwin
13554 has a different PIC label for each routine but the DWARF debugging
13555 information is not associated with any particular routine, so it's
13556 necessary to remove references to the PIC label from RTL stored by
13557 the DWARF output code. */
13560 ix86_delegitimize_address (rtx x
)
13562 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13563 /* addend is NULL or some rtx if x is something+GOTOFF where
13564 something doesn't include the PIC register. */
13565 rtx addend
= NULL_RTX
;
13566 /* reg_addend is NULL or a multiple of some register. */
13567 rtx reg_addend
= NULL_RTX
;
13568 /* const_addend is NULL or a const_int. */
13569 rtx const_addend
= NULL_RTX
;
13570 /* This is the result, or NULL. */
13571 rtx result
= NULL_RTX
;
13580 if (GET_CODE (x
) == CONST
13581 && GET_CODE (XEXP (x
, 0)) == PLUS
13582 && GET_MODE (XEXP (x
, 0)) == Pmode
13583 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13584 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13585 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13587 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13588 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13589 if (MEM_P (orig_x
))
13590 x
= replace_equiv_address_nv (orig_x
, x
);
13593 if (GET_CODE (x
) != CONST
13594 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13595 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13596 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13597 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13598 return ix86_delegitimize_tls_address (orig_x
);
13599 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13600 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13602 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13610 if (GET_CODE (x
) != PLUS
13611 || GET_CODE (XEXP (x
, 1)) != CONST
)
13612 return ix86_delegitimize_tls_address (orig_x
);
13614 if (ix86_pic_register_p (XEXP (x
, 0)))
13615 /* %ebx + GOT/GOTOFF */
13617 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13619 /* %ebx + %reg * scale + GOT/GOTOFF */
13620 reg_addend
= XEXP (x
, 0);
13621 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13622 reg_addend
= XEXP (reg_addend
, 1);
13623 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13624 reg_addend
= XEXP (reg_addend
, 0);
13627 reg_addend
= NULL_RTX
;
13628 addend
= XEXP (x
, 0);
13632 addend
= XEXP (x
, 0);
13634 x
= XEXP (XEXP (x
, 1), 0);
13635 if (GET_CODE (x
) == PLUS
13636 && CONST_INT_P (XEXP (x
, 1)))
13638 const_addend
= XEXP (x
, 1);
13642 if (GET_CODE (x
) == UNSPEC
13643 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13644 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13645 result
= XVECEXP (x
, 0, 0);
13647 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13648 && !MEM_P (orig_x
))
13649 result
= XVECEXP (x
, 0, 0);
13652 return ix86_delegitimize_tls_address (orig_x
);
13655 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13657 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13660 /* If the rest of original X doesn't involve the PIC register, add
13661 addend and subtract pic_offset_table_rtx. This can happen e.g.
13663 leal (%ebx, %ecx, 4), %ecx
13665 movl foo@GOTOFF(%ecx), %edx
13666 in which case we return (%ecx - %ebx) + foo. */
13667 if (pic_offset_table_rtx
)
13668 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13669 pic_offset_table_rtx
),
13674 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13676 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13677 if (result
== NULL_RTX
)
13683 /* If X is a machine specific address (i.e. a symbol or label being
13684 referenced as a displacement from the GOT implemented using an
13685 UNSPEC), then return the base term. Otherwise return X. */
13688 ix86_find_base_term (rtx x
)
13694 if (GET_CODE (x
) != CONST
)
13696 term
= XEXP (x
, 0);
13697 if (GET_CODE (term
) == PLUS
13698 && (CONST_INT_P (XEXP (term
, 1))
13699 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13700 term
= XEXP (term
, 0);
13701 if (GET_CODE (term
) != UNSPEC
13702 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13703 && XINT (term
, 1) != UNSPEC_PCREL
))
13706 return XVECEXP (term
, 0, 0);
13709 return ix86_delegitimize_address (x
);
13713 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13714 bool fp
, FILE *file
)
13716 const char *suffix
;
13718 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13720 code
= ix86_fp_compare_code_to_integer (code
);
13724 code
= reverse_condition (code
);
13775 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13779 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13780 Those same assemblers have the same but opposite lossage on cmov. */
13781 if (mode
== CCmode
)
13782 suffix
= fp
? "nbe" : "a";
13783 else if (mode
== CCCmode
)
13786 gcc_unreachable ();
13802 gcc_unreachable ();
13806 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13823 gcc_unreachable ();
13827 /* ??? As above. */
13828 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13829 suffix
= fp
? "nb" : "ae";
13832 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13836 /* ??? As above. */
13837 if (mode
== CCmode
)
13839 else if (mode
== CCCmode
)
13840 suffix
= fp
? "nb" : "ae";
13842 gcc_unreachable ();
13845 suffix
= fp
? "u" : "p";
13848 suffix
= fp
? "nu" : "np";
13851 gcc_unreachable ();
13853 fputs (suffix
, file
);
13856 /* Print the name of register X to FILE based on its machine mode and number.
13857 If CODE is 'w', pretend the mode is HImode.
13858 If CODE is 'b', pretend the mode is QImode.
13859 If CODE is 'k', pretend the mode is SImode.
13860 If CODE is 'q', pretend the mode is DImode.
13861 If CODE is 'x', pretend the mode is V4SFmode.
13862 If CODE is 't', pretend the mode is V8SFmode.
13863 If CODE is 'h', pretend the reg is the 'high' byte register.
13864 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13865 If CODE is 'd', duplicate the operand for AVX instruction.
13869 print_reg (rtx x
, int code
, FILE *file
)
13872 unsigned int regno
;
13873 bool duplicated
= code
== 'd' && TARGET_AVX
;
13875 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13880 gcc_assert (TARGET_64BIT
);
13881 fputs ("rip", file
);
13885 regno
= true_regnum (x
);
13886 gcc_assert (regno
!= ARG_POINTER_REGNUM
13887 && regno
!= FRAME_POINTER_REGNUM
13888 && regno
!= FLAGS_REG
13889 && regno
!= FPSR_REG
13890 && regno
!= FPCR_REG
);
13892 if (code
== 'w' || MMX_REG_P (x
))
13894 else if (code
== 'b')
13896 else if (code
== 'k')
13898 else if (code
== 'q')
13900 else if (code
== 'y')
13902 else if (code
== 'h')
13904 else if (code
== 'x')
13906 else if (code
== 't')
13909 code
= GET_MODE_SIZE (GET_MODE (x
));
13911 /* Irritatingly, AMD extended registers use different naming convention
13912 from the normal registers: "r%d[bwd]" */
13913 if (REX_INT_REGNO_P (regno
))
13915 gcc_assert (TARGET_64BIT
);
13917 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13921 error ("extended registers have no high halves");
13936 error ("unsupported operand size for extended register");
13946 if (STACK_TOP_P (x
))
13955 if (! ANY_FP_REG_P (x
))
13956 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13961 reg
= hi_reg_name
[regno
];
13964 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13966 reg
= qi_reg_name
[regno
];
13969 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13971 reg
= qi_high_reg_name
[regno
];
13976 gcc_assert (!duplicated
);
13978 fputs (hi_reg_name
[regno
] + 1, file
);
13983 gcc_unreachable ();
13989 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13990 fprintf (file
, ", %%%s", reg
);
13992 fprintf (file
, ", %s", reg
);
13996 /* Locate some local-dynamic symbol still in use by this function
13997 so that we can print its name in some tls_local_dynamic_base
14001 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14005 if (GET_CODE (x
) == SYMBOL_REF
14006 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14008 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14015 static const char *
14016 get_some_local_dynamic_name (void)
14020 if (cfun
->machine
->some_ld_name
)
14021 return cfun
->machine
->some_ld_name
;
14023 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14024 if (NONDEBUG_INSN_P (insn
)
14025 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14026 return cfun
->machine
->some_ld_name
;
14031 /* Meaning of CODE:
14032 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14033 C -- print opcode suffix for set/cmov insn.
14034 c -- like C, but print reversed condition
14035 F,f -- likewise, but for floating-point.
14036 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14038 R -- print the prefix for register names.
14039 z -- print the opcode suffix for the size of the current operand.
14040 Z -- likewise, with special suffixes for x87 instructions.
14041 * -- print a star (in certain assembler syntax)
14042 A -- print an absolute memory reference.
14043 E -- print address with DImode register names if TARGET_64BIT.
14044 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14045 s -- print a shift double count, followed by the assemblers argument
14047 b -- print the QImode name of the register for the indicated operand.
14048 %b0 would print %al if operands[0] is reg 0.
14049 w -- likewise, print the HImode name of the register.
14050 k -- likewise, print the SImode name of the register.
14051 q -- likewise, print the DImode name of the register.
14052 x -- likewise, print the V4SFmode name of the register.
14053 t -- likewise, print the V8SFmode name of the register.
14054 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14055 y -- print "st(0)" instead of "st" as a register.
14056 d -- print duplicated register operand for AVX instruction.
14057 D -- print condition for SSE cmp instruction.
14058 P -- if PIC, print an @PLT suffix.
14059 p -- print raw symbol name.
14060 X -- don't print any sort of PIC '@' suffix for a symbol.
14061 & -- print some in-use local-dynamic symbol name.
14062 H -- print a memory address offset by 8; used for sse high-parts
14063 Y -- print condition for XOP pcom* instruction.
14064 + -- print a branch hint as 'cs' or 'ds' prefix
14065 ; -- print a semicolon (after prefixes due to bug in older gas).
14066 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14067 @ -- print a segment register of thread base pointer load
14068 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14072 ix86_print_operand (FILE *file
, rtx x
, int code
)
14079 switch (ASSEMBLER_DIALECT
)
14086 /* Intel syntax. For absolute addresses, registers should not
14087 be surrounded by braces. */
14091 ix86_print_operand (file
, x
, 0);
14098 gcc_unreachable ();
14101 ix86_print_operand (file
, x
, 0);
14105 /* Wrap address in an UNSPEC to declare special handling. */
14107 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14109 output_address (x
);
14113 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14118 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14123 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14128 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14133 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14138 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14143 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14144 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14147 switch (GET_MODE_SIZE (GET_MODE (x
)))
14162 output_operand_lossage
14163 ("invalid operand size for operand code 'O'");
14172 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14174 /* Opcodes don't get size suffixes if using Intel opcodes. */
14175 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14178 switch (GET_MODE_SIZE (GET_MODE (x
)))
14197 output_operand_lossage
14198 ("invalid operand size for operand code 'z'");
14203 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14205 (0, "non-integer operand used with operand code 'z'");
14209 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14210 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14213 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14215 switch (GET_MODE_SIZE (GET_MODE (x
)))
14218 #ifdef HAVE_AS_IX86_FILDS
14228 #ifdef HAVE_AS_IX86_FILDQ
14231 fputs ("ll", file
);
14239 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14241 /* 387 opcodes don't get size suffixes
14242 if the operands are registers. */
14243 if (STACK_REG_P (x
))
14246 switch (GET_MODE_SIZE (GET_MODE (x
)))
14267 output_operand_lossage
14268 ("invalid operand type used with operand code 'Z'");
14272 output_operand_lossage
14273 ("invalid operand size for operand code 'Z'");
14291 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14293 ix86_print_operand (file
, x
, 0);
14294 fputs (", ", file
);
14299 switch (GET_CODE (x
))
14302 fputs ("neq", file
);
14305 fputs ("eq", file
);
14309 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14313 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14317 fputs ("le", file
);
14321 fputs ("lt", file
);
14324 fputs ("unord", file
);
14327 fputs ("ord", file
);
14330 fputs ("ueq", file
);
14333 fputs ("nlt", file
);
14336 fputs ("nle", file
);
14339 fputs ("ule", file
);
14342 fputs ("ult", file
);
14345 fputs ("une", file
);
14348 output_operand_lossage ("operand is not a condition code, "
14349 "invalid operand code 'Y'");
14355 /* Little bit of braindamage here. The SSE compare instructions
14356 does use completely different names for the comparisons that the
14357 fp conditional moves. */
14358 switch (GET_CODE (x
))
14363 fputs ("eq_us", file
);
14367 fputs ("eq", file
);
14372 fputs ("nge", file
);
14376 fputs ("lt", file
);
14381 fputs ("ngt", file
);
14385 fputs ("le", file
);
14388 fputs ("unord", file
);
14393 fputs ("neq_oq", file
);
14397 fputs ("neq", file
);
14402 fputs ("ge", file
);
14406 fputs ("nlt", file
);
14411 fputs ("gt", file
);
14415 fputs ("nle", file
);
14418 fputs ("ord", file
);
14421 output_operand_lossage ("operand is not a condition code, "
14422 "invalid operand code 'D'");
14429 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14430 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14436 if (!COMPARISON_P (x
))
14438 output_operand_lossage ("operand is not a condition code, "
14439 "invalid operand code '%c'", code
);
14442 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14443 code
== 'c' || code
== 'f',
14444 code
== 'F' || code
== 'f',
14449 if (!offsettable_memref_p (x
))
14451 output_operand_lossage ("operand is not an offsettable memory "
14452 "reference, invalid operand code 'H'");
14455 /* It doesn't actually matter what mode we use here, as we're
14456 only going to use this for printing. */
14457 x
= adjust_address_nv (x
, DImode
, 8);
14461 gcc_assert (CONST_INT_P (x
));
14463 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14464 #ifdef HAVE_AS_IX86_HLE
14465 fputs ("xacquire ", file
);
14467 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14469 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14470 #ifdef HAVE_AS_IX86_HLE
14471 fputs ("xrelease ", file
);
14473 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14475 /* We do not want to print value of the operand. */
14479 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14485 const char *name
= get_some_local_dynamic_name ();
14487 output_operand_lossage ("'%%&' used without any "
14488 "local dynamic TLS references");
14490 assemble_name (file
, name
);
14499 || optimize_function_for_size_p (cfun
)
14500 || !TARGET_BRANCH_PREDICTION_HINTS
)
14503 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14506 int pred_val
= INTVAL (XEXP (x
, 0));
14508 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14509 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14511 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14513 = final_forward_branch_p (current_output_insn
) == 0;
14515 /* Emit hints only in the case default branch prediction
14516 heuristics would fail. */
14517 if (taken
!= cputaken
)
14519 /* We use 3e (DS) prefix for taken branches and
14520 2e (CS) prefix for not taken branches. */
14522 fputs ("ds ; ", file
);
14524 fputs ("cs ; ", file
);
14532 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14538 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14541 /* The kernel uses a different segment register for performance
14542 reasons; a system call would not have to trash the userspace
14543 segment register, which would be expensive. */
14544 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14545 fputs ("fs", file
);
14547 fputs ("gs", file
);
14551 putc (TARGET_AVX2
? 'i' : 'f', file
);
14555 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14556 fputs ("addr32 ", file
);
14560 output_operand_lossage ("invalid operand code '%c'", code
);
14565 print_reg (x
, code
, file
);
14567 else if (MEM_P (x
))
14569 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14570 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14571 && GET_MODE (x
) != BLKmode
)
14574 switch (GET_MODE_SIZE (GET_MODE (x
)))
14576 case 1: size
= "BYTE"; break;
14577 case 2: size
= "WORD"; break;
14578 case 4: size
= "DWORD"; break;
14579 case 8: size
= "QWORD"; break;
14580 case 12: size
= "TBYTE"; break;
14582 if (GET_MODE (x
) == XFmode
)
14587 case 32: size
= "YMMWORD"; break;
14589 gcc_unreachable ();
14592 /* Check for explicit size override (codes 'b', 'w', 'k',
14596 else if (code
== 'w')
14598 else if (code
== 'k')
14600 else if (code
== 'q')
14602 else if (code
== 'x')
14605 fputs (size
, file
);
14606 fputs (" PTR ", file
);
14610 /* Avoid (%rip) for call operands. */
14611 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14612 && !CONST_INT_P (x
))
14613 output_addr_const (file
, x
);
14614 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14615 output_operand_lossage ("invalid constraints for operand");
14617 output_address (x
);
14620 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14625 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14626 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14628 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14630 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14632 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14633 (unsigned long long) (int) l
);
14635 fprintf (file
, "0x%08x", (unsigned int) l
);
14638 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14643 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14644 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14646 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14648 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14651 /* These float cases don't actually occur as immediate operands. */
14652 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14656 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14657 fputs (dstr
, file
);
14662 /* We have patterns that allow zero sets of memory, for instance.
14663 In 64-bit mode, we should probably support all 8-byte vectors,
14664 since we can in fact encode that into an immediate. */
14665 if (GET_CODE (x
) == CONST_VECTOR
)
14667 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14671 if (code
!= 'P' && code
!= 'p')
14673 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14675 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14678 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14679 || GET_CODE (x
) == LABEL_REF
)
14681 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14684 fputs ("OFFSET FLAT:", file
);
14687 if (CONST_INT_P (x
))
14688 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14689 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14690 output_pic_addr_const (file
, x
, code
);
14692 output_addr_const (file
, x
);
14697 ix86_print_operand_punct_valid_p (unsigned char code
)
14699 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14700 || code
== ';' || code
== '~' || code
== '^');
14703 /* Print a memory operand whose address is ADDR. */
14706 ix86_print_operand_address (FILE *file
, rtx addr
)
14708 struct ix86_address parts
;
14709 rtx base
, index
, disp
;
14715 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14717 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14718 gcc_assert (parts
.index
== NULL_RTX
);
14719 parts
.index
= XVECEXP (addr
, 0, 1);
14720 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14721 addr
= XVECEXP (addr
, 0, 0);
14724 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14726 gcc_assert (TARGET_64BIT
);
14727 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14731 ok
= ix86_decompose_address (addr
, &parts
);
14736 index
= parts
.index
;
14738 scale
= parts
.scale
;
14746 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14748 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14751 gcc_unreachable ();
14754 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14755 if (TARGET_64BIT
&& !base
&& !index
)
14759 if (GET_CODE (disp
) == CONST
14760 && GET_CODE (XEXP (disp
, 0)) == PLUS
14761 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14762 symbol
= XEXP (XEXP (disp
, 0), 0);
14764 if (GET_CODE (symbol
) == LABEL_REF
14765 || (GET_CODE (symbol
) == SYMBOL_REF
14766 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14769 if (!base
&& !index
)
14771 /* Displacement only requires special attention. */
14773 if (CONST_INT_P (disp
))
14775 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14776 fputs ("ds:", file
);
14777 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14780 output_pic_addr_const (file
, disp
, 0);
14782 output_addr_const (file
, disp
);
14786 /* Print SImode register names to force addr32 prefix. */
14787 if (SImode_address_operand (addr
, VOIDmode
))
14789 #ifdef ENABLE_CHECKING
14790 gcc_assert (TARGET_64BIT
);
14791 switch (GET_CODE (addr
))
14794 gcc_assert (GET_MODE (addr
) == SImode
);
14795 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14799 gcc_assert (GET_MODE (addr
) == DImode
);
14802 gcc_unreachable ();
14805 gcc_assert (!code
);
14811 && CONST_INT_P (disp
)
14812 && INTVAL (disp
) < -16*1024*1024)
14814 /* X32 runs in 64-bit mode, where displacement, DISP, in
14815 address DISP(%r64), is encoded as 32-bit immediate sign-
14816 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14817 address is %r64 + 0xffffffffbffffd00. When %r64 <
14818 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14819 which is invalid for x32. The correct address is %r64
14820 - 0x40000300 == 0xf7ffdd64. To properly encode
14821 -0x40000300(%r64) for x32, we zero-extend negative
14822 displacement by forcing addr32 prefix which truncates
14823 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14824 zero-extend all negative displacements, including -1(%rsp).
14825 However, for small negative displacements, sign-extension
14826 won't cause overflow. We only zero-extend negative
14827 displacements if they < -16*1024*1024, which is also used
14828 to check legitimate address displacements for PIC. */
14832 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14837 output_pic_addr_const (file
, disp
, 0);
14838 else if (GET_CODE (disp
) == LABEL_REF
)
14839 output_asm_label (disp
);
14841 output_addr_const (file
, disp
);
14846 print_reg (base
, code
, file
);
14850 print_reg (index
, vsib
? 0 : code
, file
);
14851 if (scale
!= 1 || vsib
)
14852 fprintf (file
, ",%d", scale
);
14858 rtx offset
= NULL_RTX
;
14862 /* Pull out the offset of a symbol; print any symbol itself. */
14863 if (GET_CODE (disp
) == CONST
14864 && GET_CODE (XEXP (disp
, 0)) == PLUS
14865 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14867 offset
= XEXP (XEXP (disp
, 0), 1);
14868 disp
= gen_rtx_CONST (VOIDmode
,
14869 XEXP (XEXP (disp
, 0), 0));
14873 output_pic_addr_const (file
, disp
, 0);
14874 else if (GET_CODE (disp
) == LABEL_REF
)
14875 output_asm_label (disp
);
14876 else if (CONST_INT_P (disp
))
14879 output_addr_const (file
, disp
);
14885 print_reg (base
, code
, file
);
14888 if (INTVAL (offset
) >= 0)
14890 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14894 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14901 print_reg (index
, vsib
? 0 : code
, file
);
14902 if (scale
!= 1 || vsib
)
14903 fprintf (file
, "*%d", scale
);
14910 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14913 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14917 if (GET_CODE (x
) != UNSPEC
)
14920 op
= XVECEXP (x
, 0, 0);
14921 switch (XINT (x
, 1))
14923 case UNSPEC_GOTTPOFF
:
14924 output_addr_const (file
, op
);
14925 /* FIXME: This might be @TPOFF in Sun ld. */
14926 fputs ("@gottpoff", file
);
14929 output_addr_const (file
, op
);
14930 fputs ("@tpoff", file
);
14932 case UNSPEC_NTPOFF
:
14933 output_addr_const (file
, op
);
14935 fputs ("@tpoff", file
);
14937 fputs ("@ntpoff", file
);
14939 case UNSPEC_DTPOFF
:
14940 output_addr_const (file
, op
);
14941 fputs ("@dtpoff", file
);
14943 case UNSPEC_GOTNTPOFF
:
14944 output_addr_const (file
, op
);
14946 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14947 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14949 fputs ("@gotntpoff", file
);
14951 case UNSPEC_INDNTPOFF
:
14952 output_addr_const (file
, op
);
14953 fputs ("@indntpoff", file
);
14956 case UNSPEC_MACHOPIC_OFFSET
:
14957 output_addr_const (file
, op
);
14959 machopic_output_function_base_name (file
);
14963 case UNSPEC_STACK_CHECK
:
14967 gcc_assert (flag_split_stack
);
14969 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14970 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14972 gcc_unreachable ();
14975 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14986 /* Split one or more double-mode RTL references into pairs of half-mode
14987 references. The RTL can be REG, offsettable MEM, integer constant, or
14988 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14989 split and "num" is its length. lo_half and hi_half are output arrays
14990 that parallel "operands". */
14993 split_double_mode (enum machine_mode mode
, rtx operands
[],
14994 int num
, rtx lo_half
[], rtx hi_half
[])
14996 enum machine_mode half_mode
;
15002 half_mode
= DImode
;
15005 half_mode
= SImode
;
15008 gcc_unreachable ();
15011 byte
= GET_MODE_SIZE (half_mode
);
15015 rtx op
= operands
[num
];
15017 /* simplify_subreg refuse to split volatile memory addresses,
15018 but we still have to handle it. */
15021 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15022 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15026 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15027 GET_MODE (op
) == VOIDmode
15028 ? mode
: GET_MODE (op
), 0);
15029 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15030 GET_MODE (op
) == VOIDmode
15031 ? mode
: GET_MODE (op
), byte
);
15036 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15037 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15038 is the expression of the binary operation. The output may either be
15039 emitted here, or returned to the caller, like all output_* functions.
15041 There is no guarantee that the operands are the same mode, as they
15042 might be within FLOAT or FLOAT_EXTEND expressions. */
15044 #ifndef SYSV386_COMPAT
15045 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15046 wants to fix the assemblers because that causes incompatibility
15047 with gcc. No-one wants to fix gcc because that causes
15048 incompatibility with assemblers... You can use the option of
15049 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15050 #define SYSV386_COMPAT 1
15054 output_387_binary_op (rtx insn
, rtx
*operands
)
15056 static char buf
[40];
15059 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15061 #ifdef ENABLE_CHECKING
15062 /* Even if we do not want to check the inputs, this documents input
15063 constraints. Which helps in understanding the following code. */
15064 if (STACK_REG_P (operands
[0])
15065 && ((REG_P (operands
[1])
15066 && REGNO (operands
[0]) == REGNO (operands
[1])
15067 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15068 || (REG_P (operands
[2])
15069 && REGNO (operands
[0]) == REGNO (operands
[2])
15070 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15071 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15074 gcc_assert (is_sse
);
15077 switch (GET_CODE (operands
[3]))
15080 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15081 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15089 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15090 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15098 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15099 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15107 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15108 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15116 gcc_unreachable ();
15123 strcpy (buf
, ssep
);
15124 if (GET_MODE (operands
[0]) == SFmode
)
15125 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15127 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15131 strcpy (buf
, ssep
+ 1);
15132 if (GET_MODE (operands
[0]) == SFmode
)
15133 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15135 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15141 switch (GET_CODE (operands
[3]))
15145 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15147 rtx temp
= operands
[2];
15148 operands
[2] = operands
[1];
15149 operands
[1] = temp
;
15152 /* know operands[0] == operands[1]. */
15154 if (MEM_P (operands
[2]))
15160 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15162 if (STACK_TOP_P (operands
[0]))
15163 /* How is it that we are storing to a dead operand[2]?
15164 Well, presumably operands[1] is dead too. We can't
15165 store the result to st(0) as st(0) gets popped on this
15166 instruction. Instead store to operands[2] (which I
15167 think has to be st(1)). st(1) will be popped later.
15168 gcc <= 2.8.1 didn't have this check and generated
15169 assembly code that the Unixware assembler rejected. */
15170 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15172 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15176 if (STACK_TOP_P (operands
[0]))
15177 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15179 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15184 if (MEM_P (operands
[1]))
15190 if (MEM_P (operands
[2]))
15196 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15199 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15200 derived assemblers, confusingly reverse the direction of
15201 the operation for fsub{r} and fdiv{r} when the
15202 destination register is not st(0). The Intel assembler
15203 doesn't have this brain damage. Read !SYSV386_COMPAT to
15204 figure out what the hardware really does. */
15205 if (STACK_TOP_P (operands
[0]))
15206 p
= "{p\t%0, %2|rp\t%2, %0}";
15208 p
= "{rp\t%2, %0|p\t%0, %2}";
15210 if (STACK_TOP_P (operands
[0]))
15211 /* As above for fmul/fadd, we can't store to st(0). */
15212 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15214 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15219 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15222 if (STACK_TOP_P (operands
[0]))
15223 p
= "{rp\t%0, %1|p\t%1, %0}";
15225 p
= "{p\t%1, %0|rp\t%0, %1}";
15227 if (STACK_TOP_P (operands
[0]))
15228 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15230 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15235 if (STACK_TOP_P (operands
[0]))
15237 if (STACK_TOP_P (operands
[1]))
15238 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15240 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15243 else if (STACK_TOP_P (operands
[1]))
15246 p
= "{\t%1, %0|r\t%0, %1}";
15248 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15254 p
= "{r\t%2, %0|\t%0, %2}";
15256 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15262 gcc_unreachable ();
15269 /* Check if a 256bit AVX register is referenced inside of EXP. */
15272 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15276 if (GET_CODE (exp
) == SUBREG
)
15277 exp
= SUBREG_REG (exp
);
15280 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15286 /* Return needed mode for entity in optimize_mode_switching pass. */
15289 ix86_avx_u128_mode_needed (rtx insn
)
15295 /* Needed mode is set to AVX_U128_CLEAN if there are
15296 no 256bit modes used in function arguments. */
15297 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15299 link
= XEXP (link
, 1))
15301 if (GET_CODE (XEXP (link
, 0)) == USE
)
15303 rtx arg
= XEXP (XEXP (link
, 0), 0);
15305 if (ix86_check_avx256_register (&arg
, NULL
))
15306 return AVX_U128_ANY
;
15310 return AVX_U128_CLEAN
;
15313 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15314 changes state only when a 256bit register is written to, but we need
15315 to prevent the compiler from moving optimal insertion point above
15316 eventual read from 256bit register. */
15317 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15318 return AVX_U128_DIRTY
;
15320 return AVX_U128_ANY
;
15323 /* Return mode that i387 must be switched into
15324 prior to the execution of insn. */
15327 ix86_i387_mode_needed (int entity
, rtx insn
)
15329 enum attr_i387_cw mode
;
15331 /* The mode UNINITIALIZED is used to store control word after a
15332 function call or ASM pattern. The mode ANY specify that function
15333 has no requirements on the control word and make no changes in the
15334 bits we are interested in. */
15337 || (NONJUMP_INSN_P (insn
)
15338 && (asm_noperands (PATTERN (insn
)) >= 0
15339 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15340 return I387_CW_UNINITIALIZED
;
15342 if (recog_memoized (insn
) < 0)
15343 return I387_CW_ANY
;
15345 mode
= get_attr_i387_cw (insn
);
15350 if (mode
== I387_CW_TRUNC
)
15355 if (mode
== I387_CW_FLOOR
)
15360 if (mode
== I387_CW_CEIL
)
15365 if (mode
== I387_CW_MASK_PM
)
15370 gcc_unreachable ();
15373 return I387_CW_ANY
;
15376 /* Return mode that entity must be switched into
15377 prior to the execution of insn. */
15380 ix86_mode_needed (int entity
, rtx insn
)
15385 return ix86_avx_u128_mode_needed (insn
);
15390 return ix86_i387_mode_needed (entity
, insn
);
15392 gcc_unreachable ();
15397 /* Check if a 256bit AVX register is referenced in stores. */
15400 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15402 if (ix86_check_avx256_register (&dest
, NULL
))
15404 bool *used
= (bool *) data
;
15409 /* Calculate mode of upper 128bit AVX registers after the insn. */
15412 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15414 rtx pat
= PATTERN (insn
);
15416 if (vzeroupper_operation (pat
, VOIDmode
)
15417 || vzeroall_operation (pat
, VOIDmode
))
15418 return AVX_U128_CLEAN
;
15420 /* We know that state is clean after CALL insn if there are no
15421 256bit registers used in the function return register. */
15424 bool avx_reg256_found
= false;
15425 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15426 if (!avx_reg256_found
)
15427 return AVX_U128_CLEAN
;
15430 /* Otherwise, return current mode. Remember that if insn
15431 references AVX 256bit registers, the mode was already changed
15432 to DIRTY from MODE_NEEDED. */
15436 /* Return the mode that an insn results in. */
15439 ix86_mode_after (int entity
, int mode
, rtx insn
)
15444 return ix86_avx_u128_mode_after (mode
, insn
);
15451 gcc_unreachable ();
15456 ix86_avx_u128_mode_entry (void)
15460 /* Entry mode is set to AVX_U128_DIRTY if there are
15461 256bit modes used in function arguments. */
15462 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15463 arg
= TREE_CHAIN (arg
))
15465 rtx incoming
= DECL_INCOMING_RTL (arg
);
15467 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15468 return AVX_U128_DIRTY
;
15471 return AVX_U128_CLEAN
;
15474 /* Return a mode that ENTITY is assumed to be
15475 switched to at function entry. */
15478 ix86_mode_entry (int entity
)
15483 return ix86_avx_u128_mode_entry ();
15488 return I387_CW_ANY
;
15490 gcc_unreachable ();
15495 ix86_avx_u128_mode_exit (void)
15497 rtx reg
= crtl
->return_rtx
;
15499 /* Exit mode is set to AVX_U128_DIRTY if there are
15500 256bit modes used in the function return register. */
15501 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15502 return AVX_U128_DIRTY
;
15504 return AVX_U128_CLEAN
;
15507 /* Return a mode that ENTITY is assumed to be
15508 switched to at function exit. */
15511 ix86_mode_exit (int entity
)
15516 return ix86_avx_u128_mode_exit ();
15521 return I387_CW_ANY
;
15523 gcc_unreachable ();
15527 /* Output code to initialize control word copies used by trunc?f?i and
15528 rounding patterns. CURRENT_MODE is set to current control word,
15529 while NEW_MODE is set to new control word. */
15532 emit_i387_cw_initialization (int mode
)
15534 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15537 enum ix86_stack_slot slot
;
15539 rtx reg
= gen_reg_rtx (HImode
);
15541 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15542 emit_move_insn (reg
, copy_rtx (stored_mode
));
15544 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15545 || optimize_function_for_size_p (cfun
))
15549 case I387_CW_TRUNC
:
15550 /* round toward zero (truncate) */
15551 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15552 slot
= SLOT_CW_TRUNC
;
15555 case I387_CW_FLOOR
:
15556 /* round down toward -oo */
15557 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15558 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15559 slot
= SLOT_CW_FLOOR
;
15563 /* round up toward +oo */
15564 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15565 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15566 slot
= SLOT_CW_CEIL
;
15569 case I387_CW_MASK_PM
:
15570 /* mask precision exception for nearbyint() */
15571 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15572 slot
= SLOT_CW_MASK_PM
;
15576 gcc_unreachable ();
15583 case I387_CW_TRUNC
:
15584 /* round toward zero (truncate) */
15585 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15586 slot
= SLOT_CW_TRUNC
;
15589 case I387_CW_FLOOR
:
15590 /* round down toward -oo */
15591 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15592 slot
= SLOT_CW_FLOOR
;
15596 /* round up toward +oo */
15597 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15598 slot
= SLOT_CW_CEIL
;
15601 case I387_CW_MASK_PM
:
15602 /* mask precision exception for nearbyint() */
15603 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15604 slot
= SLOT_CW_MASK_PM
;
15608 gcc_unreachable ();
15612 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15614 new_mode
= assign_386_stack_local (HImode
, slot
);
15615 emit_move_insn (new_mode
, reg
);
15618 /* Emit vzeroupper. */
15621 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15625 /* Cancel automatic vzeroupper insertion if there are
15626 live call-saved SSE registers at the insertion point. */
15628 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15629 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15633 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15634 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15637 emit_insn (gen_avx_vzeroupper ());
15640 /* Generate one or more insns to set ENTITY to MODE. */
15643 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15648 if (mode
== AVX_U128_CLEAN
)
15649 ix86_avx_emit_vzeroupper (regs_live
);
15655 if (mode
!= I387_CW_ANY
15656 && mode
!= I387_CW_UNINITIALIZED
)
15657 emit_i387_cw_initialization (mode
);
15660 gcc_unreachable ();
15664 /* Output code for INSN to convert a float to a signed int. OPERANDS
15665 are the insn operands. The output may be [HSD]Imode and the input
15666 operand may be [SDX]Fmode. */
15669 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15671 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15672 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15673 int round_mode
= get_attr_i387_cw (insn
);
15675 /* Jump through a hoop or two for DImode, since the hardware has no
15676 non-popping instruction. We used to do this a different way, but
15677 that was somewhat fragile and broke with post-reload splitters. */
15678 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15679 output_asm_insn ("fld\t%y1", operands
);
15681 gcc_assert (STACK_TOP_P (operands
[1]));
15682 gcc_assert (MEM_P (operands
[0]));
15683 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15686 output_asm_insn ("fisttp%Z0\t%0", operands
);
15689 if (round_mode
!= I387_CW_ANY
)
15690 output_asm_insn ("fldcw\t%3", operands
);
15691 if (stack_top_dies
|| dimode_p
)
15692 output_asm_insn ("fistp%Z0\t%0", operands
);
15694 output_asm_insn ("fist%Z0\t%0", operands
);
15695 if (round_mode
!= I387_CW_ANY
)
15696 output_asm_insn ("fldcw\t%2", operands
);
15702 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15703 have the values zero or one, indicates the ffreep insn's operand
15704 from the OPERANDS array. */
15706 static const char *
15707 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15709 if (TARGET_USE_FFREEP
)
15710 #ifdef HAVE_AS_IX86_FFREEP
15711 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15714 static char retval
[32];
15715 int regno
= REGNO (operands
[opno
]);
15717 gcc_assert (STACK_REGNO_P (regno
));
15719 regno
-= FIRST_STACK_REG
;
15721 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15726 return opno
? "fstp\t%y1" : "fstp\t%y0";
15730 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15731 should be used. UNORDERED_P is true when fucom should be used. */
15734 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15736 int stack_top_dies
;
15737 rtx cmp_op0
, cmp_op1
;
15738 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15742 cmp_op0
= operands
[0];
15743 cmp_op1
= operands
[1];
15747 cmp_op0
= operands
[1];
15748 cmp_op1
= operands
[2];
15753 if (GET_MODE (operands
[0]) == SFmode
)
15755 return "%vucomiss\t{%1, %0|%0, %1}";
15757 return "%vcomiss\t{%1, %0|%0, %1}";
15760 return "%vucomisd\t{%1, %0|%0, %1}";
15762 return "%vcomisd\t{%1, %0|%0, %1}";
15765 gcc_assert (STACK_TOP_P (cmp_op0
));
15767 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15769 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15771 if (stack_top_dies
)
15773 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15774 return output_387_ffreep (operands
, 1);
15777 return "ftst\n\tfnstsw\t%0";
15780 if (STACK_REG_P (cmp_op1
)
15782 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15783 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15785 /* If both the top of the 387 stack dies, and the other operand
15786 is also a stack register that dies, then this must be a
15787 `fcompp' float compare */
15791 /* There is no double popping fcomi variant. Fortunately,
15792 eflags is immune from the fstp's cc clobbering. */
15794 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15796 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15797 return output_387_ffreep (operands
, 0);
15802 return "fucompp\n\tfnstsw\t%0";
15804 return "fcompp\n\tfnstsw\t%0";
15809 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15811 static const char * const alt
[16] =
15813 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15814 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15815 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15816 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15818 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15819 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15823 "fcomi\t{%y1, %0|%0, %y1}",
15824 "fcomip\t{%y1, %0|%0, %y1}",
15825 "fucomi\t{%y1, %0|%0, %y1}",
15826 "fucomip\t{%y1, %0|%0, %y1}",
15837 mask
= eflags_p
<< 3;
15838 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15839 mask
|= unordered_p
<< 1;
15840 mask
|= stack_top_dies
;
15842 gcc_assert (mask
< 16);
15851 ix86_output_addr_vec_elt (FILE *file
, int value
)
15853 const char *directive
= ASM_LONG
;
15857 directive
= ASM_QUAD
;
15859 gcc_assert (!TARGET_64BIT
);
15862 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15866 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15868 const char *directive
= ASM_LONG
;
15871 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15872 directive
= ASM_QUAD
;
15874 gcc_assert (!TARGET_64BIT
);
15876 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15877 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15878 fprintf (file
, "%s%s%d-%s%d\n",
15879 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15880 else if (HAVE_AS_GOTOFF_IN_DATA
)
15881 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15883 else if (TARGET_MACHO
)
15885 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15886 machopic_output_function_base_name (file
);
15891 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15892 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15895 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15899 ix86_expand_clear (rtx dest
)
15903 /* We play register width games, which are only valid after reload. */
15904 gcc_assert (reload_completed
);
15906 /* Avoid HImode and its attendant prefix byte. */
15907 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15908 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15909 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15911 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15912 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15914 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15915 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15921 /* X is an unchanging MEM. If it is a constant pool reference, return
15922 the constant pool rtx, else NULL. */
15925 maybe_get_pool_constant (rtx x
)
15927 x
= ix86_delegitimize_address (XEXP (x
, 0));
15929 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15930 return get_pool_constant (x
);
15936 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15939 enum tls_model model
;
15944 if (GET_CODE (op1
) == SYMBOL_REF
)
15946 model
= SYMBOL_REF_TLS_MODEL (op1
);
15949 op1
= legitimize_tls_address (op1
, model
, true);
15950 op1
= force_operand (op1
, op0
);
15953 op1
= convert_to_mode (mode
, op1
, 1);
15955 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15956 && SYMBOL_REF_DLLIMPORT_P (op1
))
15957 op1
= legitimize_dllimport_symbol (op1
, false);
15959 else if (GET_CODE (op1
) == CONST
15960 && GET_CODE (XEXP (op1
, 0)) == PLUS
15961 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15963 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15964 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15967 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15969 tmp
= legitimize_tls_address (symbol
, model
, true);
15970 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15971 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15972 tmp
= legitimize_dllimport_symbol (symbol
, true);
15976 tmp
= force_operand (tmp
, NULL
);
15977 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15978 op0
, 1, OPTAB_DIRECT
);
15981 op1
= convert_to_mode (mode
, tmp
, 1);
15985 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15986 && symbolic_operand (op1
, mode
))
15988 if (TARGET_MACHO
&& !TARGET_64BIT
)
15991 /* dynamic-no-pic */
15992 if (MACHOPIC_INDIRECT
)
15994 rtx temp
= ((reload_in_progress
15995 || ((op0
&& REG_P (op0
))
15997 ? op0
: gen_reg_rtx (Pmode
));
15998 op1
= machopic_indirect_data_reference (op1
, temp
);
16000 op1
= machopic_legitimize_pic_address (op1
, mode
,
16001 temp
== op1
? 0 : temp
);
16003 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16005 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16009 if (GET_CODE (op0
) == MEM
)
16010 op1
= force_reg (Pmode
, op1
);
16014 if (GET_CODE (temp
) != REG
)
16015 temp
= gen_reg_rtx (Pmode
);
16016 temp
= legitimize_pic_address (op1
, temp
);
16021 /* dynamic-no-pic */
16027 op1
= force_reg (mode
, op1
);
16028 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16030 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16031 op1
= legitimize_pic_address (op1
, reg
);
16034 op1
= convert_to_mode (mode
, op1
, 1);
16041 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16042 || !push_operand (op0
, mode
))
16044 op1
= force_reg (mode
, op1
);
16046 if (push_operand (op0
, mode
)
16047 && ! general_no_elim_operand (op1
, mode
))
16048 op1
= copy_to_mode_reg (mode
, op1
);
16050 /* Force large constants in 64bit compilation into register
16051 to get them CSEed. */
16052 if (can_create_pseudo_p ()
16053 && (mode
== DImode
) && TARGET_64BIT
16054 && immediate_operand (op1
, mode
)
16055 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16056 && !register_operand (op0
, mode
)
16058 op1
= copy_to_mode_reg (mode
, op1
);
16060 if (can_create_pseudo_p ()
16061 && FLOAT_MODE_P (mode
)
16062 && GET_CODE (op1
) == CONST_DOUBLE
)
16064 /* If we are loading a floating point constant to a register,
16065 force the value to memory now, since we'll get better code
16066 out the back end. */
16068 op1
= validize_mem (force_const_mem (mode
, op1
));
16069 if (!register_operand (op0
, mode
))
16071 rtx temp
= gen_reg_rtx (mode
);
16072 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16073 emit_move_insn (op0
, temp
);
16079 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16083 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16085 rtx op0
= operands
[0], op1
= operands
[1];
16086 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16088 /* Force constants other than zero into memory. We do not know how
16089 the instructions used to build constants modify the upper 64 bits
16090 of the register, once we have that information we may be able
16091 to handle some of them more efficiently. */
16092 if (can_create_pseudo_p ()
16093 && register_operand (op0
, mode
)
16094 && (CONSTANT_P (op1
)
16095 || (GET_CODE (op1
) == SUBREG
16096 && CONSTANT_P (SUBREG_REG (op1
))))
16097 && !standard_sse_constant_p (op1
))
16098 op1
= validize_mem (force_const_mem (mode
, op1
));
16100 /* We need to check memory alignment for SSE mode since attribute
16101 can make operands unaligned. */
16102 if (can_create_pseudo_p ()
16103 && SSE_REG_MODE_P (mode
)
16104 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16105 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16109 /* ix86_expand_vector_move_misalign() does not like constants ... */
16110 if (CONSTANT_P (op1
)
16111 || (GET_CODE (op1
) == SUBREG
16112 && CONSTANT_P (SUBREG_REG (op1
))))
16113 op1
= validize_mem (force_const_mem (mode
, op1
));
16115 /* ... nor both arguments in memory. */
16116 if (!register_operand (op0
, mode
)
16117 && !register_operand (op1
, mode
))
16118 op1
= force_reg (mode
, op1
);
16120 tmp
[0] = op0
; tmp
[1] = op1
;
16121 ix86_expand_vector_move_misalign (mode
, tmp
);
16125 /* Make operand1 a register if it isn't already. */
16126 if (can_create_pseudo_p ()
16127 && !register_operand (op0
, mode
)
16128 && !register_operand (op1
, mode
))
16130 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16134 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16137 /* Split 32-byte AVX unaligned load and store if needed. */
16140 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16143 rtx (*extract
) (rtx
, rtx
, rtx
);
16144 rtx (*load_unaligned
) (rtx
, rtx
);
16145 rtx (*store_unaligned
) (rtx
, rtx
);
16146 enum machine_mode mode
;
16148 switch (GET_MODE (op0
))
16151 gcc_unreachable ();
16153 extract
= gen_avx_vextractf128v32qi
;
16154 load_unaligned
= gen_avx_loaddqu256
;
16155 store_unaligned
= gen_avx_storedqu256
;
16159 extract
= gen_avx_vextractf128v8sf
;
16160 load_unaligned
= gen_avx_loadups256
;
16161 store_unaligned
= gen_avx_storeups256
;
16165 extract
= gen_avx_vextractf128v4df
;
16166 load_unaligned
= gen_avx_loadupd256
;
16167 store_unaligned
= gen_avx_storeupd256
;
16174 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16176 rtx r
= gen_reg_rtx (mode
);
16177 m
= adjust_address (op1
, mode
, 0);
16178 emit_move_insn (r
, m
);
16179 m
= adjust_address (op1
, mode
, 16);
16180 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16181 emit_move_insn (op0
, r
);
16184 emit_insn (load_unaligned (op0
, op1
));
16186 else if (MEM_P (op0
))
16188 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16190 m
= adjust_address (op0
, mode
, 0);
16191 emit_insn (extract (m
, op1
, const0_rtx
));
16192 m
= adjust_address (op0
, mode
, 16);
16193 emit_insn (extract (m
, op1
, const1_rtx
));
16196 emit_insn (store_unaligned (op0
, op1
));
16199 gcc_unreachable ();
16202 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16203 straight to ix86_expand_vector_move. */
16204 /* Code generation for scalar reg-reg moves of single and double precision data:
16205 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16209 if (x86_sse_partial_reg_dependency == true)
16214 Code generation for scalar loads of double precision data:
16215 if (x86_sse_split_regs == true)
16216 movlpd mem, reg (gas syntax)
16220 Code generation for unaligned packed loads of single precision data
16221 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16222 if (x86_sse_unaligned_move_optimal)
16225 if (x86_sse_partial_reg_dependency == true)
16237 Code generation for unaligned packed loads of double precision data
16238 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16239 if (x86_sse_unaligned_move_optimal)
16242 if (x86_sse_split_regs == true)
16255 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16263 && GET_MODE_SIZE (mode
) == 32)
16265 switch (GET_MODE_CLASS (mode
))
16267 case MODE_VECTOR_INT
:
16269 op0
= gen_lowpart (V32QImode
, op0
);
16270 op1
= gen_lowpart (V32QImode
, op1
);
16273 case MODE_VECTOR_FLOAT
:
16274 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16278 gcc_unreachable ();
16286 /* ??? If we have typed data, then it would appear that using
16287 movdqu is the only way to get unaligned data loaded with
16289 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16291 op0
= gen_lowpart (V16QImode
, op0
);
16292 op1
= gen_lowpart (V16QImode
, op1
);
16293 /* We will eventually emit movups based on insn attributes. */
16294 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16296 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16301 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16302 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16303 || optimize_function_for_size_p (cfun
))
16305 /* We will eventually emit movups based on insn attributes. */
16306 emit_insn (gen_sse2_loadupd (op0
, op1
));
16310 /* When SSE registers are split into halves, we can avoid
16311 writing to the top half twice. */
16312 if (TARGET_SSE_SPLIT_REGS
)
16314 emit_clobber (op0
);
16319 /* ??? Not sure about the best option for the Intel chips.
16320 The following would seem to satisfy; the register is
16321 entirely cleared, breaking the dependency chain. We
16322 then store to the upper half, with a dependency depth
16323 of one. A rumor has it that Intel recommends two movsd
16324 followed by an unpacklpd, but this is unconfirmed. And
16325 given that the dependency depth of the unpacklpd would
16326 still be one, I'm not sure why this would be better. */
16327 zero
= CONST0_RTX (V2DFmode
);
16330 m
= adjust_address (op1
, DFmode
, 0);
16331 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16332 m
= adjust_address (op1
, DFmode
, 8);
16333 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16338 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16339 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16340 || optimize_function_for_size_p (cfun
))
16342 op0
= gen_lowpart (V4SFmode
, op0
);
16343 op1
= gen_lowpart (V4SFmode
, op1
);
16344 emit_insn (gen_sse_loadups (op0
, op1
));
16348 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16349 emit_move_insn (op0
, CONST0_RTX (mode
));
16351 emit_clobber (op0
);
16353 if (mode
!= V4SFmode
)
16354 op0
= gen_lowpart (V4SFmode
, op0
);
16356 m
= adjust_address (op1
, V2SFmode
, 0);
16357 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16358 m
= adjust_address (op1
, V2SFmode
, 8);
16359 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16362 else if (MEM_P (op0
))
16364 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16366 op0
= gen_lowpart (V16QImode
, op0
);
16367 op1
= gen_lowpart (V16QImode
, op1
);
16368 /* We will eventually emit movups based on insn attributes. */
16369 emit_insn (gen_sse2_storedqu (op0
, op1
));
16371 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16374 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16375 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16376 || optimize_function_for_size_p (cfun
))
16377 /* We will eventually emit movups based on insn attributes. */
16378 emit_insn (gen_sse2_storeupd (op0
, op1
));
16381 m
= adjust_address (op0
, DFmode
, 0);
16382 emit_insn (gen_sse2_storelpd (m
, op1
));
16383 m
= adjust_address (op0
, DFmode
, 8);
16384 emit_insn (gen_sse2_storehpd (m
, op1
));
16389 if (mode
!= V4SFmode
)
16390 op1
= gen_lowpart (V4SFmode
, op1
);
16393 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16394 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16395 || optimize_function_for_size_p (cfun
))
16397 op0
= gen_lowpart (V4SFmode
, op0
);
16398 emit_insn (gen_sse_storeups (op0
, op1
));
16402 m
= adjust_address (op0
, V2SFmode
, 0);
16403 emit_insn (gen_sse_storelps (m
, op1
));
16404 m
= adjust_address (op0
, V2SFmode
, 8);
16405 emit_insn (gen_sse_storehps (m
, op1
));
16410 gcc_unreachable ();
16413 /* Expand a push in MODE. This is some mode for which we do not support
16414 proper push instructions, at least from the registers that we expect
16415 the value to live in. */
16418 ix86_expand_push (enum machine_mode mode
, rtx x
)
16422 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16423 GEN_INT (-GET_MODE_SIZE (mode
)),
16424 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16425 if (tmp
!= stack_pointer_rtx
)
16426 emit_move_insn (stack_pointer_rtx
, tmp
);
16428 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16430 /* When we push an operand onto stack, it has to be aligned at least
16431 at the function argument boundary. However since we don't have
16432 the argument type, we can't determine the actual argument
16434 emit_move_insn (tmp
, x
);
16437 /* Helper function of ix86_fixup_binary_operands to canonicalize
16438 operand order. Returns true if the operands should be swapped. */
16441 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16444 rtx dst
= operands
[0];
16445 rtx src1
= operands
[1];
16446 rtx src2
= operands
[2];
16448 /* If the operation is not commutative, we can't do anything. */
16449 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16452 /* Highest priority is that src1 should match dst. */
16453 if (rtx_equal_p (dst
, src1
))
16455 if (rtx_equal_p (dst
, src2
))
16458 /* Next highest priority is that immediate constants come second. */
16459 if (immediate_operand (src2
, mode
))
16461 if (immediate_operand (src1
, mode
))
16464 /* Lowest priority is that memory references should come second. */
16474 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16475 destination to use for the operation. If different from the true
16476 destination in operands[0], a copy operation will be required. */
16479 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16482 rtx dst
= operands
[0];
16483 rtx src1
= operands
[1];
16484 rtx src2
= operands
[2];
16486 /* Canonicalize operand order. */
16487 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16491 /* It is invalid to swap operands of different modes. */
16492 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16499 /* Both source operands cannot be in memory. */
16500 if (MEM_P (src1
) && MEM_P (src2
))
16502 /* Optimization: Only read from memory once. */
16503 if (rtx_equal_p (src1
, src2
))
16505 src2
= force_reg (mode
, src2
);
16509 src2
= force_reg (mode
, src2
);
16512 /* If the destination is memory, and we do not have matching source
16513 operands, do things in registers. */
16514 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16515 dst
= gen_reg_rtx (mode
);
16517 /* Source 1 cannot be a constant. */
16518 if (CONSTANT_P (src1
))
16519 src1
= force_reg (mode
, src1
);
16521 /* Source 1 cannot be a non-matching memory. */
16522 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16523 src1
= force_reg (mode
, src1
);
16525 /* Improve address combine. */
16527 && GET_MODE_CLASS (mode
) == MODE_INT
16529 src2
= force_reg (mode
, src2
);
16531 operands
[1] = src1
;
16532 operands
[2] = src2
;
16536 /* Similarly, but assume that the destination has already been
16537 set up properly. */
16540 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16541 enum machine_mode mode
, rtx operands
[])
16543 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16544 gcc_assert (dst
== operands
[0]);
16547 /* Attempt to expand a binary operator. Make the expansion closer to the
16548 actual machine, then just general_operand, which will allow 3 separate
16549 memory references (one output, two input) in a single insn. */
16552 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16555 rtx src1
, src2
, dst
, op
, clob
;
16557 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16558 src1
= operands
[1];
16559 src2
= operands
[2];
16561 /* Emit the instruction. */
16563 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16564 if (reload_in_progress
)
16566 /* Reload doesn't know about the flags register, and doesn't know that
16567 it doesn't want to clobber it. We can only do this with PLUS. */
16568 gcc_assert (code
== PLUS
);
16571 else if (reload_completed
16573 && !rtx_equal_p (dst
, src1
))
16575 /* This is going to be an LEA; avoid splitting it later. */
16580 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16581 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16584 /* Fix up the destination if needed. */
16585 if (dst
!= operands
[0])
16586 emit_move_insn (operands
[0], dst
);
16589 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16590 the given OPERANDS. */
16593 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16596 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16597 if (GET_CODE (operands
[1]) == SUBREG
)
16602 else if (GET_CODE (operands
[2]) == SUBREG
)
16607 /* Optimize (__m128i) d | (__m128i) e and similar code
16608 when d and e are float vectors into float vector logical
16609 insn. In C/C++ without using intrinsics there is no other way
16610 to express vector logical operation on float vectors than
16611 to cast them temporarily to integer vectors. */
16613 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16614 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16615 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16616 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16617 && SUBREG_BYTE (op1
) == 0
16618 && (GET_CODE (op2
) == CONST_VECTOR
16619 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16620 && SUBREG_BYTE (op2
) == 0))
16621 && can_create_pseudo_p ())
16624 switch (GET_MODE (SUBREG_REG (op1
)))
16630 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16631 if (GET_CODE (op2
) == CONST_VECTOR
)
16633 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16634 op2
= force_reg (GET_MODE (dst
), op2
);
16639 op2
= SUBREG_REG (operands
[2]);
16640 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16641 op2
= force_reg (GET_MODE (dst
), op2
);
16643 op1
= SUBREG_REG (op1
);
16644 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16645 op1
= force_reg (GET_MODE (dst
), op1
);
16646 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16647 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16649 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16655 if (!nonimmediate_operand (operands
[1], mode
))
16656 operands
[1] = force_reg (mode
, operands
[1]);
16657 if (!nonimmediate_operand (operands
[2], mode
))
16658 operands
[2] = force_reg (mode
, operands
[2]);
16659 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16660 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16661 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16665 /* Return TRUE or FALSE depending on whether the binary operator meets the
16666 appropriate constraints. */
16669 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16672 rtx dst
= operands
[0];
16673 rtx src1
= operands
[1];
16674 rtx src2
= operands
[2];
16676 /* Both source operands cannot be in memory. */
16677 if (MEM_P (src1
) && MEM_P (src2
))
16680 /* Canonicalize operand order for commutative operators. */
16681 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16688 /* If the destination is memory, we must have a matching source operand. */
16689 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16692 /* Source 1 cannot be a constant. */
16693 if (CONSTANT_P (src1
))
16696 /* Source 1 cannot be a non-matching memory. */
16697 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16698 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16699 return (code
== AND
16702 || (TARGET_64BIT
&& mode
== DImode
))
16703 && satisfies_constraint_L (src2
));
16708 /* Attempt to expand a unary operator. Make the expansion closer to the
16709 actual machine, then just general_operand, which will allow 2 separate
16710 memory references (one output, one input) in a single insn. */
16713 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16716 int matching_memory
;
16717 rtx src
, dst
, op
, clob
;
16722 /* If the destination is memory, and we do not have matching source
16723 operands, do things in registers. */
16724 matching_memory
= 0;
16727 if (rtx_equal_p (dst
, src
))
16728 matching_memory
= 1;
16730 dst
= gen_reg_rtx (mode
);
16733 /* When source operand is memory, destination must match. */
16734 if (MEM_P (src
) && !matching_memory
)
16735 src
= force_reg (mode
, src
);
16737 /* Emit the instruction. */
16739 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16740 if (reload_in_progress
|| code
== NOT
)
16742 /* Reload doesn't know about the flags register, and doesn't know that
16743 it doesn't want to clobber it. */
16744 gcc_assert (code
== NOT
);
16749 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16750 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16753 /* Fix up the destination if needed. */
16754 if (dst
!= operands
[0])
16755 emit_move_insn (operands
[0], dst
);
16758 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16759 divisor are within the range [0-255]. */
16762 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16765 rtx end_label
, qimode_label
;
16766 rtx insn
, div
, mod
;
16767 rtx scratch
, tmp0
, tmp1
, tmp2
;
16768 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16769 rtx (*gen_zero_extend
) (rtx
, rtx
);
16770 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16775 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16776 gen_test_ccno_1
= gen_testsi_ccno_1
;
16777 gen_zero_extend
= gen_zero_extendqisi2
;
16780 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16781 gen_test_ccno_1
= gen_testdi_ccno_1
;
16782 gen_zero_extend
= gen_zero_extendqidi2
;
16785 gcc_unreachable ();
16788 end_label
= gen_label_rtx ();
16789 qimode_label
= gen_label_rtx ();
16791 scratch
= gen_reg_rtx (mode
);
16793 /* Use 8bit unsigned divimod if dividend and divisor are within
16794 the range [0-255]. */
16795 emit_move_insn (scratch
, operands
[2]);
16796 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16797 scratch
, 1, OPTAB_DIRECT
);
16798 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16799 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16800 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16801 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16802 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16804 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16805 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16806 JUMP_LABEL (insn
) = qimode_label
;
16808 /* Generate original signed/unsigned divimod. */
16809 div
= gen_divmod4_1 (operands
[0], operands
[1],
16810 operands
[2], operands
[3]);
16813 /* Branch to the end. */
16814 emit_jump_insn (gen_jump (end_label
));
16817 /* Generate 8bit unsigned divide. */
16818 emit_label (qimode_label
);
16819 /* Don't use operands[0] for result of 8bit divide since not all
16820 registers support QImode ZERO_EXTRACT. */
16821 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16822 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16823 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16824 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16828 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16829 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16833 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16834 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16837 /* Extract remainder from AH. */
16838 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16839 if (REG_P (operands
[1]))
16840 insn
= emit_move_insn (operands
[1], tmp1
);
16843 /* Need a new scratch register since the old one has result
16845 scratch
= gen_reg_rtx (mode
);
16846 emit_move_insn (scratch
, tmp1
);
16847 insn
= emit_move_insn (operands
[1], scratch
);
16849 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16851 /* Zero extend quotient from AL. */
16852 tmp1
= gen_lowpart (QImode
, tmp0
);
16853 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16854 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16856 emit_label (end_label
);
16859 #define LEA_MAX_STALL (3)
16860 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16862 /* Increase given DISTANCE in half-cycles according to
16863 dependencies between PREV and NEXT instructions.
16864 Add 1 half-cycle if there is no dependency and
16865 go to next cycle if there is some dependecy. */
16867 static unsigned int
16868 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16873 if (!prev
|| !next
)
16874 return distance
+ (distance
& 1) + 2;
16876 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16877 return distance
+ 1;
16879 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16880 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16881 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16882 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16883 return distance
+ (distance
& 1) + 2;
16885 return distance
+ 1;
16888 /* Function checks if instruction INSN defines register number
16889 REGNO1 or REGNO2. */
16892 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16897 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16898 if (DF_REF_REG_DEF_P (*def_rec
)
16899 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16900 && (regno1
== DF_REF_REGNO (*def_rec
)
16901 || regno2
== DF_REF_REGNO (*def_rec
)))
16909 /* Function checks if instruction INSN uses register number
16910 REGNO as a part of address expression. */
16913 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16917 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16918 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16924 /* Search backward for non-agu definition of register number REGNO1
16925 or register number REGNO2 in basic block starting from instruction
16926 START up to head of basic block or instruction INSN.
16928 Function puts true value into *FOUND var if definition was found
16929 and false otherwise.
16931 Distance in half-cycles between START and found instruction or head
16932 of BB is added to DISTANCE and returned. */
16935 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16936 rtx insn
, int distance
,
16937 rtx start
, bool *found
)
16939 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16947 && distance
< LEA_SEARCH_THRESHOLD
)
16949 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16951 distance
= increase_distance (prev
, next
, distance
);
16952 if (insn_defines_reg (regno1
, regno2
, prev
))
16954 if (recog_memoized (prev
) < 0
16955 || get_attr_type (prev
) != TYPE_LEA
)
16964 if (prev
== BB_HEAD (bb
))
16967 prev
= PREV_INSN (prev
);
16973 /* Search backward for non-agu definition of register number REGNO1
16974 or register number REGNO2 in INSN's basic block until
16975 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16976 2. Reach neighbour BBs boundary, or
16977 3. Reach agu definition.
16978 Returns the distance between the non-agu definition point and INSN.
16979 If no definition point, returns -1. */
16982 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16985 basic_block bb
= BLOCK_FOR_INSN (insn
);
16987 bool found
= false;
16989 if (insn
!= BB_HEAD (bb
))
16990 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16991 distance
, PREV_INSN (insn
),
16994 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16998 bool simple_loop
= false;
17000 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17003 simple_loop
= true;
17008 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17010 BB_END (bb
), &found
);
17013 int shortest_dist
= -1;
17014 bool found_in_bb
= false;
17016 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17019 = distance_non_agu_define_in_bb (regno1
, regno2
,
17025 if (shortest_dist
< 0)
17026 shortest_dist
= bb_dist
;
17027 else if (bb_dist
> 0)
17028 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17034 distance
= shortest_dist
;
17038 /* get_attr_type may modify recog data. We want to make sure
17039 that recog data is valid for instruction INSN, on which
17040 distance_non_agu_define is called. INSN is unchanged here. */
17041 extract_insn_cached (insn
);
17046 return distance
>> 1;
17049 /* Return the distance in half-cycles between INSN and the next
17050 insn that uses register number REGNO in memory address added
17051 to DISTANCE. Return -1 if REGNO0 is set.
17053 Put true value into *FOUND if register usage was found and
17055 Put true value into *REDEFINED if register redefinition was
17056 found and false otherwise. */
17059 distance_agu_use_in_bb (unsigned int regno
,
17060 rtx insn
, int distance
, rtx start
,
17061 bool *found
, bool *redefined
)
17063 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17068 *redefined
= false;
17072 && distance
< LEA_SEARCH_THRESHOLD
)
17074 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17076 distance
= increase_distance(prev
, next
, distance
);
17077 if (insn_uses_reg_mem (regno
, next
))
17079 /* Return DISTANCE if OP0 is used in memory
17080 address in NEXT. */
17085 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17087 /* Return -1 if OP0 is set in NEXT. */
17095 if (next
== BB_END (bb
))
17098 next
= NEXT_INSN (next
);
17104 /* Return the distance between INSN and the next insn that uses
17105 register number REGNO0 in memory address. Return -1 if no such
17106 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17109 distance_agu_use (unsigned int regno0
, rtx insn
)
17111 basic_block bb
= BLOCK_FOR_INSN (insn
);
17113 bool found
= false;
17114 bool redefined
= false;
17116 if (insn
!= BB_END (bb
))
17117 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17119 &found
, &redefined
);
17121 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17125 bool simple_loop
= false;
17127 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17130 simple_loop
= true;
17135 distance
= distance_agu_use_in_bb (regno0
, insn
,
17136 distance
, BB_HEAD (bb
),
17137 &found
, &redefined
);
17140 int shortest_dist
= -1;
17141 bool found_in_bb
= false;
17142 bool redefined_in_bb
= false;
17144 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17147 = distance_agu_use_in_bb (regno0
, insn
,
17148 distance
, BB_HEAD (e
->dest
),
17149 &found_in_bb
, &redefined_in_bb
);
17152 if (shortest_dist
< 0)
17153 shortest_dist
= bb_dist
;
17154 else if (bb_dist
> 0)
17155 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17161 distance
= shortest_dist
;
17165 if (!found
|| redefined
)
17168 return distance
>> 1;
17171 /* Define this macro to tune LEA priority vs ADD, it take effect when
17172 there is a dilemma of choicing LEA or ADD
17173 Negative value: ADD is more preferred than LEA
17175 Positive value: LEA is more preferred than ADD*/
17176 #define IX86_LEA_PRIORITY 0
17178 /* Return true if usage of lea INSN has performance advantage
17179 over a sequence of instructions. Instructions sequence has
17180 SPLIT_COST cycles higher latency than lea latency. */
17183 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17184 unsigned int regno2
, int split_cost
)
17186 int dist_define
, dist_use
;
17188 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17189 dist_use
= distance_agu_use (regno0
, insn
);
17191 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17193 /* If there is no non AGU operand definition, no AGU
17194 operand usage and split cost is 0 then both lea
17195 and non lea variants have same priority. Currently
17196 we prefer lea for 64 bit code and non lea on 32 bit
17198 if (dist_use
< 0 && split_cost
== 0)
17199 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17204 /* With longer definitions distance lea is more preferable.
17205 Here we change it to take into account splitting cost and
17207 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17209 /* If there is no use in memory addess then we just check
17210 that split cost exceeds AGU stall. */
17212 return dist_define
> LEA_MAX_STALL
;
17214 /* If this insn has both backward non-agu dependence and forward
17215 agu dependence, the one with short distance takes effect. */
17216 return dist_define
>= dist_use
;
17219 /* Return true if it is legal to clobber flags by INSN and
17220 false otherwise. */
17223 ix86_ok_to_clobber_flags (rtx insn
)
17225 basic_block bb
= BLOCK_FOR_INSN (insn
);
17231 if (NONDEBUG_INSN_P (insn
))
17233 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17234 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17237 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17241 if (insn
== BB_END (bb
))
17244 insn
= NEXT_INSN (insn
);
17247 live
= df_get_live_out(bb
);
17248 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17251 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17252 move and add to avoid AGU stalls. */
17255 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17257 unsigned int regno0
, regno1
, regno2
;
17259 /* Check if we need to optimize. */
17260 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17263 /* Check it is correct to split here. */
17264 if (!ix86_ok_to_clobber_flags(insn
))
17267 regno0
= true_regnum (operands
[0]);
17268 regno1
= true_regnum (operands
[1]);
17269 regno2
= true_regnum (operands
[2]);
17271 /* We need to split only adds with non destructive
17272 destination operand. */
17273 if (regno0
== regno1
|| regno0
== regno2
)
17276 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17279 /* Return true if we should emit lea instruction instead of mov
17283 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17285 unsigned int regno0
, regno1
;
17287 /* Check if we need to optimize. */
17288 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17291 /* Use lea for reg to reg moves only. */
17292 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17295 regno0
= true_regnum (operands
[0]);
17296 regno1
= true_regnum (operands
[1]);
17298 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17301 /* Return true if we need to split lea into a sequence of
17302 instructions to avoid AGU stalls. */
17305 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17307 unsigned int regno0
, regno1
, regno2
;
17309 struct ix86_address parts
;
17312 /* Check we need to optimize. */
17313 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17316 /* Check it is correct to split here. */
17317 if (!ix86_ok_to_clobber_flags(insn
))
17320 ok
= ix86_decompose_address (operands
[1], &parts
);
17323 /* There should be at least two components in the address. */
17324 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17325 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17328 /* We should not split into add if non legitimate pic
17329 operand is used as displacement. */
17330 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17333 regno0
= true_regnum (operands
[0]) ;
17334 regno1
= INVALID_REGNUM
;
17335 regno2
= INVALID_REGNUM
;
17338 regno1
= true_regnum (parts
.base
);
17340 regno2
= true_regnum (parts
.index
);
17344 /* Compute how many cycles we will add to execution time
17345 if split lea into a sequence of instructions. */
17346 if (parts
.base
|| parts
.index
)
17348 /* Have to use mov instruction if non desctructive
17349 destination form is used. */
17350 if (regno1
!= regno0
&& regno2
!= regno0
)
17353 /* Have to add index to base if both exist. */
17354 if (parts
.base
&& parts
.index
)
17357 /* Have to use shift and adds if scale is 2 or greater. */
17358 if (parts
.scale
> 1)
17360 if (regno0
!= regno1
)
17362 else if (regno2
== regno0
)
17365 split_cost
+= parts
.scale
;
17368 /* Have to use add instruction with immediate if
17369 disp is non zero. */
17370 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17373 /* Subtract the price of lea. */
17377 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17380 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17381 matches destination. RTX includes clobber of FLAGS_REG. */
17384 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17389 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17390 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17392 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17395 /* Return true if regno1 def is nearest to the insn. */
17398 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17401 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17405 while (prev
&& prev
!= start
)
17407 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17409 prev
= PREV_INSN (prev
);
17412 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17414 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17416 prev
= PREV_INSN (prev
);
17419 /* None of the regs is defined in the bb. */
17423 /* Split lea instructions into a sequence of instructions
17424 which are executed on ALU to avoid AGU stalls.
17425 It is assumed that it is allowed to clobber flags register
17426 at lea position. */
17429 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17431 unsigned int regno0
, regno1
, regno2
;
17432 struct ix86_address parts
;
17436 ok
= ix86_decompose_address (operands
[1], &parts
);
17439 target
= gen_lowpart (mode
, operands
[0]);
17441 regno0
= true_regnum (target
);
17442 regno1
= INVALID_REGNUM
;
17443 regno2
= INVALID_REGNUM
;
17447 parts
.base
= gen_lowpart (mode
, parts
.base
);
17448 regno1
= true_regnum (parts
.base
);
17453 parts
.index
= gen_lowpart (mode
, parts
.index
);
17454 regno2
= true_regnum (parts
.index
);
17458 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17460 if (parts
.scale
> 1)
17462 /* Case r1 = r1 + ... */
17463 if (regno1
== regno0
)
17465 /* If we have a case r1 = r1 + C * r1 then we
17466 should use multiplication which is very
17467 expensive. Assume cost model is wrong if we
17468 have such case here. */
17469 gcc_assert (regno2
!= regno0
);
17471 for (adds
= parts
.scale
; adds
> 0; adds
--)
17472 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17476 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17477 if (regno0
!= regno2
)
17478 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17480 /* Use shift for scaling. */
17481 ix86_emit_binop (ASHIFT
, mode
, target
,
17482 GEN_INT (exact_log2 (parts
.scale
)));
17485 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17487 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17488 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17491 else if (!parts
.base
&& !parts
.index
)
17493 gcc_assert(parts
.disp
);
17494 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17500 if (regno0
!= regno2
)
17501 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17503 else if (!parts
.index
)
17505 if (regno0
!= regno1
)
17506 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17510 if (regno0
== regno1
)
17512 else if (regno0
== regno2
)
17518 /* Find better operand for SET instruction, depending
17519 on which definition is farther from the insn. */
17520 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17521 tmp
= parts
.index
, tmp1
= parts
.base
;
17523 tmp
= parts
.base
, tmp1
= parts
.index
;
17525 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17527 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17528 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17530 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17534 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17537 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17538 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17542 /* Return true if it is ok to optimize an ADD operation to LEA
17543 operation to avoid flag register consumation. For most processors,
17544 ADD is faster than LEA. For the processors like ATOM, if the
17545 destination register of LEA holds an actual address which will be
17546 used soon, LEA is better and otherwise ADD is better. */
17549 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17551 unsigned int regno0
= true_regnum (operands
[0]);
17552 unsigned int regno1
= true_regnum (operands
[1]);
17553 unsigned int regno2
= true_regnum (operands
[2]);
17555 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17556 if (regno0
!= regno1
&& regno0
!= regno2
)
17559 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17562 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17565 /* Return true if destination reg of SET_BODY is shift count of
17569 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17575 /* Retrieve destination of SET_BODY. */
17576 switch (GET_CODE (set_body
))
17579 set_dest
= SET_DEST (set_body
);
17580 if (!set_dest
|| !REG_P (set_dest
))
17584 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17585 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17593 /* Retrieve shift count of USE_BODY. */
17594 switch (GET_CODE (use_body
))
17597 shift_rtx
= XEXP (use_body
, 1);
17600 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17601 if (ix86_dep_by_shift_count_body (set_body
,
17602 XVECEXP (use_body
, 0, i
)))
17610 && (GET_CODE (shift_rtx
) == ASHIFT
17611 || GET_CODE (shift_rtx
) == LSHIFTRT
17612 || GET_CODE (shift_rtx
) == ASHIFTRT
17613 || GET_CODE (shift_rtx
) == ROTATE
17614 || GET_CODE (shift_rtx
) == ROTATERT
))
17616 rtx shift_count
= XEXP (shift_rtx
, 1);
17618 /* Return true if shift count is dest of SET_BODY. */
17619 if (REG_P (shift_count
))
17621 /* Add check since it can be invoked before register
17622 allocation in pre-reload schedule. */
17623 if (reload_completed
17624 && true_regnum (set_dest
) == true_regnum (shift_count
))
17626 else if (REGNO(set_dest
) == REGNO(shift_count
))
17634 /* Return true if destination reg of SET_INSN is shift count of
17638 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17640 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17641 PATTERN (use_insn
));
17644 /* Return TRUE or FALSE depending on whether the unary operator meets the
17645 appropriate constraints. */
17648 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17649 enum machine_mode mode ATTRIBUTE_UNUSED
,
17650 rtx operands
[2] ATTRIBUTE_UNUSED
)
17652 /* If one of operands is memory, source and destination must match. */
17653 if ((MEM_P (operands
[0])
17654 || MEM_P (operands
[1]))
17655 && ! rtx_equal_p (operands
[0], operands
[1]))
17660 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17661 are ok, keeping in mind the possible movddup alternative. */
17664 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17666 if (MEM_P (operands
[0]))
17667 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17668 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17669 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17673 /* Post-reload splitter for converting an SF or DFmode value in an
17674 SSE register into an unsigned SImode. */
17677 ix86_split_convert_uns_si_sse (rtx operands
[])
17679 enum machine_mode vecmode
;
17680 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17682 large
= operands
[1];
17683 zero_or_two31
= operands
[2];
17684 input
= operands
[3];
17685 two31
= operands
[4];
17686 vecmode
= GET_MODE (large
);
17687 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17689 /* Load up the value into the low element. We must ensure that the other
17690 elements are valid floats -- zero is the easiest such value. */
17693 if (vecmode
== V4SFmode
)
17694 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17696 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17700 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17701 emit_move_insn (value
, CONST0_RTX (vecmode
));
17702 if (vecmode
== V4SFmode
)
17703 emit_insn (gen_sse_movss (value
, value
, input
));
17705 emit_insn (gen_sse2_movsd (value
, value
, input
));
17708 emit_move_insn (large
, two31
);
17709 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17711 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17712 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17714 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17715 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17717 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17718 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17720 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17721 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17723 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17724 if (vecmode
== V4SFmode
)
17725 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17727 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17730 emit_insn (gen_xorv4si3 (value
, value
, large
));
17733 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17734 Expects the 64-bit DImode to be supplied in a pair of integral
17735 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17736 -mfpmath=sse, !optimize_size only. */
17739 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17741 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17742 rtx int_xmm
, fp_xmm
;
17743 rtx biases
, exponents
;
17746 int_xmm
= gen_reg_rtx (V4SImode
);
17747 if (TARGET_INTER_UNIT_MOVES
)
17748 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17749 else if (TARGET_SSE_SPLIT_REGS
)
17751 emit_clobber (int_xmm
);
17752 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17756 x
= gen_reg_rtx (V2DImode
);
17757 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17758 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17761 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17762 gen_rtvec (4, GEN_INT (0x43300000UL
),
17763 GEN_INT (0x45300000UL
),
17764 const0_rtx
, const0_rtx
));
17765 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17767 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17768 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17770 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17771 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17772 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17773 (0x1.0p84 + double(fp_value_hi_xmm)).
17774 Note these exponents differ by 32. */
17776 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17778 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17779 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17780 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17781 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17782 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17783 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17784 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17785 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17786 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17788 /* Add the upper and lower DFmode values together. */
17790 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17793 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17794 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17795 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17798 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17801 /* Not used, but eases macroization of patterns. */
17803 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17804 rtx input ATTRIBUTE_UNUSED
)
17806 gcc_unreachable ();
17809 /* Convert an unsigned SImode value into a DFmode. Only currently used
17810 for SSE, but applicable anywhere. */
17813 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17815 REAL_VALUE_TYPE TWO31r
;
17818 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17819 NULL
, 1, OPTAB_DIRECT
);
17821 fp
= gen_reg_rtx (DFmode
);
17822 emit_insn (gen_floatsidf2 (fp
, x
));
17824 real_ldexp (&TWO31r
, &dconst1
, 31);
17825 x
= const_double_from_real_value (TWO31r
, DFmode
);
17827 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17829 emit_move_insn (target
, x
);
17832 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17833 32-bit mode; otherwise we have a direct convert instruction. */
17836 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17838 REAL_VALUE_TYPE TWO32r
;
17839 rtx fp_lo
, fp_hi
, x
;
17841 fp_lo
= gen_reg_rtx (DFmode
);
17842 fp_hi
= gen_reg_rtx (DFmode
);
17844 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17846 real_ldexp (&TWO32r
, &dconst1
, 32);
17847 x
= const_double_from_real_value (TWO32r
, DFmode
);
17848 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17850 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17852 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17855 emit_move_insn (target
, x
);
17858 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17859 For x86_32, -mfpmath=sse, !optimize_size only. */
17861 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17863 REAL_VALUE_TYPE ONE16r
;
17864 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17866 real_ldexp (&ONE16r
, &dconst1
, 16);
17867 x
= const_double_from_real_value (ONE16r
, SFmode
);
17868 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17869 NULL
, 0, OPTAB_DIRECT
);
17870 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17871 NULL
, 0, OPTAB_DIRECT
);
17872 fp_hi
= gen_reg_rtx (SFmode
);
17873 fp_lo
= gen_reg_rtx (SFmode
);
17874 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17875 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17876 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17878 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17880 if (!rtx_equal_p (target
, fp_hi
))
17881 emit_move_insn (target
, fp_hi
);
17884 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17885 a vector of unsigned ints VAL to vector of floats TARGET. */
17888 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17891 REAL_VALUE_TYPE TWO16r
;
17892 enum machine_mode intmode
= GET_MODE (val
);
17893 enum machine_mode fltmode
= GET_MODE (target
);
17894 rtx (*cvt
) (rtx
, rtx
);
17896 if (intmode
== V4SImode
)
17897 cvt
= gen_floatv4siv4sf2
;
17899 cvt
= gen_floatv8siv8sf2
;
17900 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17901 tmp
[0] = force_reg (intmode
, tmp
[0]);
17902 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17904 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17905 NULL_RTX
, 1, OPTAB_DIRECT
);
17906 tmp
[3] = gen_reg_rtx (fltmode
);
17907 emit_insn (cvt (tmp
[3], tmp
[1]));
17908 tmp
[4] = gen_reg_rtx (fltmode
);
17909 emit_insn (cvt (tmp
[4], tmp
[2]));
17910 real_ldexp (&TWO16r
, &dconst1
, 16);
17911 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17912 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17913 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17915 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17917 if (tmp
[7] != target
)
17918 emit_move_insn (target
, tmp
[7]);
17921 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17922 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17923 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17924 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17927 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17929 REAL_VALUE_TYPE TWO31r
;
17930 rtx two31r
, tmp
[4];
17931 enum machine_mode mode
= GET_MODE (val
);
17932 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17933 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17934 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17937 for (i
= 0; i
< 3; i
++)
17938 tmp
[i
] = gen_reg_rtx (mode
);
17939 real_ldexp (&TWO31r
, &dconst1
, 31);
17940 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17941 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17942 two31r
= force_reg (mode
, two31r
);
17945 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17946 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17947 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17948 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17949 default: gcc_unreachable ();
17951 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17952 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17953 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17955 if (intmode
== V4SImode
|| TARGET_AVX2
)
17956 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17957 gen_lowpart (intmode
, tmp
[0]),
17958 GEN_INT (31), NULL_RTX
, 0,
17962 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17963 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17964 *xorp
= expand_simple_binop (intmode
, AND
,
17965 gen_lowpart (intmode
, tmp
[0]),
17966 two31
, NULL_RTX
, 0,
17969 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17973 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17974 then replicate the value for all elements of the vector
17978 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17982 enum machine_mode scalar_mode
;
17999 n_elt
= GET_MODE_NUNITS (mode
);
18000 v
= rtvec_alloc (n_elt
);
18001 scalar_mode
= GET_MODE_INNER (mode
);
18003 RTVEC_ELT (v
, 0) = value
;
18005 for (i
= 1; i
< n_elt
; ++i
)
18006 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18008 return gen_rtx_CONST_VECTOR (mode
, v
);
18011 gcc_unreachable ();
18015 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18016 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18017 for an SSE register. If VECT is true, then replicate the mask for
18018 all elements of the vector register. If INVERT is true, then create
18019 a mask excluding the sign bit. */
18022 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18024 enum machine_mode vec_mode
, imode
;
18025 HOST_WIDE_INT hi
, lo
;
18030 /* Find the sign bit, sign extended to 2*HWI. */
18038 mode
= GET_MODE_INNER (mode
);
18040 lo
= 0x80000000, hi
= lo
< 0;
18048 mode
= GET_MODE_INNER (mode
);
18050 if (HOST_BITS_PER_WIDE_INT
>= 64)
18051 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18053 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18058 vec_mode
= VOIDmode
;
18059 if (HOST_BITS_PER_WIDE_INT
>= 64)
18062 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18069 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18073 lo
= ~lo
, hi
= ~hi
;
18079 mask
= immed_double_const (lo
, hi
, imode
);
18081 vec
= gen_rtvec (2, v
, mask
);
18082 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18083 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18090 gcc_unreachable ();
18094 lo
= ~lo
, hi
= ~hi
;
18096 /* Force this value into the low part of a fp vector constant. */
18097 mask
= immed_double_const (lo
, hi
, imode
);
18098 mask
= gen_lowpart (mode
, mask
);
18100 if (vec_mode
== VOIDmode
)
18101 return force_reg (mode
, mask
);
18103 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18104 return force_reg (vec_mode
, v
);
18107 /* Generate code for floating point ABS or NEG. */
18110 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18113 rtx mask
, set
, dst
, src
;
18114 bool use_sse
= false;
18115 bool vector_mode
= VECTOR_MODE_P (mode
);
18116 enum machine_mode vmode
= mode
;
18120 else if (mode
== TFmode
)
18122 else if (TARGET_SSE_MATH
)
18124 use_sse
= SSE_FLOAT_MODE_P (mode
);
18125 if (mode
== SFmode
)
18127 else if (mode
== DFmode
)
18131 /* NEG and ABS performed with SSE use bitwise mask operations.
18132 Create the appropriate mask now. */
18134 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18141 set
= gen_rtx_fmt_e (code
, mode
, src
);
18142 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18149 use
= gen_rtx_USE (VOIDmode
, mask
);
18151 par
= gen_rtvec (2, set
, use
);
18154 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18155 par
= gen_rtvec (3, set
, use
, clob
);
18157 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18163 /* Expand a copysign operation. Special case operand 0 being a constant. */
18166 ix86_expand_copysign (rtx operands
[])
18168 enum machine_mode mode
, vmode
;
18169 rtx dest
, op0
, op1
, mask
, nmask
;
18171 dest
= operands
[0];
18175 mode
= GET_MODE (dest
);
18177 if (mode
== SFmode
)
18179 else if (mode
== DFmode
)
18184 if (GET_CODE (op0
) == CONST_DOUBLE
)
18186 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18188 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18189 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18191 if (mode
== SFmode
|| mode
== DFmode
)
18193 if (op0
== CONST0_RTX (mode
))
18194 op0
= CONST0_RTX (vmode
);
18197 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18199 op0
= force_reg (vmode
, v
);
18202 else if (op0
!= CONST0_RTX (mode
))
18203 op0
= force_reg (mode
, op0
);
18205 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18207 if (mode
== SFmode
)
18208 copysign_insn
= gen_copysignsf3_const
;
18209 else if (mode
== DFmode
)
18210 copysign_insn
= gen_copysigndf3_const
;
18212 copysign_insn
= gen_copysigntf3_const
;
18214 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18218 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18220 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18221 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18223 if (mode
== SFmode
)
18224 copysign_insn
= gen_copysignsf3_var
;
18225 else if (mode
== DFmode
)
18226 copysign_insn
= gen_copysigndf3_var
;
18228 copysign_insn
= gen_copysigntf3_var
;
18230 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18234 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18235 be a constant, and so has already been expanded into a vector constant. */
18238 ix86_split_copysign_const (rtx operands
[])
18240 enum machine_mode mode
, vmode
;
18241 rtx dest
, op0
, mask
, x
;
18243 dest
= operands
[0];
18245 mask
= operands
[3];
18247 mode
= GET_MODE (dest
);
18248 vmode
= GET_MODE (mask
);
18250 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18251 x
= gen_rtx_AND (vmode
, dest
, mask
);
18252 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18254 if (op0
!= CONST0_RTX (vmode
))
18256 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18257 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18261 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18262 so we have to do two masks. */
18265 ix86_split_copysign_var (rtx operands
[])
18267 enum machine_mode mode
, vmode
;
18268 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18270 dest
= operands
[0];
18271 scratch
= operands
[1];
18274 nmask
= operands
[4];
18275 mask
= operands
[5];
18277 mode
= GET_MODE (dest
);
18278 vmode
= GET_MODE (mask
);
18280 if (rtx_equal_p (op0
, op1
))
18282 /* Shouldn't happen often (it's useless, obviously), but when it does
18283 we'd generate incorrect code if we continue below. */
18284 emit_move_insn (dest
, op0
);
18288 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18290 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18292 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18293 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18296 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18297 x
= gen_rtx_NOT (vmode
, dest
);
18298 x
= gen_rtx_AND (vmode
, x
, op0
);
18299 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18303 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18305 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18307 else /* alternative 2,4 */
18309 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18310 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18311 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18313 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18315 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18317 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18318 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18320 else /* alternative 3,4 */
18322 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18324 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18325 x
= gen_rtx_AND (vmode
, dest
, op0
);
18327 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18330 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18331 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18334 /* Return TRUE or FALSE depending on whether the first SET in INSN
18335 has source and destination with matching CC modes, and that the
18336 CC mode is at least as constrained as REQ_MODE. */
18339 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18342 enum machine_mode set_mode
;
18344 set
= PATTERN (insn
);
18345 if (GET_CODE (set
) == PARALLEL
)
18346 set
= XVECEXP (set
, 0, 0);
18347 gcc_assert (GET_CODE (set
) == SET
);
18348 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18350 set_mode
= GET_MODE (SET_DEST (set
));
18354 if (req_mode
!= CCNOmode
18355 && (req_mode
!= CCmode
18356 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18360 if (req_mode
== CCGCmode
)
18364 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18368 if (req_mode
== CCZmode
)
18378 if (set_mode
!= req_mode
)
18383 gcc_unreachable ();
18386 return GET_MODE (SET_SRC (set
)) == set_mode
;
18389 /* Generate insn patterns to do an integer compare of OPERANDS. */
18392 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18394 enum machine_mode cmpmode
;
18397 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18398 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18400 /* This is very simple, but making the interface the same as in the
18401 FP case makes the rest of the code easier. */
18402 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18403 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18405 /* Return the test that should be put into the flags user, i.e.
18406 the bcc, scc, or cmov instruction. */
18407 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18410 /* Figure out whether to use ordered or unordered fp comparisons.
18411 Return the appropriate mode to use. */
18414 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18416 /* ??? In order to make all comparisons reversible, we do all comparisons
18417 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18418 all forms trapping and nontrapping comparisons, we can make inequality
18419 comparisons trapping again, since it results in better code when using
18420 FCOM based compares. */
18421 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18425 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18427 enum machine_mode mode
= GET_MODE (op0
);
18429 if (SCALAR_FLOAT_MODE_P (mode
))
18431 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18432 return ix86_fp_compare_mode (code
);
18437 /* Only zero flag is needed. */
18438 case EQ
: /* ZF=0 */
18439 case NE
: /* ZF!=0 */
18441 /* Codes needing carry flag. */
18442 case GEU
: /* CF=0 */
18443 case LTU
: /* CF=1 */
18444 /* Detect overflow checks. They need just the carry flag. */
18445 if (GET_CODE (op0
) == PLUS
18446 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18450 case GTU
: /* CF=0 & ZF=0 */
18451 case LEU
: /* CF=1 | ZF=1 */
18452 /* Detect overflow checks. They need just the carry flag. */
18453 if (GET_CODE (op0
) == MINUS
18454 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18458 /* Codes possibly doable only with sign flag when
18459 comparing against zero. */
18460 case GE
: /* SF=OF or SF=0 */
18461 case LT
: /* SF<>OF or SF=1 */
18462 if (op1
== const0_rtx
)
18465 /* For other cases Carry flag is not required. */
18467 /* Codes doable only with sign flag when comparing
18468 against zero, but we miss jump instruction for it
18469 so we need to use relational tests against overflow
18470 that thus needs to be zero. */
18471 case GT
: /* ZF=0 & SF=OF */
18472 case LE
: /* ZF=1 | SF<>OF */
18473 if (op1
== const0_rtx
)
18477 /* strcmp pattern do (use flags) and combine may ask us for proper
18482 gcc_unreachable ();
18486 /* Return the fixed registers used for condition codes. */
18489 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18496 /* If two condition code modes are compatible, return a condition code
18497 mode which is compatible with both. Otherwise, return
18500 static enum machine_mode
18501 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18506 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18509 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18510 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18513 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18515 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18521 gcc_unreachable ();
18551 /* These are only compatible with themselves, which we already
18558 /* Return a comparison we can do and that it is equivalent to
18559 swap_condition (code) apart possibly from orderedness.
18560 But, never change orderedness if TARGET_IEEE_FP, returning
18561 UNKNOWN in that case if necessary. */
18563 static enum rtx_code
18564 ix86_fp_swap_condition (enum rtx_code code
)
18568 case GT
: /* GTU - CF=0 & ZF=0 */
18569 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18570 case GE
: /* GEU - CF=0 */
18571 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18572 case UNLT
: /* LTU - CF=1 */
18573 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18574 case UNLE
: /* LEU - CF=1 | ZF=1 */
18575 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18577 return swap_condition (code
);
18581 /* Return cost of comparison CODE using the best strategy for performance.
18582 All following functions do use number of instructions as a cost metrics.
18583 In future this should be tweaked to compute bytes for optimize_size and
18584 take into account performance of various instructions on various CPUs. */
18587 ix86_fp_comparison_cost (enum rtx_code code
)
18591 /* The cost of code using bit-twiddling on %ah. */
18608 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18612 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18615 gcc_unreachable ();
18618 switch (ix86_fp_comparison_strategy (code
))
18620 case IX86_FPCMP_COMI
:
18621 return arith_cost
> 4 ? 3 : 2;
18622 case IX86_FPCMP_SAHF
:
18623 return arith_cost
> 4 ? 4 : 3;
18629 /* Return strategy to use for floating-point. We assume that fcomi is always
18630 preferrable where available, since that is also true when looking at size
18631 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18633 enum ix86_fpcmp_strategy
18634 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18636 /* Do fcomi/sahf based test when profitable. */
18639 return IX86_FPCMP_COMI
;
18641 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18642 return IX86_FPCMP_SAHF
;
18644 return IX86_FPCMP_ARITH
;
18647 /* Swap, force into registers, or otherwise massage the two operands
18648 to a fp comparison. The operands are updated in place; the new
18649 comparison code is returned. */
18651 static enum rtx_code
18652 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18654 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18655 rtx op0
= *pop0
, op1
= *pop1
;
18656 enum machine_mode op_mode
= GET_MODE (op0
);
18657 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18659 /* All of the unordered compare instructions only work on registers.
18660 The same is true of the fcomi compare instructions. The XFmode
18661 compare instructions require registers except when comparing
18662 against zero or when converting operand 1 from fixed point to
18666 && (fpcmp_mode
== CCFPUmode
18667 || (op_mode
== XFmode
18668 && ! (standard_80387_constant_p (op0
) == 1
18669 || standard_80387_constant_p (op1
) == 1)
18670 && GET_CODE (op1
) != FLOAT
)
18671 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18673 op0
= force_reg (op_mode
, op0
);
18674 op1
= force_reg (op_mode
, op1
);
18678 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18679 things around if they appear profitable, otherwise force op0
18680 into a register. */
18682 if (standard_80387_constant_p (op0
) == 0
18684 && ! (standard_80387_constant_p (op1
) == 0
18687 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18688 if (new_code
!= UNKNOWN
)
18691 tmp
= op0
, op0
= op1
, op1
= tmp
;
18697 op0
= force_reg (op_mode
, op0
);
18699 if (CONSTANT_P (op1
))
18701 int tmp
= standard_80387_constant_p (op1
);
18703 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18707 op1
= force_reg (op_mode
, op1
);
18710 op1
= force_reg (op_mode
, op1
);
18714 /* Try to rearrange the comparison to make it cheaper. */
18715 if (ix86_fp_comparison_cost (code
)
18716 > ix86_fp_comparison_cost (swap_condition (code
))
18717 && (REG_P (op1
) || can_create_pseudo_p ()))
18720 tmp
= op0
, op0
= op1
, op1
= tmp
;
18721 code
= swap_condition (code
);
18723 op0
= force_reg (op_mode
, op0
);
18731 /* Convert comparison codes we use to represent FP comparison to integer
18732 code that will result in proper branch. Return UNKNOWN if no such code
18736 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18765 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18768 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18770 enum machine_mode fpcmp_mode
, intcmp_mode
;
18773 fpcmp_mode
= ix86_fp_compare_mode (code
);
18774 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18776 /* Do fcomi/sahf based test when profitable. */
18777 switch (ix86_fp_comparison_strategy (code
))
18779 case IX86_FPCMP_COMI
:
18780 intcmp_mode
= fpcmp_mode
;
18781 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18782 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18787 case IX86_FPCMP_SAHF
:
18788 intcmp_mode
= fpcmp_mode
;
18789 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18790 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18794 scratch
= gen_reg_rtx (HImode
);
18795 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18796 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18799 case IX86_FPCMP_ARITH
:
18800 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18801 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18802 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18804 scratch
= gen_reg_rtx (HImode
);
18805 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18807 /* In the unordered case, we have to check C2 for NaN's, which
18808 doesn't happen to work out to anything nice combination-wise.
18809 So do some bit twiddling on the value we've got in AH to come
18810 up with an appropriate set of condition codes. */
18812 intcmp_mode
= CCNOmode
;
18817 if (code
== GT
|| !TARGET_IEEE_FP
)
18819 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18824 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18825 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18826 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18827 intcmp_mode
= CCmode
;
18833 if (code
== LT
&& TARGET_IEEE_FP
)
18835 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18836 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18837 intcmp_mode
= CCmode
;
18842 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18848 if (code
== GE
|| !TARGET_IEEE_FP
)
18850 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18855 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18856 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18862 if (code
== LE
&& TARGET_IEEE_FP
)
18864 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18865 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18866 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18867 intcmp_mode
= CCmode
;
18872 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18878 if (code
== EQ
&& TARGET_IEEE_FP
)
18880 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18881 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18882 intcmp_mode
= CCmode
;
18887 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18893 if (code
== NE
&& TARGET_IEEE_FP
)
18895 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18896 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18902 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18908 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18912 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18917 gcc_unreachable ();
18925 /* Return the test that should be put into the flags user, i.e.
18926 the bcc, scc, or cmov instruction. */
18927 return gen_rtx_fmt_ee (code
, VOIDmode
,
18928 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18933 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18937 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18938 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18940 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18942 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18943 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18946 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18952 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18954 enum machine_mode mode
= GET_MODE (op0
);
18966 tmp
= ix86_expand_compare (code
, op0
, op1
);
18967 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18968 gen_rtx_LABEL_REF (VOIDmode
, label
),
18970 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18977 /* Expand DImode branch into multiple compare+branch. */
18979 rtx lo
[2], hi
[2], label2
;
18980 enum rtx_code code1
, code2
, code3
;
18981 enum machine_mode submode
;
18983 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18985 tmp
= op0
, op0
= op1
, op1
= tmp
;
18986 code
= swap_condition (code
);
18989 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18990 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18992 submode
= mode
== DImode
? SImode
: DImode
;
18994 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18995 avoid two branches. This costs one extra insn, so disable when
18996 optimizing for size. */
18998 if ((code
== EQ
|| code
== NE
)
18999 && (!optimize_insn_for_size_p ()
19000 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19005 if (hi
[1] != const0_rtx
)
19006 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19007 NULL_RTX
, 0, OPTAB_WIDEN
);
19010 if (lo
[1] != const0_rtx
)
19011 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19012 NULL_RTX
, 0, OPTAB_WIDEN
);
19014 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19015 NULL_RTX
, 0, OPTAB_WIDEN
);
19017 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19021 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19022 op1 is a constant and the low word is zero, then we can just
19023 examine the high word. Similarly for low word -1 and
19024 less-or-equal-than or greater-than. */
19026 if (CONST_INT_P (hi
[1]))
19029 case LT
: case LTU
: case GE
: case GEU
:
19030 if (lo
[1] == const0_rtx
)
19032 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19036 case LE
: case LEU
: case GT
: case GTU
:
19037 if (lo
[1] == constm1_rtx
)
19039 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19047 /* Otherwise, we need two or three jumps. */
19049 label2
= gen_label_rtx ();
19052 code2
= swap_condition (code
);
19053 code3
= unsigned_condition (code
);
19057 case LT
: case GT
: case LTU
: case GTU
:
19060 case LE
: code1
= LT
; code2
= GT
; break;
19061 case GE
: code1
= GT
; code2
= LT
; break;
19062 case LEU
: code1
= LTU
; code2
= GTU
; break;
19063 case GEU
: code1
= GTU
; code2
= LTU
; break;
19065 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19066 case NE
: code2
= UNKNOWN
; break;
19069 gcc_unreachable ();
19074 * if (hi(a) < hi(b)) goto true;
19075 * if (hi(a) > hi(b)) goto false;
19076 * if (lo(a) < lo(b)) goto true;
19080 if (code1
!= UNKNOWN
)
19081 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19082 if (code2
!= UNKNOWN
)
19083 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19085 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19087 if (code2
!= UNKNOWN
)
19088 emit_label (label2
);
19093 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19098 /* Split branch based on floating point condition. */
19100 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19101 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19106 if (target2
!= pc_rtx
)
19109 code
= reverse_condition_maybe_unordered (code
);
19114 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19117 /* Remove pushed operand from stack. */
19119 ix86_free_from_memory (GET_MODE (pushed
));
19121 i
= emit_jump_insn (gen_rtx_SET
19123 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19124 condition
, target1
, target2
)));
19125 if (split_branch_probability
>= 0)
19126 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19130 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19134 gcc_assert (GET_MODE (dest
) == QImode
);
19136 ret
= ix86_expand_compare (code
, op0
, op1
);
19137 PUT_MODE (ret
, QImode
);
19138 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19141 /* Expand comparison setting or clearing carry flag. Return true when
19142 successful and set pop for the operation. */
19144 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19146 enum machine_mode mode
=
19147 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19149 /* Do not handle double-mode compares that go through special path. */
19150 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19153 if (SCALAR_FLOAT_MODE_P (mode
))
19155 rtx compare_op
, compare_seq
;
19157 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19159 /* Shortcut: following common codes never translate
19160 into carry flag compares. */
19161 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19162 || code
== ORDERED
|| code
== UNORDERED
)
19165 /* These comparisons require zero flag; swap operands so they won't. */
19166 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19167 && !TARGET_IEEE_FP
)
19172 code
= swap_condition (code
);
19175 /* Try to expand the comparison and verify that we end up with
19176 carry flag based comparison. This fails to be true only when
19177 we decide to expand comparison using arithmetic that is not
19178 too common scenario. */
19180 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19181 compare_seq
= get_insns ();
19184 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19185 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19186 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19188 code
= GET_CODE (compare_op
);
19190 if (code
!= LTU
&& code
!= GEU
)
19193 emit_insn (compare_seq
);
19198 if (!INTEGRAL_MODE_P (mode
))
19207 /* Convert a==0 into (unsigned)a<1. */
19210 if (op1
!= const0_rtx
)
19213 code
= (code
== EQ
? LTU
: GEU
);
19216 /* Convert a>b into b<a or a>=b-1. */
19219 if (CONST_INT_P (op1
))
19221 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19222 /* Bail out on overflow. We still can swap operands but that
19223 would force loading of the constant into register. */
19224 if (op1
== const0_rtx
19225 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19227 code
= (code
== GTU
? GEU
: LTU
);
19234 code
= (code
== GTU
? LTU
: GEU
);
19238 /* Convert a>=0 into (unsigned)a<0x80000000. */
19241 if (mode
== DImode
|| op1
!= const0_rtx
)
19243 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19244 code
= (code
== LT
? GEU
: LTU
);
19248 if (mode
== DImode
|| op1
!= constm1_rtx
)
19250 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19251 code
= (code
== LE
? GEU
: LTU
);
19257 /* Swapping operands may cause constant to appear as first operand. */
19258 if (!nonimmediate_operand (op0
, VOIDmode
))
19260 if (!can_create_pseudo_p ())
19262 op0
= force_reg (mode
, op0
);
19264 *pop
= ix86_expand_compare (code
, op0
, op1
);
19265 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19270 ix86_expand_int_movcc (rtx operands
[])
19272 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19273 rtx compare_seq
, compare_op
;
19274 enum machine_mode mode
= GET_MODE (operands
[0]);
19275 bool sign_bit_compare_p
= false;
19276 rtx op0
= XEXP (operands
[1], 0);
19277 rtx op1
= XEXP (operands
[1], 1);
19279 if (GET_MODE (op0
) == TImode
19280 || (GET_MODE (op0
) == DImode
19285 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19286 compare_seq
= get_insns ();
19289 compare_code
= GET_CODE (compare_op
);
19291 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19292 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19293 sign_bit_compare_p
= true;
19295 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19296 HImode insns, we'd be swallowed in word prefix ops. */
19298 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19299 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19300 && CONST_INT_P (operands
[2])
19301 && CONST_INT_P (operands
[3]))
19303 rtx out
= operands
[0];
19304 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19305 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19306 HOST_WIDE_INT diff
;
19309 /* Sign bit compares are better done using shifts than we do by using
19311 if (sign_bit_compare_p
19312 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19314 /* Detect overlap between destination and compare sources. */
19317 if (!sign_bit_compare_p
)
19320 bool fpcmp
= false;
19322 compare_code
= GET_CODE (compare_op
);
19324 flags
= XEXP (compare_op
, 0);
19326 if (GET_MODE (flags
) == CCFPmode
19327 || GET_MODE (flags
) == CCFPUmode
)
19331 = ix86_fp_compare_code_to_integer (compare_code
);
19334 /* To simplify rest of code, restrict to the GEU case. */
19335 if (compare_code
== LTU
)
19337 HOST_WIDE_INT tmp
= ct
;
19340 compare_code
= reverse_condition (compare_code
);
19341 code
= reverse_condition (code
);
19346 PUT_CODE (compare_op
,
19347 reverse_condition_maybe_unordered
19348 (GET_CODE (compare_op
)));
19350 PUT_CODE (compare_op
,
19351 reverse_condition (GET_CODE (compare_op
)));
19355 if (reg_overlap_mentioned_p (out
, op0
)
19356 || reg_overlap_mentioned_p (out
, op1
))
19357 tmp
= gen_reg_rtx (mode
);
19359 if (mode
== DImode
)
19360 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19362 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19363 flags
, compare_op
));
19367 if (code
== GT
|| code
== GE
)
19368 code
= reverse_condition (code
);
19371 HOST_WIDE_INT tmp
= ct
;
19376 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19389 tmp
= expand_simple_binop (mode
, PLUS
,
19391 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19402 tmp
= expand_simple_binop (mode
, IOR
,
19404 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19406 else if (diff
== -1 && ct
)
19416 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19418 tmp
= expand_simple_binop (mode
, PLUS
,
19419 copy_rtx (tmp
), GEN_INT (cf
),
19420 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19428 * andl cf - ct, dest
19438 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19441 tmp
= expand_simple_binop (mode
, AND
,
19443 gen_int_mode (cf
- ct
, mode
),
19444 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19446 tmp
= expand_simple_binop (mode
, PLUS
,
19447 copy_rtx (tmp
), GEN_INT (ct
),
19448 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19451 if (!rtx_equal_p (tmp
, out
))
19452 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19459 enum machine_mode cmp_mode
= GET_MODE (op0
);
19462 tmp
= ct
, ct
= cf
, cf
= tmp
;
19465 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19467 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19469 /* We may be reversing unordered compare to normal compare, that
19470 is not valid in general (we may convert non-trapping condition
19471 to trapping one), however on i386 we currently emit all
19472 comparisons unordered. */
19473 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19474 code
= reverse_condition_maybe_unordered (code
);
19478 compare_code
= reverse_condition (compare_code
);
19479 code
= reverse_condition (code
);
19483 compare_code
= UNKNOWN
;
19484 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19485 && CONST_INT_P (op1
))
19487 if (op1
== const0_rtx
19488 && (code
== LT
|| code
== GE
))
19489 compare_code
= code
;
19490 else if (op1
== constm1_rtx
)
19494 else if (code
== GT
)
19499 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19500 if (compare_code
!= UNKNOWN
19501 && GET_MODE (op0
) == GET_MODE (out
)
19502 && (cf
== -1 || ct
== -1))
19504 /* If lea code below could be used, only optimize
19505 if it results in a 2 insn sequence. */
19507 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19508 || diff
== 3 || diff
== 5 || diff
== 9)
19509 || (compare_code
== LT
&& ct
== -1)
19510 || (compare_code
== GE
&& cf
== -1))
19513 * notl op1 (if necessary)
19521 code
= reverse_condition (code
);
19524 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19526 out
= expand_simple_binop (mode
, IOR
,
19528 out
, 1, OPTAB_DIRECT
);
19529 if (out
!= operands
[0])
19530 emit_move_insn (operands
[0], out
);
19537 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19538 || diff
== 3 || diff
== 5 || diff
== 9)
19539 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19541 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19547 * lea cf(dest*(ct-cf)),dest
19551 * This also catches the degenerate setcc-only case.
19557 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19560 /* On x86_64 the lea instruction operates on Pmode, so we need
19561 to get arithmetics done in proper mode to match. */
19563 tmp
= copy_rtx (out
);
19567 out1
= copy_rtx (out
);
19568 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19572 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19578 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19581 if (!rtx_equal_p (tmp
, out
))
19584 out
= force_operand (tmp
, copy_rtx (out
));
19586 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19588 if (!rtx_equal_p (out
, operands
[0]))
19589 emit_move_insn (operands
[0], copy_rtx (out
));
19595 * General case: Jumpful:
19596 * xorl dest,dest cmpl op1, op2
19597 * cmpl op1, op2 movl ct, dest
19598 * setcc dest jcc 1f
19599 * decl dest movl cf, dest
19600 * andl (cf-ct),dest 1:
19603 * Size 20. Size 14.
19605 * This is reasonably steep, but branch mispredict costs are
19606 * high on modern cpus, so consider failing only if optimizing
19610 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19611 && BRANCH_COST (optimize_insn_for_speed_p (),
19616 enum machine_mode cmp_mode
= GET_MODE (op0
);
19621 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19623 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19625 /* We may be reversing unordered compare to normal compare,
19626 that is not valid in general (we may convert non-trapping
19627 condition to trapping one), however on i386 we currently
19628 emit all comparisons unordered. */
19629 code
= reverse_condition_maybe_unordered (code
);
19633 code
= reverse_condition (code
);
19634 if (compare_code
!= UNKNOWN
)
19635 compare_code
= reverse_condition (compare_code
);
19639 if (compare_code
!= UNKNOWN
)
19641 /* notl op1 (if needed)
19646 For x < 0 (resp. x <= -1) there will be no notl,
19647 so if possible swap the constants to get rid of the
19649 True/false will be -1/0 while code below (store flag
19650 followed by decrement) is 0/-1, so the constants need
19651 to be exchanged once more. */
19653 if (compare_code
== GE
|| !cf
)
19655 code
= reverse_condition (code
);
19660 HOST_WIDE_INT tmp
= cf
;
19665 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19669 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19671 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19673 copy_rtx (out
), 1, OPTAB_DIRECT
);
19676 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19677 gen_int_mode (cf
- ct
, mode
),
19678 copy_rtx (out
), 1, OPTAB_DIRECT
);
19680 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19681 copy_rtx (out
), 1, OPTAB_DIRECT
);
19682 if (!rtx_equal_p (out
, operands
[0]))
19683 emit_move_insn (operands
[0], copy_rtx (out
));
19689 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19691 /* Try a few things more with specific constants and a variable. */
19694 rtx var
, orig_out
, out
, tmp
;
19696 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19699 /* If one of the two operands is an interesting constant, load a
19700 constant with the above and mask it in with a logical operation. */
19702 if (CONST_INT_P (operands
[2]))
19705 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19706 operands
[3] = constm1_rtx
, op
= and_optab
;
19707 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19708 operands
[3] = const0_rtx
, op
= ior_optab
;
19712 else if (CONST_INT_P (operands
[3]))
19715 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19716 operands
[2] = constm1_rtx
, op
= and_optab
;
19717 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19718 operands
[2] = const0_rtx
, op
= ior_optab
;
19725 orig_out
= operands
[0];
19726 tmp
= gen_reg_rtx (mode
);
19729 /* Recurse to get the constant loaded. */
19730 if (ix86_expand_int_movcc (operands
) == 0)
19733 /* Mask in the interesting variable. */
19734 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19736 if (!rtx_equal_p (out
, orig_out
))
19737 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19743 * For comparison with above,
19753 if (! nonimmediate_operand (operands
[2], mode
))
19754 operands
[2] = force_reg (mode
, operands
[2]);
19755 if (! nonimmediate_operand (operands
[3], mode
))
19756 operands
[3] = force_reg (mode
, operands
[3]);
19758 if (! register_operand (operands
[2], VOIDmode
)
19760 || ! register_operand (operands
[3], VOIDmode
)))
19761 operands
[2] = force_reg (mode
, operands
[2]);
19764 && ! register_operand (operands
[3], VOIDmode
))
19765 operands
[3] = force_reg (mode
, operands
[3]);
19767 emit_insn (compare_seq
);
19768 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19769 gen_rtx_IF_THEN_ELSE (mode
,
19770 compare_op
, operands
[2],
19775 /* Swap, force into registers, or otherwise massage the two operands
19776 to an sse comparison with a mask result. Thus we differ a bit from
19777 ix86_prepare_fp_compare_args which expects to produce a flags result.
19779 The DEST operand exists to help determine whether to commute commutative
19780 operators. The POP0/POP1 operands are updated in place. The new
19781 comparison code is returned, or UNKNOWN if not implementable. */
19783 static enum rtx_code
19784 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19785 rtx
*pop0
, rtx
*pop1
)
19793 /* AVX supports all the needed comparisons. */
19796 /* We have no LTGT as an operator. We could implement it with
19797 NE & ORDERED, but this requires an extra temporary. It's
19798 not clear that it's worth it. */
19805 /* These are supported directly. */
19812 /* AVX has 3 operand comparisons, no need to swap anything. */
19815 /* For commutative operators, try to canonicalize the destination
19816 operand to be first in the comparison - this helps reload to
19817 avoid extra moves. */
19818 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19826 /* These are not supported directly before AVX, and furthermore
19827 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19828 comparison operands to transform into something that is
19833 code
= swap_condition (code
);
19837 gcc_unreachable ();
19843 /* Detect conditional moves that exactly match min/max operational
19844 semantics. Note that this is IEEE safe, as long as we don't
19845 interchange the operands.
19847 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19848 and TRUE if the operation is successful and instructions are emitted. */
19851 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19852 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19854 enum machine_mode mode
;
19860 else if (code
== UNGE
)
19863 if_true
= if_false
;
19869 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19871 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19876 mode
= GET_MODE (dest
);
19878 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19879 but MODE may be a vector mode and thus not appropriate. */
19880 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19882 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19885 if_true
= force_reg (mode
, if_true
);
19886 v
= gen_rtvec (2, if_true
, if_false
);
19887 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19891 code
= is_min
? SMIN
: SMAX
;
19892 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19895 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19899 /* Expand an sse vector comparison. Return the register with the result. */
19902 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19903 rtx op_true
, rtx op_false
)
19905 enum machine_mode mode
= GET_MODE (dest
);
19906 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19909 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19910 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19911 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19914 || reg_overlap_mentioned_p (dest
, op_true
)
19915 || reg_overlap_mentioned_p (dest
, op_false
))
19916 dest
= gen_reg_rtx (mode
);
19918 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19919 if (cmp_mode
!= mode
)
19921 x
= force_reg (cmp_mode
, x
);
19922 convert_move (dest
, x
, false);
19925 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19930 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19931 operations. This is used for both scalar and vector conditional moves. */
19934 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19936 enum machine_mode mode
= GET_MODE (dest
);
19939 if (vector_all_ones_operand (op_true
, mode
)
19940 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19942 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19944 else if (op_false
== CONST0_RTX (mode
))
19946 op_true
= force_reg (mode
, op_true
);
19947 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19948 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19950 else if (op_true
== CONST0_RTX (mode
))
19952 op_false
= force_reg (mode
, op_false
);
19953 x
= gen_rtx_NOT (mode
, cmp
);
19954 x
= gen_rtx_AND (mode
, x
, op_false
);
19955 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19957 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19959 op_false
= force_reg (mode
, op_false
);
19960 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19961 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19963 else if (TARGET_XOP
)
19965 op_true
= force_reg (mode
, op_true
);
19967 if (!nonimmediate_operand (op_false
, mode
))
19968 op_false
= force_reg (mode
, op_false
);
19970 emit_insn (gen_rtx_SET (mode
, dest
,
19971 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19977 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19979 if (!nonimmediate_operand (op_true
, mode
))
19980 op_true
= force_reg (mode
, op_true
);
19982 op_false
= force_reg (mode
, op_false
);
19988 gen
= gen_sse4_1_blendvps
;
19992 gen
= gen_sse4_1_blendvpd
;
20000 gen
= gen_sse4_1_pblendvb
;
20001 dest
= gen_lowpart (V16QImode
, dest
);
20002 op_false
= gen_lowpart (V16QImode
, op_false
);
20003 op_true
= gen_lowpart (V16QImode
, op_true
);
20004 cmp
= gen_lowpart (V16QImode
, cmp
);
20009 gen
= gen_avx_blendvps256
;
20013 gen
= gen_avx_blendvpd256
;
20021 gen
= gen_avx2_pblendvb
;
20022 dest
= gen_lowpart (V32QImode
, dest
);
20023 op_false
= gen_lowpart (V32QImode
, op_false
);
20024 op_true
= gen_lowpart (V32QImode
, op_true
);
20025 cmp
= gen_lowpart (V32QImode
, cmp
);
20033 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20036 op_true
= force_reg (mode
, op_true
);
20038 t2
= gen_reg_rtx (mode
);
20040 t3
= gen_reg_rtx (mode
);
20044 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20045 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20047 x
= gen_rtx_NOT (mode
, cmp
);
20048 x
= gen_rtx_AND (mode
, x
, op_false
);
20049 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20051 x
= gen_rtx_IOR (mode
, t3
, t2
);
20052 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20057 /* Expand a floating-point conditional move. Return true if successful. */
20060 ix86_expand_fp_movcc (rtx operands
[])
20062 enum machine_mode mode
= GET_MODE (operands
[0]);
20063 enum rtx_code code
= GET_CODE (operands
[1]);
20064 rtx tmp
, compare_op
;
20065 rtx op0
= XEXP (operands
[1], 0);
20066 rtx op1
= XEXP (operands
[1], 1);
20068 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20070 enum machine_mode cmode
;
20072 /* Since we've no cmove for sse registers, don't force bad register
20073 allocation just to gain access to it. Deny movcc when the
20074 comparison mode doesn't match the move mode. */
20075 cmode
= GET_MODE (op0
);
20076 if (cmode
== VOIDmode
)
20077 cmode
= GET_MODE (op1
);
20081 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20082 if (code
== UNKNOWN
)
20085 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20086 operands
[2], operands
[3]))
20089 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20090 operands
[2], operands
[3]);
20091 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20095 if (GET_MODE (op0
) == TImode
20096 || (GET_MODE (op0
) == DImode
20100 /* The floating point conditional move instructions don't directly
20101 support conditions resulting from a signed integer comparison. */
20103 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20104 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20106 tmp
= gen_reg_rtx (QImode
);
20107 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20109 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20112 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20113 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20114 operands
[2], operands
[3])));
20119 /* Expand a floating-point vector conditional move; a vcond operation
20120 rather than a movcc operation. */
20123 ix86_expand_fp_vcond (rtx operands
[])
20125 enum rtx_code code
= GET_CODE (operands
[3]);
20128 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20129 &operands
[4], &operands
[5]);
20130 if (code
== UNKNOWN
)
20133 switch (GET_CODE (operands
[3]))
20136 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20137 operands
[5], operands
[0], operands
[0]);
20138 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20139 operands
[5], operands
[1], operands
[2]);
20143 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20144 operands
[5], operands
[0], operands
[0]);
20145 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20146 operands
[5], operands
[1], operands
[2]);
20150 gcc_unreachable ();
20152 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20154 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20158 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20159 operands
[5], operands
[1], operands
[2]))
20162 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20163 operands
[1], operands
[2]);
20164 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20168 /* Expand a signed/unsigned integral vector conditional move. */
20171 ix86_expand_int_vcond (rtx operands
[])
20173 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20174 enum machine_mode mode
= GET_MODE (operands
[4]);
20175 enum rtx_code code
= GET_CODE (operands
[3]);
20176 bool negate
= false;
20179 cop0
= operands
[4];
20180 cop1
= operands
[5];
20182 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20183 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20184 if ((code
== LT
|| code
== GE
)
20185 && data_mode
== mode
20186 && cop1
== CONST0_RTX (mode
)
20187 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20188 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20189 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20190 && (GET_MODE_SIZE (data_mode
) == 16
20191 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20193 rtx negop
= operands
[2 - (code
== LT
)];
20194 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20195 if (negop
== CONST1_RTX (data_mode
))
20197 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20198 operands
[0], 1, OPTAB_DIRECT
);
20199 if (res
!= operands
[0])
20200 emit_move_insn (operands
[0], res
);
20203 else if (GET_MODE_INNER (data_mode
) != DImode
20204 && vector_all_ones_operand (negop
, data_mode
))
20206 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20207 operands
[0], 0, OPTAB_DIRECT
);
20208 if (res
!= operands
[0])
20209 emit_move_insn (operands
[0], res
);
20214 if (!nonimmediate_operand (cop1
, mode
))
20215 cop1
= force_reg (mode
, cop1
);
20216 if (!general_operand (operands
[1], data_mode
))
20217 operands
[1] = force_reg (data_mode
, operands
[1]);
20218 if (!general_operand (operands
[2], data_mode
))
20219 operands
[2] = force_reg (data_mode
, operands
[2]);
20221 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20223 && (mode
== V16QImode
|| mode
== V8HImode
20224 || mode
== V4SImode
|| mode
== V2DImode
))
20228 /* Canonicalize the comparison to EQ, GT, GTU. */
20239 code
= reverse_condition (code
);
20245 code
= reverse_condition (code
);
20251 code
= swap_condition (code
);
20252 x
= cop0
, cop0
= cop1
, cop1
= x
;
20256 gcc_unreachable ();
20259 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20260 if (mode
== V2DImode
)
20265 /* SSE4.1 supports EQ. */
20266 if (!TARGET_SSE4_1
)
20272 /* SSE4.2 supports GT/GTU. */
20273 if (!TARGET_SSE4_2
)
20278 gcc_unreachable ();
20282 /* Unsigned parallel compare is not supported by the hardware.
20283 Play some tricks to turn this into a signed comparison
20287 cop0
= force_reg (mode
, cop0
);
20297 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20301 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20302 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20303 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20304 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20306 gcc_unreachable ();
20308 /* Subtract (-(INT MAX) - 1) from both operands to make
20310 mask
= ix86_build_signbit_mask (mode
, true, false);
20311 t1
= gen_reg_rtx (mode
);
20312 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20314 t2
= gen_reg_rtx (mode
);
20315 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20327 /* Perform a parallel unsigned saturating subtraction. */
20328 x
= gen_reg_rtx (mode
);
20329 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20330 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20333 cop1
= CONST0_RTX (mode
);
20339 gcc_unreachable ();
20344 /* Allow the comparison to be done in one mode, but the movcc to
20345 happen in another mode. */
20346 if (data_mode
== mode
)
20348 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20349 operands
[1+negate
], operands
[2-negate
]);
20353 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20354 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20356 operands
[1+negate
], operands
[2-negate
]);
20357 x
= gen_lowpart (data_mode
, x
);
20360 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20361 operands
[2-negate
]);
20365 /* Expand a variable vector permutation. */
20368 ix86_expand_vec_perm (rtx operands
[])
20370 rtx target
= operands
[0];
20371 rtx op0
= operands
[1];
20372 rtx op1
= operands
[2];
20373 rtx mask
= operands
[3];
20374 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20375 enum machine_mode mode
= GET_MODE (op0
);
20376 enum machine_mode maskmode
= GET_MODE (mask
);
20378 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20380 /* Number of elements in the vector. */
20381 w
= GET_MODE_NUNITS (mode
);
20382 e
= GET_MODE_UNIT_SIZE (mode
);
20383 gcc_assert (w
<= 32);
20387 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20389 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20390 an constant shuffle operand. With a tiny bit of effort we can
20391 use VPERMD instead. A re-interpretation stall for V4DFmode is
20392 unfortunate but there's no avoiding it.
20393 Similarly for V16HImode we don't have instructions for variable
20394 shuffling, while for V32QImode we can use after preparing suitable
20395 masks vpshufb; vpshufb; vpermq; vpor. */
20397 if (mode
== V16HImode
)
20399 maskmode
= mode
= V32QImode
;
20405 maskmode
= mode
= V8SImode
;
20409 t1
= gen_reg_rtx (maskmode
);
20411 /* Replicate the low bits of the V4DImode mask into V8SImode:
20413 t1 = { A A B B C C D D }. */
20414 for (i
= 0; i
< w
/ 2; ++i
)
20415 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20416 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20417 vt
= force_reg (maskmode
, vt
);
20418 mask
= gen_lowpart (maskmode
, mask
);
20419 if (maskmode
== V8SImode
)
20420 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20422 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20424 /* Multiply the shuffle indicies by two. */
20425 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20428 /* Add one to the odd shuffle indicies:
20429 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20430 for (i
= 0; i
< w
/ 2; ++i
)
20432 vec
[i
* 2] = const0_rtx
;
20433 vec
[i
* 2 + 1] = const1_rtx
;
20435 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20436 vt
= force_const_mem (maskmode
, vt
);
20437 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20440 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20441 operands
[3] = mask
= t1
;
20442 target
= gen_lowpart (mode
, target
);
20443 op0
= gen_lowpart (mode
, op0
);
20444 op1
= gen_lowpart (mode
, op1
);
20450 /* The VPERMD and VPERMPS instructions already properly ignore
20451 the high bits of the shuffle elements. No need for us to
20452 perform an AND ourselves. */
20453 if (one_operand_shuffle
)
20454 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20457 t1
= gen_reg_rtx (V8SImode
);
20458 t2
= gen_reg_rtx (V8SImode
);
20459 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20460 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20466 mask
= gen_lowpart (V8SFmode
, mask
);
20467 if (one_operand_shuffle
)
20468 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20471 t1
= gen_reg_rtx (V8SFmode
);
20472 t2
= gen_reg_rtx (V8SFmode
);
20473 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20474 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20480 /* By combining the two 128-bit input vectors into one 256-bit
20481 input vector, we can use VPERMD and VPERMPS for the full
20482 two-operand shuffle. */
20483 t1
= gen_reg_rtx (V8SImode
);
20484 t2
= gen_reg_rtx (V8SImode
);
20485 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20486 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20487 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20488 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20492 t1
= gen_reg_rtx (V8SFmode
);
20493 t2
= gen_reg_rtx (V8SImode
);
20494 mask
= gen_lowpart (V4SImode
, mask
);
20495 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20496 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20497 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20498 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20502 t1
= gen_reg_rtx (V32QImode
);
20503 t2
= gen_reg_rtx (V32QImode
);
20504 t3
= gen_reg_rtx (V32QImode
);
20505 vt2
= GEN_INT (128);
20506 for (i
= 0; i
< 32; i
++)
20508 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20509 vt
= force_reg (V32QImode
, vt
);
20510 for (i
= 0; i
< 32; i
++)
20511 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20512 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20513 vt2
= force_reg (V32QImode
, vt2
);
20514 /* From mask create two adjusted masks, which contain the same
20515 bits as mask in the low 7 bits of each vector element.
20516 The first mask will have the most significant bit clear
20517 if it requests element from the same 128-bit lane
20518 and MSB set if it requests element from the other 128-bit lane.
20519 The second mask will have the opposite values of the MSB,
20520 and additionally will have its 128-bit lanes swapped.
20521 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20522 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20523 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20524 stands for other 12 bytes. */
20525 /* The bit whether element is from the same lane or the other
20526 lane is bit 4, so shift it up by 3 to the MSB position. */
20527 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20528 gen_lowpart (V4DImode
, mask
),
20530 /* Clear MSB bits from the mask just in case it had them set. */
20531 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20532 /* After this t1 will have MSB set for elements from other lane. */
20533 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20534 /* Clear bits other than MSB. */
20535 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20536 /* Or in the lower bits from mask into t3. */
20537 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20538 /* And invert MSB bits in t1, so MSB is set for elements from the same
20540 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20541 /* Swap 128-bit lanes in t3. */
20542 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20543 gen_lowpart (V4DImode
, t3
),
20544 const2_rtx
, GEN_INT (3),
20545 const0_rtx
, const1_rtx
));
20546 /* And or in the lower bits from mask into t1. */
20547 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20548 if (one_operand_shuffle
)
20550 /* Each of these shuffles will put 0s in places where
20551 element from the other 128-bit lane is needed, otherwise
20552 will shuffle in the requested value. */
20553 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20554 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20555 /* For t3 the 128-bit lanes are swapped again. */
20556 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20557 gen_lowpart (V4DImode
, t3
),
20558 const2_rtx
, GEN_INT (3),
20559 const0_rtx
, const1_rtx
));
20560 /* And oring both together leads to the result. */
20561 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20565 t4
= gen_reg_rtx (V32QImode
);
20566 /* Similarly to the above one_operand_shuffle code,
20567 just for repeated twice for each operand. merge_two:
20568 code will merge the two results together. */
20569 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20570 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20571 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20572 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20573 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20574 gen_lowpart (V4DImode
, t4
),
20575 const2_rtx
, GEN_INT (3),
20576 const0_rtx
, const1_rtx
));
20577 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20578 gen_lowpart (V4DImode
, t3
),
20579 const2_rtx
, GEN_INT (3),
20580 const0_rtx
, const1_rtx
));
20581 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20582 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20588 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20595 /* The XOP VPPERM insn supports three inputs. By ignoring the
20596 one_operand_shuffle special case, we avoid creating another
20597 set of constant vectors in memory. */
20598 one_operand_shuffle
= false;
20600 /* mask = mask & {2*w-1, ...} */
20601 vt
= GEN_INT (2*w
- 1);
20605 /* mask = mask & {w-1, ...} */
20606 vt
= GEN_INT (w
- 1);
20609 for (i
= 0; i
< w
; i
++)
20611 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20612 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20613 NULL_RTX
, 0, OPTAB_DIRECT
);
20615 /* For non-QImode operations, convert the word permutation control
20616 into a byte permutation control. */
20617 if (mode
!= V16QImode
)
20619 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20620 GEN_INT (exact_log2 (e
)),
20621 NULL_RTX
, 0, OPTAB_DIRECT
);
20623 /* Convert mask to vector of chars. */
20624 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20626 /* Replicate each of the input bytes into byte positions:
20627 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20628 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20629 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20630 for (i
= 0; i
< 16; ++i
)
20631 vec
[i
] = GEN_INT (i
/e
* e
);
20632 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20633 vt
= force_const_mem (V16QImode
, vt
);
20635 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20637 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20639 /* Convert it into the byte positions by doing
20640 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20641 for (i
= 0; i
< 16; ++i
)
20642 vec
[i
] = GEN_INT (i
% e
);
20643 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20644 vt
= force_const_mem (V16QImode
, vt
);
20645 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20648 /* The actual shuffle operations all operate on V16QImode. */
20649 op0
= gen_lowpart (V16QImode
, op0
);
20650 op1
= gen_lowpart (V16QImode
, op1
);
20651 target
= gen_lowpart (V16QImode
, target
);
20655 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20657 else if (one_operand_shuffle
)
20659 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20666 /* Shuffle the two input vectors independently. */
20667 t1
= gen_reg_rtx (V16QImode
);
20668 t2
= gen_reg_rtx (V16QImode
);
20669 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20670 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20673 /* Then merge them together. The key is whether any given control
20674 element contained a bit set that indicates the second word. */
20675 mask
= operands
[3];
20677 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20679 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20680 more shuffle to convert the V2DI input mask into a V4SI
20681 input mask. At which point the masking that expand_int_vcond
20682 will work as desired. */
20683 rtx t3
= gen_reg_rtx (V4SImode
);
20684 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20685 const0_rtx
, const0_rtx
,
20686 const2_rtx
, const2_rtx
));
20688 maskmode
= V4SImode
;
20692 for (i
= 0; i
< w
; i
++)
20694 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20695 vt
= force_reg (maskmode
, vt
);
20696 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20697 NULL_RTX
, 0, OPTAB_DIRECT
);
20699 xops
[0] = gen_lowpart (mode
, operands
[0]);
20700 xops
[1] = gen_lowpart (mode
, t2
);
20701 xops
[2] = gen_lowpart (mode
, t1
);
20702 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20705 ok
= ix86_expand_int_vcond (xops
);
20710 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20711 true if we should do zero extension, else sign extension. HIGH_P is
20712 true if we want the N/2 high elements, else the low elements. */
20715 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20717 enum machine_mode imode
= GET_MODE (src
);
20722 rtx (*unpack
)(rtx
, rtx
);
20723 rtx (*extract
)(rtx
, rtx
) = NULL
;
20724 enum machine_mode halfmode
= BLKmode
;
20730 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20732 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20733 halfmode
= V16QImode
;
20735 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20739 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20741 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20742 halfmode
= V8HImode
;
20744 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20748 unpack
= gen_avx2_zero_extendv4siv4di2
;
20750 unpack
= gen_avx2_sign_extendv4siv4di2
;
20751 halfmode
= V4SImode
;
20753 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20757 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20759 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20763 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20765 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20769 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20771 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20774 gcc_unreachable ();
20777 if (GET_MODE_SIZE (imode
) == 32)
20779 tmp
= gen_reg_rtx (halfmode
);
20780 emit_insn (extract (tmp
, src
));
20784 /* Shift higher 8 bytes to lower 8 bytes. */
20785 tmp
= gen_reg_rtx (imode
);
20786 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20787 gen_lowpart (V1TImode
, src
),
20793 emit_insn (unpack (dest
, tmp
));
20797 rtx (*unpack
)(rtx
, rtx
, rtx
);
20803 unpack
= gen_vec_interleave_highv16qi
;
20805 unpack
= gen_vec_interleave_lowv16qi
;
20809 unpack
= gen_vec_interleave_highv8hi
;
20811 unpack
= gen_vec_interleave_lowv8hi
;
20815 unpack
= gen_vec_interleave_highv4si
;
20817 unpack
= gen_vec_interleave_lowv4si
;
20820 gcc_unreachable ();
20824 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20826 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20827 src
, pc_rtx
, pc_rtx
);
20829 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20833 /* Expand conditional increment or decrement using adb/sbb instructions.
20834 The default case using setcc followed by the conditional move can be
20835 done by generic code. */
20837 ix86_expand_int_addcc (rtx operands
[])
20839 enum rtx_code code
= GET_CODE (operands
[1]);
20841 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20843 rtx val
= const0_rtx
;
20844 bool fpcmp
= false;
20845 enum machine_mode mode
;
20846 rtx op0
= XEXP (operands
[1], 0);
20847 rtx op1
= XEXP (operands
[1], 1);
20849 if (operands
[3] != const1_rtx
20850 && operands
[3] != constm1_rtx
)
20852 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20854 code
= GET_CODE (compare_op
);
20856 flags
= XEXP (compare_op
, 0);
20858 if (GET_MODE (flags
) == CCFPmode
20859 || GET_MODE (flags
) == CCFPUmode
)
20862 code
= ix86_fp_compare_code_to_integer (code
);
20869 PUT_CODE (compare_op
,
20870 reverse_condition_maybe_unordered
20871 (GET_CODE (compare_op
)));
20873 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20876 mode
= GET_MODE (operands
[0]);
20878 /* Construct either adc or sbb insn. */
20879 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20884 insn
= gen_subqi3_carry
;
20887 insn
= gen_subhi3_carry
;
20890 insn
= gen_subsi3_carry
;
20893 insn
= gen_subdi3_carry
;
20896 gcc_unreachable ();
20904 insn
= gen_addqi3_carry
;
20907 insn
= gen_addhi3_carry
;
20910 insn
= gen_addsi3_carry
;
20913 insn
= gen_adddi3_carry
;
20916 gcc_unreachable ();
20919 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20925 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20926 but works for floating pointer parameters and nonoffsetable memories.
20927 For pushes, it returns just stack offsets; the values will be saved
20928 in the right order. Maximally three parts are generated. */
20931 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20936 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20938 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20940 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20941 gcc_assert (size
>= 2 && size
<= 4);
20943 /* Optimize constant pool reference to immediates. This is used by fp
20944 moves, that force all constants to memory to allow combining. */
20945 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20947 rtx tmp
= maybe_get_pool_constant (operand
);
20952 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20954 /* The only non-offsetable memories we handle are pushes. */
20955 int ok
= push_operand (operand
, VOIDmode
);
20959 operand
= copy_rtx (operand
);
20960 PUT_MODE (operand
, word_mode
);
20961 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20965 if (GET_CODE (operand
) == CONST_VECTOR
)
20967 enum machine_mode imode
= int_mode_for_mode (mode
);
20968 /* Caution: if we looked through a constant pool memory above,
20969 the operand may actually have a different mode now. That's
20970 ok, since we want to pun this all the way back to an integer. */
20971 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20972 gcc_assert (operand
!= NULL
);
20978 if (mode
== DImode
)
20979 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20984 if (REG_P (operand
))
20986 gcc_assert (reload_completed
);
20987 for (i
= 0; i
< size
; i
++)
20988 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20990 else if (offsettable_memref_p (operand
))
20992 operand
= adjust_address (operand
, SImode
, 0);
20993 parts
[0] = operand
;
20994 for (i
= 1; i
< size
; i
++)
20995 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20997 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21002 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21006 real_to_target (l
, &r
, mode
);
21007 parts
[3] = gen_int_mode (l
[3], SImode
);
21008 parts
[2] = gen_int_mode (l
[2], SImode
);
21011 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21012 long double may not be 80-bit. */
21013 real_to_target (l
, &r
, mode
);
21014 parts
[2] = gen_int_mode (l
[2], SImode
);
21017 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21020 gcc_unreachable ();
21022 parts
[1] = gen_int_mode (l
[1], SImode
);
21023 parts
[0] = gen_int_mode (l
[0], SImode
);
21026 gcc_unreachable ();
21031 if (mode
== TImode
)
21032 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21033 if (mode
== XFmode
|| mode
== TFmode
)
21035 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21036 if (REG_P (operand
))
21038 gcc_assert (reload_completed
);
21039 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21040 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21042 else if (offsettable_memref_p (operand
))
21044 operand
= adjust_address (operand
, DImode
, 0);
21045 parts
[0] = operand
;
21046 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21048 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21053 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21054 real_to_target (l
, &r
, mode
);
21056 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21057 if (HOST_BITS_PER_WIDE_INT
>= 64)
21060 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21061 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21064 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21066 if (upper_mode
== SImode
)
21067 parts
[1] = gen_int_mode (l
[2], SImode
);
21068 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21071 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21072 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21075 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21078 gcc_unreachable ();
21085 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21086 Return false when normal moves are needed; true when all required
21087 insns have been emitted. Operands 2-4 contain the input values
21088 int the correct order; operands 5-7 contain the output values. */
21091 ix86_split_long_move (rtx operands
[])
21096 int collisions
= 0;
21097 enum machine_mode mode
= GET_MODE (operands
[0]);
21098 bool collisionparts
[4];
21100 /* The DFmode expanders may ask us to move double.
21101 For 64bit target this is single move. By hiding the fact
21102 here we simplify i386.md splitters. */
21103 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21105 /* Optimize constant pool reference to immediates. This is used by
21106 fp moves, that force all constants to memory to allow combining. */
21108 if (MEM_P (operands
[1])
21109 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21110 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21111 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21112 if (push_operand (operands
[0], VOIDmode
))
21114 operands
[0] = copy_rtx (operands
[0]);
21115 PUT_MODE (operands
[0], word_mode
);
21118 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21119 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21120 emit_move_insn (operands
[0], operands
[1]);
21124 /* The only non-offsettable memory we handle is push. */
21125 if (push_operand (operands
[0], VOIDmode
))
21128 gcc_assert (!MEM_P (operands
[0])
21129 || offsettable_memref_p (operands
[0]));
21131 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21132 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21134 /* When emitting push, take care for source operands on the stack. */
21135 if (push
&& MEM_P (operands
[1])
21136 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21138 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21140 /* Compensate for the stack decrement by 4. */
21141 if (!TARGET_64BIT
&& nparts
== 3
21142 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21143 src_base
= plus_constant (Pmode
, src_base
, 4);
21145 /* src_base refers to the stack pointer and is
21146 automatically decreased by emitted push. */
21147 for (i
= 0; i
< nparts
; i
++)
21148 part
[1][i
] = change_address (part
[1][i
],
21149 GET_MODE (part
[1][i
]), src_base
);
21152 /* We need to do copy in the right order in case an address register
21153 of the source overlaps the destination. */
21154 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21158 for (i
= 0; i
< nparts
; i
++)
21161 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21162 if (collisionparts
[i
])
21166 /* Collision in the middle part can be handled by reordering. */
21167 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21169 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21170 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21172 else if (collisions
== 1
21174 && (collisionparts
[1] || collisionparts
[2]))
21176 if (collisionparts
[1])
21178 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21179 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21183 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21184 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21188 /* If there are more collisions, we can't handle it by reordering.
21189 Do an lea to the last part and use only one colliding move. */
21190 else if (collisions
> 1)
21196 base
= part
[0][nparts
- 1];
21198 /* Handle the case when the last part isn't valid for lea.
21199 Happens in 64-bit mode storing the 12-byte XFmode. */
21200 if (GET_MODE (base
) != Pmode
)
21201 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21203 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21204 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21205 for (i
= 1; i
< nparts
; i
++)
21207 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21208 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21219 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21220 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21221 stack_pointer_rtx
, GEN_INT (-4)));
21222 emit_move_insn (part
[0][2], part
[1][2]);
21224 else if (nparts
== 4)
21226 emit_move_insn (part
[0][3], part
[1][3]);
21227 emit_move_insn (part
[0][2], part
[1][2]);
21232 /* In 64bit mode we don't have 32bit push available. In case this is
21233 register, it is OK - we will just use larger counterpart. We also
21234 retype memory - these comes from attempt to avoid REX prefix on
21235 moving of second half of TFmode value. */
21236 if (GET_MODE (part
[1][1]) == SImode
)
21238 switch (GET_CODE (part
[1][1]))
21241 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21245 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21249 gcc_unreachable ();
21252 if (GET_MODE (part
[1][0]) == SImode
)
21253 part
[1][0] = part
[1][1];
21256 emit_move_insn (part
[0][1], part
[1][1]);
21257 emit_move_insn (part
[0][0], part
[1][0]);
21261 /* Choose correct order to not overwrite the source before it is copied. */
21262 if ((REG_P (part
[0][0])
21263 && REG_P (part
[1][1])
21264 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21266 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21268 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21270 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21272 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21274 operands
[2 + i
] = part
[0][j
];
21275 operands
[6 + i
] = part
[1][j
];
21280 for (i
= 0; i
< nparts
; i
++)
21282 operands
[2 + i
] = part
[0][i
];
21283 operands
[6 + i
] = part
[1][i
];
21287 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21288 if (optimize_insn_for_size_p ())
21290 for (j
= 0; j
< nparts
- 1; j
++)
21291 if (CONST_INT_P (operands
[6 + j
])
21292 && operands
[6 + j
] != const0_rtx
21293 && REG_P (operands
[2 + j
]))
21294 for (i
= j
; i
< nparts
- 1; i
++)
21295 if (CONST_INT_P (operands
[7 + i
])
21296 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21297 operands
[7 + i
] = operands
[2 + j
];
21300 for (i
= 0; i
< nparts
; i
++)
21301 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21306 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21307 left shift by a constant, either using a single shift or
21308 a sequence of add instructions. */
21311 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21313 rtx (*insn
)(rtx
, rtx
, rtx
);
21316 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21317 && !optimize_insn_for_size_p ()))
21319 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21320 while (count
-- > 0)
21321 emit_insn (insn (operand
, operand
, operand
));
21325 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21326 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21331 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21333 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21334 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21335 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21337 rtx low
[2], high
[2];
21340 if (CONST_INT_P (operands
[2]))
21342 split_double_mode (mode
, operands
, 2, low
, high
);
21343 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21345 if (count
>= half_width
)
21347 emit_move_insn (high
[0], low
[1]);
21348 emit_move_insn (low
[0], const0_rtx
);
21350 if (count
> half_width
)
21351 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21355 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21357 if (!rtx_equal_p (operands
[0], operands
[1]))
21358 emit_move_insn (operands
[0], operands
[1]);
21360 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21361 ix86_expand_ashl_const (low
[0], count
, mode
);
21366 split_double_mode (mode
, operands
, 1, low
, high
);
21368 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21370 if (operands
[1] == const1_rtx
)
21372 /* Assuming we've chosen a QImode capable registers, then 1 << N
21373 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21374 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21376 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21378 ix86_expand_clear (low
[0]);
21379 ix86_expand_clear (high
[0]);
21380 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21382 d
= gen_lowpart (QImode
, low
[0]);
21383 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21384 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21385 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21387 d
= gen_lowpart (QImode
, high
[0]);
21388 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21389 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21390 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21393 /* Otherwise, we can get the same results by manually performing
21394 a bit extract operation on bit 5/6, and then performing the two
21395 shifts. The two methods of getting 0/1 into low/high are exactly
21396 the same size. Avoiding the shift in the bit extract case helps
21397 pentium4 a bit; no one else seems to care much either way. */
21400 enum machine_mode half_mode
;
21401 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21402 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21403 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21404 HOST_WIDE_INT bits
;
21407 if (mode
== DImode
)
21409 half_mode
= SImode
;
21410 gen_lshr3
= gen_lshrsi3
;
21411 gen_and3
= gen_andsi3
;
21412 gen_xor3
= gen_xorsi3
;
21417 half_mode
= DImode
;
21418 gen_lshr3
= gen_lshrdi3
;
21419 gen_and3
= gen_anddi3
;
21420 gen_xor3
= gen_xordi3
;
21424 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21425 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21427 x
= gen_lowpart (half_mode
, operands
[2]);
21428 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21430 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21431 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21432 emit_move_insn (low
[0], high
[0]);
21433 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21436 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21437 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21441 if (operands
[1] == constm1_rtx
)
21443 /* For -1 << N, we can avoid the shld instruction, because we
21444 know that we're shifting 0...31/63 ones into a -1. */
21445 emit_move_insn (low
[0], constm1_rtx
);
21446 if (optimize_insn_for_size_p ())
21447 emit_move_insn (high
[0], low
[0]);
21449 emit_move_insn (high
[0], constm1_rtx
);
21453 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21455 if (!rtx_equal_p (operands
[0], operands
[1]))
21456 emit_move_insn (operands
[0], operands
[1]);
21458 split_double_mode (mode
, operands
, 1, low
, high
);
21459 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21462 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21464 if (TARGET_CMOVE
&& scratch
)
21466 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21467 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21469 ix86_expand_clear (scratch
);
21470 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21474 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21475 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21477 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21482 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21484 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21485 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21486 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21487 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21489 rtx low
[2], high
[2];
21492 if (CONST_INT_P (operands
[2]))
21494 split_double_mode (mode
, operands
, 2, low
, high
);
21495 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21497 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21499 emit_move_insn (high
[0], high
[1]);
21500 emit_insn (gen_ashr3 (high
[0], high
[0],
21501 GEN_INT (half_width
- 1)));
21502 emit_move_insn (low
[0], high
[0]);
21505 else if (count
>= half_width
)
21507 emit_move_insn (low
[0], high
[1]);
21508 emit_move_insn (high
[0], low
[0]);
21509 emit_insn (gen_ashr3 (high
[0], high
[0],
21510 GEN_INT (half_width
- 1)));
21512 if (count
> half_width
)
21513 emit_insn (gen_ashr3 (low
[0], low
[0],
21514 GEN_INT (count
- half_width
)));
21518 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21520 if (!rtx_equal_p (operands
[0], operands
[1]))
21521 emit_move_insn (operands
[0], operands
[1]);
21523 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21524 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21529 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21531 if (!rtx_equal_p (operands
[0], operands
[1]))
21532 emit_move_insn (operands
[0], operands
[1]);
21534 split_double_mode (mode
, operands
, 1, low
, high
);
21536 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21537 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21539 if (TARGET_CMOVE
&& scratch
)
21541 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21542 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21544 emit_move_insn (scratch
, high
[0]);
21545 emit_insn (gen_ashr3 (scratch
, scratch
,
21546 GEN_INT (half_width
- 1)));
21547 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21552 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21553 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21555 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21561 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21563 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21564 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21565 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21566 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21568 rtx low
[2], high
[2];
21571 if (CONST_INT_P (operands
[2]))
21573 split_double_mode (mode
, operands
, 2, low
, high
);
21574 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21576 if (count
>= half_width
)
21578 emit_move_insn (low
[0], high
[1]);
21579 ix86_expand_clear (high
[0]);
21581 if (count
> half_width
)
21582 emit_insn (gen_lshr3 (low
[0], low
[0],
21583 GEN_INT (count
- half_width
)));
21587 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21589 if (!rtx_equal_p (operands
[0], operands
[1]))
21590 emit_move_insn (operands
[0], operands
[1]);
21592 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21593 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21598 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21600 if (!rtx_equal_p (operands
[0], operands
[1]))
21601 emit_move_insn (operands
[0], operands
[1]);
21603 split_double_mode (mode
, operands
, 1, low
, high
);
21605 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21606 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21608 if (TARGET_CMOVE
&& scratch
)
21610 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21611 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21613 ix86_expand_clear (scratch
);
21614 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21619 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21620 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21622 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21627 /* Predict just emitted jump instruction to be taken with probability PROB. */
21629 predict_jump (int prob
)
21631 rtx insn
= get_last_insn ();
21632 gcc_assert (JUMP_P (insn
));
21633 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21636 /* Helper function for the string operations below. Dest VARIABLE whether
21637 it is aligned to VALUE bytes. If true, jump to the label. */
21639 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21641 rtx label
= gen_label_rtx ();
21642 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21643 if (GET_MODE (variable
) == DImode
)
21644 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21646 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21647 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21650 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21652 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21656 /* Adjust COUNTER by the VALUE. */
21658 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21660 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21661 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21663 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21666 /* Zero extend possibly SImode EXP to Pmode register. */
21668 ix86_zero_extend_to_Pmode (rtx exp
)
21670 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21673 /* Divide COUNTREG by SCALE. */
21675 scale_counter (rtx countreg
, int scale
)
21681 if (CONST_INT_P (countreg
))
21682 return GEN_INT (INTVAL (countreg
) / scale
);
21683 gcc_assert (REG_P (countreg
));
21685 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21686 GEN_INT (exact_log2 (scale
)),
21687 NULL
, 1, OPTAB_DIRECT
);
21691 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21692 DImode for constant loop counts. */
21694 static enum machine_mode
21695 counter_mode (rtx count_exp
)
21697 if (GET_MODE (count_exp
) != VOIDmode
)
21698 return GET_MODE (count_exp
);
21699 if (!CONST_INT_P (count_exp
))
21701 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21706 /* When SRCPTR is non-NULL, output simple loop to move memory
21707 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21708 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21709 equivalent loop to set memory by VALUE (supposed to be in MODE).
21711 The size is rounded down to whole number of chunk size moved at once.
21712 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21716 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21717 rtx destptr
, rtx srcptr
, rtx value
,
21718 rtx count
, enum machine_mode mode
, int unroll
,
21721 rtx out_label
, top_label
, iter
, tmp
;
21722 enum machine_mode iter_mode
= counter_mode (count
);
21723 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21724 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21730 top_label
= gen_label_rtx ();
21731 out_label
= gen_label_rtx ();
21732 iter
= gen_reg_rtx (iter_mode
);
21734 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21735 NULL
, 1, OPTAB_DIRECT
);
21736 /* Those two should combine. */
21737 if (piece_size
== const1_rtx
)
21739 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21741 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21743 emit_move_insn (iter
, const0_rtx
);
21745 emit_label (top_label
);
21747 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21748 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21749 destmem
= change_address (destmem
, mode
, x_addr
);
21753 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21754 srcmem
= change_address (srcmem
, mode
, y_addr
);
21756 /* When unrolling for chips that reorder memory reads and writes,
21757 we can save registers by using single temporary.
21758 Also using 4 temporaries is overkill in 32bit mode. */
21759 if (!TARGET_64BIT
&& 0)
21761 for (i
= 0; i
< unroll
; i
++)
21766 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21768 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21770 emit_move_insn (destmem
, srcmem
);
21776 gcc_assert (unroll
<= 4);
21777 for (i
= 0; i
< unroll
; i
++)
21779 tmpreg
[i
] = gen_reg_rtx (mode
);
21783 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21785 emit_move_insn (tmpreg
[i
], srcmem
);
21787 for (i
= 0; i
< unroll
; i
++)
21792 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21794 emit_move_insn (destmem
, tmpreg
[i
]);
21799 for (i
= 0; i
< unroll
; i
++)
21803 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21804 emit_move_insn (destmem
, value
);
21807 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21808 true, OPTAB_LIB_WIDEN
);
21810 emit_move_insn (iter
, tmp
);
21812 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21814 if (expected_size
!= -1)
21816 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21817 if (expected_size
== 0)
21819 else if (expected_size
> REG_BR_PROB_BASE
)
21820 predict_jump (REG_BR_PROB_BASE
- 1);
21822 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21825 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21826 iter
= ix86_zero_extend_to_Pmode (iter
);
21827 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21828 true, OPTAB_LIB_WIDEN
);
21829 if (tmp
!= destptr
)
21830 emit_move_insn (destptr
, tmp
);
21833 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21834 true, OPTAB_LIB_WIDEN
);
21836 emit_move_insn (srcptr
, tmp
);
21838 emit_label (out_label
);
21841 /* Output "rep; mov" instruction.
21842 Arguments have same meaning as for previous function */
21844 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21845 rtx destptr
, rtx srcptr
,
21847 enum machine_mode mode
)
21852 HOST_WIDE_INT rounded_count
;
21854 /* If the size is known, it is shorter to use rep movs. */
21855 if (mode
== QImode
&& CONST_INT_P (count
)
21856 && !(INTVAL (count
) & 3))
21859 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21860 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21861 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21862 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21863 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21864 if (mode
!= QImode
)
21866 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21867 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21868 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21869 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21870 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21871 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21875 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21876 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21878 if (CONST_INT_P (count
))
21880 rounded_count
= (INTVAL (count
)
21881 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21882 destmem
= shallow_copy_rtx (destmem
);
21883 srcmem
= shallow_copy_rtx (srcmem
);
21884 set_mem_size (destmem
, rounded_count
);
21885 set_mem_size (srcmem
, rounded_count
);
21889 if (MEM_SIZE_KNOWN_P (destmem
))
21890 clear_mem_size (destmem
);
21891 if (MEM_SIZE_KNOWN_P (srcmem
))
21892 clear_mem_size (srcmem
);
21894 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21898 /* Output "rep; stos" instruction.
21899 Arguments have same meaning as for previous function */
21901 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21902 rtx count
, enum machine_mode mode
,
21907 HOST_WIDE_INT rounded_count
;
21909 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21910 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21911 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21912 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21913 if (mode
!= QImode
)
21915 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21916 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21917 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21920 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21921 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21923 rounded_count
= (INTVAL (count
)
21924 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21925 destmem
= shallow_copy_rtx (destmem
);
21926 set_mem_size (destmem
, rounded_count
);
21928 else if (MEM_SIZE_KNOWN_P (destmem
))
21929 clear_mem_size (destmem
);
21930 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21934 emit_strmov (rtx destmem
, rtx srcmem
,
21935 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21937 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21938 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21939 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21942 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21944 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21945 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21948 if (CONST_INT_P (count
))
21950 HOST_WIDE_INT countval
= INTVAL (count
);
21953 if ((countval
& 0x10) && max_size
> 16)
21957 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21958 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21961 gcc_unreachable ();
21964 if ((countval
& 0x08) && max_size
> 8)
21967 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21970 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21971 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21975 if ((countval
& 0x04) && max_size
> 4)
21977 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21980 if ((countval
& 0x02) && max_size
> 2)
21982 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21985 if ((countval
& 0x01) && max_size
> 1)
21987 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21994 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21995 count
, 1, OPTAB_DIRECT
);
21996 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21997 count
, QImode
, 1, 4);
22001 /* When there are stringops, we can cheaply increase dest and src pointers.
22002 Otherwise we save code size by maintaining offset (zero is readily
22003 available from preceding rep operation) and using x86 addressing modes.
22005 if (TARGET_SINGLE_STRINGOP
)
22009 rtx label
= ix86_expand_aligntest (count
, 4, true);
22010 src
= change_address (srcmem
, SImode
, srcptr
);
22011 dest
= change_address (destmem
, SImode
, destptr
);
22012 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22013 emit_label (label
);
22014 LABEL_NUSES (label
) = 1;
22018 rtx label
= ix86_expand_aligntest (count
, 2, true);
22019 src
= change_address (srcmem
, HImode
, srcptr
);
22020 dest
= change_address (destmem
, HImode
, destptr
);
22021 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22022 emit_label (label
);
22023 LABEL_NUSES (label
) = 1;
22027 rtx label
= ix86_expand_aligntest (count
, 1, true);
22028 src
= change_address (srcmem
, QImode
, srcptr
);
22029 dest
= change_address (destmem
, QImode
, destptr
);
22030 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22031 emit_label (label
);
22032 LABEL_NUSES (label
) = 1;
22037 rtx offset
= force_reg (Pmode
, const0_rtx
);
22042 rtx label
= ix86_expand_aligntest (count
, 4, true);
22043 src
= change_address (srcmem
, SImode
, srcptr
);
22044 dest
= change_address (destmem
, SImode
, destptr
);
22045 emit_move_insn (dest
, src
);
22046 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22047 true, OPTAB_LIB_WIDEN
);
22049 emit_move_insn (offset
, tmp
);
22050 emit_label (label
);
22051 LABEL_NUSES (label
) = 1;
22055 rtx label
= ix86_expand_aligntest (count
, 2, true);
22056 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22057 src
= change_address (srcmem
, HImode
, tmp
);
22058 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22059 dest
= change_address (destmem
, HImode
, tmp
);
22060 emit_move_insn (dest
, src
);
22061 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22062 true, OPTAB_LIB_WIDEN
);
22064 emit_move_insn (offset
, tmp
);
22065 emit_label (label
);
22066 LABEL_NUSES (label
) = 1;
22070 rtx label
= ix86_expand_aligntest (count
, 1, true);
22071 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22072 src
= change_address (srcmem
, QImode
, tmp
);
22073 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22074 dest
= change_address (destmem
, QImode
, tmp
);
22075 emit_move_insn (dest
, src
);
22076 emit_label (label
);
22077 LABEL_NUSES (label
) = 1;
22082 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22084 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22085 rtx count
, int max_size
)
22088 expand_simple_binop (counter_mode (count
), AND
, count
,
22089 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22090 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22091 gen_lowpart (QImode
, value
), count
, QImode
,
22095 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22097 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22101 if (CONST_INT_P (count
))
22103 HOST_WIDE_INT countval
= INTVAL (count
);
22106 if ((countval
& 0x10) && max_size
> 16)
22110 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22111 emit_insn (gen_strset (destptr
, dest
, value
));
22112 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22113 emit_insn (gen_strset (destptr
, dest
, value
));
22116 gcc_unreachable ();
22119 if ((countval
& 0x08) && max_size
> 8)
22123 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22124 emit_insn (gen_strset (destptr
, dest
, value
));
22128 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22129 emit_insn (gen_strset (destptr
, dest
, value
));
22130 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22131 emit_insn (gen_strset (destptr
, dest
, value
));
22135 if ((countval
& 0x04) && max_size
> 4)
22137 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22138 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22141 if ((countval
& 0x02) && max_size
> 2)
22143 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22144 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22147 if ((countval
& 0x01) && max_size
> 1)
22149 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22150 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22157 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22162 rtx label
= ix86_expand_aligntest (count
, 16, true);
22165 dest
= change_address (destmem
, DImode
, destptr
);
22166 emit_insn (gen_strset (destptr
, dest
, value
));
22167 emit_insn (gen_strset (destptr
, dest
, value
));
22171 dest
= change_address (destmem
, SImode
, destptr
);
22172 emit_insn (gen_strset (destptr
, dest
, value
));
22173 emit_insn (gen_strset (destptr
, dest
, value
));
22174 emit_insn (gen_strset (destptr
, dest
, value
));
22175 emit_insn (gen_strset (destptr
, dest
, value
));
22177 emit_label (label
);
22178 LABEL_NUSES (label
) = 1;
22182 rtx label
= ix86_expand_aligntest (count
, 8, true);
22185 dest
= change_address (destmem
, DImode
, destptr
);
22186 emit_insn (gen_strset (destptr
, dest
, value
));
22190 dest
= change_address (destmem
, SImode
, destptr
);
22191 emit_insn (gen_strset (destptr
, dest
, value
));
22192 emit_insn (gen_strset (destptr
, dest
, value
));
22194 emit_label (label
);
22195 LABEL_NUSES (label
) = 1;
22199 rtx label
= ix86_expand_aligntest (count
, 4, true);
22200 dest
= change_address (destmem
, SImode
, destptr
);
22201 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22202 emit_label (label
);
22203 LABEL_NUSES (label
) = 1;
22207 rtx label
= ix86_expand_aligntest (count
, 2, true);
22208 dest
= change_address (destmem
, HImode
, destptr
);
22209 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22210 emit_label (label
);
22211 LABEL_NUSES (label
) = 1;
22215 rtx label
= ix86_expand_aligntest (count
, 1, true);
22216 dest
= change_address (destmem
, QImode
, destptr
);
22217 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22218 emit_label (label
);
22219 LABEL_NUSES (label
) = 1;
22223 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22224 DESIRED_ALIGNMENT. */
22226 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22227 rtx destptr
, rtx srcptr
, rtx count
,
22228 int align
, int desired_alignment
)
22230 if (align
<= 1 && desired_alignment
> 1)
22232 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22233 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22234 destmem
= change_address (destmem
, QImode
, destptr
);
22235 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22236 ix86_adjust_counter (count
, 1);
22237 emit_label (label
);
22238 LABEL_NUSES (label
) = 1;
22240 if (align
<= 2 && desired_alignment
> 2)
22242 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22243 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22244 destmem
= change_address (destmem
, HImode
, destptr
);
22245 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22246 ix86_adjust_counter (count
, 2);
22247 emit_label (label
);
22248 LABEL_NUSES (label
) = 1;
22250 if (align
<= 4 && desired_alignment
> 4)
22252 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22253 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22254 destmem
= change_address (destmem
, SImode
, destptr
);
22255 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22256 ix86_adjust_counter (count
, 4);
22257 emit_label (label
);
22258 LABEL_NUSES (label
) = 1;
22260 gcc_assert (desired_alignment
<= 8);
22263 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22264 ALIGN_BYTES is how many bytes need to be copied. */
22266 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22267 int desired_align
, int align_bytes
)
22270 rtx orig_dst
= dst
;
22271 rtx orig_src
= src
;
22273 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22274 if (src_align_bytes
>= 0)
22275 src_align_bytes
= desired_align
- src_align_bytes
;
22276 if (align_bytes
& 1)
22278 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22279 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22281 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22283 if (align_bytes
& 2)
22285 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22286 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22287 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22288 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22289 if (src_align_bytes
>= 0
22290 && (src_align_bytes
& 1) == (align_bytes
& 1)
22291 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22292 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22294 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22296 if (align_bytes
& 4)
22298 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22299 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22300 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22301 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22302 if (src_align_bytes
>= 0)
22304 unsigned int src_align
= 0;
22305 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22307 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22309 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22310 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22313 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22315 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22316 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22317 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22318 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22319 if (src_align_bytes
>= 0)
22321 unsigned int src_align
= 0;
22322 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22324 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22326 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22328 if (src_align
> (unsigned int) desired_align
)
22329 src_align
= desired_align
;
22330 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22331 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22333 if (MEM_SIZE_KNOWN_P (orig_dst
))
22334 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22335 if (MEM_SIZE_KNOWN_P (orig_src
))
22336 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22341 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22342 DESIRED_ALIGNMENT. */
22344 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22345 int align
, int desired_alignment
)
22347 if (align
<= 1 && desired_alignment
> 1)
22349 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22350 destmem
= change_address (destmem
, QImode
, destptr
);
22351 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22352 ix86_adjust_counter (count
, 1);
22353 emit_label (label
);
22354 LABEL_NUSES (label
) = 1;
22356 if (align
<= 2 && desired_alignment
> 2)
22358 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22359 destmem
= change_address (destmem
, HImode
, destptr
);
22360 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22361 ix86_adjust_counter (count
, 2);
22362 emit_label (label
);
22363 LABEL_NUSES (label
) = 1;
22365 if (align
<= 4 && desired_alignment
> 4)
22367 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22368 destmem
= change_address (destmem
, SImode
, destptr
);
22369 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22370 ix86_adjust_counter (count
, 4);
22371 emit_label (label
);
22372 LABEL_NUSES (label
) = 1;
22374 gcc_assert (desired_alignment
<= 8);
22377 /* Set enough from DST to align DST known to by aligned by ALIGN to
22378 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22380 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22381 int desired_align
, int align_bytes
)
22384 rtx orig_dst
= dst
;
22385 if (align_bytes
& 1)
22387 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22389 emit_insn (gen_strset (destreg
, dst
,
22390 gen_lowpart (QImode
, value
)));
22392 if (align_bytes
& 2)
22394 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22395 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22396 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22398 emit_insn (gen_strset (destreg
, dst
,
22399 gen_lowpart (HImode
, value
)));
22401 if (align_bytes
& 4)
22403 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22404 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22405 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22407 emit_insn (gen_strset (destreg
, dst
,
22408 gen_lowpart (SImode
, value
)));
22410 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22411 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22412 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22413 if (MEM_SIZE_KNOWN_P (orig_dst
))
22414 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22418 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22419 static enum stringop_alg
22420 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22421 int *dynamic_check
, bool *noalign
)
22423 const struct stringop_algs
* algs
;
22424 bool optimize_for_speed
;
22425 /* Algorithms using the rep prefix want at least edi and ecx;
22426 additionally, memset wants eax and memcpy wants esi. Don't
22427 consider such algorithms if the user has appropriated those
22428 registers for their own purposes. */
22429 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22431 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22434 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22435 || (alg != rep_prefix_1_byte \
22436 && alg != rep_prefix_4_byte \
22437 && alg != rep_prefix_8_byte))
22438 const struct processor_costs
*cost
;
22440 /* Even if the string operation call is cold, we still might spend a lot
22441 of time processing large blocks. */
22442 if (optimize_function_for_size_p (cfun
)
22443 || (optimize_insn_for_size_p ()
22444 && expected_size
!= -1 && expected_size
< 256))
22445 optimize_for_speed
= false;
22447 optimize_for_speed
= true;
22449 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22451 *dynamic_check
= -1;
22453 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22455 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22456 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22457 return ix86_stringop_alg
;
22458 /* rep; movq or rep; movl is the smallest variant. */
22459 else if (!optimize_for_speed
)
22461 if (!count
|| (count
& 3))
22462 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22464 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22466 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22468 else if (expected_size
!= -1 && expected_size
< 4)
22469 return loop_1_byte
;
22470 else if (expected_size
!= -1)
22473 enum stringop_alg alg
= libcall
;
22474 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22476 /* We get here if the algorithms that were not libcall-based
22477 were rep-prefix based and we are unable to use rep prefixes
22478 based on global register usage. Break out of the loop and
22479 use the heuristic below. */
22480 if (algs
->size
[i
].max
== 0)
22482 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22484 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22486 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22488 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22489 last non-libcall inline algorithm. */
22490 if (TARGET_INLINE_ALL_STRINGOPS
)
22492 /* When the current size is best to be copied by a libcall,
22493 but we are still forced to inline, run the heuristic below
22494 that will pick code for medium sized blocks. */
22495 if (alg
!= libcall
)
22499 else if (ALG_USABLE_P (candidate
))
22501 *noalign
= algs
->size
[i
].noalign
;
22506 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22508 /* When asked to inline the call anyway, try to pick meaningful choice.
22509 We look for maximal size of block that is faster to copy by hand and
22510 take blocks of at most of that size guessing that average size will
22511 be roughly half of the block.
22513 If this turns out to be bad, we might simply specify the preferred
22514 choice in ix86_costs. */
22515 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22516 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22519 enum stringop_alg alg
;
22521 bool any_alg_usable_p
= true;
22523 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22525 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22526 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22528 if (candidate
!= libcall
&& candidate
22529 && ALG_USABLE_P (candidate
))
22530 max
= algs
->size
[i
].max
;
22532 /* If there aren't any usable algorithms, then recursing on
22533 smaller sizes isn't going to find anything. Just return the
22534 simple byte-at-a-time copy loop. */
22535 if (!any_alg_usable_p
)
22537 /* Pick something reasonable. */
22538 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22539 *dynamic_check
= 128;
22540 return loop_1_byte
;
22544 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22545 gcc_assert (*dynamic_check
== -1);
22546 gcc_assert (alg
!= libcall
);
22547 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22548 *dynamic_check
= max
;
22551 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22552 #undef ALG_USABLE_P
22555 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22556 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22558 decide_alignment (int align
,
22559 enum stringop_alg alg
,
22562 int desired_align
= 0;
22566 gcc_unreachable ();
22568 case unrolled_loop
:
22569 desired_align
= GET_MODE_SIZE (Pmode
);
22571 case rep_prefix_8_byte
:
22574 case rep_prefix_4_byte
:
22575 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22576 copying whole cacheline at once. */
22577 if (TARGET_PENTIUMPRO
)
22582 case rep_prefix_1_byte
:
22583 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22584 copying whole cacheline at once. */
22585 if (TARGET_PENTIUMPRO
)
22599 if (desired_align
< align
)
22600 desired_align
= align
;
22601 if (expected_size
!= -1 && expected_size
< 4)
22602 desired_align
= align
;
22603 return desired_align
;
22606 /* Return the smallest power of 2 greater than VAL. */
22608 smallest_pow2_greater_than (int val
)
22616 /* Expand string move (memcpy) operation. Use i386 string operations
22617 when profitable. expand_setmem contains similar code. The code
22618 depends upon architecture, block size and alignment, but always has
22619 the same overall structure:
22621 1) Prologue guard: Conditional that jumps up to epilogues for small
22622 blocks that can be handled by epilogue alone. This is faster
22623 but also needed for correctness, since prologue assume the block
22624 is larger than the desired alignment.
22626 Optional dynamic check for size and libcall for large
22627 blocks is emitted here too, with -minline-stringops-dynamically.
22629 2) Prologue: copy first few bytes in order to get destination
22630 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22631 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22632 copied. We emit either a jump tree on power of two sized
22633 blocks, or a byte loop.
22635 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22636 with specified algorithm.
22638 4) Epilogue: code copying tail of the block that is too small to be
22639 handled by main body (or up to size guarded by prologue guard). */
22642 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22643 rtx expected_align_exp
, rtx expected_size_exp
)
22649 rtx jump_around_label
= NULL
;
22650 HOST_WIDE_INT align
= 1;
22651 unsigned HOST_WIDE_INT count
= 0;
22652 HOST_WIDE_INT expected_size
= -1;
22653 int size_needed
= 0, epilogue_size_needed
;
22654 int desired_align
= 0, align_bytes
= 0;
22655 enum stringop_alg alg
;
22657 bool need_zero_guard
= false;
22660 if (CONST_INT_P (align_exp
))
22661 align
= INTVAL (align_exp
);
22662 /* i386 can do misaligned access on reasonably increased cost. */
22663 if (CONST_INT_P (expected_align_exp
)
22664 && INTVAL (expected_align_exp
) > align
)
22665 align
= INTVAL (expected_align_exp
);
22666 /* ALIGN is the minimum of destination and source alignment, but we care here
22667 just about destination alignment. */
22668 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22669 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22671 if (CONST_INT_P (count_exp
))
22672 count
= expected_size
= INTVAL (count_exp
);
22673 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22674 expected_size
= INTVAL (expected_size_exp
);
22676 /* Make sure we don't need to care about overflow later on. */
22677 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22680 /* Step 0: Decide on preferred algorithm, desired alignment and
22681 size of chunks to be copied by main loop. */
22683 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22684 desired_align
= decide_alignment (align
, alg
, expected_size
);
22686 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
22687 align
= desired_align
;
22689 if (alg
== libcall
)
22691 gcc_assert (alg
!= no_stringop
);
22693 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22694 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22695 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22700 gcc_unreachable ();
22702 need_zero_guard
= true;
22703 size_needed
= GET_MODE_SIZE (word_mode
);
22705 case unrolled_loop
:
22706 need_zero_guard
= true;
22707 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22709 case rep_prefix_8_byte
:
22712 case rep_prefix_4_byte
:
22715 case rep_prefix_1_byte
:
22719 need_zero_guard
= true;
22724 epilogue_size_needed
= size_needed
;
22726 /* Step 1: Prologue guard. */
22728 /* Alignment code needs count to be in register. */
22729 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22731 if (INTVAL (count_exp
) > desired_align
22732 && INTVAL (count_exp
) > size_needed
)
22735 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22736 if (align_bytes
<= 0)
22739 align_bytes
= desired_align
- align_bytes
;
22741 if (align_bytes
== 0)
22742 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22744 gcc_assert (desired_align
>= 1 && align
>= 1);
22746 /* Ensure that alignment prologue won't copy past end of block. */
22747 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22749 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22750 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22751 Make sure it is power of 2. */
22752 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22756 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22758 /* If main algorithm works on QImode, no epilogue is needed.
22759 For small sizes just don't align anything. */
22760 if (size_needed
== 1)
22761 desired_align
= align
;
22768 label
= gen_label_rtx ();
22769 emit_cmp_and_jump_insns (count_exp
,
22770 GEN_INT (epilogue_size_needed
),
22771 LTU
, 0, counter_mode (count_exp
), 1, label
);
22772 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22773 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22775 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22779 /* Emit code to decide on runtime whether library call or inline should be
22781 if (dynamic_check
!= -1)
22783 if (CONST_INT_P (count_exp
))
22785 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22787 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22788 count_exp
= const0_rtx
;
22794 rtx hot_label
= gen_label_rtx ();
22795 jump_around_label
= gen_label_rtx ();
22796 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22797 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22798 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22799 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22800 emit_jump (jump_around_label
);
22801 emit_label (hot_label
);
22805 /* Step 2: Alignment prologue. */
22807 if (desired_align
> align
)
22809 if (align_bytes
== 0)
22811 /* Except for the first move in epilogue, we no longer know
22812 constant offset in aliasing info. It don't seems to worth
22813 the pain to maintain it for the first move, so throw away
22815 src
= change_address (src
, BLKmode
, srcreg
);
22816 dst
= change_address (dst
, BLKmode
, destreg
);
22817 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22822 /* If we know how many bytes need to be stored before dst is
22823 sufficiently aligned, maintain aliasing info accurately. */
22824 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22825 desired_align
, align_bytes
);
22826 count_exp
= plus_constant (counter_mode (count_exp
),
22827 count_exp
, -align_bytes
);
22828 count
-= align_bytes
;
22830 if (need_zero_guard
22831 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22832 || (align_bytes
== 0
22833 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22834 + desired_align
- align
))))
22836 /* It is possible that we copied enough so the main loop will not
22838 gcc_assert (size_needed
> 1);
22839 if (label
== NULL_RTX
)
22840 label
= gen_label_rtx ();
22841 emit_cmp_and_jump_insns (count_exp
,
22842 GEN_INT (size_needed
),
22843 LTU
, 0, counter_mode (count_exp
), 1, label
);
22844 if (expected_size
== -1
22845 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22846 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22848 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22851 if (label
&& size_needed
== 1)
22853 emit_label (label
);
22854 LABEL_NUSES (label
) = 1;
22856 epilogue_size_needed
= 1;
22858 else if (label
== NULL_RTX
)
22859 epilogue_size_needed
= size_needed
;
22861 /* Step 3: Main loop. */
22867 gcc_unreachable ();
22869 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22870 count_exp
, QImode
, 1, expected_size
);
22873 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22874 count_exp
, word_mode
, 1, expected_size
);
22876 case unrolled_loop
:
22877 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22878 registers for 4 temporaries anyway. */
22879 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22880 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22883 case rep_prefix_8_byte
:
22884 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22887 case rep_prefix_4_byte
:
22888 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22891 case rep_prefix_1_byte
:
22892 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22896 /* Adjust properly the offset of src and dest memory for aliasing. */
22897 if (CONST_INT_P (count_exp
))
22899 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22900 (count
/ size_needed
) * size_needed
);
22901 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22902 (count
/ size_needed
) * size_needed
);
22906 src
= change_address (src
, BLKmode
, srcreg
);
22907 dst
= change_address (dst
, BLKmode
, destreg
);
22910 /* Step 4: Epilogue to copy the remaining bytes. */
22914 /* When the main loop is done, COUNT_EXP might hold original count,
22915 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22916 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22917 bytes. Compensate if needed. */
22919 if (size_needed
< epilogue_size_needed
)
22922 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22923 GEN_INT (size_needed
- 1), count_exp
, 1,
22925 if (tmp
!= count_exp
)
22926 emit_move_insn (count_exp
, tmp
);
22928 emit_label (label
);
22929 LABEL_NUSES (label
) = 1;
22932 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22933 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22934 epilogue_size_needed
);
22935 if (jump_around_label
)
22936 emit_label (jump_around_label
);
22940 /* Helper function for memcpy. For QImode value 0xXY produce
22941 0xXYXYXYXY of wide specified by MODE. This is essentially
22942 a * 0x10101010, but we can do slightly better than
22943 synth_mult by unwinding the sequence by hand on CPUs with
22946 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22948 enum machine_mode valmode
= GET_MODE (val
);
22950 int nops
= mode
== DImode
? 3 : 2;
22952 gcc_assert (mode
== SImode
|| mode
== DImode
);
22953 if (val
== const0_rtx
)
22954 return copy_to_mode_reg (mode
, const0_rtx
);
22955 if (CONST_INT_P (val
))
22957 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22961 if (mode
== DImode
)
22962 v
|= (v
<< 16) << 16;
22963 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22966 if (valmode
== VOIDmode
)
22968 if (valmode
!= QImode
)
22969 val
= gen_lowpart (QImode
, val
);
22970 if (mode
== QImode
)
22972 if (!TARGET_PARTIAL_REG_STALL
)
22974 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22975 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22976 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22977 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22979 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22980 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22981 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22986 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22988 if (!TARGET_PARTIAL_REG_STALL
)
22989 if (mode
== SImode
)
22990 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22992 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22995 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22996 NULL
, 1, OPTAB_DIRECT
);
22998 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23000 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23001 NULL
, 1, OPTAB_DIRECT
);
23002 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23003 if (mode
== SImode
)
23005 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23006 NULL
, 1, OPTAB_DIRECT
);
23007 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23012 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23013 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23014 alignment from ALIGN to DESIRED_ALIGN. */
23016 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23021 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23022 promoted_val
= promote_duplicated_reg (DImode
, val
);
23023 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23024 promoted_val
= promote_duplicated_reg (SImode
, val
);
23025 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23026 promoted_val
= promote_duplicated_reg (HImode
, val
);
23028 promoted_val
= val
;
23030 return promoted_val
;
23033 /* Expand string clear operation (bzero). Use i386 string operations when
23034 profitable. See expand_movmem comment for explanation of individual
23035 steps performed. */
23037 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23038 rtx expected_align_exp
, rtx expected_size_exp
)
23043 rtx jump_around_label
= NULL
;
23044 HOST_WIDE_INT align
= 1;
23045 unsigned HOST_WIDE_INT count
= 0;
23046 HOST_WIDE_INT expected_size
= -1;
23047 int size_needed
= 0, epilogue_size_needed
;
23048 int desired_align
= 0, align_bytes
= 0;
23049 enum stringop_alg alg
;
23050 rtx promoted_val
= NULL
;
23051 bool force_loopy_epilogue
= false;
23053 bool need_zero_guard
= false;
23056 if (CONST_INT_P (align_exp
))
23057 align
= INTVAL (align_exp
);
23058 /* i386 can do misaligned access on reasonably increased cost. */
23059 if (CONST_INT_P (expected_align_exp
)
23060 && INTVAL (expected_align_exp
) > align
)
23061 align
= INTVAL (expected_align_exp
);
23062 if (CONST_INT_P (count_exp
))
23063 count
= expected_size
= INTVAL (count_exp
);
23064 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23065 expected_size
= INTVAL (expected_size_exp
);
23067 /* Make sure we don't need to care about overflow later on. */
23068 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23071 /* Step 0: Decide on preferred algorithm, desired alignment and
23072 size of chunks to be copied by main loop. */
23074 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23075 desired_align
= decide_alignment (align
, alg
, expected_size
);
23077 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23078 align
= desired_align
;
23080 if (alg
== libcall
)
23082 gcc_assert (alg
!= no_stringop
);
23084 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23085 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23090 gcc_unreachable ();
23092 need_zero_guard
= true;
23093 size_needed
= GET_MODE_SIZE (word_mode
);
23095 case unrolled_loop
:
23096 need_zero_guard
= true;
23097 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23099 case rep_prefix_8_byte
:
23102 case rep_prefix_4_byte
:
23105 case rep_prefix_1_byte
:
23109 need_zero_guard
= true;
23113 epilogue_size_needed
= size_needed
;
23115 /* Step 1: Prologue guard. */
23117 /* Alignment code needs count to be in register. */
23118 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23120 if (INTVAL (count_exp
) > desired_align
23121 && INTVAL (count_exp
) > size_needed
)
23124 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23125 if (align_bytes
<= 0)
23128 align_bytes
= desired_align
- align_bytes
;
23130 if (align_bytes
== 0)
23132 enum machine_mode mode
= SImode
;
23133 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23135 count_exp
= force_reg (mode
, count_exp
);
23138 /* Do the cheap promotion to allow better CSE across the
23139 main loop and epilogue (ie one load of the big constant in the
23140 front of all code. */
23141 if (CONST_INT_P (val_exp
))
23142 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23143 desired_align
, align
);
23144 /* Ensure that alignment prologue won't copy past end of block. */
23145 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23147 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23148 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23149 Make sure it is power of 2. */
23150 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23152 /* To improve performance of small blocks, we jump around the VAL
23153 promoting mode. This mean that if the promoted VAL is not constant,
23154 we might not use it in the epilogue and have to use byte
23156 if (epilogue_size_needed
> 2 && !promoted_val
)
23157 force_loopy_epilogue
= true;
23160 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23162 /* If main algorithm works on QImode, no epilogue is needed.
23163 For small sizes just don't align anything. */
23164 if (size_needed
== 1)
23165 desired_align
= align
;
23172 label
= gen_label_rtx ();
23173 emit_cmp_and_jump_insns (count_exp
,
23174 GEN_INT (epilogue_size_needed
),
23175 LTU
, 0, counter_mode (count_exp
), 1, label
);
23176 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23177 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23179 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23182 if (dynamic_check
!= -1)
23184 rtx hot_label
= gen_label_rtx ();
23185 jump_around_label
= gen_label_rtx ();
23186 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23187 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23188 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23189 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23190 emit_jump (jump_around_label
);
23191 emit_label (hot_label
);
23194 /* Step 2: Alignment prologue. */
23196 /* Do the expensive promotion once we branched off the small blocks. */
23198 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23199 desired_align
, align
);
23200 gcc_assert (desired_align
>= 1 && align
>= 1);
23202 if (desired_align
> align
)
23204 if (align_bytes
== 0)
23206 /* Except for the first move in epilogue, we no longer know
23207 constant offset in aliasing info. It don't seems to worth
23208 the pain to maintain it for the first move, so throw away
23210 dst
= change_address (dst
, BLKmode
, destreg
);
23211 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23216 /* If we know how many bytes need to be stored before dst is
23217 sufficiently aligned, maintain aliasing info accurately. */
23218 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23219 desired_align
, align_bytes
);
23220 count_exp
= plus_constant (counter_mode (count_exp
),
23221 count_exp
, -align_bytes
);
23222 count
-= align_bytes
;
23224 if (need_zero_guard
23225 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23226 || (align_bytes
== 0
23227 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23228 + desired_align
- align
))))
23230 /* It is possible that we copied enough so the main loop will not
23232 gcc_assert (size_needed
> 1);
23233 if (label
== NULL_RTX
)
23234 label
= gen_label_rtx ();
23235 emit_cmp_and_jump_insns (count_exp
,
23236 GEN_INT (size_needed
),
23237 LTU
, 0, counter_mode (count_exp
), 1, label
);
23238 if (expected_size
== -1
23239 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23240 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23242 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23245 if (label
&& size_needed
== 1)
23247 emit_label (label
);
23248 LABEL_NUSES (label
) = 1;
23250 promoted_val
= val_exp
;
23251 epilogue_size_needed
= 1;
23253 else if (label
== NULL_RTX
)
23254 epilogue_size_needed
= size_needed
;
23256 /* Step 3: Main loop. */
23262 gcc_unreachable ();
23264 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23265 count_exp
, QImode
, 1, expected_size
);
23268 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23269 count_exp
, word_mode
, 1, expected_size
);
23271 case unrolled_loop
:
23272 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23273 count_exp
, word_mode
, 4, expected_size
);
23275 case rep_prefix_8_byte
:
23276 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23279 case rep_prefix_4_byte
:
23280 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23283 case rep_prefix_1_byte
:
23284 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23288 /* Adjust properly the offset of src and dest memory for aliasing. */
23289 if (CONST_INT_P (count_exp
))
23290 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23291 (count
/ size_needed
) * size_needed
);
23293 dst
= change_address (dst
, BLKmode
, destreg
);
23295 /* Step 4: Epilogue to copy the remaining bytes. */
23299 /* When the main loop is done, COUNT_EXP might hold original count,
23300 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23301 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23302 bytes. Compensate if needed. */
23304 if (size_needed
< epilogue_size_needed
)
23307 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23308 GEN_INT (size_needed
- 1), count_exp
, 1,
23310 if (tmp
!= count_exp
)
23311 emit_move_insn (count_exp
, tmp
);
23313 emit_label (label
);
23314 LABEL_NUSES (label
) = 1;
23317 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23319 if (force_loopy_epilogue
)
23320 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23321 epilogue_size_needed
);
23323 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23324 epilogue_size_needed
);
23326 if (jump_around_label
)
23327 emit_label (jump_around_label
);
23331 /* Expand the appropriate insns for doing strlen if not just doing
23334 out = result, initialized with the start address
23335 align_rtx = alignment of the address.
23336 scratch = scratch register, initialized with the startaddress when
23337 not aligned, otherwise undefined
23339 This is just the body. It needs the initializations mentioned above and
23340 some address computing at the end. These things are done in i386.md. */
23343 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23347 rtx align_2_label
= NULL_RTX
;
23348 rtx align_3_label
= NULL_RTX
;
23349 rtx align_4_label
= gen_label_rtx ();
23350 rtx end_0_label
= gen_label_rtx ();
23352 rtx tmpreg
= gen_reg_rtx (SImode
);
23353 rtx scratch
= gen_reg_rtx (SImode
);
23357 if (CONST_INT_P (align_rtx
))
23358 align
= INTVAL (align_rtx
);
23360 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23362 /* Is there a known alignment and is it less than 4? */
23365 rtx scratch1
= gen_reg_rtx (Pmode
);
23366 emit_move_insn (scratch1
, out
);
23367 /* Is there a known alignment and is it not 2? */
23370 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23371 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23373 /* Leave just the 3 lower bits. */
23374 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23375 NULL_RTX
, 0, OPTAB_WIDEN
);
23377 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23378 Pmode
, 1, align_4_label
);
23379 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23380 Pmode
, 1, align_2_label
);
23381 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23382 Pmode
, 1, align_3_label
);
23386 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23387 check if is aligned to 4 - byte. */
23389 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23390 NULL_RTX
, 0, OPTAB_WIDEN
);
23392 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23393 Pmode
, 1, align_4_label
);
23396 mem
= change_address (src
, QImode
, out
);
23398 /* Now compare the bytes. */
23400 /* Compare the first n unaligned byte on a byte per byte basis. */
23401 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23402 QImode
, 1, end_0_label
);
23404 /* Increment the address. */
23405 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23407 /* Not needed with an alignment of 2 */
23410 emit_label (align_2_label
);
23412 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23415 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23417 emit_label (align_3_label
);
23420 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23423 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23426 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23427 align this loop. It gives only huge programs, but does not help to
23429 emit_label (align_4_label
);
23431 mem
= change_address (src
, SImode
, out
);
23432 emit_move_insn (scratch
, mem
);
23433 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23435 /* This formula yields a nonzero result iff one of the bytes is zero.
23436 This saves three branches inside loop and many cycles. */
23438 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23439 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23440 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23441 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23442 gen_int_mode (0x80808080, SImode
)));
23443 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23448 rtx reg
= gen_reg_rtx (SImode
);
23449 rtx reg2
= gen_reg_rtx (Pmode
);
23450 emit_move_insn (reg
, tmpreg
);
23451 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23453 /* If zero is not in the first two bytes, move two bytes forward. */
23454 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23455 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23456 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23457 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23458 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23461 /* Emit lea manually to avoid clobbering of flags. */
23462 emit_insn (gen_rtx_SET (SImode
, reg2
,
23463 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23465 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23466 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23467 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23468 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23474 rtx end_2_label
= gen_label_rtx ();
23475 /* Is zero in the first two bytes? */
23477 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23478 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23479 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23480 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23481 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23483 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23484 JUMP_LABEL (tmp
) = end_2_label
;
23486 /* Not in the first two. Move two bytes forward. */
23487 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23488 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23490 emit_label (end_2_label
);
23494 /* Avoid branch in fixing the byte. */
23495 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23496 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23497 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23498 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23499 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23501 emit_label (end_0_label
);
23504 /* Expand strlen. */
23507 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23509 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23511 /* The generic case of strlen expander is long. Avoid it's
23512 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23514 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23515 && !TARGET_INLINE_ALL_STRINGOPS
23516 && !optimize_insn_for_size_p ()
23517 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23520 addr
= force_reg (Pmode
, XEXP (src
, 0));
23521 scratch1
= gen_reg_rtx (Pmode
);
23523 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23524 && !optimize_insn_for_size_p ())
23526 /* Well it seems that some optimizer does not combine a call like
23527 foo(strlen(bar), strlen(bar));
23528 when the move and the subtraction is done here. It does calculate
23529 the length just once when these instructions are done inside of
23530 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23531 often used and I use one fewer register for the lifetime of
23532 output_strlen_unroll() this is better. */
23534 emit_move_insn (out
, addr
);
23536 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23538 /* strlensi_unroll_1 returns the address of the zero at the end of
23539 the string, like memchr(), so compute the length by subtracting
23540 the start address. */
23541 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23547 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23548 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23551 scratch2
= gen_reg_rtx (Pmode
);
23552 scratch3
= gen_reg_rtx (Pmode
);
23553 scratch4
= force_reg (Pmode
, constm1_rtx
);
23555 emit_move_insn (scratch3
, addr
);
23556 eoschar
= force_reg (QImode
, eoschar
);
23558 src
= replace_equiv_address_nv (src
, scratch3
);
23560 /* If .md starts supporting :P, this can be done in .md. */
23561 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23562 scratch4
), UNSPEC_SCAS
);
23563 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23564 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23565 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23570 /* For given symbol (function) construct code to compute address of it's PLT
23571 entry in large x86-64 PIC model. */
23573 construct_plt_address (rtx symbol
)
23577 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23578 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23579 gcc_assert (Pmode
== DImode
);
23581 tmp
= gen_reg_rtx (Pmode
);
23582 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23584 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23585 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23590 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23592 rtx pop
, bool sibcall
)
23594 /* We need to represent that SI and DI registers are clobbered
23596 static int clobbered_registers
[] = {
23597 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23598 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23599 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23600 XMM15_REG
, SI_REG
, DI_REG
23602 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23603 rtx use
= NULL
, call
;
23604 unsigned int vec_len
;
23606 if (pop
== const0_rtx
)
23608 gcc_assert (!TARGET_64BIT
|| !pop
);
23610 if (TARGET_MACHO
&& !TARGET_64BIT
)
23613 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23614 fnaddr
= machopic_indirect_call_target (fnaddr
);
23619 /* Static functions and indirect calls don't need the pic register. */
23620 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23621 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23622 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23623 use_reg (&use
, pic_offset_table_rtx
);
23626 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23628 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23629 emit_move_insn (al
, callarg2
);
23630 use_reg (&use
, al
);
23633 if (ix86_cmodel
== CM_LARGE_PIC
23635 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23636 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23637 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23639 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23640 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23642 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23643 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23647 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23649 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23650 vec
[vec_len
++] = call
;
23654 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23655 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23656 vec
[vec_len
++] = pop
;
23659 if (TARGET_64BIT_MS_ABI
23660 && (!callarg2
|| INTVAL (callarg2
) != -2))
23664 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23665 UNSPEC_MS_TO_SYSV_CALL
);
23667 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23669 = gen_rtx_CLOBBER (VOIDmode
,
23670 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23672 clobbered_registers
[i
]));
23676 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23677 call
= emit_call_insn (call
);
23679 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23684 /* Output the assembly for a call instruction. */
23687 ix86_output_call_insn (rtx insn
, rtx call_op
)
23689 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23690 bool seh_nop_p
= false;
23693 if (SIBLING_CALL_P (insn
))
23697 /* SEH epilogue detection requires the indirect branch case
23698 to include REX.W. */
23699 else if (TARGET_SEH
)
23700 xasm
= "rex.W jmp %A0";
23704 output_asm_insn (xasm
, &call_op
);
23708 /* SEH unwinding can require an extra nop to be emitted in several
23709 circumstances. Determine if we have one of those. */
23714 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23716 /* If we get to another real insn, we don't need the nop. */
23720 /* If we get to the epilogue note, prevent a catch region from
23721 being adjacent to the standard epilogue sequence. If non-
23722 call-exceptions, we'll have done this during epilogue emission. */
23723 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23724 && !flag_non_call_exceptions
23725 && !can_throw_internal (insn
))
23732 /* If we didn't find a real insn following the call, prevent the
23733 unwinder from looking into the next function. */
23739 xasm
= "call\t%P0";
23741 xasm
= "call\t%A0";
23743 output_asm_insn (xasm
, &call_op
);
23751 /* Clear stack slot assignments remembered from previous functions.
23752 This is called from INIT_EXPANDERS once before RTL is emitted for each
23755 static struct machine_function
*
23756 ix86_init_machine_status (void)
23758 struct machine_function
*f
;
23760 f
= ggc_alloc_cleared_machine_function ();
23761 f
->use_fast_prologue_epilogue_nregs
= -1;
23762 f
->call_abi
= ix86_abi
;
23767 /* Return a MEM corresponding to a stack slot with mode MODE.
23768 Allocate a new slot if necessary.
23770 The RTL for a function can have several slots available: N is
23771 which slot to use. */
23774 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23776 struct stack_local_entry
*s
;
23778 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23780 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23781 if (s
->mode
== mode
&& s
->n
== n
)
23782 return validize_mem (copy_rtx (s
->rtl
));
23784 s
= ggc_alloc_stack_local_entry ();
23787 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23789 s
->next
= ix86_stack_locals
;
23790 ix86_stack_locals
= s
;
23791 return validize_mem (s
->rtl
);
23795 ix86_instantiate_decls (void)
23797 struct stack_local_entry
*s
;
23799 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23800 if (s
->rtl
!= NULL_RTX
)
23801 instantiate_decl_rtl (s
->rtl
);
23804 /* Calculate the length of the memory address in the instruction encoding.
23805 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23806 or other prefixes. We never generate addr32 prefix for LEA insn. */
23809 memory_address_length (rtx addr
, bool lea
)
23811 struct ix86_address parts
;
23812 rtx base
, index
, disp
;
23816 if (GET_CODE (addr
) == PRE_DEC
23817 || GET_CODE (addr
) == POST_INC
23818 || GET_CODE (addr
) == PRE_MODIFY
23819 || GET_CODE (addr
) == POST_MODIFY
)
23822 ok
= ix86_decompose_address (addr
, &parts
);
23825 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23827 /* If this is not LEA instruction, add the length of addr32 prefix. */
23828 if (TARGET_64BIT
&& !lea
23829 && (SImode_address_operand (addr
, VOIDmode
)
23830 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23831 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23835 index
= parts
.index
;
23838 if (base
&& GET_CODE (base
) == SUBREG
)
23839 base
= SUBREG_REG (base
);
23840 if (index
&& GET_CODE (index
) == SUBREG
)
23841 index
= SUBREG_REG (index
);
23843 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23844 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23847 - esp as the base always wants an index,
23848 - ebp as the base always wants a displacement,
23849 - r12 as the base always wants an index,
23850 - r13 as the base always wants a displacement. */
23852 /* Register Indirect. */
23853 if (base
&& !index
&& !disp
)
23855 /* esp (for its index) and ebp (for its displacement) need
23856 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23858 if (base
== arg_pointer_rtx
23859 || base
== frame_pointer_rtx
23860 || REGNO (base
) == SP_REG
23861 || REGNO (base
) == BP_REG
23862 || REGNO (base
) == R12_REG
23863 || REGNO (base
) == R13_REG
)
23867 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23868 is not disp32, but disp32(%rip), so for disp32
23869 SIB byte is needed, unless print_operand_address
23870 optimizes it into disp32(%rip) or (%rip) is implied
23872 else if (disp
&& !base
&& !index
)
23879 if (GET_CODE (disp
) == CONST
)
23880 symbol
= XEXP (disp
, 0);
23881 if (GET_CODE (symbol
) == PLUS
23882 && CONST_INT_P (XEXP (symbol
, 1)))
23883 symbol
= XEXP (symbol
, 0);
23885 if (GET_CODE (symbol
) != LABEL_REF
23886 && (GET_CODE (symbol
) != SYMBOL_REF
23887 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23888 && (GET_CODE (symbol
) != UNSPEC
23889 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23890 && XINT (symbol
, 1) != UNSPEC_PCREL
23891 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23897 /* Find the length of the displacement constant. */
23900 if (base
&& satisfies_constraint_K (disp
))
23905 /* ebp always wants a displacement. Similarly r13. */
23906 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23909 /* An index requires the two-byte modrm form.... */
23911 /* ...like esp (or r12), which always wants an index. */
23912 || base
== arg_pointer_rtx
23913 || base
== frame_pointer_rtx
23914 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23921 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23922 is set, expect that insn have 8bit immediate alternative. */
23924 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23928 extract_insn_cached (insn
);
23929 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23930 if (CONSTANT_P (recog_data
.operand
[i
]))
23932 enum attr_mode mode
= get_attr_mode (insn
);
23935 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23937 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23944 ival
= trunc_int_for_mode (ival
, HImode
);
23947 ival
= trunc_int_for_mode (ival
, SImode
);
23952 if (IN_RANGE (ival
, -128, 127))
23969 /* Immediates for DImode instructions are encoded
23970 as 32bit sign extended values. */
23975 fatal_insn ("unknown insn mode", insn
);
23981 /* Compute default value for "length_address" attribute. */
23983 ix86_attr_length_address_default (rtx insn
)
23987 if (get_attr_type (insn
) == TYPE_LEA
)
23989 rtx set
= PATTERN (insn
), addr
;
23991 if (GET_CODE (set
) == PARALLEL
)
23992 set
= XVECEXP (set
, 0, 0);
23994 gcc_assert (GET_CODE (set
) == SET
);
23996 addr
= SET_SRC (set
);
23998 return memory_address_length (addr
, true);
24001 extract_insn_cached (insn
);
24002 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24003 if (MEM_P (recog_data
.operand
[i
]))
24005 constrain_operands_cached (reload_completed
);
24006 if (which_alternative
!= -1)
24008 const char *constraints
= recog_data
.constraints
[i
];
24009 int alt
= which_alternative
;
24011 while (*constraints
== '=' || *constraints
== '+')
24014 while (*constraints
++ != ',')
24016 /* Skip ignored operands. */
24017 if (*constraints
== 'X')
24020 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24025 /* Compute default value for "length_vex" attribute. It includes
24026 2 or 3 byte VEX prefix and 1 opcode byte. */
24029 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24033 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24034 byte VEX prefix. */
24035 if (!has_0f_opcode
|| has_vex_w
)
24038 /* We can always use 2 byte VEX prefix in 32bit. */
24042 extract_insn_cached (insn
);
24044 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24045 if (REG_P (recog_data
.operand
[i
]))
24047 /* REX.W bit uses 3 byte VEX prefix. */
24048 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24049 && GENERAL_REG_P (recog_data
.operand
[i
]))
24054 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24055 if (MEM_P (recog_data
.operand
[i
])
24056 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24063 /* Return the maximum number of instructions a cpu can issue. */
24066 ix86_issue_rate (void)
24070 case PROCESSOR_PENTIUM
:
24071 case PROCESSOR_ATOM
:
24073 case PROCESSOR_BTVER2
:
24076 case PROCESSOR_PENTIUMPRO
:
24077 case PROCESSOR_PENTIUM4
:
24078 case PROCESSOR_CORE2
:
24079 case PROCESSOR_COREI7
:
24080 case PROCESSOR_HASWELL
:
24081 case PROCESSOR_ATHLON
:
24083 case PROCESSOR_AMDFAM10
:
24084 case PROCESSOR_NOCONA
:
24085 case PROCESSOR_GENERIC32
:
24086 case PROCESSOR_GENERIC64
:
24087 case PROCESSOR_BDVER1
:
24088 case PROCESSOR_BDVER2
:
24089 case PROCESSOR_BDVER3
:
24090 case PROCESSOR_BTVER1
:
24098 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24099 by DEP_INSN and nothing set by DEP_INSN. */
24102 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24106 /* Simplify the test for uninteresting insns. */
24107 if (insn_type
!= TYPE_SETCC
24108 && insn_type
!= TYPE_ICMOV
24109 && insn_type
!= TYPE_FCMOV
24110 && insn_type
!= TYPE_IBR
)
24113 if ((set
= single_set (dep_insn
)) != 0)
24115 set
= SET_DEST (set
);
24118 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24119 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24120 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24121 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24123 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24124 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24129 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24132 /* This test is true if the dependent insn reads the flags but
24133 not any other potentially set register. */
24134 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24137 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24143 /* Return true iff USE_INSN has a memory address with operands set by
24147 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24150 extract_insn_cached (use_insn
);
24151 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24152 if (MEM_P (recog_data
.operand
[i
]))
24154 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24155 return modified_in_p (addr
, set_insn
) != 0;
24161 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24163 enum attr_type insn_type
, dep_insn_type
;
24164 enum attr_memory memory
;
24166 int dep_insn_code_number
;
24168 /* Anti and output dependencies have zero cost on all CPUs. */
24169 if (REG_NOTE_KIND (link
) != 0)
24172 dep_insn_code_number
= recog_memoized (dep_insn
);
24174 /* If we can't recognize the insns, we can't really do anything. */
24175 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24178 insn_type
= get_attr_type (insn
);
24179 dep_insn_type
= get_attr_type (dep_insn
);
24183 case PROCESSOR_PENTIUM
:
24184 /* Address Generation Interlock adds a cycle of latency. */
24185 if (insn_type
== TYPE_LEA
)
24187 rtx addr
= PATTERN (insn
);
24189 if (GET_CODE (addr
) == PARALLEL
)
24190 addr
= XVECEXP (addr
, 0, 0);
24192 gcc_assert (GET_CODE (addr
) == SET
);
24194 addr
= SET_SRC (addr
);
24195 if (modified_in_p (addr
, dep_insn
))
24198 else if (ix86_agi_dependent (dep_insn
, insn
))
24201 /* ??? Compares pair with jump/setcc. */
24202 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24205 /* Floating point stores require value to be ready one cycle earlier. */
24206 if (insn_type
== TYPE_FMOV
24207 && get_attr_memory (insn
) == MEMORY_STORE
24208 && !ix86_agi_dependent (dep_insn
, insn
))
24212 case PROCESSOR_PENTIUMPRO
:
24213 memory
= get_attr_memory (insn
);
24215 /* INT->FP conversion is expensive. */
24216 if (get_attr_fp_int_src (dep_insn
))
24219 /* There is one cycle extra latency between an FP op and a store. */
24220 if (insn_type
== TYPE_FMOV
24221 && (set
= single_set (dep_insn
)) != NULL_RTX
24222 && (set2
= single_set (insn
)) != NULL_RTX
24223 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24224 && MEM_P (SET_DEST (set2
)))
24227 /* Show ability of reorder buffer to hide latency of load by executing
24228 in parallel with previous instruction in case
24229 previous instruction is not needed to compute the address. */
24230 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24231 && !ix86_agi_dependent (dep_insn
, insn
))
24233 /* Claim moves to take one cycle, as core can issue one load
24234 at time and the next load can start cycle later. */
24235 if (dep_insn_type
== TYPE_IMOV
24236 || dep_insn_type
== TYPE_FMOV
)
24244 memory
= get_attr_memory (insn
);
24246 /* The esp dependency is resolved before the instruction is really
24248 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24249 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24252 /* INT->FP conversion is expensive. */
24253 if (get_attr_fp_int_src (dep_insn
))
24256 /* Show ability of reorder buffer to hide latency of load by executing
24257 in parallel with previous instruction in case
24258 previous instruction is not needed to compute the address. */
24259 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24260 && !ix86_agi_dependent (dep_insn
, insn
))
24262 /* Claim moves to take one cycle, as core can issue one load
24263 at time and the next load can start cycle later. */
24264 if (dep_insn_type
== TYPE_IMOV
24265 || dep_insn_type
== TYPE_FMOV
)
24274 case PROCESSOR_ATHLON
:
24276 case PROCESSOR_AMDFAM10
:
24277 case PROCESSOR_BDVER1
:
24278 case PROCESSOR_BDVER2
:
24279 case PROCESSOR_BDVER3
:
24280 case PROCESSOR_BTVER1
:
24281 case PROCESSOR_BTVER2
:
24282 case PROCESSOR_ATOM
:
24283 case PROCESSOR_GENERIC32
:
24284 case PROCESSOR_GENERIC64
:
24285 memory
= get_attr_memory (insn
);
24287 /* Show ability of reorder buffer to hide latency of load by executing
24288 in parallel with previous instruction in case
24289 previous instruction is not needed to compute the address. */
24290 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24291 && !ix86_agi_dependent (dep_insn
, insn
))
24293 enum attr_unit unit
= get_attr_unit (insn
);
24296 /* Because of the difference between the length of integer and
24297 floating unit pipeline preparation stages, the memory operands
24298 for floating point are cheaper.
24300 ??? For Athlon it the difference is most probably 2. */
24301 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24304 loadcost
= TARGET_ATHLON
? 2 : 0;
24306 if (cost
>= loadcost
)
24319 /* How many alternative schedules to try. This should be as wide as the
24320 scheduling freedom in the DFA, but no wider. Making this value too
24321 large results extra work for the scheduler. */
24324 ia32_multipass_dfa_lookahead (void)
24328 case PROCESSOR_PENTIUM
:
24331 case PROCESSOR_PENTIUMPRO
:
24335 case PROCESSOR_CORE2
:
24336 case PROCESSOR_COREI7
:
24337 case PROCESSOR_HASWELL
:
24338 case PROCESSOR_ATOM
:
24339 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24340 as many instructions can be executed on a cycle, i.e.,
24341 issue_rate. I wonder why tuning for many CPUs does not do this. */
24342 if (reload_completed
)
24343 return ix86_issue_rate ();
24344 /* Don't use lookahead for pre-reload schedule to save compile time. */
24352 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24353 execution. It is applied if
24354 (1) IMUL instruction is on the top of list;
24355 (2) There exists the only producer of independent IMUL instruction in
24357 (3) Put found producer on the top of ready list.
24358 Returns issue rate. */
24361 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24362 int clock_var ATTRIBUTE_UNUSED
)
24364 static int issue_rate
= -1;
24365 int n_ready
= *pn_ready
;
24366 rtx insn
, insn1
, insn2
;
24368 sd_iterator_def sd_it
;
24372 /* Set up issue rate. */
24373 issue_rate
= ix86_issue_rate();
24375 /* Do reodering for Atom only. */
24376 if (ix86_tune
!= PROCESSOR_ATOM
)
24378 /* Do not perform ready list reodering for pre-reload schedule pass. */
24379 if (!reload_completed
)
24381 /* Nothing to do if ready list contains only 1 instruction. */
24385 /* Check that IMUL instruction is on the top of ready list. */
24386 insn
= ready
[n_ready
- 1];
24387 if (!NONDEBUG_INSN_P (insn
))
24389 insn
= PATTERN (insn
);
24390 if (GET_CODE (insn
) == PARALLEL
)
24391 insn
= XVECEXP (insn
, 0, 0);
24392 if (GET_CODE (insn
) != SET
)
24394 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24395 && GET_MODE (SET_SRC (insn
)) == SImode
))
24398 /* Search for producer of independent IMUL instruction. */
24399 for (i
= n_ready
- 2; i
>= 0; i
--)
24402 if (!NONDEBUG_INSN_P (insn
))
24404 /* Skip IMUL instruction. */
24405 insn2
= PATTERN (insn
);
24406 if (GET_CODE (insn2
) == PARALLEL
)
24407 insn2
= XVECEXP (insn2
, 0, 0);
24408 if (GET_CODE (insn2
) == SET
24409 && GET_CODE (SET_SRC (insn2
)) == MULT
24410 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24413 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24416 con
= DEP_CON (dep
);
24417 if (!NONDEBUG_INSN_P (con
))
24419 insn1
= PATTERN (con
);
24420 if (GET_CODE (insn1
) == PARALLEL
)
24421 insn1
= XVECEXP (insn1
, 0, 0);
24423 if (GET_CODE (insn1
) == SET
24424 && GET_CODE (SET_SRC (insn1
)) == MULT
24425 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24427 sd_iterator_def sd_it1
;
24429 /* Check if there is no other dependee for IMUL. */
24431 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24434 pro
= DEP_PRO (dep1
);
24435 if (!NONDEBUG_INSN_P (pro
))
24448 return issue_rate
; /* Didn't find IMUL producer. */
24450 if (sched_verbose
> 1)
24451 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24452 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24454 /* Put IMUL producer (ready[index]) at the top of ready list. */
24455 insn1
= ready
[index
];
24456 for (i
= index
; i
< n_ready
- 1; i
++)
24457 ready
[i
] = ready
[i
+ 1];
24458 ready
[n_ready
- 1] = insn1
;
24464 ix86_class_likely_spilled_p (reg_class_t
);
24466 /* Returns true if lhs of insn is HW function argument register and set up
24467 is_spilled to true if it is likely spilled HW register. */
24469 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24473 if (!NONDEBUG_INSN_P (insn
))
24475 /* Call instructions are not movable, ignore it. */
24478 insn
= PATTERN (insn
);
24479 if (GET_CODE (insn
) == PARALLEL
)
24480 insn
= XVECEXP (insn
, 0, 0);
24481 if (GET_CODE (insn
) != SET
)
24483 dst
= SET_DEST (insn
);
24484 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24485 && ix86_function_arg_regno_p (REGNO (dst
)))
24487 /* Is it likely spilled HW register? */
24488 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24489 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24490 *is_spilled
= true;
24496 /* Add output dependencies for chain of function adjacent arguments if only
24497 there is a move to likely spilled HW register. Return first argument
24498 if at least one dependence was added or NULL otherwise. */
24500 add_parameter_dependencies (rtx call
, rtx head
)
24504 rtx first_arg
= NULL
;
24505 bool is_spilled
= false;
24507 head
= PREV_INSN (head
);
24509 /* Find nearest to call argument passing instruction. */
24512 last
= PREV_INSN (last
);
24515 if (!NONDEBUG_INSN_P (last
))
24517 if (insn_is_function_arg (last
, &is_spilled
))
24525 insn
= PREV_INSN (last
);
24526 if (!INSN_P (insn
))
24530 if (!NONDEBUG_INSN_P (insn
))
24535 if (insn_is_function_arg (insn
, &is_spilled
))
24537 /* Add output depdendence between two function arguments if chain
24538 of output arguments contains likely spilled HW registers. */
24540 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24541 first_arg
= last
= insn
;
24551 /* Add output or anti dependency from insn to first_arg to restrict its code
24554 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24559 set
= single_set (insn
);
24562 tmp
= SET_DEST (set
);
24565 /* Add output dependency to the first function argument. */
24566 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24569 /* Add anti dependency. */
24570 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24573 /* Avoid cross block motion of function argument through adding dependency
24574 from the first non-jump instruction in bb. */
24576 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24578 rtx insn
= BB_END (bb
);
24582 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24584 rtx set
= single_set (insn
);
24587 avoid_func_arg_motion (arg
, insn
);
24591 if (insn
== BB_HEAD (bb
))
24593 insn
= PREV_INSN (insn
);
24597 /* Hook for pre-reload schedule - avoid motion of function arguments
24598 passed in likely spilled HW registers. */
24600 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24603 rtx first_arg
= NULL
;
24604 if (reload_completed
)
24606 while (head
!= tail
&& DEBUG_INSN_P (head
))
24607 head
= NEXT_INSN (head
);
24608 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24609 if (INSN_P (insn
) && CALL_P (insn
))
24611 first_arg
= add_parameter_dependencies (insn
, head
);
24614 /* Add dependee for first argument to predecessors if only
24615 region contains more than one block. */
24616 basic_block bb
= BLOCK_FOR_INSN (insn
);
24617 int rgn
= CONTAINING_RGN (bb
->index
);
24618 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24619 /* Skip trivial regions and region head blocks that can have
24620 predecessors outside of region. */
24621 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24625 /* Assume that region is SCC, i.e. all immediate predecessors
24626 of non-head block are in the same region. */
24627 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24629 /* Avoid creating of loop-carried dependencies through
24630 using topological odering in region. */
24631 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24632 add_dependee_for_func_arg (first_arg
, e
->src
);
24640 else if (first_arg
)
24641 avoid_func_arg_motion (first_arg
, insn
);
24644 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24645 HW registers to maximum, to schedule them at soon as possible. These are
24646 moves from function argument registers at the top of the function entry
24647 and moves from function return value registers after call. */
24649 ix86_adjust_priority (rtx insn
, int priority
)
24653 if (reload_completed
)
24656 if (!NONDEBUG_INSN_P (insn
))
24659 set
= single_set (insn
);
24662 rtx tmp
= SET_SRC (set
);
24664 && HARD_REGISTER_P (tmp
)
24665 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24666 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24667 return current_sched_info
->sched_max_insns_priority
;
24673 /* Model decoder of Core 2/i7.
24674 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24675 track the instruction fetch block boundaries and make sure that long
24676 (9+ bytes) instructions are assigned to D0. */
24678 /* Maximum length of an insn that can be handled by
24679 a secondary decoder unit. '8' for Core 2/i7. */
24680 static int core2i7_secondary_decoder_max_insn_size
;
24682 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24683 '16' for Core 2/i7. */
24684 static int core2i7_ifetch_block_size
;
24686 /* Maximum number of instructions decoder can handle per cycle.
24687 '6' for Core 2/i7. */
24688 static int core2i7_ifetch_block_max_insns
;
24690 typedef struct ix86_first_cycle_multipass_data_
*
24691 ix86_first_cycle_multipass_data_t
;
24692 typedef const struct ix86_first_cycle_multipass_data_
*
24693 const_ix86_first_cycle_multipass_data_t
;
24695 /* A variable to store target state across calls to max_issue within
24697 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24698 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24700 /* Initialize DATA. */
24702 core2i7_first_cycle_multipass_init (void *_data
)
24704 ix86_first_cycle_multipass_data_t data
24705 = (ix86_first_cycle_multipass_data_t
) _data
;
24707 data
->ifetch_block_len
= 0;
24708 data
->ifetch_block_n_insns
= 0;
24709 data
->ready_try_change
= NULL
;
24710 data
->ready_try_change_size
= 0;
24713 /* Advancing the cycle; reset ifetch block counts. */
24715 core2i7_dfa_post_advance_cycle (void)
24717 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24719 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24721 data
->ifetch_block_len
= 0;
24722 data
->ifetch_block_n_insns
= 0;
24725 static int min_insn_size (rtx
);
24727 /* Filter out insns from ready_try that the core will not be able to issue
24728 on current cycle due to decoder. */
24730 core2i7_first_cycle_multipass_filter_ready_try
24731 (const_ix86_first_cycle_multipass_data_t data
,
24732 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24739 if (ready_try
[n_ready
])
24742 insn
= get_ready_element (n_ready
);
24743 insn_size
= min_insn_size (insn
);
24745 if (/* If this is a too long an insn for a secondary decoder ... */
24746 (!first_cycle_insn_p
24747 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24748 /* ... or it would not fit into the ifetch block ... */
24749 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24750 /* ... or the decoder is full already ... */
24751 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24752 /* ... mask the insn out. */
24754 ready_try
[n_ready
] = 1;
24756 if (data
->ready_try_change
)
24757 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24762 /* Prepare for a new round of multipass lookahead scheduling. */
24764 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24765 bool first_cycle_insn_p
)
24767 ix86_first_cycle_multipass_data_t data
24768 = (ix86_first_cycle_multipass_data_t
) _data
;
24769 const_ix86_first_cycle_multipass_data_t prev_data
24770 = ix86_first_cycle_multipass_data
;
24772 /* Restore the state from the end of the previous round. */
24773 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24774 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24776 /* Filter instructions that cannot be issued on current cycle due to
24777 decoder restrictions. */
24778 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24779 first_cycle_insn_p
);
24782 /* INSN is being issued in current solution. Account for its impact on
24783 the decoder model. */
24785 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24786 rtx insn
, const void *_prev_data
)
24788 ix86_first_cycle_multipass_data_t data
24789 = (ix86_first_cycle_multipass_data_t
) _data
;
24790 const_ix86_first_cycle_multipass_data_t prev_data
24791 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24793 int insn_size
= min_insn_size (insn
);
24795 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24796 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24797 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24798 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24800 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24801 if (!data
->ready_try_change
)
24803 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24804 data
->ready_try_change_size
= n_ready
;
24806 else if (data
->ready_try_change_size
< n_ready
)
24808 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24810 data
->ready_try_change_size
= n_ready
;
24812 bitmap_clear (data
->ready_try_change
);
24814 /* Filter out insns from ready_try that the core will not be able to issue
24815 on current cycle due to decoder. */
24816 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24820 /* Revert the effect on ready_try. */
24822 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24824 int n_ready ATTRIBUTE_UNUSED
)
24826 const_ix86_first_cycle_multipass_data_t data
24827 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24828 unsigned int i
= 0;
24829 sbitmap_iterator sbi
;
24831 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24832 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24838 /* Save the result of multipass lookahead scheduling for the next round. */
24840 core2i7_first_cycle_multipass_end (const void *_data
)
24842 const_ix86_first_cycle_multipass_data_t data
24843 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24844 ix86_first_cycle_multipass_data_t next_data
24845 = ix86_first_cycle_multipass_data
;
24849 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24850 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24854 /* Deallocate target data. */
24856 core2i7_first_cycle_multipass_fini (void *_data
)
24858 ix86_first_cycle_multipass_data_t data
24859 = (ix86_first_cycle_multipass_data_t
) _data
;
24861 if (data
->ready_try_change
)
24863 sbitmap_free (data
->ready_try_change
);
24864 data
->ready_try_change
= NULL
;
24865 data
->ready_try_change_size
= 0;
24869 /* Prepare for scheduling pass. */
24871 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24872 int verbose ATTRIBUTE_UNUSED
,
24873 int max_uid ATTRIBUTE_UNUSED
)
24875 /* Install scheduling hooks for current CPU. Some of these hooks are used
24876 in time-critical parts of the scheduler, so we only set them up when
24877 they are actually used. */
24880 case PROCESSOR_CORE2
:
24881 case PROCESSOR_COREI7
:
24882 case PROCESSOR_HASWELL
:
24883 /* Do not perform multipass scheduling for pre-reload schedule
24884 to save compile time. */
24885 if (reload_completed
)
24887 targetm
.sched
.dfa_post_advance_cycle
24888 = core2i7_dfa_post_advance_cycle
;
24889 targetm
.sched
.first_cycle_multipass_init
24890 = core2i7_first_cycle_multipass_init
;
24891 targetm
.sched
.first_cycle_multipass_begin
24892 = core2i7_first_cycle_multipass_begin
;
24893 targetm
.sched
.first_cycle_multipass_issue
24894 = core2i7_first_cycle_multipass_issue
;
24895 targetm
.sched
.first_cycle_multipass_backtrack
24896 = core2i7_first_cycle_multipass_backtrack
;
24897 targetm
.sched
.first_cycle_multipass_end
24898 = core2i7_first_cycle_multipass_end
;
24899 targetm
.sched
.first_cycle_multipass_fini
24900 = core2i7_first_cycle_multipass_fini
;
24902 /* Set decoder parameters. */
24903 core2i7_secondary_decoder_max_insn_size
= 8;
24904 core2i7_ifetch_block_size
= 16;
24905 core2i7_ifetch_block_max_insns
= 6;
24908 /* ... Fall through ... */
24910 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24911 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24912 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24913 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24914 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24915 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24916 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24922 /* Compute the alignment given to a constant that is being placed in memory.
24923 EXP is the constant and ALIGN is the alignment that the object would
24925 The value of this function is used instead of that alignment to align
24929 ix86_constant_alignment (tree exp
, int align
)
24931 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24932 || TREE_CODE (exp
) == INTEGER_CST
)
24934 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24936 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24939 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24940 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24941 return BITS_PER_WORD
;
24946 /* Compute the alignment for a static variable.
24947 TYPE is the data type, and ALIGN is the alignment that
24948 the object would ordinarily have. The value of this function is used
24949 instead of that alignment to align the object. */
24952 ix86_data_alignment (tree type
, int align
)
24954 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24956 if (AGGREGATE_TYPE_P (type
)
24957 && TYPE_SIZE (type
)
24958 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24959 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24960 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24961 && align
< max_align
)
24964 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24965 to 16byte boundary. */
24968 if (AGGREGATE_TYPE_P (type
)
24969 && TYPE_SIZE (type
)
24970 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24971 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24972 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24976 if (TREE_CODE (type
) == ARRAY_TYPE
)
24978 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24980 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24983 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24986 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24988 if ((TYPE_MODE (type
) == XCmode
24989 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24992 else if ((TREE_CODE (type
) == RECORD_TYPE
24993 || TREE_CODE (type
) == UNION_TYPE
24994 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24995 && TYPE_FIELDS (type
))
24997 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24999 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25002 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25003 || TREE_CODE (type
) == INTEGER_TYPE
)
25005 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25007 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25014 /* Compute the alignment for a local variable or a stack slot. EXP is
25015 the data type or decl itself, MODE is the widest mode available and
25016 ALIGN is the alignment that the object would ordinarily have. The
25017 value of this macro is used instead of that alignment to align the
25021 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25022 unsigned int align
)
25026 if (exp
&& DECL_P (exp
))
25028 type
= TREE_TYPE (exp
);
25037 /* Don't do dynamic stack realignment for long long objects with
25038 -mpreferred-stack-boundary=2. */
25041 && ix86_preferred_stack_boundary
< 64
25042 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25043 && (!type
|| !TYPE_USER_ALIGN (type
))
25044 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25047 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25048 register in MODE. We will return the largest alignment of XF
25052 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25053 align
= GET_MODE_ALIGNMENT (DFmode
);
25057 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25058 to 16byte boundary. Exact wording is:
25060 An array uses the same alignment as its elements, except that a local or
25061 global array variable of length at least 16 bytes or
25062 a C99 variable-length array variable always has alignment of at least 16 bytes.
25064 This was added to allow use of aligned SSE instructions at arrays. This
25065 rule is meant for static storage (where compiler can not do the analysis
25066 by itself). We follow it for automatic variables only when convenient.
25067 We fully control everything in the function compiled and functions from
25068 other unit can not rely on the alignment.
25070 Exclude va_list type. It is the common case of local array where
25071 we can not benefit from the alignment. */
25072 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25075 if (AGGREGATE_TYPE_P (type
)
25076 && (va_list_type_node
== NULL_TREE
25077 || (TYPE_MAIN_VARIANT (type
)
25078 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25079 && TYPE_SIZE (type
)
25080 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25081 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25082 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25085 if (TREE_CODE (type
) == ARRAY_TYPE
)
25087 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25089 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25092 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25094 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25096 if ((TYPE_MODE (type
) == XCmode
25097 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25100 else if ((TREE_CODE (type
) == RECORD_TYPE
25101 || TREE_CODE (type
) == UNION_TYPE
25102 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25103 && TYPE_FIELDS (type
))
25105 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25107 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25110 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25111 || TREE_CODE (type
) == INTEGER_TYPE
)
25114 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25116 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25122 /* Compute the minimum required alignment for dynamic stack realignment
25123 purposes for a local variable, parameter or a stack slot. EXP is
25124 the data type or decl itself, MODE is its mode and ALIGN is the
25125 alignment that the object would ordinarily have. */
25128 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25129 unsigned int align
)
25133 if (exp
&& DECL_P (exp
))
25135 type
= TREE_TYPE (exp
);
25144 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25147 /* Don't do dynamic stack realignment for long long objects with
25148 -mpreferred-stack-boundary=2. */
25149 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25150 && (!type
|| !TYPE_USER_ALIGN (type
))
25151 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25157 /* Find a location for the static chain incoming to a nested function.
25158 This is a register, unless all free registers are used by arguments. */
25161 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25165 if (!DECL_STATIC_CHAIN (fndecl
))
25170 /* We always use R10 in 64-bit mode. */
25178 /* By default in 32-bit mode we use ECX to pass the static chain. */
25181 fntype
= TREE_TYPE (fndecl
);
25182 ccvt
= ix86_get_callcvt (fntype
);
25183 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25185 /* Fastcall functions use ecx/edx for arguments, which leaves
25186 us with EAX for the static chain.
25187 Thiscall functions use ecx for arguments, which also
25188 leaves us with EAX for the static chain. */
25191 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25193 /* Thiscall functions use ecx for arguments, which leaves
25194 us with EAX and EDX for the static chain.
25195 We are using for abi-compatibility EAX. */
25198 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25200 /* For regparm 3, we have no free call-clobbered registers in
25201 which to store the static chain. In order to implement this,
25202 we have the trampoline push the static chain to the stack.
25203 However, we can't push a value below the return address when
25204 we call the nested function directly, so we have to use an
25205 alternate entry point. For this we use ESI, and have the
25206 alternate entry point push ESI, so that things appear the
25207 same once we're executing the nested function. */
25210 if (fndecl
== current_function_decl
)
25211 ix86_static_chain_on_stack
= true;
25212 return gen_frame_mem (SImode
,
25213 plus_constant (Pmode
,
25214 arg_pointer_rtx
, -8));
25220 return gen_rtx_REG (Pmode
, regno
);
25223 /* Emit RTL insns to initialize the variable parts of a trampoline.
25224 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25225 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25226 to be passed to the target function. */
25229 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25235 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25241 /* Load the function address to r11. Try to load address using
25242 the shorter movl instead of movabs. We may want to support
25243 movq for kernel mode, but kernel does not use trampolines at
25244 the moment. FNADDR is a 32bit address and may not be in
25245 DImode when ptr_mode == SImode. Always use movl in this
25247 if (ptr_mode
== SImode
25248 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25250 fnaddr
= copy_addr_to_reg (fnaddr
);
25252 mem
= adjust_address (m_tramp
, HImode
, offset
);
25253 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25255 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25256 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25261 mem
= adjust_address (m_tramp
, HImode
, offset
);
25262 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25264 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25265 emit_move_insn (mem
, fnaddr
);
25269 /* Load static chain using movabs to r10. Use the shorter movl
25270 instead of movabs when ptr_mode == SImode. */
25271 if (ptr_mode
== SImode
)
25282 mem
= adjust_address (m_tramp
, HImode
, offset
);
25283 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25285 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25286 emit_move_insn (mem
, chain_value
);
25289 /* Jump to r11; the last (unused) byte is a nop, only there to
25290 pad the write out to a single 32-bit store. */
25291 mem
= adjust_address (m_tramp
, SImode
, offset
);
25292 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25299 /* Depending on the static chain location, either load a register
25300 with a constant, or push the constant to the stack. All of the
25301 instructions are the same size. */
25302 chain
= ix86_static_chain (fndecl
, true);
25305 switch (REGNO (chain
))
25308 opcode
= 0xb8; break;
25310 opcode
= 0xb9; break;
25312 gcc_unreachable ();
25318 mem
= adjust_address (m_tramp
, QImode
, offset
);
25319 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25321 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25322 emit_move_insn (mem
, chain_value
);
25325 mem
= adjust_address (m_tramp
, QImode
, offset
);
25326 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25328 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25330 /* Compute offset from the end of the jmp to the target function.
25331 In the case in which the trampoline stores the static chain on
25332 the stack, we need to skip the first insn which pushes the
25333 (call-saved) register static chain; this push is 1 byte. */
25335 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25336 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25337 offset
- (MEM_P (chain
) ? 1 : 0)),
25338 NULL_RTX
, 1, OPTAB_DIRECT
);
25339 emit_move_insn (mem
, disp
);
25342 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25344 #ifdef HAVE_ENABLE_EXECUTE_STACK
25345 #ifdef CHECK_EXECUTE_STACK_ENABLED
25346 if (CHECK_EXECUTE_STACK_ENABLED
)
25348 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25349 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25353 /* The following file contains several enumerations and data structures
25354 built from the definitions in i386-builtin-types.def. */
25356 #include "i386-builtin-types.inc"
25358 /* Table for the ix86 builtin non-function types. */
25359 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25361 /* Retrieve an element from the above table, building some of
25362 the types lazily. */
25365 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25367 unsigned int index
;
25370 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25372 type
= ix86_builtin_type_tab
[(int) tcode
];
25376 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25377 if (tcode
<= IX86_BT_LAST_VECT
)
25379 enum machine_mode mode
;
25381 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25382 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25383 mode
= ix86_builtin_type_vect_mode
[index
];
25385 type
= build_vector_type_for_mode (itype
, mode
);
25391 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25392 if (tcode
<= IX86_BT_LAST_PTR
)
25393 quals
= TYPE_UNQUALIFIED
;
25395 quals
= TYPE_QUAL_CONST
;
25397 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25398 if (quals
!= TYPE_UNQUALIFIED
)
25399 itype
= build_qualified_type (itype
, quals
);
25401 type
= build_pointer_type (itype
);
25404 ix86_builtin_type_tab
[(int) tcode
] = type
;
25408 /* Table for the ix86 builtin function types. */
25409 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25411 /* Retrieve an element from the above table, building some of
25412 the types lazily. */
25415 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25419 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25421 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25425 if (tcode
<= IX86_BT_LAST_FUNC
)
25427 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25428 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25429 tree rtype
, atype
, args
= void_list_node
;
25432 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25433 for (i
= after
- 1; i
> start
; --i
)
25435 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25436 args
= tree_cons (NULL
, atype
, args
);
25439 type
= build_function_type (rtype
, args
);
25443 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25444 enum ix86_builtin_func_type icode
;
25446 icode
= ix86_builtin_func_alias_base
[index
];
25447 type
= ix86_get_builtin_func_type (icode
);
25450 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25455 /* Codes for all the SSE/MMX builtins. */
25458 IX86_BUILTIN_ADDPS
,
25459 IX86_BUILTIN_ADDSS
,
25460 IX86_BUILTIN_DIVPS
,
25461 IX86_BUILTIN_DIVSS
,
25462 IX86_BUILTIN_MULPS
,
25463 IX86_BUILTIN_MULSS
,
25464 IX86_BUILTIN_SUBPS
,
25465 IX86_BUILTIN_SUBSS
,
25467 IX86_BUILTIN_CMPEQPS
,
25468 IX86_BUILTIN_CMPLTPS
,
25469 IX86_BUILTIN_CMPLEPS
,
25470 IX86_BUILTIN_CMPGTPS
,
25471 IX86_BUILTIN_CMPGEPS
,
25472 IX86_BUILTIN_CMPNEQPS
,
25473 IX86_BUILTIN_CMPNLTPS
,
25474 IX86_BUILTIN_CMPNLEPS
,
25475 IX86_BUILTIN_CMPNGTPS
,
25476 IX86_BUILTIN_CMPNGEPS
,
25477 IX86_BUILTIN_CMPORDPS
,
25478 IX86_BUILTIN_CMPUNORDPS
,
25479 IX86_BUILTIN_CMPEQSS
,
25480 IX86_BUILTIN_CMPLTSS
,
25481 IX86_BUILTIN_CMPLESS
,
25482 IX86_BUILTIN_CMPNEQSS
,
25483 IX86_BUILTIN_CMPNLTSS
,
25484 IX86_BUILTIN_CMPNLESS
,
25485 IX86_BUILTIN_CMPNGTSS
,
25486 IX86_BUILTIN_CMPNGESS
,
25487 IX86_BUILTIN_CMPORDSS
,
25488 IX86_BUILTIN_CMPUNORDSS
,
25490 IX86_BUILTIN_COMIEQSS
,
25491 IX86_BUILTIN_COMILTSS
,
25492 IX86_BUILTIN_COMILESS
,
25493 IX86_BUILTIN_COMIGTSS
,
25494 IX86_BUILTIN_COMIGESS
,
25495 IX86_BUILTIN_COMINEQSS
,
25496 IX86_BUILTIN_UCOMIEQSS
,
25497 IX86_BUILTIN_UCOMILTSS
,
25498 IX86_BUILTIN_UCOMILESS
,
25499 IX86_BUILTIN_UCOMIGTSS
,
25500 IX86_BUILTIN_UCOMIGESS
,
25501 IX86_BUILTIN_UCOMINEQSS
,
25503 IX86_BUILTIN_CVTPI2PS
,
25504 IX86_BUILTIN_CVTPS2PI
,
25505 IX86_BUILTIN_CVTSI2SS
,
25506 IX86_BUILTIN_CVTSI642SS
,
25507 IX86_BUILTIN_CVTSS2SI
,
25508 IX86_BUILTIN_CVTSS2SI64
,
25509 IX86_BUILTIN_CVTTPS2PI
,
25510 IX86_BUILTIN_CVTTSS2SI
,
25511 IX86_BUILTIN_CVTTSS2SI64
,
25513 IX86_BUILTIN_MAXPS
,
25514 IX86_BUILTIN_MAXSS
,
25515 IX86_BUILTIN_MINPS
,
25516 IX86_BUILTIN_MINSS
,
25518 IX86_BUILTIN_LOADUPS
,
25519 IX86_BUILTIN_STOREUPS
,
25520 IX86_BUILTIN_MOVSS
,
25522 IX86_BUILTIN_MOVHLPS
,
25523 IX86_BUILTIN_MOVLHPS
,
25524 IX86_BUILTIN_LOADHPS
,
25525 IX86_BUILTIN_LOADLPS
,
25526 IX86_BUILTIN_STOREHPS
,
25527 IX86_BUILTIN_STORELPS
,
25529 IX86_BUILTIN_MASKMOVQ
,
25530 IX86_BUILTIN_MOVMSKPS
,
25531 IX86_BUILTIN_PMOVMSKB
,
25533 IX86_BUILTIN_MOVNTPS
,
25534 IX86_BUILTIN_MOVNTQ
,
25536 IX86_BUILTIN_LOADDQU
,
25537 IX86_BUILTIN_STOREDQU
,
25539 IX86_BUILTIN_PACKSSWB
,
25540 IX86_BUILTIN_PACKSSDW
,
25541 IX86_BUILTIN_PACKUSWB
,
25543 IX86_BUILTIN_PADDB
,
25544 IX86_BUILTIN_PADDW
,
25545 IX86_BUILTIN_PADDD
,
25546 IX86_BUILTIN_PADDQ
,
25547 IX86_BUILTIN_PADDSB
,
25548 IX86_BUILTIN_PADDSW
,
25549 IX86_BUILTIN_PADDUSB
,
25550 IX86_BUILTIN_PADDUSW
,
25551 IX86_BUILTIN_PSUBB
,
25552 IX86_BUILTIN_PSUBW
,
25553 IX86_BUILTIN_PSUBD
,
25554 IX86_BUILTIN_PSUBQ
,
25555 IX86_BUILTIN_PSUBSB
,
25556 IX86_BUILTIN_PSUBSW
,
25557 IX86_BUILTIN_PSUBUSB
,
25558 IX86_BUILTIN_PSUBUSW
,
25561 IX86_BUILTIN_PANDN
,
25565 IX86_BUILTIN_PAVGB
,
25566 IX86_BUILTIN_PAVGW
,
25568 IX86_BUILTIN_PCMPEQB
,
25569 IX86_BUILTIN_PCMPEQW
,
25570 IX86_BUILTIN_PCMPEQD
,
25571 IX86_BUILTIN_PCMPGTB
,
25572 IX86_BUILTIN_PCMPGTW
,
25573 IX86_BUILTIN_PCMPGTD
,
25575 IX86_BUILTIN_PMADDWD
,
25577 IX86_BUILTIN_PMAXSW
,
25578 IX86_BUILTIN_PMAXUB
,
25579 IX86_BUILTIN_PMINSW
,
25580 IX86_BUILTIN_PMINUB
,
25582 IX86_BUILTIN_PMULHUW
,
25583 IX86_BUILTIN_PMULHW
,
25584 IX86_BUILTIN_PMULLW
,
25586 IX86_BUILTIN_PSADBW
,
25587 IX86_BUILTIN_PSHUFW
,
25589 IX86_BUILTIN_PSLLW
,
25590 IX86_BUILTIN_PSLLD
,
25591 IX86_BUILTIN_PSLLQ
,
25592 IX86_BUILTIN_PSRAW
,
25593 IX86_BUILTIN_PSRAD
,
25594 IX86_BUILTIN_PSRLW
,
25595 IX86_BUILTIN_PSRLD
,
25596 IX86_BUILTIN_PSRLQ
,
25597 IX86_BUILTIN_PSLLWI
,
25598 IX86_BUILTIN_PSLLDI
,
25599 IX86_BUILTIN_PSLLQI
,
25600 IX86_BUILTIN_PSRAWI
,
25601 IX86_BUILTIN_PSRADI
,
25602 IX86_BUILTIN_PSRLWI
,
25603 IX86_BUILTIN_PSRLDI
,
25604 IX86_BUILTIN_PSRLQI
,
25606 IX86_BUILTIN_PUNPCKHBW
,
25607 IX86_BUILTIN_PUNPCKHWD
,
25608 IX86_BUILTIN_PUNPCKHDQ
,
25609 IX86_BUILTIN_PUNPCKLBW
,
25610 IX86_BUILTIN_PUNPCKLWD
,
25611 IX86_BUILTIN_PUNPCKLDQ
,
25613 IX86_BUILTIN_SHUFPS
,
25615 IX86_BUILTIN_RCPPS
,
25616 IX86_BUILTIN_RCPSS
,
25617 IX86_BUILTIN_RSQRTPS
,
25618 IX86_BUILTIN_RSQRTPS_NR
,
25619 IX86_BUILTIN_RSQRTSS
,
25620 IX86_BUILTIN_RSQRTF
,
25621 IX86_BUILTIN_SQRTPS
,
25622 IX86_BUILTIN_SQRTPS_NR
,
25623 IX86_BUILTIN_SQRTSS
,
25625 IX86_BUILTIN_UNPCKHPS
,
25626 IX86_BUILTIN_UNPCKLPS
,
25628 IX86_BUILTIN_ANDPS
,
25629 IX86_BUILTIN_ANDNPS
,
25631 IX86_BUILTIN_XORPS
,
25634 IX86_BUILTIN_LDMXCSR
,
25635 IX86_BUILTIN_STMXCSR
,
25636 IX86_BUILTIN_SFENCE
,
25638 IX86_BUILTIN_FXSAVE
,
25639 IX86_BUILTIN_FXRSTOR
,
25640 IX86_BUILTIN_FXSAVE64
,
25641 IX86_BUILTIN_FXRSTOR64
,
25643 IX86_BUILTIN_XSAVE
,
25644 IX86_BUILTIN_XRSTOR
,
25645 IX86_BUILTIN_XSAVE64
,
25646 IX86_BUILTIN_XRSTOR64
,
25648 IX86_BUILTIN_XSAVEOPT
,
25649 IX86_BUILTIN_XSAVEOPT64
,
25651 /* 3DNow! Original */
25652 IX86_BUILTIN_FEMMS
,
25653 IX86_BUILTIN_PAVGUSB
,
25654 IX86_BUILTIN_PF2ID
,
25655 IX86_BUILTIN_PFACC
,
25656 IX86_BUILTIN_PFADD
,
25657 IX86_BUILTIN_PFCMPEQ
,
25658 IX86_BUILTIN_PFCMPGE
,
25659 IX86_BUILTIN_PFCMPGT
,
25660 IX86_BUILTIN_PFMAX
,
25661 IX86_BUILTIN_PFMIN
,
25662 IX86_BUILTIN_PFMUL
,
25663 IX86_BUILTIN_PFRCP
,
25664 IX86_BUILTIN_PFRCPIT1
,
25665 IX86_BUILTIN_PFRCPIT2
,
25666 IX86_BUILTIN_PFRSQIT1
,
25667 IX86_BUILTIN_PFRSQRT
,
25668 IX86_BUILTIN_PFSUB
,
25669 IX86_BUILTIN_PFSUBR
,
25670 IX86_BUILTIN_PI2FD
,
25671 IX86_BUILTIN_PMULHRW
,
25673 /* 3DNow! Athlon Extensions */
25674 IX86_BUILTIN_PF2IW
,
25675 IX86_BUILTIN_PFNACC
,
25676 IX86_BUILTIN_PFPNACC
,
25677 IX86_BUILTIN_PI2FW
,
25678 IX86_BUILTIN_PSWAPDSI
,
25679 IX86_BUILTIN_PSWAPDSF
,
25682 IX86_BUILTIN_ADDPD
,
25683 IX86_BUILTIN_ADDSD
,
25684 IX86_BUILTIN_DIVPD
,
25685 IX86_BUILTIN_DIVSD
,
25686 IX86_BUILTIN_MULPD
,
25687 IX86_BUILTIN_MULSD
,
25688 IX86_BUILTIN_SUBPD
,
25689 IX86_BUILTIN_SUBSD
,
25691 IX86_BUILTIN_CMPEQPD
,
25692 IX86_BUILTIN_CMPLTPD
,
25693 IX86_BUILTIN_CMPLEPD
,
25694 IX86_BUILTIN_CMPGTPD
,
25695 IX86_BUILTIN_CMPGEPD
,
25696 IX86_BUILTIN_CMPNEQPD
,
25697 IX86_BUILTIN_CMPNLTPD
,
25698 IX86_BUILTIN_CMPNLEPD
,
25699 IX86_BUILTIN_CMPNGTPD
,
25700 IX86_BUILTIN_CMPNGEPD
,
25701 IX86_BUILTIN_CMPORDPD
,
25702 IX86_BUILTIN_CMPUNORDPD
,
25703 IX86_BUILTIN_CMPEQSD
,
25704 IX86_BUILTIN_CMPLTSD
,
25705 IX86_BUILTIN_CMPLESD
,
25706 IX86_BUILTIN_CMPNEQSD
,
25707 IX86_BUILTIN_CMPNLTSD
,
25708 IX86_BUILTIN_CMPNLESD
,
25709 IX86_BUILTIN_CMPORDSD
,
25710 IX86_BUILTIN_CMPUNORDSD
,
25712 IX86_BUILTIN_COMIEQSD
,
25713 IX86_BUILTIN_COMILTSD
,
25714 IX86_BUILTIN_COMILESD
,
25715 IX86_BUILTIN_COMIGTSD
,
25716 IX86_BUILTIN_COMIGESD
,
25717 IX86_BUILTIN_COMINEQSD
,
25718 IX86_BUILTIN_UCOMIEQSD
,
25719 IX86_BUILTIN_UCOMILTSD
,
25720 IX86_BUILTIN_UCOMILESD
,
25721 IX86_BUILTIN_UCOMIGTSD
,
25722 IX86_BUILTIN_UCOMIGESD
,
25723 IX86_BUILTIN_UCOMINEQSD
,
25725 IX86_BUILTIN_MAXPD
,
25726 IX86_BUILTIN_MAXSD
,
25727 IX86_BUILTIN_MINPD
,
25728 IX86_BUILTIN_MINSD
,
25730 IX86_BUILTIN_ANDPD
,
25731 IX86_BUILTIN_ANDNPD
,
25733 IX86_BUILTIN_XORPD
,
25735 IX86_BUILTIN_SQRTPD
,
25736 IX86_BUILTIN_SQRTSD
,
25738 IX86_BUILTIN_UNPCKHPD
,
25739 IX86_BUILTIN_UNPCKLPD
,
25741 IX86_BUILTIN_SHUFPD
,
25743 IX86_BUILTIN_LOADUPD
,
25744 IX86_BUILTIN_STOREUPD
,
25745 IX86_BUILTIN_MOVSD
,
25747 IX86_BUILTIN_LOADHPD
,
25748 IX86_BUILTIN_LOADLPD
,
25750 IX86_BUILTIN_CVTDQ2PD
,
25751 IX86_BUILTIN_CVTDQ2PS
,
25753 IX86_BUILTIN_CVTPD2DQ
,
25754 IX86_BUILTIN_CVTPD2PI
,
25755 IX86_BUILTIN_CVTPD2PS
,
25756 IX86_BUILTIN_CVTTPD2DQ
,
25757 IX86_BUILTIN_CVTTPD2PI
,
25759 IX86_BUILTIN_CVTPI2PD
,
25760 IX86_BUILTIN_CVTSI2SD
,
25761 IX86_BUILTIN_CVTSI642SD
,
25763 IX86_BUILTIN_CVTSD2SI
,
25764 IX86_BUILTIN_CVTSD2SI64
,
25765 IX86_BUILTIN_CVTSD2SS
,
25766 IX86_BUILTIN_CVTSS2SD
,
25767 IX86_BUILTIN_CVTTSD2SI
,
25768 IX86_BUILTIN_CVTTSD2SI64
,
25770 IX86_BUILTIN_CVTPS2DQ
,
25771 IX86_BUILTIN_CVTPS2PD
,
25772 IX86_BUILTIN_CVTTPS2DQ
,
25774 IX86_BUILTIN_MOVNTI
,
25775 IX86_BUILTIN_MOVNTI64
,
25776 IX86_BUILTIN_MOVNTPD
,
25777 IX86_BUILTIN_MOVNTDQ
,
25779 IX86_BUILTIN_MOVQ128
,
25782 IX86_BUILTIN_MASKMOVDQU
,
25783 IX86_BUILTIN_MOVMSKPD
,
25784 IX86_BUILTIN_PMOVMSKB128
,
25786 IX86_BUILTIN_PACKSSWB128
,
25787 IX86_BUILTIN_PACKSSDW128
,
25788 IX86_BUILTIN_PACKUSWB128
,
25790 IX86_BUILTIN_PADDB128
,
25791 IX86_BUILTIN_PADDW128
,
25792 IX86_BUILTIN_PADDD128
,
25793 IX86_BUILTIN_PADDQ128
,
25794 IX86_BUILTIN_PADDSB128
,
25795 IX86_BUILTIN_PADDSW128
,
25796 IX86_BUILTIN_PADDUSB128
,
25797 IX86_BUILTIN_PADDUSW128
,
25798 IX86_BUILTIN_PSUBB128
,
25799 IX86_BUILTIN_PSUBW128
,
25800 IX86_BUILTIN_PSUBD128
,
25801 IX86_BUILTIN_PSUBQ128
,
25802 IX86_BUILTIN_PSUBSB128
,
25803 IX86_BUILTIN_PSUBSW128
,
25804 IX86_BUILTIN_PSUBUSB128
,
25805 IX86_BUILTIN_PSUBUSW128
,
25807 IX86_BUILTIN_PAND128
,
25808 IX86_BUILTIN_PANDN128
,
25809 IX86_BUILTIN_POR128
,
25810 IX86_BUILTIN_PXOR128
,
25812 IX86_BUILTIN_PAVGB128
,
25813 IX86_BUILTIN_PAVGW128
,
25815 IX86_BUILTIN_PCMPEQB128
,
25816 IX86_BUILTIN_PCMPEQW128
,
25817 IX86_BUILTIN_PCMPEQD128
,
25818 IX86_BUILTIN_PCMPGTB128
,
25819 IX86_BUILTIN_PCMPGTW128
,
25820 IX86_BUILTIN_PCMPGTD128
,
25822 IX86_BUILTIN_PMADDWD128
,
25824 IX86_BUILTIN_PMAXSW128
,
25825 IX86_BUILTIN_PMAXUB128
,
25826 IX86_BUILTIN_PMINSW128
,
25827 IX86_BUILTIN_PMINUB128
,
25829 IX86_BUILTIN_PMULUDQ
,
25830 IX86_BUILTIN_PMULUDQ128
,
25831 IX86_BUILTIN_PMULHUW128
,
25832 IX86_BUILTIN_PMULHW128
,
25833 IX86_BUILTIN_PMULLW128
,
25835 IX86_BUILTIN_PSADBW128
,
25836 IX86_BUILTIN_PSHUFHW
,
25837 IX86_BUILTIN_PSHUFLW
,
25838 IX86_BUILTIN_PSHUFD
,
25840 IX86_BUILTIN_PSLLDQI128
,
25841 IX86_BUILTIN_PSLLWI128
,
25842 IX86_BUILTIN_PSLLDI128
,
25843 IX86_BUILTIN_PSLLQI128
,
25844 IX86_BUILTIN_PSRAWI128
,
25845 IX86_BUILTIN_PSRADI128
,
25846 IX86_BUILTIN_PSRLDQI128
,
25847 IX86_BUILTIN_PSRLWI128
,
25848 IX86_BUILTIN_PSRLDI128
,
25849 IX86_BUILTIN_PSRLQI128
,
25851 IX86_BUILTIN_PSLLDQ128
,
25852 IX86_BUILTIN_PSLLW128
,
25853 IX86_BUILTIN_PSLLD128
,
25854 IX86_BUILTIN_PSLLQ128
,
25855 IX86_BUILTIN_PSRAW128
,
25856 IX86_BUILTIN_PSRAD128
,
25857 IX86_BUILTIN_PSRLW128
,
25858 IX86_BUILTIN_PSRLD128
,
25859 IX86_BUILTIN_PSRLQ128
,
25861 IX86_BUILTIN_PUNPCKHBW128
,
25862 IX86_BUILTIN_PUNPCKHWD128
,
25863 IX86_BUILTIN_PUNPCKHDQ128
,
25864 IX86_BUILTIN_PUNPCKHQDQ128
,
25865 IX86_BUILTIN_PUNPCKLBW128
,
25866 IX86_BUILTIN_PUNPCKLWD128
,
25867 IX86_BUILTIN_PUNPCKLDQ128
,
25868 IX86_BUILTIN_PUNPCKLQDQ128
,
25870 IX86_BUILTIN_CLFLUSH
,
25871 IX86_BUILTIN_MFENCE
,
25872 IX86_BUILTIN_LFENCE
,
25873 IX86_BUILTIN_PAUSE
,
25875 IX86_BUILTIN_BSRSI
,
25876 IX86_BUILTIN_BSRDI
,
25877 IX86_BUILTIN_RDPMC
,
25878 IX86_BUILTIN_RDTSC
,
25879 IX86_BUILTIN_RDTSCP
,
25880 IX86_BUILTIN_ROLQI
,
25881 IX86_BUILTIN_ROLHI
,
25882 IX86_BUILTIN_RORQI
,
25883 IX86_BUILTIN_RORHI
,
25886 IX86_BUILTIN_ADDSUBPS
,
25887 IX86_BUILTIN_HADDPS
,
25888 IX86_BUILTIN_HSUBPS
,
25889 IX86_BUILTIN_MOVSHDUP
,
25890 IX86_BUILTIN_MOVSLDUP
,
25891 IX86_BUILTIN_ADDSUBPD
,
25892 IX86_BUILTIN_HADDPD
,
25893 IX86_BUILTIN_HSUBPD
,
25894 IX86_BUILTIN_LDDQU
,
25896 IX86_BUILTIN_MONITOR
,
25897 IX86_BUILTIN_MWAIT
,
25900 IX86_BUILTIN_PHADDW
,
25901 IX86_BUILTIN_PHADDD
,
25902 IX86_BUILTIN_PHADDSW
,
25903 IX86_BUILTIN_PHSUBW
,
25904 IX86_BUILTIN_PHSUBD
,
25905 IX86_BUILTIN_PHSUBSW
,
25906 IX86_BUILTIN_PMADDUBSW
,
25907 IX86_BUILTIN_PMULHRSW
,
25908 IX86_BUILTIN_PSHUFB
,
25909 IX86_BUILTIN_PSIGNB
,
25910 IX86_BUILTIN_PSIGNW
,
25911 IX86_BUILTIN_PSIGND
,
25912 IX86_BUILTIN_PALIGNR
,
25913 IX86_BUILTIN_PABSB
,
25914 IX86_BUILTIN_PABSW
,
25915 IX86_BUILTIN_PABSD
,
25917 IX86_BUILTIN_PHADDW128
,
25918 IX86_BUILTIN_PHADDD128
,
25919 IX86_BUILTIN_PHADDSW128
,
25920 IX86_BUILTIN_PHSUBW128
,
25921 IX86_BUILTIN_PHSUBD128
,
25922 IX86_BUILTIN_PHSUBSW128
,
25923 IX86_BUILTIN_PMADDUBSW128
,
25924 IX86_BUILTIN_PMULHRSW128
,
25925 IX86_BUILTIN_PSHUFB128
,
25926 IX86_BUILTIN_PSIGNB128
,
25927 IX86_BUILTIN_PSIGNW128
,
25928 IX86_BUILTIN_PSIGND128
,
25929 IX86_BUILTIN_PALIGNR128
,
25930 IX86_BUILTIN_PABSB128
,
25931 IX86_BUILTIN_PABSW128
,
25932 IX86_BUILTIN_PABSD128
,
25934 /* AMDFAM10 - SSE4A New Instructions. */
25935 IX86_BUILTIN_MOVNTSD
,
25936 IX86_BUILTIN_MOVNTSS
,
25937 IX86_BUILTIN_EXTRQI
,
25938 IX86_BUILTIN_EXTRQ
,
25939 IX86_BUILTIN_INSERTQI
,
25940 IX86_BUILTIN_INSERTQ
,
25943 IX86_BUILTIN_BLENDPD
,
25944 IX86_BUILTIN_BLENDPS
,
25945 IX86_BUILTIN_BLENDVPD
,
25946 IX86_BUILTIN_BLENDVPS
,
25947 IX86_BUILTIN_PBLENDVB128
,
25948 IX86_BUILTIN_PBLENDW128
,
25953 IX86_BUILTIN_INSERTPS128
,
25955 IX86_BUILTIN_MOVNTDQA
,
25956 IX86_BUILTIN_MPSADBW128
,
25957 IX86_BUILTIN_PACKUSDW128
,
25958 IX86_BUILTIN_PCMPEQQ
,
25959 IX86_BUILTIN_PHMINPOSUW128
,
25961 IX86_BUILTIN_PMAXSB128
,
25962 IX86_BUILTIN_PMAXSD128
,
25963 IX86_BUILTIN_PMAXUD128
,
25964 IX86_BUILTIN_PMAXUW128
,
25966 IX86_BUILTIN_PMINSB128
,
25967 IX86_BUILTIN_PMINSD128
,
25968 IX86_BUILTIN_PMINUD128
,
25969 IX86_BUILTIN_PMINUW128
,
25971 IX86_BUILTIN_PMOVSXBW128
,
25972 IX86_BUILTIN_PMOVSXBD128
,
25973 IX86_BUILTIN_PMOVSXBQ128
,
25974 IX86_BUILTIN_PMOVSXWD128
,
25975 IX86_BUILTIN_PMOVSXWQ128
,
25976 IX86_BUILTIN_PMOVSXDQ128
,
25978 IX86_BUILTIN_PMOVZXBW128
,
25979 IX86_BUILTIN_PMOVZXBD128
,
25980 IX86_BUILTIN_PMOVZXBQ128
,
25981 IX86_BUILTIN_PMOVZXWD128
,
25982 IX86_BUILTIN_PMOVZXWQ128
,
25983 IX86_BUILTIN_PMOVZXDQ128
,
25985 IX86_BUILTIN_PMULDQ128
,
25986 IX86_BUILTIN_PMULLD128
,
25988 IX86_BUILTIN_ROUNDSD
,
25989 IX86_BUILTIN_ROUNDSS
,
25991 IX86_BUILTIN_ROUNDPD
,
25992 IX86_BUILTIN_ROUNDPS
,
25994 IX86_BUILTIN_FLOORPD
,
25995 IX86_BUILTIN_CEILPD
,
25996 IX86_BUILTIN_TRUNCPD
,
25997 IX86_BUILTIN_RINTPD
,
25998 IX86_BUILTIN_ROUNDPD_AZ
,
26000 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26001 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26002 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26004 IX86_BUILTIN_FLOORPS
,
26005 IX86_BUILTIN_CEILPS
,
26006 IX86_BUILTIN_TRUNCPS
,
26007 IX86_BUILTIN_RINTPS
,
26008 IX86_BUILTIN_ROUNDPS_AZ
,
26010 IX86_BUILTIN_FLOORPS_SFIX
,
26011 IX86_BUILTIN_CEILPS_SFIX
,
26012 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26014 IX86_BUILTIN_PTESTZ
,
26015 IX86_BUILTIN_PTESTC
,
26016 IX86_BUILTIN_PTESTNZC
,
26018 IX86_BUILTIN_VEC_INIT_V2SI
,
26019 IX86_BUILTIN_VEC_INIT_V4HI
,
26020 IX86_BUILTIN_VEC_INIT_V8QI
,
26021 IX86_BUILTIN_VEC_EXT_V2DF
,
26022 IX86_BUILTIN_VEC_EXT_V2DI
,
26023 IX86_BUILTIN_VEC_EXT_V4SF
,
26024 IX86_BUILTIN_VEC_EXT_V4SI
,
26025 IX86_BUILTIN_VEC_EXT_V8HI
,
26026 IX86_BUILTIN_VEC_EXT_V2SI
,
26027 IX86_BUILTIN_VEC_EXT_V4HI
,
26028 IX86_BUILTIN_VEC_EXT_V16QI
,
26029 IX86_BUILTIN_VEC_SET_V2DI
,
26030 IX86_BUILTIN_VEC_SET_V4SF
,
26031 IX86_BUILTIN_VEC_SET_V4SI
,
26032 IX86_BUILTIN_VEC_SET_V8HI
,
26033 IX86_BUILTIN_VEC_SET_V4HI
,
26034 IX86_BUILTIN_VEC_SET_V16QI
,
26036 IX86_BUILTIN_VEC_PACK_SFIX
,
26037 IX86_BUILTIN_VEC_PACK_SFIX256
,
26040 IX86_BUILTIN_CRC32QI
,
26041 IX86_BUILTIN_CRC32HI
,
26042 IX86_BUILTIN_CRC32SI
,
26043 IX86_BUILTIN_CRC32DI
,
26045 IX86_BUILTIN_PCMPESTRI128
,
26046 IX86_BUILTIN_PCMPESTRM128
,
26047 IX86_BUILTIN_PCMPESTRA128
,
26048 IX86_BUILTIN_PCMPESTRC128
,
26049 IX86_BUILTIN_PCMPESTRO128
,
26050 IX86_BUILTIN_PCMPESTRS128
,
26051 IX86_BUILTIN_PCMPESTRZ128
,
26052 IX86_BUILTIN_PCMPISTRI128
,
26053 IX86_BUILTIN_PCMPISTRM128
,
26054 IX86_BUILTIN_PCMPISTRA128
,
26055 IX86_BUILTIN_PCMPISTRC128
,
26056 IX86_BUILTIN_PCMPISTRO128
,
26057 IX86_BUILTIN_PCMPISTRS128
,
26058 IX86_BUILTIN_PCMPISTRZ128
,
26060 IX86_BUILTIN_PCMPGTQ
,
26062 /* AES instructions */
26063 IX86_BUILTIN_AESENC128
,
26064 IX86_BUILTIN_AESENCLAST128
,
26065 IX86_BUILTIN_AESDEC128
,
26066 IX86_BUILTIN_AESDECLAST128
,
26067 IX86_BUILTIN_AESIMC128
,
26068 IX86_BUILTIN_AESKEYGENASSIST128
,
26070 /* PCLMUL instruction */
26071 IX86_BUILTIN_PCLMULQDQ128
,
26074 IX86_BUILTIN_ADDPD256
,
26075 IX86_BUILTIN_ADDPS256
,
26076 IX86_BUILTIN_ADDSUBPD256
,
26077 IX86_BUILTIN_ADDSUBPS256
,
26078 IX86_BUILTIN_ANDPD256
,
26079 IX86_BUILTIN_ANDPS256
,
26080 IX86_BUILTIN_ANDNPD256
,
26081 IX86_BUILTIN_ANDNPS256
,
26082 IX86_BUILTIN_BLENDPD256
,
26083 IX86_BUILTIN_BLENDPS256
,
26084 IX86_BUILTIN_BLENDVPD256
,
26085 IX86_BUILTIN_BLENDVPS256
,
26086 IX86_BUILTIN_DIVPD256
,
26087 IX86_BUILTIN_DIVPS256
,
26088 IX86_BUILTIN_DPPS256
,
26089 IX86_BUILTIN_HADDPD256
,
26090 IX86_BUILTIN_HADDPS256
,
26091 IX86_BUILTIN_HSUBPD256
,
26092 IX86_BUILTIN_HSUBPS256
,
26093 IX86_BUILTIN_MAXPD256
,
26094 IX86_BUILTIN_MAXPS256
,
26095 IX86_BUILTIN_MINPD256
,
26096 IX86_BUILTIN_MINPS256
,
26097 IX86_BUILTIN_MULPD256
,
26098 IX86_BUILTIN_MULPS256
,
26099 IX86_BUILTIN_ORPD256
,
26100 IX86_BUILTIN_ORPS256
,
26101 IX86_BUILTIN_SHUFPD256
,
26102 IX86_BUILTIN_SHUFPS256
,
26103 IX86_BUILTIN_SUBPD256
,
26104 IX86_BUILTIN_SUBPS256
,
26105 IX86_BUILTIN_XORPD256
,
26106 IX86_BUILTIN_XORPS256
,
26107 IX86_BUILTIN_CMPSD
,
26108 IX86_BUILTIN_CMPSS
,
26109 IX86_BUILTIN_CMPPD
,
26110 IX86_BUILTIN_CMPPS
,
26111 IX86_BUILTIN_CMPPD256
,
26112 IX86_BUILTIN_CMPPS256
,
26113 IX86_BUILTIN_CVTDQ2PD256
,
26114 IX86_BUILTIN_CVTDQ2PS256
,
26115 IX86_BUILTIN_CVTPD2PS256
,
26116 IX86_BUILTIN_CVTPS2DQ256
,
26117 IX86_BUILTIN_CVTPS2PD256
,
26118 IX86_BUILTIN_CVTTPD2DQ256
,
26119 IX86_BUILTIN_CVTPD2DQ256
,
26120 IX86_BUILTIN_CVTTPS2DQ256
,
26121 IX86_BUILTIN_EXTRACTF128PD256
,
26122 IX86_BUILTIN_EXTRACTF128PS256
,
26123 IX86_BUILTIN_EXTRACTF128SI256
,
26124 IX86_BUILTIN_VZEROALL
,
26125 IX86_BUILTIN_VZEROUPPER
,
26126 IX86_BUILTIN_VPERMILVARPD
,
26127 IX86_BUILTIN_VPERMILVARPS
,
26128 IX86_BUILTIN_VPERMILVARPD256
,
26129 IX86_BUILTIN_VPERMILVARPS256
,
26130 IX86_BUILTIN_VPERMILPD
,
26131 IX86_BUILTIN_VPERMILPS
,
26132 IX86_BUILTIN_VPERMILPD256
,
26133 IX86_BUILTIN_VPERMILPS256
,
26134 IX86_BUILTIN_VPERMIL2PD
,
26135 IX86_BUILTIN_VPERMIL2PS
,
26136 IX86_BUILTIN_VPERMIL2PD256
,
26137 IX86_BUILTIN_VPERMIL2PS256
,
26138 IX86_BUILTIN_VPERM2F128PD256
,
26139 IX86_BUILTIN_VPERM2F128PS256
,
26140 IX86_BUILTIN_VPERM2F128SI256
,
26141 IX86_BUILTIN_VBROADCASTSS
,
26142 IX86_BUILTIN_VBROADCASTSD256
,
26143 IX86_BUILTIN_VBROADCASTSS256
,
26144 IX86_BUILTIN_VBROADCASTPD256
,
26145 IX86_BUILTIN_VBROADCASTPS256
,
26146 IX86_BUILTIN_VINSERTF128PD256
,
26147 IX86_BUILTIN_VINSERTF128PS256
,
26148 IX86_BUILTIN_VINSERTF128SI256
,
26149 IX86_BUILTIN_LOADUPD256
,
26150 IX86_BUILTIN_LOADUPS256
,
26151 IX86_BUILTIN_STOREUPD256
,
26152 IX86_BUILTIN_STOREUPS256
,
26153 IX86_BUILTIN_LDDQU256
,
26154 IX86_BUILTIN_MOVNTDQ256
,
26155 IX86_BUILTIN_MOVNTPD256
,
26156 IX86_BUILTIN_MOVNTPS256
,
26157 IX86_BUILTIN_LOADDQU256
,
26158 IX86_BUILTIN_STOREDQU256
,
26159 IX86_BUILTIN_MASKLOADPD
,
26160 IX86_BUILTIN_MASKLOADPS
,
26161 IX86_BUILTIN_MASKSTOREPD
,
26162 IX86_BUILTIN_MASKSTOREPS
,
26163 IX86_BUILTIN_MASKLOADPD256
,
26164 IX86_BUILTIN_MASKLOADPS256
,
26165 IX86_BUILTIN_MASKSTOREPD256
,
26166 IX86_BUILTIN_MASKSTOREPS256
,
26167 IX86_BUILTIN_MOVSHDUP256
,
26168 IX86_BUILTIN_MOVSLDUP256
,
26169 IX86_BUILTIN_MOVDDUP256
,
26171 IX86_BUILTIN_SQRTPD256
,
26172 IX86_BUILTIN_SQRTPS256
,
26173 IX86_BUILTIN_SQRTPS_NR256
,
26174 IX86_BUILTIN_RSQRTPS256
,
26175 IX86_BUILTIN_RSQRTPS_NR256
,
26177 IX86_BUILTIN_RCPPS256
,
26179 IX86_BUILTIN_ROUNDPD256
,
26180 IX86_BUILTIN_ROUNDPS256
,
26182 IX86_BUILTIN_FLOORPD256
,
26183 IX86_BUILTIN_CEILPD256
,
26184 IX86_BUILTIN_TRUNCPD256
,
26185 IX86_BUILTIN_RINTPD256
,
26186 IX86_BUILTIN_ROUNDPD_AZ256
,
26188 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26189 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26190 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26192 IX86_BUILTIN_FLOORPS256
,
26193 IX86_BUILTIN_CEILPS256
,
26194 IX86_BUILTIN_TRUNCPS256
,
26195 IX86_BUILTIN_RINTPS256
,
26196 IX86_BUILTIN_ROUNDPS_AZ256
,
26198 IX86_BUILTIN_FLOORPS_SFIX256
,
26199 IX86_BUILTIN_CEILPS_SFIX256
,
26200 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26202 IX86_BUILTIN_UNPCKHPD256
,
26203 IX86_BUILTIN_UNPCKLPD256
,
26204 IX86_BUILTIN_UNPCKHPS256
,
26205 IX86_BUILTIN_UNPCKLPS256
,
26207 IX86_BUILTIN_SI256_SI
,
26208 IX86_BUILTIN_PS256_PS
,
26209 IX86_BUILTIN_PD256_PD
,
26210 IX86_BUILTIN_SI_SI256
,
26211 IX86_BUILTIN_PS_PS256
,
26212 IX86_BUILTIN_PD_PD256
,
26214 IX86_BUILTIN_VTESTZPD
,
26215 IX86_BUILTIN_VTESTCPD
,
26216 IX86_BUILTIN_VTESTNZCPD
,
26217 IX86_BUILTIN_VTESTZPS
,
26218 IX86_BUILTIN_VTESTCPS
,
26219 IX86_BUILTIN_VTESTNZCPS
,
26220 IX86_BUILTIN_VTESTZPD256
,
26221 IX86_BUILTIN_VTESTCPD256
,
26222 IX86_BUILTIN_VTESTNZCPD256
,
26223 IX86_BUILTIN_VTESTZPS256
,
26224 IX86_BUILTIN_VTESTCPS256
,
26225 IX86_BUILTIN_VTESTNZCPS256
,
26226 IX86_BUILTIN_PTESTZ256
,
26227 IX86_BUILTIN_PTESTC256
,
26228 IX86_BUILTIN_PTESTNZC256
,
26230 IX86_BUILTIN_MOVMSKPD256
,
26231 IX86_BUILTIN_MOVMSKPS256
,
26234 IX86_BUILTIN_MPSADBW256
,
26235 IX86_BUILTIN_PABSB256
,
26236 IX86_BUILTIN_PABSW256
,
26237 IX86_BUILTIN_PABSD256
,
26238 IX86_BUILTIN_PACKSSDW256
,
26239 IX86_BUILTIN_PACKSSWB256
,
26240 IX86_BUILTIN_PACKUSDW256
,
26241 IX86_BUILTIN_PACKUSWB256
,
26242 IX86_BUILTIN_PADDB256
,
26243 IX86_BUILTIN_PADDW256
,
26244 IX86_BUILTIN_PADDD256
,
26245 IX86_BUILTIN_PADDQ256
,
26246 IX86_BUILTIN_PADDSB256
,
26247 IX86_BUILTIN_PADDSW256
,
26248 IX86_BUILTIN_PADDUSB256
,
26249 IX86_BUILTIN_PADDUSW256
,
26250 IX86_BUILTIN_PALIGNR256
,
26251 IX86_BUILTIN_AND256I
,
26252 IX86_BUILTIN_ANDNOT256I
,
26253 IX86_BUILTIN_PAVGB256
,
26254 IX86_BUILTIN_PAVGW256
,
26255 IX86_BUILTIN_PBLENDVB256
,
26256 IX86_BUILTIN_PBLENDVW256
,
26257 IX86_BUILTIN_PCMPEQB256
,
26258 IX86_BUILTIN_PCMPEQW256
,
26259 IX86_BUILTIN_PCMPEQD256
,
26260 IX86_BUILTIN_PCMPEQQ256
,
26261 IX86_BUILTIN_PCMPGTB256
,
26262 IX86_BUILTIN_PCMPGTW256
,
26263 IX86_BUILTIN_PCMPGTD256
,
26264 IX86_BUILTIN_PCMPGTQ256
,
26265 IX86_BUILTIN_PHADDW256
,
26266 IX86_BUILTIN_PHADDD256
,
26267 IX86_BUILTIN_PHADDSW256
,
26268 IX86_BUILTIN_PHSUBW256
,
26269 IX86_BUILTIN_PHSUBD256
,
26270 IX86_BUILTIN_PHSUBSW256
,
26271 IX86_BUILTIN_PMADDUBSW256
,
26272 IX86_BUILTIN_PMADDWD256
,
26273 IX86_BUILTIN_PMAXSB256
,
26274 IX86_BUILTIN_PMAXSW256
,
26275 IX86_BUILTIN_PMAXSD256
,
26276 IX86_BUILTIN_PMAXUB256
,
26277 IX86_BUILTIN_PMAXUW256
,
26278 IX86_BUILTIN_PMAXUD256
,
26279 IX86_BUILTIN_PMINSB256
,
26280 IX86_BUILTIN_PMINSW256
,
26281 IX86_BUILTIN_PMINSD256
,
26282 IX86_BUILTIN_PMINUB256
,
26283 IX86_BUILTIN_PMINUW256
,
26284 IX86_BUILTIN_PMINUD256
,
26285 IX86_BUILTIN_PMOVMSKB256
,
26286 IX86_BUILTIN_PMOVSXBW256
,
26287 IX86_BUILTIN_PMOVSXBD256
,
26288 IX86_BUILTIN_PMOVSXBQ256
,
26289 IX86_BUILTIN_PMOVSXWD256
,
26290 IX86_BUILTIN_PMOVSXWQ256
,
26291 IX86_BUILTIN_PMOVSXDQ256
,
26292 IX86_BUILTIN_PMOVZXBW256
,
26293 IX86_BUILTIN_PMOVZXBD256
,
26294 IX86_BUILTIN_PMOVZXBQ256
,
26295 IX86_BUILTIN_PMOVZXWD256
,
26296 IX86_BUILTIN_PMOVZXWQ256
,
26297 IX86_BUILTIN_PMOVZXDQ256
,
26298 IX86_BUILTIN_PMULDQ256
,
26299 IX86_BUILTIN_PMULHRSW256
,
26300 IX86_BUILTIN_PMULHUW256
,
26301 IX86_BUILTIN_PMULHW256
,
26302 IX86_BUILTIN_PMULLW256
,
26303 IX86_BUILTIN_PMULLD256
,
26304 IX86_BUILTIN_PMULUDQ256
,
26305 IX86_BUILTIN_POR256
,
26306 IX86_BUILTIN_PSADBW256
,
26307 IX86_BUILTIN_PSHUFB256
,
26308 IX86_BUILTIN_PSHUFD256
,
26309 IX86_BUILTIN_PSHUFHW256
,
26310 IX86_BUILTIN_PSHUFLW256
,
26311 IX86_BUILTIN_PSIGNB256
,
26312 IX86_BUILTIN_PSIGNW256
,
26313 IX86_BUILTIN_PSIGND256
,
26314 IX86_BUILTIN_PSLLDQI256
,
26315 IX86_BUILTIN_PSLLWI256
,
26316 IX86_BUILTIN_PSLLW256
,
26317 IX86_BUILTIN_PSLLDI256
,
26318 IX86_BUILTIN_PSLLD256
,
26319 IX86_BUILTIN_PSLLQI256
,
26320 IX86_BUILTIN_PSLLQ256
,
26321 IX86_BUILTIN_PSRAWI256
,
26322 IX86_BUILTIN_PSRAW256
,
26323 IX86_BUILTIN_PSRADI256
,
26324 IX86_BUILTIN_PSRAD256
,
26325 IX86_BUILTIN_PSRLDQI256
,
26326 IX86_BUILTIN_PSRLWI256
,
26327 IX86_BUILTIN_PSRLW256
,
26328 IX86_BUILTIN_PSRLDI256
,
26329 IX86_BUILTIN_PSRLD256
,
26330 IX86_BUILTIN_PSRLQI256
,
26331 IX86_BUILTIN_PSRLQ256
,
26332 IX86_BUILTIN_PSUBB256
,
26333 IX86_BUILTIN_PSUBW256
,
26334 IX86_BUILTIN_PSUBD256
,
26335 IX86_BUILTIN_PSUBQ256
,
26336 IX86_BUILTIN_PSUBSB256
,
26337 IX86_BUILTIN_PSUBSW256
,
26338 IX86_BUILTIN_PSUBUSB256
,
26339 IX86_BUILTIN_PSUBUSW256
,
26340 IX86_BUILTIN_PUNPCKHBW256
,
26341 IX86_BUILTIN_PUNPCKHWD256
,
26342 IX86_BUILTIN_PUNPCKHDQ256
,
26343 IX86_BUILTIN_PUNPCKHQDQ256
,
26344 IX86_BUILTIN_PUNPCKLBW256
,
26345 IX86_BUILTIN_PUNPCKLWD256
,
26346 IX86_BUILTIN_PUNPCKLDQ256
,
26347 IX86_BUILTIN_PUNPCKLQDQ256
,
26348 IX86_BUILTIN_PXOR256
,
26349 IX86_BUILTIN_MOVNTDQA256
,
26350 IX86_BUILTIN_VBROADCASTSS_PS
,
26351 IX86_BUILTIN_VBROADCASTSS_PS256
,
26352 IX86_BUILTIN_VBROADCASTSD_PD256
,
26353 IX86_BUILTIN_VBROADCASTSI256
,
26354 IX86_BUILTIN_PBLENDD256
,
26355 IX86_BUILTIN_PBLENDD128
,
26356 IX86_BUILTIN_PBROADCASTB256
,
26357 IX86_BUILTIN_PBROADCASTW256
,
26358 IX86_BUILTIN_PBROADCASTD256
,
26359 IX86_BUILTIN_PBROADCASTQ256
,
26360 IX86_BUILTIN_PBROADCASTB128
,
26361 IX86_BUILTIN_PBROADCASTW128
,
26362 IX86_BUILTIN_PBROADCASTD128
,
26363 IX86_BUILTIN_PBROADCASTQ128
,
26364 IX86_BUILTIN_VPERMVARSI256
,
26365 IX86_BUILTIN_VPERMDF256
,
26366 IX86_BUILTIN_VPERMVARSF256
,
26367 IX86_BUILTIN_VPERMDI256
,
26368 IX86_BUILTIN_VPERMTI256
,
26369 IX86_BUILTIN_VEXTRACT128I256
,
26370 IX86_BUILTIN_VINSERT128I256
,
26371 IX86_BUILTIN_MASKLOADD
,
26372 IX86_BUILTIN_MASKLOADQ
,
26373 IX86_BUILTIN_MASKLOADD256
,
26374 IX86_BUILTIN_MASKLOADQ256
,
26375 IX86_BUILTIN_MASKSTORED
,
26376 IX86_BUILTIN_MASKSTOREQ
,
26377 IX86_BUILTIN_MASKSTORED256
,
26378 IX86_BUILTIN_MASKSTOREQ256
,
26379 IX86_BUILTIN_PSLLVV4DI
,
26380 IX86_BUILTIN_PSLLVV2DI
,
26381 IX86_BUILTIN_PSLLVV8SI
,
26382 IX86_BUILTIN_PSLLVV4SI
,
26383 IX86_BUILTIN_PSRAVV8SI
,
26384 IX86_BUILTIN_PSRAVV4SI
,
26385 IX86_BUILTIN_PSRLVV4DI
,
26386 IX86_BUILTIN_PSRLVV2DI
,
26387 IX86_BUILTIN_PSRLVV8SI
,
26388 IX86_BUILTIN_PSRLVV4SI
,
26390 IX86_BUILTIN_GATHERSIV2DF
,
26391 IX86_BUILTIN_GATHERSIV4DF
,
26392 IX86_BUILTIN_GATHERDIV2DF
,
26393 IX86_BUILTIN_GATHERDIV4DF
,
26394 IX86_BUILTIN_GATHERSIV4SF
,
26395 IX86_BUILTIN_GATHERSIV8SF
,
26396 IX86_BUILTIN_GATHERDIV4SF
,
26397 IX86_BUILTIN_GATHERDIV8SF
,
26398 IX86_BUILTIN_GATHERSIV2DI
,
26399 IX86_BUILTIN_GATHERSIV4DI
,
26400 IX86_BUILTIN_GATHERDIV2DI
,
26401 IX86_BUILTIN_GATHERDIV4DI
,
26402 IX86_BUILTIN_GATHERSIV4SI
,
26403 IX86_BUILTIN_GATHERSIV8SI
,
26404 IX86_BUILTIN_GATHERDIV4SI
,
26405 IX86_BUILTIN_GATHERDIV8SI
,
26407 /* Alternate 4 element gather for the vectorizer where
26408 all operands are 32-byte wide. */
26409 IX86_BUILTIN_GATHERALTSIV4DF
,
26410 IX86_BUILTIN_GATHERALTDIV8SF
,
26411 IX86_BUILTIN_GATHERALTSIV4DI
,
26412 IX86_BUILTIN_GATHERALTDIV8SI
,
26414 /* TFmode support builtins. */
26416 IX86_BUILTIN_HUGE_VALQ
,
26417 IX86_BUILTIN_FABSQ
,
26418 IX86_BUILTIN_COPYSIGNQ
,
26420 /* Vectorizer support builtins. */
26421 IX86_BUILTIN_CPYSGNPS
,
26422 IX86_BUILTIN_CPYSGNPD
,
26423 IX86_BUILTIN_CPYSGNPS256
,
26424 IX86_BUILTIN_CPYSGNPD256
,
26426 /* FMA4 instructions. */
26427 IX86_BUILTIN_VFMADDSS
,
26428 IX86_BUILTIN_VFMADDSD
,
26429 IX86_BUILTIN_VFMADDPS
,
26430 IX86_BUILTIN_VFMADDPD
,
26431 IX86_BUILTIN_VFMADDPS256
,
26432 IX86_BUILTIN_VFMADDPD256
,
26433 IX86_BUILTIN_VFMADDSUBPS
,
26434 IX86_BUILTIN_VFMADDSUBPD
,
26435 IX86_BUILTIN_VFMADDSUBPS256
,
26436 IX86_BUILTIN_VFMADDSUBPD256
,
26438 /* FMA3 instructions. */
26439 IX86_BUILTIN_VFMADDSS3
,
26440 IX86_BUILTIN_VFMADDSD3
,
26442 /* XOP instructions. */
26443 IX86_BUILTIN_VPCMOV
,
26444 IX86_BUILTIN_VPCMOV_V2DI
,
26445 IX86_BUILTIN_VPCMOV_V4SI
,
26446 IX86_BUILTIN_VPCMOV_V8HI
,
26447 IX86_BUILTIN_VPCMOV_V16QI
,
26448 IX86_BUILTIN_VPCMOV_V4SF
,
26449 IX86_BUILTIN_VPCMOV_V2DF
,
26450 IX86_BUILTIN_VPCMOV256
,
26451 IX86_BUILTIN_VPCMOV_V4DI256
,
26452 IX86_BUILTIN_VPCMOV_V8SI256
,
26453 IX86_BUILTIN_VPCMOV_V16HI256
,
26454 IX86_BUILTIN_VPCMOV_V32QI256
,
26455 IX86_BUILTIN_VPCMOV_V8SF256
,
26456 IX86_BUILTIN_VPCMOV_V4DF256
,
26458 IX86_BUILTIN_VPPERM
,
26460 IX86_BUILTIN_VPMACSSWW
,
26461 IX86_BUILTIN_VPMACSWW
,
26462 IX86_BUILTIN_VPMACSSWD
,
26463 IX86_BUILTIN_VPMACSWD
,
26464 IX86_BUILTIN_VPMACSSDD
,
26465 IX86_BUILTIN_VPMACSDD
,
26466 IX86_BUILTIN_VPMACSSDQL
,
26467 IX86_BUILTIN_VPMACSSDQH
,
26468 IX86_BUILTIN_VPMACSDQL
,
26469 IX86_BUILTIN_VPMACSDQH
,
26470 IX86_BUILTIN_VPMADCSSWD
,
26471 IX86_BUILTIN_VPMADCSWD
,
26473 IX86_BUILTIN_VPHADDBW
,
26474 IX86_BUILTIN_VPHADDBD
,
26475 IX86_BUILTIN_VPHADDBQ
,
26476 IX86_BUILTIN_VPHADDWD
,
26477 IX86_BUILTIN_VPHADDWQ
,
26478 IX86_BUILTIN_VPHADDDQ
,
26479 IX86_BUILTIN_VPHADDUBW
,
26480 IX86_BUILTIN_VPHADDUBD
,
26481 IX86_BUILTIN_VPHADDUBQ
,
26482 IX86_BUILTIN_VPHADDUWD
,
26483 IX86_BUILTIN_VPHADDUWQ
,
26484 IX86_BUILTIN_VPHADDUDQ
,
26485 IX86_BUILTIN_VPHSUBBW
,
26486 IX86_BUILTIN_VPHSUBWD
,
26487 IX86_BUILTIN_VPHSUBDQ
,
26489 IX86_BUILTIN_VPROTB
,
26490 IX86_BUILTIN_VPROTW
,
26491 IX86_BUILTIN_VPROTD
,
26492 IX86_BUILTIN_VPROTQ
,
26493 IX86_BUILTIN_VPROTB_IMM
,
26494 IX86_BUILTIN_VPROTW_IMM
,
26495 IX86_BUILTIN_VPROTD_IMM
,
26496 IX86_BUILTIN_VPROTQ_IMM
,
26498 IX86_BUILTIN_VPSHLB
,
26499 IX86_BUILTIN_VPSHLW
,
26500 IX86_BUILTIN_VPSHLD
,
26501 IX86_BUILTIN_VPSHLQ
,
26502 IX86_BUILTIN_VPSHAB
,
26503 IX86_BUILTIN_VPSHAW
,
26504 IX86_BUILTIN_VPSHAD
,
26505 IX86_BUILTIN_VPSHAQ
,
26507 IX86_BUILTIN_VFRCZSS
,
26508 IX86_BUILTIN_VFRCZSD
,
26509 IX86_BUILTIN_VFRCZPS
,
26510 IX86_BUILTIN_VFRCZPD
,
26511 IX86_BUILTIN_VFRCZPS256
,
26512 IX86_BUILTIN_VFRCZPD256
,
26514 IX86_BUILTIN_VPCOMEQUB
,
26515 IX86_BUILTIN_VPCOMNEUB
,
26516 IX86_BUILTIN_VPCOMLTUB
,
26517 IX86_BUILTIN_VPCOMLEUB
,
26518 IX86_BUILTIN_VPCOMGTUB
,
26519 IX86_BUILTIN_VPCOMGEUB
,
26520 IX86_BUILTIN_VPCOMFALSEUB
,
26521 IX86_BUILTIN_VPCOMTRUEUB
,
26523 IX86_BUILTIN_VPCOMEQUW
,
26524 IX86_BUILTIN_VPCOMNEUW
,
26525 IX86_BUILTIN_VPCOMLTUW
,
26526 IX86_BUILTIN_VPCOMLEUW
,
26527 IX86_BUILTIN_VPCOMGTUW
,
26528 IX86_BUILTIN_VPCOMGEUW
,
26529 IX86_BUILTIN_VPCOMFALSEUW
,
26530 IX86_BUILTIN_VPCOMTRUEUW
,
26532 IX86_BUILTIN_VPCOMEQUD
,
26533 IX86_BUILTIN_VPCOMNEUD
,
26534 IX86_BUILTIN_VPCOMLTUD
,
26535 IX86_BUILTIN_VPCOMLEUD
,
26536 IX86_BUILTIN_VPCOMGTUD
,
26537 IX86_BUILTIN_VPCOMGEUD
,
26538 IX86_BUILTIN_VPCOMFALSEUD
,
26539 IX86_BUILTIN_VPCOMTRUEUD
,
26541 IX86_BUILTIN_VPCOMEQUQ
,
26542 IX86_BUILTIN_VPCOMNEUQ
,
26543 IX86_BUILTIN_VPCOMLTUQ
,
26544 IX86_BUILTIN_VPCOMLEUQ
,
26545 IX86_BUILTIN_VPCOMGTUQ
,
26546 IX86_BUILTIN_VPCOMGEUQ
,
26547 IX86_BUILTIN_VPCOMFALSEUQ
,
26548 IX86_BUILTIN_VPCOMTRUEUQ
,
26550 IX86_BUILTIN_VPCOMEQB
,
26551 IX86_BUILTIN_VPCOMNEB
,
26552 IX86_BUILTIN_VPCOMLTB
,
26553 IX86_BUILTIN_VPCOMLEB
,
26554 IX86_BUILTIN_VPCOMGTB
,
26555 IX86_BUILTIN_VPCOMGEB
,
26556 IX86_BUILTIN_VPCOMFALSEB
,
26557 IX86_BUILTIN_VPCOMTRUEB
,
26559 IX86_BUILTIN_VPCOMEQW
,
26560 IX86_BUILTIN_VPCOMNEW
,
26561 IX86_BUILTIN_VPCOMLTW
,
26562 IX86_BUILTIN_VPCOMLEW
,
26563 IX86_BUILTIN_VPCOMGTW
,
26564 IX86_BUILTIN_VPCOMGEW
,
26565 IX86_BUILTIN_VPCOMFALSEW
,
26566 IX86_BUILTIN_VPCOMTRUEW
,
26568 IX86_BUILTIN_VPCOMEQD
,
26569 IX86_BUILTIN_VPCOMNED
,
26570 IX86_BUILTIN_VPCOMLTD
,
26571 IX86_BUILTIN_VPCOMLED
,
26572 IX86_BUILTIN_VPCOMGTD
,
26573 IX86_BUILTIN_VPCOMGED
,
26574 IX86_BUILTIN_VPCOMFALSED
,
26575 IX86_BUILTIN_VPCOMTRUED
,
26577 IX86_BUILTIN_VPCOMEQQ
,
26578 IX86_BUILTIN_VPCOMNEQ
,
26579 IX86_BUILTIN_VPCOMLTQ
,
26580 IX86_BUILTIN_VPCOMLEQ
,
26581 IX86_BUILTIN_VPCOMGTQ
,
26582 IX86_BUILTIN_VPCOMGEQ
,
26583 IX86_BUILTIN_VPCOMFALSEQ
,
26584 IX86_BUILTIN_VPCOMTRUEQ
,
26586 /* LWP instructions. */
26587 IX86_BUILTIN_LLWPCB
,
26588 IX86_BUILTIN_SLWPCB
,
26589 IX86_BUILTIN_LWPVAL32
,
26590 IX86_BUILTIN_LWPVAL64
,
26591 IX86_BUILTIN_LWPINS32
,
26592 IX86_BUILTIN_LWPINS64
,
26597 IX86_BUILTIN_XBEGIN
,
26599 IX86_BUILTIN_XABORT
,
26600 IX86_BUILTIN_XTEST
,
26602 /* BMI instructions. */
26603 IX86_BUILTIN_BEXTR32
,
26604 IX86_BUILTIN_BEXTR64
,
26607 /* TBM instructions. */
26608 IX86_BUILTIN_BEXTRI32
,
26609 IX86_BUILTIN_BEXTRI64
,
26611 /* BMI2 instructions. */
26612 IX86_BUILTIN_BZHI32
,
26613 IX86_BUILTIN_BZHI64
,
26614 IX86_BUILTIN_PDEP32
,
26615 IX86_BUILTIN_PDEP64
,
26616 IX86_BUILTIN_PEXT32
,
26617 IX86_BUILTIN_PEXT64
,
26619 /* ADX instructions. */
26620 IX86_BUILTIN_ADDCARRYX32
,
26621 IX86_BUILTIN_ADDCARRYX64
,
26623 /* FSGSBASE instructions. */
26624 IX86_BUILTIN_RDFSBASE32
,
26625 IX86_BUILTIN_RDFSBASE64
,
26626 IX86_BUILTIN_RDGSBASE32
,
26627 IX86_BUILTIN_RDGSBASE64
,
26628 IX86_BUILTIN_WRFSBASE32
,
26629 IX86_BUILTIN_WRFSBASE64
,
26630 IX86_BUILTIN_WRGSBASE32
,
26631 IX86_BUILTIN_WRGSBASE64
,
26633 /* RDRND instructions. */
26634 IX86_BUILTIN_RDRAND16_STEP
,
26635 IX86_BUILTIN_RDRAND32_STEP
,
26636 IX86_BUILTIN_RDRAND64_STEP
,
26638 /* RDSEED instructions. */
26639 IX86_BUILTIN_RDSEED16_STEP
,
26640 IX86_BUILTIN_RDSEED32_STEP
,
26641 IX86_BUILTIN_RDSEED64_STEP
,
26643 /* F16C instructions. */
26644 IX86_BUILTIN_CVTPH2PS
,
26645 IX86_BUILTIN_CVTPH2PS256
,
26646 IX86_BUILTIN_CVTPS2PH
,
26647 IX86_BUILTIN_CVTPS2PH256
,
26649 /* CFString built-in for darwin */
26650 IX86_BUILTIN_CFSTRING
,
26652 /* Builtins to get CPU type and supported features. */
26653 IX86_BUILTIN_CPU_INIT
,
26654 IX86_BUILTIN_CPU_IS
,
26655 IX86_BUILTIN_CPU_SUPPORTS
,
26660 /* Table for the ix86 builtin decls. */
26661 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26663 /* Table of all of the builtin functions that are possible with different ISA's
26664 but are waiting to be built until a function is declared to use that
26666 struct builtin_isa
{
26667 const char *name
; /* function name */
26668 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26669 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26670 bool const_p
; /* true if the declaration is constant */
26671 bool set_and_not_built_p
;
26674 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26677 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26678 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26679 function decl in the ix86_builtins array. Returns the function decl or
26680 NULL_TREE, if the builtin was not added.
26682 If the front end has a special hook for builtin functions, delay adding
26683 builtin functions that aren't in the current ISA until the ISA is changed
26684 with function specific optimization. Doing so, can save about 300K for the
26685 default compiler. When the builtin is expanded, check at that time whether
26688 If the front end doesn't have a special hook, record all builtins, even if
26689 it isn't an instruction set in the current ISA in case the user uses
26690 function specific options for a different ISA, so that we don't get scope
26691 errors if a builtin is added in the middle of a function scope. */
26694 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26695 enum ix86_builtin_func_type tcode
,
26696 enum ix86_builtins code
)
26698 tree decl
= NULL_TREE
;
26700 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26702 ix86_builtins_isa
[(int) code
].isa
= mask
;
26704 mask
&= ~OPTION_MASK_ISA_64BIT
;
26706 || (mask
& ix86_isa_flags
) != 0
26707 || (lang_hooks
.builtin_function
26708 == lang_hooks
.builtin_function_ext_scope
))
26711 tree type
= ix86_get_builtin_func_type (tcode
);
26712 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26714 ix86_builtins
[(int) code
] = decl
;
26715 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26719 ix86_builtins
[(int) code
] = NULL_TREE
;
26720 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26721 ix86_builtins_isa
[(int) code
].name
= name
;
26722 ix86_builtins_isa
[(int) code
].const_p
= false;
26723 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26730 /* Like def_builtin, but also marks the function decl "const". */
26733 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26734 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26736 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26738 TREE_READONLY (decl
) = 1;
26740 ix86_builtins_isa
[(int) code
].const_p
= true;
26745 /* Add any new builtin functions for a given ISA that may not have been
26746 declared. This saves a bit of space compared to adding all of the
26747 declarations to the tree, even if we didn't use them. */
26750 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26754 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26756 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26757 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26761 /* Don't define the builtin again. */
26762 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26764 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26765 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26766 type
, i
, BUILT_IN_MD
, NULL
,
26769 ix86_builtins
[i
] = decl
;
26770 if (ix86_builtins_isa
[i
].const_p
)
26771 TREE_READONLY (decl
) = 1;
26776 /* Bits for builtin_description.flag. */
26778 /* Set when we don't support the comparison natively, and should
26779 swap_comparison in order to support it. */
26780 #define BUILTIN_DESC_SWAP_OPERANDS 1
26782 struct builtin_description
26784 const HOST_WIDE_INT mask
;
26785 const enum insn_code icode
;
26786 const char *const name
;
26787 const enum ix86_builtins code
;
26788 const enum rtx_code comparison
;
26792 static const struct builtin_description bdesc_comi
[] =
26794 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26795 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26796 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26797 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26798 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26799 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26800 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26801 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26802 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26803 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26804 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26805 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26806 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26807 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26808 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26809 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26810 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26811 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26812 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26813 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26814 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26815 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26816 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26817 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26820 static const struct builtin_description bdesc_pcmpestr
[] =
26823 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26824 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26825 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26826 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26827 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26828 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26829 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26832 static const struct builtin_description bdesc_pcmpistr
[] =
26835 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26836 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26837 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26838 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26839 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26840 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26841 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26844 /* Special builtins with variable number of arguments. */
26845 static const struct builtin_description bdesc_special_args
[] =
26847 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26848 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26849 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26852 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26855 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26857 /* FXSR, XSAVE and XSAVEOPT */
26858 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26859 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26860 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26861 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26862 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26864 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26865 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26866 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26867 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26868 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26871 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26872 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26873 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26875 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26876 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26877 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26878 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26880 /* SSE or 3DNow!A */
26881 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26882 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26885 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26886 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26887 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26888 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26889 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26890 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26891 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26892 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26893 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26894 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26896 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26897 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26900 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26903 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26906 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26907 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26910 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26911 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26913 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26914 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26915 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26916 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26917 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26919 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26920 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26921 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26922 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26923 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26927 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26928 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26929 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26931 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26932 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26933 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26934 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26935 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26936 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26937 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26938 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26941 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26942 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26943 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26944 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26945 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26946 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26947 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26948 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26949 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26951 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26952 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26953 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26954 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26955 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26956 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26959 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26960 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26961 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26962 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26963 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26964 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26965 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26966 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26969 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26970 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26971 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26974 /* Builtins with variable number of arguments. */
26975 static const struct builtin_description bdesc_args
[] =
26977 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26978 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26979 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26980 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26981 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26982 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26983 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26986 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26987 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26988 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26989 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26990 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26991 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26993 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26994 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26995 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26996 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26997 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26998 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26999 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27006 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27011 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27015 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27017 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27018 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27019 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27020 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27021 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27022 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27024 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27025 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27026 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27028 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27030 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27031 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27032 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27033 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27034 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27035 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27037 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27038 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27039 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27040 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27041 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27042 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27044 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27045 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27046 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27047 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27050 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27051 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27052 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27053 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27055 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27056 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27057 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27058 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27059 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27060 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27061 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27062 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27063 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27064 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27065 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27066 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27067 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27068 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27069 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27072 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27073 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27074 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27075 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27076 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27077 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27080 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27081 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27082 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27083 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27084 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27085 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27086 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27088 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27091 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27102 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27105 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27106 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27117 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27128 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27130 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27132 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27133 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27134 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27135 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27137 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27139 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27140 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27141 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27142 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27143 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27145 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27146 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27147 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27149 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27151 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27152 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27153 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27155 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27156 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27158 /* SSE MMX or 3Dnow!A */
27159 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27160 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27161 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27163 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27164 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27165 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27166 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27168 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27169 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27171 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27176 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27177 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27192 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27193 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27201 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27206 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27208 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27211 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27212 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27214 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27250 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27278 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27281 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27289 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27310 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27314 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27320 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27322 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27324 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27327 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27341 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27348 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27349 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27351 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27352 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27353 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27354 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27355 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27356 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27359 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27362 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27363 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27364 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27366 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27367 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27368 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27369 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27370 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27371 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27372 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27374 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27377 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27378 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27379 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27380 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27381 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27382 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27383 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27384 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27385 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27386 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27387 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27388 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27389 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27392 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27393 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27396 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27397 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27398 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27399 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27400 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27401 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27402 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27403 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27404 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27405 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27407 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27408 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27409 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27410 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27411 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27413 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27414 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27415 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27416 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27417 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27418 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27419 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27421 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27422 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27423 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27424 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27425 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27426 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27427 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27428 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27429 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27430 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27431 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27432 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27435 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27436 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27437 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27438 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27440 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27441 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27442 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27443 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27445 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27446 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27448 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27449 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27451 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27452 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27453 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27454 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27456 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27457 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27459 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27460 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27462 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27463 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27464 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27467 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27468 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27469 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27470 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27471 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27474 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27475 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27476 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27477 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27480 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27481 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27483 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27484 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27485 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27486 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27489 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27492 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27493 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27494 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27495 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27496 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27497 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27498 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27499 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27500 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27501 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27502 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27503 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27504 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27505 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27506 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27522 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27526 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27527 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27530 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27532 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27541 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27546 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27549 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27560 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27563 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27572 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27574 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27575 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27576 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27577 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27591 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27593 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27594 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27596 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27597 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27598 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27599 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27603 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27604 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27605 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27606 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27608 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27609 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27614 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27618 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27619 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27620 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27621 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27624 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27627 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27628 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27630 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27633 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27634 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27635 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27636 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27637 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27638 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27639 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27640 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27641 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27642 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27643 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27644 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27645 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27646 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27780 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27783 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27784 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27785 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27788 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27789 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27792 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27793 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27794 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27795 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27798 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27799 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27800 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27801 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27802 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27803 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27806 /* FMA4 and XOP. */
27807 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27808 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27809 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27810 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27811 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27812 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27813 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27814 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27815 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27816 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27817 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27818 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27819 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27820 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27821 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27822 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27823 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27824 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27825 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27826 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27827 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27828 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27829 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27830 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27831 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27832 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27833 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27834 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27835 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27836 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27837 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27838 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27839 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27840 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27841 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27842 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27843 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27844 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27845 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27846 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27847 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27848 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27849 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27850 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27851 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27852 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27853 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27854 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27855 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27856 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27857 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27858 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27860 static const struct builtin_description bdesc_multi_arg
[] =
27862 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27863 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27864 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27865 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27866 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27867 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27869 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27870 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27871 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27872 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27873 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27874 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27876 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27877 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27878 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27879 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27880 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27881 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27882 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27883 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27884 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27885 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27886 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27887 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27889 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27890 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27891 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27892 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27893 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27894 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27895 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27896 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27897 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27898 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27899 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27900 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27902 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27903 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27904 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27905 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27910 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27913 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27915 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27923 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27924 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27931 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27933 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27940 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27946 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27948 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27963 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27964 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27970 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28034 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28042 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28050 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28062 /* TM vector builtins. */
28064 /* Reuse the existing x86-specific `struct builtin_description' cause
28065 we're lazy. Add casts to make them fit. */
28066 static const struct builtin_description bdesc_tm
[] =
28068 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28069 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28070 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28071 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28072 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28073 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28074 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28076 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28077 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28078 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28079 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28080 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28081 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28082 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28084 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28085 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28086 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28087 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28088 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28089 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28090 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28092 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28093 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28094 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28097 /* TM callbacks. */
28099 /* Return the builtin decl needed to load a vector of TYPE. */
28102 ix86_builtin_tm_load (tree type
)
28104 if (TREE_CODE (type
) == VECTOR_TYPE
)
28106 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28109 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28111 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28113 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28119 /* Return the builtin decl needed to store a vector of TYPE. */
28122 ix86_builtin_tm_store (tree type
)
28124 if (TREE_CODE (type
) == VECTOR_TYPE
)
28126 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28129 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28131 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28133 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28139 /* Initialize the transactional memory vector load/store builtins. */
28142 ix86_init_tm_builtins (void)
28144 enum ix86_builtin_func_type ftype
;
28145 const struct builtin_description
*d
;
28148 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28149 tree attrs_log
, attrs_type_log
;
28154 /* If there are no builtins defined, we must be compiling in a
28155 language without trans-mem support. */
28156 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28159 /* Use whatever attributes a normal TM load has. */
28160 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28161 attrs_load
= DECL_ATTRIBUTES (decl
);
28162 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28163 /* Use whatever attributes a normal TM store has. */
28164 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28165 attrs_store
= DECL_ATTRIBUTES (decl
);
28166 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28167 /* Use whatever attributes a normal TM log has. */
28168 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28169 attrs_log
= DECL_ATTRIBUTES (decl
);
28170 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28172 for (i
= 0, d
= bdesc_tm
;
28173 i
< ARRAY_SIZE (bdesc_tm
);
28176 if ((d
->mask
& ix86_isa_flags
) != 0
28177 || (lang_hooks
.builtin_function
28178 == lang_hooks
.builtin_function_ext_scope
))
28180 tree type
, attrs
, attrs_type
;
28181 enum built_in_function code
= (enum built_in_function
) d
->code
;
28183 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28184 type
= ix86_get_builtin_func_type (ftype
);
28186 if (BUILTIN_TM_LOAD_P (code
))
28188 attrs
= attrs_load
;
28189 attrs_type
= attrs_type_load
;
28191 else if (BUILTIN_TM_STORE_P (code
))
28193 attrs
= attrs_store
;
28194 attrs_type
= attrs_type_store
;
28199 attrs_type
= attrs_type_log
;
28201 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28202 /* The builtin without the prefix for
28203 calling it directly. */
28204 d
->name
+ strlen ("__builtin_"),
28206 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28207 set the TYPE_ATTRIBUTES. */
28208 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28210 set_builtin_decl (code
, decl
, false);
28215 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28216 in the current target ISA to allow the user to compile particular modules
28217 with different target specific options that differ from the command line
28220 ix86_init_mmx_sse_builtins (void)
28222 const struct builtin_description
* d
;
28223 enum ix86_builtin_func_type ftype
;
28226 /* Add all special builtins with variable number of operands. */
28227 for (i
= 0, d
= bdesc_special_args
;
28228 i
< ARRAY_SIZE (bdesc_special_args
);
28234 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28235 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28238 /* Add all builtins with variable number of operands. */
28239 for (i
= 0, d
= bdesc_args
;
28240 i
< ARRAY_SIZE (bdesc_args
);
28246 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28247 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28250 /* pcmpestr[im] insns. */
28251 for (i
= 0, d
= bdesc_pcmpestr
;
28252 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28255 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28256 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28258 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28259 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28262 /* pcmpistr[im] insns. */
28263 for (i
= 0, d
= bdesc_pcmpistr
;
28264 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28267 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28268 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28270 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28271 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28274 /* comi/ucomi insns. */
28275 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28277 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28278 ftype
= INT_FTYPE_V2DF_V2DF
;
28280 ftype
= INT_FTYPE_V4SF_V4SF
;
28281 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28285 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28286 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28287 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28288 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28290 /* SSE or 3DNow!A */
28291 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28292 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28293 IX86_BUILTIN_MASKMOVQ
);
28296 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28297 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28299 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28300 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28301 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28302 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28305 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28306 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28307 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28308 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28311 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28312 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28313 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28314 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28315 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28316 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28317 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28318 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28319 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28320 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28321 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28322 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28325 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28326 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28329 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28330 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28331 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28332 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28333 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28334 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28335 IX86_BUILTIN_RDRAND64_STEP
);
28338 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28339 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28340 IX86_BUILTIN_GATHERSIV2DF
);
28342 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28343 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28344 IX86_BUILTIN_GATHERSIV4DF
);
28346 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28347 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28348 IX86_BUILTIN_GATHERDIV2DF
);
28350 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28351 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28352 IX86_BUILTIN_GATHERDIV4DF
);
28354 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28355 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28356 IX86_BUILTIN_GATHERSIV4SF
);
28358 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28359 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28360 IX86_BUILTIN_GATHERSIV8SF
);
28362 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28363 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28364 IX86_BUILTIN_GATHERDIV4SF
);
28366 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28367 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28368 IX86_BUILTIN_GATHERDIV8SF
);
28370 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28371 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28372 IX86_BUILTIN_GATHERSIV2DI
);
28374 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28375 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28376 IX86_BUILTIN_GATHERSIV4DI
);
28378 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28379 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28380 IX86_BUILTIN_GATHERDIV2DI
);
28382 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28383 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28384 IX86_BUILTIN_GATHERDIV4DI
);
28386 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28387 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28388 IX86_BUILTIN_GATHERSIV4SI
);
28390 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28391 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28392 IX86_BUILTIN_GATHERSIV8SI
);
28394 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28395 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28396 IX86_BUILTIN_GATHERDIV4SI
);
28398 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28399 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28400 IX86_BUILTIN_GATHERDIV8SI
);
28402 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28403 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28404 IX86_BUILTIN_GATHERALTSIV4DF
);
28406 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28407 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28408 IX86_BUILTIN_GATHERALTDIV8SF
);
28410 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28411 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28412 IX86_BUILTIN_GATHERALTSIV4DI
);
28414 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28415 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28416 IX86_BUILTIN_GATHERALTDIV8SI
);
28419 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28420 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28422 /* MMX access to the vec_init patterns. */
28423 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28424 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28426 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28427 V4HI_FTYPE_HI_HI_HI_HI
,
28428 IX86_BUILTIN_VEC_INIT_V4HI
);
28430 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28431 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28432 IX86_BUILTIN_VEC_INIT_V8QI
);
28434 /* Access to the vec_extract patterns. */
28435 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28436 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28437 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28438 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28439 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28440 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28441 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28442 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28443 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28444 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28446 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28447 "__builtin_ia32_vec_ext_v4hi",
28448 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28450 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28451 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28453 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28454 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28456 /* Access to the vec_set patterns. */
28457 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28458 "__builtin_ia32_vec_set_v2di",
28459 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28461 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28462 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28464 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28465 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28467 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28468 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28470 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28471 "__builtin_ia32_vec_set_v4hi",
28472 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28474 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28475 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28478 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28479 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28480 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28481 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28482 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28483 "__builtin_ia32_rdseed_di_step",
28484 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28487 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28488 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28489 def_builtin (OPTION_MASK_ISA_64BIT
,
28490 "__builtin_ia32_addcarryx_u64",
28491 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28492 IX86_BUILTIN_ADDCARRYX64
);
28494 /* Add FMA4 multi-arg argument instructions */
28495 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28500 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28501 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28505 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28506 to return a pointer to VERSION_DECL if the outcome of the expression
28507 formed by PREDICATE_CHAIN is true. This function will be called during
28508 version dispatch to decide which function version to execute. It returns
28509 the basic block at the end, to which more conditions can be added. */
28512 add_condition_to_bb (tree function_decl
, tree version_decl
,
28513 tree predicate_chain
, basic_block new_bb
)
28515 gimple return_stmt
;
28516 tree convert_expr
, result_var
;
28517 gimple convert_stmt
;
28518 gimple call_cond_stmt
;
28519 gimple if_else_stmt
;
28521 basic_block bb1
, bb2
, bb3
;
28524 tree cond_var
, and_expr_var
= NULL_TREE
;
28527 tree predicate_decl
, predicate_arg
;
28529 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28531 gcc_assert (new_bb
!= NULL
);
28532 gseq
= bb_seq (new_bb
);
28535 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28536 build_fold_addr_expr (version_decl
));
28537 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28538 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28539 return_stmt
= gimple_build_return (result_var
);
28541 if (predicate_chain
== NULL_TREE
)
28543 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28544 gimple_seq_add_stmt (&gseq
, return_stmt
);
28545 set_bb_seq (new_bb
, gseq
);
28546 gimple_set_bb (convert_stmt
, new_bb
);
28547 gimple_set_bb (return_stmt
, new_bb
);
28552 while (predicate_chain
!= NULL
)
28554 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28555 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28556 predicate_arg
= TREE_VALUE (predicate_chain
);
28557 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28558 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28560 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28561 gimple_set_bb (call_cond_stmt
, new_bb
);
28562 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28564 predicate_chain
= TREE_CHAIN (predicate_chain
);
28566 if (and_expr_var
== NULL
)
28567 and_expr_var
= cond_var
;
28570 gimple assign_stmt
;
28571 /* Use MIN_EXPR to check if any integer is zero?.
28572 and_expr_var = min_expr <cond_var, and_expr_var> */
28573 assign_stmt
= gimple_build_assign (and_expr_var
,
28574 build2 (MIN_EXPR
, integer_type_node
,
28575 cond_var
, and_expr_var
));
28577 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28578 gimple_set_bb (assign_stmt
, new_bb
);
28579 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28583 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28585 NULL_TREE
, NULL_TREE
);
28586 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28587 gimple_set_bb (if_else_stmt
, new_bb
);
28588 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28590 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28591 gimple_seq_add_stmt (&gseq
, return_stmt
);
28592 set_bb_seq (new_bb
, gseq
);
28595 e12
= split_block (bb1
, if_else_stmt
);
28597 e12
->flags
&= ~EDGE_FALLTHRU
;
28598 e12
->flags
|= EDGE_TRUE_VALUE
;
28600 e23
= split_block (bb2
, return_stmt
);
28602 gimple_set_bb (convert_stmt
, bb2
);
28603 gimple_set_bb (return_stmt
, bb2
);
28606 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28609 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28616 /* This parses the attribute arguments to target in DECL and determines
28617 the right builtin to use to match the platform specification.
28618 It returns the priority value for this version decl. If PREDICATE_LIST
28619 is not NULL, it stores the list of cpu features that need to be checked
28620 before dispatching this function. */
28622 static unsigned int
28623 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28626 struct cl_target_option cur_target
;
28628 struct cl_target_option
*new_target
;
28629 const char *arg_str
= NULL
;
28630 const char *attrs_str
= NULL
;
28631 char *tok_str
= NULL
;
28634 /* Priority of i386 features, greater value is higher priority. This is
28635 used to decide the order in which function dispatch must happen. For
28636 instance, a version specialized for SSE4.2 should be checked for dispatch
28637 before a version for SSE3, as SSE4.2 implies SSE3. */
28638 enum feature_priority
28659 enum feature_priority priority
= P_ZERO
;
28661 /* These are the target attribute strings for which a dispatcher is
28662 available, from fold_builtin_cpu. */
28664 static struct _feature_list
28666 const char *const name
;
28667 const enum feature_priority priority
;
28669 const feature_list
[] =
28675 {"ssse3", P_SSSE3
},
28676 {"sse4.1", P_SSE4_1
},
28677 {"sse4.2", P_SSE4_2
},
28678 {"popcnt", P_POPCNT
},
28684 static unsigned int NUM_FEATURES
28685 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28689 tree predicate_chain
= NULL_TREE
;
28690 tree predicate_decl
, predicate_arg
;
28692 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28693 gcc_assert (attrs
!= NULL
);
28695 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28697 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28698 attrs_str
= TREE_STRING_POINTER (attrs
);
28700 /* Return priority zero for default function. */
28701 if (strcmp (attrs_str
, "default") == 0)
28704 /* Handle arch= if specified. For priority, set it to be 1 more than
28705 the best instruction set the processor can handle. For instance, if
28706 there is a version for atom and a version for ssse3 (the highest ISA
28707 priority for atom), the atom version must be checked for dispatch
28708 before the ssse3 version. */
28709 if (strstr (attrs_str
, "arch=") != NULL
)
28711 cl_target_option_save (&cur_target
, &global_options
);
28712 target_node
= ix86_valid_target_attribute_tree (attrs
);
28714 gcc_assert (target_node
);
28715 new_target
= TREE_TARGET_OPTION (target_node
);
28716 gcc_assert (new_target
);
28718 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28720 switch (new_target
->arch
)
28722 case PROCESSOR_CORE2
:
28724 priority
= P_PROC_SSSE3
;
28726 case PROCESSOR_COREI7
:
28727 arg_str
= "corei7";
28728 priority
= P_PROC_SSE4_2
;
28730 case PROCESSOR_ATOM
:
28732 priority
= P_PROC_SSSE3
;
28734 case PROCESSOR_AMDFAM10
:
28735 arg_str
= "amdfam10h";
28736 priority
= P_PROC_SSE4_a
;
28738 case PROCESSOR_BDVER1
:
28739 arg_str
= "bdver1";
28740 priority
= P_PROC_FMA
;
28742 case PROCESSOR_BDVER2
:
28743 arg_str
= "bdver2";
28744 priority
= P_PROC_FMA
;
28749 cl_target_option_restore (&global_options
, &cur_target
);
28751 if (predicate_list
&& arg_str
== NULL
)
28753 error_at (DECL_SOURCE_LOCATION (decl
),
28754 "No dispatcher found for the versioning attributes");
28758 if (predicate_list
)
28760 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28761 /* For a C string literal the length includes the trailing NULL. */
28762 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28763 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28768 /* Process feature name. */
28769 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28770 strcpy (tok_str
, attrs_str
);
28771 token
= strtok (tok_str
, ",");
28772 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28774 while (token
!= NULL
)
28776 /* Do not process "arch=" */
28777 if (strncmp (token
, "arch=", 5) == 0)
28779 token
= strtok (NULL
, ",");
28782 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28784 if (strcmp (token
, feature_list
[i
].name
) == 0)
28786 if (predicate_list
)
28788 predicate_arg
= build_string_literal (
28789 strlen (feature_list
[i
].name
) + 1,
28790 feature_list
[i
].name
);
28791 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28794 /* Find the maximum priority feature. */
28795 if (feature_list
[i
].priority
> priority
)
28796 priority
= feature_list
[i
].priority
;
28801 if (predicate_list
&& i
== NUM_FEATURES
)
28803 error_at (DECL_SOURCE_LOCATION (decl
),
28804 "No dispatcher found for %s", token
);
28807 token
= strtok (NULL
, ",");
28811 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28813 error_at (DECL_SOURCE_LOCATION (decl
),
28814 "No dispatcher found for the versioning attributes : %s",
28818 else if (predicate_list
)
28820 predicate_chain
= nreverse (predicate_chain
);
28821 *predicate_list
= predicate_chain
;
28827 /* This compares the priority of target features in function DECL1
28828 and DECL2. It returns positive value if DECL1 is higher priority,
28829 negative value if DECL2 is higher priority and 0 if they are the
28833 ix86_compare_version_priority (tree decl1
, tree decl2
)
28835 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
28836 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
28838 return (int)priority1
- (int)priority2
;
28841 /* V1 and V2 point to function versions with different priorities
28842 based on the target ISA. This function compares their priorities. */
28845 feature_compare (const void *v1
, const void *v2
)
28847 typedef struct _function_version_info
28850 tree predicate_chain
;
28851 unsigned int dispatch_priority
;
28852 } function_version_info
;
28854 const function_version_info c1
= *(const function_version_info
*)v1
;
28855 const function_version_info c2
= *(const function_version_info
*)v2
;
28856 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28859 /* This function generates the dispatch function for
28860 multi-versioned functions. DISPATCH_DECL is the function which will
28861 contain the dispatch logic. FNDECLS are the function choices for
28862 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28863 in DISPATCH_DECL in which the dispatch code is generated. */
28866 dispatch_function_versions (tree dispatch_decl
,
28868 basic_block
*empty_bb
)
28871 gimple ifunc_cpu_init_stmt
;
28875 vec
<tree
> *fndecls
;
28876 unsigned int num_versions
= 0;
28877 unsigned int actual_versions
= 0;
28880 struct _function_version_info
28883 tree predicate_chain
;
28884 unsigned int dispatch_priority
;
28885 }*function_version_info
;
28887 gcc_assert (dispatch_decl
!= NULL
28888 && fndecls_p
!= NULL
28889 && empty_bb
!= NULL
);
28891 /*fndecls_p is actually a vector. */
28892 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28894 /* At least one more version other than the default. */
28895 num_versions
= fndecls
->length ();
28896 gcc_assert (num_versions
>= 2);
28898 function_version_info
= (struct _function_version_info
*)
28899 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28901 /* The first version in the vector is the default decl. */
28902 default_decl
= (*fndecls
)[0];
28904 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28906 gseq
= bb_seq (*empty_bb
);
28907 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28908 constructors, so explicity call __builtin_cpu_init here. */
28909 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28910 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
28911 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28912 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28913 set_bb_seq (*empty_bb
, gseq
);
28918 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28920 tree version_decl
= ele
;
28921 tree predicate_chain
= NULL_TREE
;
28922 unsigned int priority
;
28923 /* Get attribute string, parse it and find the right predicate decl.
28924 The predicate function could be a lengthy combination of many
28925 features, like arch-type and various isa-variants. */
28926 priority
= get_builtin_code_for_version (version_decl
,
28929 if (predicate_chain
== NULL_TREE
)
28933 function_version_info
[ix
- 1].version_decl
= version_decl
;
28934 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28935 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28938 /* Sort the versions according to descending order of dispatch priority. The
28939 priority is based on the ISA. This is not a perfect solution. There
28940 could still be ambiguity. If more than one function version is suitable
28941 to execute, which one should be dispatched? In future, allow the user
28942 to specify a dispatch priority next to the version. */
28943 qsort (function_version_info
, actual_versions
,
28944 sizeof (struct _function_version_info
), feature_compare
);
28946 for (i
= 0; i
< actual_versions
; ++i
)
28947 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28948 function_version_info
[i
].version_decl
,
28949 function_version_info
[i
].predicate_chain
,
28952 /* dispatch default version at the end. */
28953 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28956 free (function_version_info
);
28960 /* Comparator function to be used in qsort routine to sort attribute
28961 specification strings to "target". */
28964 attr_strcmp (const void *v1
, const void *v2
)
28966 const char *c1
= *(char *const*)v1
;
28967 const char *c2
= *(char *const*)v2
;
28968 return strcmp (c1
, c2
);
28971 /* ARGLIST is the argument to target attribute. This function tokenizes
28972 the comma separated arguments, sorts them and returns a string which
28973 is a unique identifier for the comma separated arguments. It also
28974 replaces non-identifier characters "=,-" with "_". */
28977 sorted_attr_string (tree arglist
)
28980 size_t str_len_sum
= 0;
28981 char **args
= NULL
;
28982 char *attr_str
, *ret_str
;
28984 unsigned int argnum
= 1;
28987 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
28989 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
28990 size_t len
= strlen (str
);
28991 str_len_sum
+= len
+ 1;
28992 if (arg
!= arglist
)
28994 for (i
= 0; i
< strlen (str
); i
++)
28999 attr_str
= XNEWVEC (char, str_len_sum
);
29001 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29003 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29004 size_t len
= strlen (str
);
29005 memcpy (attr_str
+ str_len_sum
, str
, len
);
29006 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29007 str_len_sum
+= len
+ 1;
29010 /* Replace "=,-" with "_". */
29011 for (i
= 0; i
< strlen (attr_str
); i
++)
29012 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29018 args
= XNEWVEC (char *, argnum
);
29021 attr
= strtok (attr_str
, ",");
29022 while (attr
!= NULL
)
29026 attr
= strtok (NULL
, ",");
29029 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29031 ret_str
= XNEWVEC (char, str_len_sum
);
29033 for (i
= 0; i
< argnum
; i
++)
29035 size_t len
= strlen (args
[i
]);
29036 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29037 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29038 str_len_sum
+= len
+ 1;
29042 XDELETEVEC (attr_str
);
29046 /* This function changes the assembler name for functions that are
29047 versions. If DECL is a function version and has a "target"
29048 attribute, it appends the attribute string to its assembler name. */
29051 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29054 const char *orig_name
, *version_string
;
29055 char *attr_str
, *assembler_name
;
29057 if (DECL_DECLARED_INLINE_P (decl
)
29058 && lookup_attribute ("gnu_inline",
29059 DECL_ATTRIBUTES (decl
)))
29060 error_at (DECL_SOURCE_LOCATION (decl
),
29061 "Function versions cannot be marked as gnu_inline,"
29062 " bodies have to be generated");
29064 if (DECL_VIRTUAL_P (decl
)
29065 || DECL_VINDEX (decl
))
29066 sorry ("Virtual function multiversioning not supported");
29068 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29070 /* target attribute string cannot be NULL. */
29071 gcc_assert (version_attr
!= NULL_TREE
);
29073 orig_name
= IDENTIFIER_POINTER (id
);
29075 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29077 if (strcmp (version_string
, "default") == 0)
29080 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29081 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29083 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29085 /* Allow assembler name to be modified if already set. */
29086 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29087 SET_DECL_RTL (decl
, NULL
);
29089 tree ret
= get_identifier (assembler_name
);
29090 XDELETEVEC (attr_str
);
29091 XDELETEVEC (assembler_name
);
29095 /* This function returns true if FN1 and FN2 are versions of the same function,
29096 that is, the target strings of the function decls are different. This assumes
29097 that FN1 and FN2 have the same signature. */
29100 ix86_function_versions (tree fn1
, tree fn2
)
29103 char *target1
, *target2
;
29106 if (TREE_CODE (fn1
) != FUNCTION_DECL
29107 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29110 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29111 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29113 /* At least one function decl should have the target attribute specified. */
29114 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29117 /* Diagnose missing target attribute if one of the decls is already
29118 multi-versioned. */
29119 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29121 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29123 if (attr2
!= NULL_TREE
)
29130 error_at (DECL_SOURCE_LOCATION (fn2
),
29131 "missing %<target%> attribute for multi-versioned %D",
29133 error_at (DECL_SOURCE_LOCATION (fn1
),
29134 "previous declaration of %D", fn1
);
29135 /* Prevent diagnosing of the same error multiple times. */
29136 DECL_ATTRIBUTES (fn2
)
29137 = tree_cons (get_identifier ("target"),
29138 copy_node (TREE_VALUE (attr1
)),
29139 DECL_ATTRIBUTES (fn2
));
29144 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29145 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29147 /* The sorted target strings must be different for fn1 and fn2
29149 if (strcmp (target1
, target2
) == 0)
29154 XDELETEVEC (target1
);
29155 XDELETEVEC (target2
);
29161 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29163 /* For function version, add the target suffix to the assembler name. */
29164 if (TREE_CODE (decl
) == FUNCTION_DECL
29165 && DECL_FUNCTION_VERSIONED (decl
))
29166 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29167 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29168 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29174 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29175 is true, append the full path name of the source file. */
29178 make_name (tree decl
, const char *suffix
, bool make_unique
)
29180 char *global_var_name
;
29183 const char *unique_name
= NULL
;
29185 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29187 /* Get a unique name that can be used globally without any chances
29188 of collision at link time. */
29190 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29192 name_len
= strlen (name
) + strlen (suffix
) + 2;
29195 name_len
+= strlen (unique_name
) + 1;
29196 global_var_name
= XNEWVEC (char, name_len
);
29198 /* Use '.' to concatenate names as it is demangler friendly. */
29200 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29203 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29205 return global_var_name
;
29208 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29210 /* Make a dispatcher declaration for the multi-versioned function DECL.
29211 Calls to DECL function will be replaced with calls to the dispatcher
29212 by the front-end. Return the decl created. */
29215 make_dispatcher_decl (const tree decl
)
29219 tree fn_type
, func_type
;
29220 bool is_uniq
= false;
29222 if (TREE_PUBLIC (decl
) == 0)
29225 func_name
= make_name (decl
, "ifunc", is_uniq
);
29227 fn_type
= TREE_TYPE (decl
);
29228 func_type
= build_function_type (TREE_TYPE (fn_type
),
29229 TYPE_ARG_TYPES (fn_type
));
29231 func_decl
= build_fn_decl (func_name
, func_type
);
29232 XDELETEVEC (func_name
);
29233 TREE_USED (func_decl
) = 1;
29234 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29235 DECL_INITIAL (func_decl
) = error_mark_node
;
29236 DECL_ARTIFICIAL (func_decl
) = 1;
29237 /* Mark this func as external, the resolver will flip it again if
29238 it gets generated. */
29239 DECL_EXTERNAL (func_decl
) = 1;
29240 /* This will be of type IFUNCs have to be externally visible. */
29241 TREE_PUBLIC (func_decl
) = 1;
29248 /* Returns true if decl is multi-versioned and DECL is the default function,
29249 that is it is not tagged with target specific optimization. */
29252 is_function_default_version (const tree decl
)
29254 if (TREE_CODE (decl
) != FUNCTION_DECL
29255 || !DECL_FUNCTION_VERSIONED (decl
))
29257 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29259 attr
= TREE_VALUE (TREE_VALUE (attr
));
29260 return (TREE_CODE (attr
) == STRING_CST
29261 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29264 /* Make a dispatcher declaration for the multi-versioned function DECL.
29265 Calls to DECL function will be replaced with calls to the dispatcher
29266 by the front-end. Returns the decl of the dispatcher function. */
29269 ix86_get_function_versions_dispatcher (void *decl
)
29271 tree fn
= (tree
) decl
;
29272 struct cgraph_node
*node
= NULL
;
29273 struct cgraph_node
*default_node
= NULL
;
29274 struct cgraph_function_version_info
*node_v
= NULL
;
29275 struct cgraph_function_version_info
*first_v
= NULL
;
29277 tree dispatch_decl
= NULL
;
29279 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29280 struct cgraph_function_version_info
*it_v
= NULL
;
29281 struct cgraph_node
*dispatcher_node
= NULL
;
29282 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29285 struct cgraph_function_version_info
*default_version_info
= NULL
;
29287 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29289 node
= cgraph_get_node (fn
);
29290 gcc_assert (node
!= NULL
);
29292 node_v
= get_cgraph_node_version (node
);
29293 gcc_assert (node_v
!= NULL
);
29295 if (node_v
->dispatcher_resolver
!= NULL
)
29296 return node_v
->dispatcher_resolver
;
29298 /* Find the default version and make it the first node. */
29300 /* Go to the beginnig of the chain. */
29301 while (first_v
->prev
!= NULL
)
29302 first_v
= first_v
->prev
;
29303 default_version_info
= first_v
;
29304 while (default_version_info
!= NULL
)
29306 if (is_function_default_version
29307 (default_version_info
->this_node
->symbol
.decl
))
29309 default_version_info
= default_version_info
->next
;
29312 /* If there is no default node, just return NULL. */
29313 if (default_version_info
== NULL
)
29316 /* Make default info the first node. */
29317 if (first_v
!= default_version_info
)
29319 default_version_info
->prev
->next
= default_version_info
->next
;
29320 if (default_version_info
->next
)
29321 default_version_info
->next
->prev
= default_version_info
->prev
;
29322 first_v
->prev
= default_version_info
;
29323 default_version_info
->next
= first_v
;
29324 default_version_info
->prev
= NULL
;
29327 default_node
= default_version_info
->this_node
;
29329 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29330 /* Right now, the dispatching is done via ifunc. */
29331 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29333 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29334 gcc_assert (dispatcher_node
!= NULL
);
29335 dispatcher_node
->dispatcher_function
= 1;
29336 dispatcher_version_info
29337 = insert_new_cgraph_node_version (dispatcher_node
);
29338 dispatcher_version_info
->next
= default_version_info
;
29339 dispatcher_node
->local
.finalized
= 1;
29341 /* Set the dispatcher for all the versions. */
29342 it_v
= default_version_info
;
29343 while (it_v
!= NULL
)
29345 it_v
->dispatcher_resolver
= dispatch_decl
;
29349 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29350 "multiversioning needs ifunc which is not supported "
29351 "in this configuration");
29353 return dispatch_decl
;
29356 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29360 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29363 tree attr_arg_name
;
29367 attr_name
= get_identifier (name
);
29368 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29369 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29370 attr
= tree_cons (attr_name
, attr_args
, chain
);
29374 /* Make the resolver function decl to dispatch the versions of
29375 a multi-versioned function, DEFAULT_DECL. Create an
29376 empty basic block in the resolver and store the pointer in
29377 EMPTY_BB. Return the decl of the resolver function. */
29380 make_resolver_func (const tree default_decl
,
29381 const tree dispatch_decl
,
29382 basic_block
*empty_bb
)
29384 char *resolver_name
;
29385 tree decl
, type
, decl_name
, t
;
29386 bool is_uniq
= false;
29388 /* IFUNC's have to be globally visible. So, if the default_decl is
29389 not, then the name of the IFUNC should be made unique. */
29390 if (TREE_PUBLIC (default_decl
) == 0)
29393 /* Append the filename to the resolver function if the versions are
29394 not externally visible. This is because the resolver function has
29395 to be externally visible for the loader to find it. So, appending
29396 the filename will prevent conflicts with a resolver function from
29397 another module which is based on the same version name. */
29398 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29400 /* The resolver function should return a (void *). */
29401 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29403 decl
= build_fn_decl (resolver_name
, type
);
29404 decl_name
= get_identifier (resolver_name
);
29405 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29407 DECL_NAME (decl
) = decl_name
;
29408 TREE_USED (decl
) = 1;
29409 DECL_ARTIFICIAL (decl
) = 1;
29410 DECL_IGNORED_P (decl
) = 0;
29411 /* IFUNC resolvers have to be externally visible. */
29412 TREE_PUBLIC (decl
) = 1;
29413 DECL_UNINLINABLE (decl
) = 0;
29415 /* Resolver is not external, body is generated. */
29416 DECL_EXTERNAL (decl
) = 0;
29417 DECL_EXTERNAL (dispatch_decl
) = 0;
29419 DECL_CONTEXT (decl
) = NULL_TREE
;
29420 DECL_INITIAL (decl
) = make_node (BLOCK
);
29421 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29423 if (DECL_COMDAT_GROUP (default_decl
)
29424 || TREE_PUBLIC (default_decl
))
29426 /* In this case, each translation unit with a call to this
29427 versioned function will put out a resolver. Ensure it
29428 is comdat to keep just one copy. */
29429 DECL_COMDAT (decl
) = 1;
29430 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29432 /* Build result decl and add to function_decl. */
29433 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29434 DECL_ARTIFICIAL (t
) = 1;
29435 DECL_IGNORED_P (t
) = 1;
29436 DECL_RESULT (decl
) = t
;
29438 gimplify_function_tree (decl
);
29439 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29440 *empty_bb
= init_lowered_empty_function (decl
, false);
29442 cgraph_add_new_function (decl
, true);
29443 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29447 gcc_assert (dispatch_decl
!= NULL
);
29448 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29449 DECL_ATTRIBUTES (dispatch_decl
)
29450 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29452 /* Create the alias for dispatch to resolver here. */
29453 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29454 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29455 XDELETEVEC (resolver_name
);
29459 /* Generate the dispatching code body to dispatch multi-versioned function
29460 DECL. The target hook is called to process the "target" attributes and
29461 provide the code to dispatch the right function at run-time. NODE points
29462 to the dispatcher decl whose body will be created. */
29465 ix86_generate_version_dispatcher_body (void *node_p
)
29467 tree resolver_decl
;
29468 basic_block empty_bb
;
29469 vec
<tree
> fn_ver_vec
= vNULL
;
29470 tree default_ver_decl
;
29471 struct cgraph_node
*versn
;
29472 struct cgraph_node
*node
;
29474 struct cgraph_function_version_info
*node_version_info
= NULL
;
29475 struct cgraph_function_version_info
*versn_info
= NULL
;
29477 node
= (cgraph_node
*)node_p
;
29479 node_version_info
= get_cgraph_node_version (node
);
29480 gcc_assert (node
->dispatcher_function
29481 && node_version_info
!= NULL
);
29483 if (node_version_info
->dispatcher_resolver
)
29484 return node_version_info
->dispatcher_resolver
;
29486 /* The first version in the chain corresponds to the default version. */
29487 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29489 /* node is going to be an alias, so remove the finalized bit. */
29490 node
->local
.finalized
= false;
29492 resolver_decl
= make_resolver_func (default_ver_decl
,
29493 node
->symbol
.decl
, &empty_bb
);
29495 node_version_info
->dispatcher_resolver
= resolver_decl
;
29497 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29499 fn_ver_vec
.create (2);
29501 for (versn_info
= node_version_info
->next
; versn_info
;
29502 versn_info
= versn_info
->next
)
29504 versn
= versn_info
->this_node
;
29505 /* Check for virtual functions here again, as by this time it should
29506 have been determined if this function needs a vtable index or
29507 not. This happens for methods in derived classes that override
29508 virtual methods in base classes but are not explicitly marked as
29510 if (DECL_VINDEX (versn
->symbol
.decl
))
29511 sorry ("Virtual function multiversioning not supported");
29513 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29516 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29517 fn_ver_vec
.release ();
29518 rebuild_cgraph_edges ();
29520 return resolver_decl
;
29522 /* This builds the processor_model struct type defined in
29523 libgcc/config/i386/cpuinfo.c */
29526 build_processor_model_struct (void)
29528 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29530 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29532 tree type
= make_node (RECORD_TYPE
);
29534 /* The first 3 fields are unsigned int. */
29535 for (i
= 0; i
< 3; ++i
)
29537 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29538 get_identifier (field_name
[i
]), unsigned_type_node
);
29539 if (field_chain
!= NULL_TREE
)
29540 DECL_CHAIN (field
) = field_chain
;
29541 field_chain
= field
;
29544 /* The last field is an array of unsigned integers of size one. */
29545 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29546 get_identifier (field_name
[3]),
29547 build_array_type (unsigned_type_node
,
29548 build_index_type (size_one_node
)));
29549 if (field_chain
!= NULL_TREE
)
29550 DECL_CHAIN (field
) = field_chain
;
29551 field_chain
= field
;
29553 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29557 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29560 make_var_decl (tree type
, const char *name
)
29564 new_decl
= build_decl (UNKNOWN_LOCATION
,
29566 get_identifier(name
),
29569 DECL_EXTERNAL (new_decl
) = 1;
29570 TREE_STATIC (new_decl
) = 1;
29571 TREE_PUBLIC (new_decl
) = 1;
29572 DECL_INITIAL (new_decl
) = 0;
29573 DECL_ARTIFICIAL (new_decl
) = 0;
29574 DECL_PRESERVE_P (new_decl
) = 1;
29576 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29577 assemble_variable (new_decl
, 0, 0, 0);
29582 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29583 into an integer defined in libgcc/config/i386/cpuinfo.c */
29586 fold_builtin_cpu (tree fndecl
, tree
*args
)
29589 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29590 DECL_FUNCTION_CODE (fndecl
);
29591 tree param_string_cst
= NULL
;
29593 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29594 enum processor_features
29610 /* These are the values for vendor types and cpu types and subtypes
29611 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29612 the corresponding start value. */
29613 enum processor_model
29623 M_CPU_SUBTYPE_START
,
29624 M_INTEL_COREI7_NEHALEM
,
29625 M_INTEL_COREI7_WESTMERE
,
29626 M_INTEL_COREI7_SANDYBRIDGE
,
29627 M_AMDFAM10H_BARCELONA
,
29628 M_AMDFAM10H_SHANGHAI
,
29629 M_AMDFAM10H_ISTANBUL
,
29630 M_AMDFAM15H_BDVER1
,
29631 M_AMDFAM15H_BDVER2
,
29635 static struct _arch_names_table
29637 const char *const name
;
29638 const enum processor_model model
;
29640 const arch_names_table
[] =
29643 {"intel", M_INTEL
},
29644 {"atom", M_INTEL_ATOM
},
29645 {"core2", M_INTEL_CORE2
},
29646 {"corei7", M_INTEL_COREI7
},
29647 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29648 {"westmere", M_INTEL_COREI7_WESTMERE
},
29649 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29650 {"amdfam10h", M_AMDFAM10H
},
29651 {"barcelona", M_AMDFAM10H_BARCELONA
},
29652 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29653 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29654 {"amdfam15h", M_AMDFAM15H
},
29655 {"bdver1", M_AMDFAM15H_BDVER1
},
29656 {"bdver2", M_AMDFAM15H_BDVER2
},
29657 {"bdver3", M_AMDFAM15H_BDVER3
},
29660 static struct _isa_names_table
29662 const char *const name
;
29663 const enum processor_features feature
;
29665 const isa_names_table
[] =
29669 {"popcnt", F_POPCNT
},
29673 {"ssse3", F_SSSE3
},
29674 {"sse4.1", F_SSE4_1
},
29675 {"sse4.2", F_SSE4_2
},
29680 tree __processor_model_type
= build_processor_model_struct ();
29681 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
29684 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29686 param_string_cst
= *args
;
29687 while (param_string_cst
29688 && TREE_CODE (param_string_cst
) != STRING_CST
)
29690 /* *args must be a expr that can contain other EXPRS leading to a
29692 if (!EXPR_P (param_string_cst
))
29694 error ("Parameter to builtin must be a string constant or literal");
29695 return integer_zero_node
;
29697 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29700 gcc_assert (param_string_cst
);
29702 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29708 unsigned int field_val
= 0;
29709 unsigned int NUM_ARCH_NAMES
29710 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29712 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29713 if (strcmp (arch_names_table
[i
].name
,
29714 TREE_STRING_POINTER (param_string_cst
)) == 0)
29717 if (i
== NUM_ARCH_NAMES
)
29719 error ("Parameter to builtin not valid: %s",
29720 TREE_STRING_POINTER (param_string_cst
));
29721 return integer_zero_node
;
29724 field
= TYPE_FIELDS (__processor_model_type
);
29725 field_val
= arch_names_table
[i
].model
;
29727 /* CPU types are stored in the next field. */
29728 if (field_val
> M_CPU_TYPE_START
29729 && field_val
< M_CPU_SUBTYPE_START
)
29731 field
= DECL_CHAIN (field
);
29732 field_val
-= M_CPU_TYPE_START
;
29735 /* CPU subtypes are stored in the next field. */
29736 if (field_val
> M_CPU_SUBTYPE_START
)
29738 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29739 field_val
-= M_CPU_SUBTYPE_START
;
29742 /* Get the appropriate field in __cpu_model. */
29743 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29746 /* Check the value. */
29747 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29748 build_int_cstu (unsigned_type_node
, field_val
));
29749 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29751 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29758 unsigned int field_val
= 0;
29759 unsigned int NUM_ISA_NAMES
29760 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29762 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29763 if (strcmp (isa_names_table
[i
].name
,
29764 TREE_STRING_POINTER (param_string_cst
)) == 0)
29767 if (i
== NUM_ISA_NAMES
)
29769 error ("Parameter to builtin not valid: %s",
29770 TREE_STRING_POINTER (param_string_cst
));
29771 return integer_zero_node
;
29774 field
= TYPE_FIELDS (__processor_model_type
);
29775 /* Get the last field, which is __cpu_features. */
29776 while (DECL_CHAIN (field
))
29777 field
= DECL_CHAIN (field
);
29779 /* Get the appropriate field: __cpu_model.__cpu_features */
29780 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29783 /* Access the 0th element of __cpu_features array. */
29784 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29785 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29787 field_val
= (1 << isa_names_table
[i
].feature
);
29788 /* Return __cpu_model.__cpu_features[0] & field_val */
29789 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29790 build_int_cstu (unsigned_type_node
, field_val
));
29791 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29793 gcc_unreachable ();
29797 ix86_fold_builtin (tree fndecl
, int n_args
,
29798 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29800 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29802 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29803 DECL_FUNCTION_CODE (fndecl
);
29804 if (fn_code
== IX86_BUILTIN_CPU_IS
29805 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29807 gcc_assert (n_args
== 1);
29808 return fold_builtin_cpu (fndecl
, args
);
29812 #ifdef SUBTARGET_FOLD_BUILTIN
29813 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29819 /* Make builtins to detect cpu type and features supported. NAME is
29820 the builtin name, CODE is the builtin code, and FTYPE is the function
29821 type of the builtin. */
29824 make_cpu_type_builtin (const char* name
, int code
,
29825 enum ix86_builtin_func_type ftype
, bool is_const
)
29830 type
= ix86_get_builtin_func_type (ftype
);
29831 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29833 gcc_assert (decl
!= NULL_TREE
);
29834 ix86_builtins
[(int) code
] = decl
;
29835 TREE_READONLY (decl
) = is_const
;
29838 /* Make builtins to get CPU type and features supported. The created
29841 __builtin_cpu_init (), to detect cpu type and features,
29842 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29843 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29847 ix86_init_platform_type_builtins (void)
29849 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29850 INT_FTYPE_VOID
, false);
29851 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29852 INT_FTYPE_PCCHAR
, true);
29853 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29854 INT_FTYPE_PCCHAR
, true);
29857 /* Internal method for ix86_init_builtins. */
29860 ix86_init_builtins_va_builtins_abi (void)
29862 tree ms_va_ref
, sysv_va_ref
;
29863 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29864 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29865 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29866 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29870 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29871 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29872 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29874 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29877 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29878 fnvoid_va_start_ms
=
29879 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29880 fnvoid_va_end_sysv
=
29881 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29882 fnvoid_va_start_sysv
=
29883 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29885 fnvoid_va_copy_ms
=
29886 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29888 fnvoid_va_copy_sysv
=
29889 build_function_type_list (void_type_node
, sysv_va_ref
,
29890 sysv_va_ref
, NULL_TREE
);
29892 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29893 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29894 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29895 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29896 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29897 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29898 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29899 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29900 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29901 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29902 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29903 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29907 ix86_init_builtin_types (void)
29909 tree float128_type_node
, float80_type_node
;
29911 /* The __float80 type. */
29912 float80_type_node
= long_double_type_node
;
29913 if (TYPE_MODE (float80_type_node
) != XFmode
)
29915 /* The __float80 type. */
29916 float80_type_node
= make_node (REAL_TYPE
);
29918 TYPE_PRECISION (float80_type_node
) = 80;
29919 layout_type (float80_type_node
);
29921 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29923 /* The __float128 type. */
29924 float128_type_node
= make_node (REAL_TYPE
);
29925 TYPE_PRECISION (float128_type_node
) = 128;
29926 layout_type (float128_type_node
);
29927 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29929 /* This macro is built by i386-builtin-types.awk. */
29930 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29934 ix86_init_builtins (void)
29938 ix86_init_builtin_types ();
29940 /* Builtins to get CPU type and features. */
29941 ix86_init_platform_type_builtins ();
29943 /* TFmode support builtins. */
29944 def_builtin_const (0, "__builtin_infq",
29945 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29946 def_builtin_const (0, "__builtin_huge_valq",
29947 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29949 /* We will expand them to normal call if SSE isn't available since
29950 they are used by libgcc. */
29951 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29952 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29953 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29954 TREE_READONLY (t
) = 1;
29955 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29957 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29958 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29959 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29960 TREE_READONLY (t
) = 1;
29961 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29963 ix86_init_tm_builtins ();
29964 ix86_init_mmx_sse_builtins ();
29967 ix86_init_builtins_va_builtins_abi ();
29969 #ifdef SUBTARGET_INIT_BUILTINS
29970 SUBTARGET_INIT_BUILTINS
;
29974 /* Return the ix86 builtin for CODE. */
29977 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29979 if (code
>= IX86_BUILTIN_MAX
)
29980 return error_mark_node
;
29982 return ix86_builtins
[code
];
29985 /* Errors in the source file can cause expand_expr to return const0_rtx
29986 where we expect a vector. To avoid crashing, use one of the vector
29987 clear instructions. */
29989 safe_vector_operand (rtx x
, enum machine_mode mode
)
29991 if (x
== const0_rtx
)
29992 x
= CONST0_RTX (mode
);
29996 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
29999 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30002 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30003 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30004 rtx op0
= expand_normal (arg0
);
30005 rtx op1
= expand_normal (arg1
);
30006 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30007 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30008 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30010 if (VECTOR_MODE_P (mode0
))
30011 op0
= safe_vector_operand (op0
, mode0
);
30012 if (VECTOR_MODE_P (mode1
))
30013 op1
= safe_vector_operand (op1
, mode1
);
30015 if (optimize
|| !target
30016 || GET_MODE (target
) != tmode
30017 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30018 target
= gen_reg_rtx (tmode
);
30020 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30022 rtx x
= gen_reg_rtx (V4SImode
);
30023 emit_insn (gen_sse2_loadd (x
, op1
));
30024 op1
= gen_lowpart (TImode
, x
);
30027 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30028 op0
= copy_to_mode_reg (mode0
, op0
);
30029 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30030 op1
= copy_to_mode_reg (mode1
, op1
);
30032 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30041 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30044 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30045 enum ix86_builtin_func_type m_type
,
30046 enum rtx_code sub_code
)
30051 bool comparison_p
= false;
30053 bool last_arg_constant
= false;
30054 int num_memory
= 0;
30057 enum machine_mode mode
;
30060 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30064 case MULTI_ARG_4_DF2_DI_I
:
30065 case MULTI_ARG_4_DF2_DI_I1
:
30066 case MULTI_ARG_4_SF2_SI_I
:
30067 case MULTI_ARG_4_SF2_SI_I1
:
30069 last_arg_constant
= true;
30072 case MULTI_ARG_3_SF
:
30073 case MULTI_ARG_3_DF
:
30074 case MULTI_ARG_3_SF2
:
30075 case MULTI_ARG_3_DF2
:
30076 case MULTI_ARG_3_DI
:
30077 case MULTI_ARG_3_SI
:
30078 case MULTI_ARG_3_SI_DI
:
30079 case MULTI_ARG_3_HI
:
30080 case MULTI_ARG_3_HI_SI
:
30081 case MULTI_ARG_3_QI
:
30082 case MULTI_ARG_3_DI2
:
30083 case MULTI_ARG_3_SI2
:
30084 case MULTI_ARG_3_HI2
:
30085 case MULTI_ARG_3_QI2
:
30089 case MULTI_ARG_2_SF
:
30090 case MULTI_ARG_2_DF
:
30091 case MULTI_ARG_2_DI
:
30092 case MULTI_ARG_2_SI
:
30093 case MULTI_ARG_2_HI
:
30094 case MULTI_ARG_2_QI
:
30098 case MULTI_ARG_2_DI_IMM
:
30099 case MULTI_ARG_2_SI_IMM
:
30100 case MULTI_ARG_2_HI_IMM
:
30101 case MULTI_ARG_2_QI_IMM
:
30103 last_arg_constant
= true;
30106 case MULTI_ARG_1_SF
:
30107 case MULTI_ARG_1_DF
:
30108 case MULTI_ARG_1_SF2
:
30109 case MULTI_ARG_1_DF2
:
30110 case MULTI_ARG_1_DI
:
30111 case MULTI_ARG_1_SI
:
30112 case MULTI_ARG_1_HI
:
30113 case MULTI_ARG_1_QI
:
30114 case MULTI_ARG_1_SI_DI
:
30115 case MULTI_ARG_1_HI_DI
:
30116 case MULTI_ARG_1_HI_SI
:
30117 case MULTI_ARG_1_QI_DI
:
30118 case MULTI_ARG_1_QI_SI
:
30119 case MULTI_ARG_1_QI_HI
:
30123 case MULTI_ARG_2_DI_CMP
:
30124 case MULTI_ARG_2_SI_CMP
:
30125 case MULTI_ARG_2_HI_CMP
:
30126 case MULTI_ARG_2_QI_CMP
:
30128 comparison_p
= true;
30131 case MULTI_ARG_2_SF_TF
:
30132 case MULTI_ARG_2_DF_TF
:
30133 case MULTI_ARG_2_DI_TF
:
30134 case MULTI_ARG_2_SI_TF
:
30135 case MULTI_ARG_2_HI_TF
:
30136 case MULTI_ARG_2_QI_TF
:
30142 gcc_unreachable ();
30145 if (optimize
|| !target
30146 || GET_MODE (target
) != tmode
30147 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30148 target
= gen_reg_rtx (tmode
);
30150 gcc_assert (nargs
<= 4);
30152 for (i
= 0; i
< nargs
; i
++)
30154 tree arg
= CALL_EXPR_ARG (exp
, i
);
30155 rtx op
= expand_normal (arg
);
30156 int adjust
= (comparison_p
) ? 1 : 0;
30157 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30159 if (last_arg_constant
&& i
== nargs
- 1)
30161 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30163 enum insn_code new_icode
= icode
;
30166 case CODE_FOR_xop_vpermil2v2df3
:
30167 case CODE_FOR_xop_vpermil2v4sf3
:
30168 case CODE_FOR_xop_vpermil2v4df3
:
30169 case CODE_FOR_xop_vpermil2v8sf3
:
30170 error ("the last argument must be a 2-bit immediate");
30171 return gen_reg_rtx (tmode
);
30172 case CODE_FOR_xop_rotlv2di3
:
30173 new_icode
= CODE_FOR_rotlv2di3
;
30175 case CODE_FOR_xop_rotlv4si3
:
30176 new_icode
= CODE_FOR_rotlv4si3
;
30178 case CODE_FOR_xop_rotlv8hi3
:
30179 new_icode
= CODE_FOR_rotlv8hi3
;
30181 case CODE_FOR_xop_rotlv16qi3
:
30182 new_icode
= CODE_FOR_rotlv16qi3
;
30184 if (CONST_INT_P (op
))
30186 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30187 op
= GEN_INT (INTVAL (op
) & mask
);
30188 gcc_checking_assert
30189 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30193 gcc_checking_assert
30195 && insn_data
[new_icode
].operand
[0].mode
== tmode
30196 && insn_data
[new_icode
].operand
[1].mode
== tmode
30197 && insn_data
[new_icode
].operand
[2].mode
== mode
30198 && insn_data
[new_icode
].operand
[0].predicate
30199 == insn_data
[icode
].operand
[0].predicate
30200 && insn_data
[new_icode
].operand
[1].predicate
30201 == insn_data
[icode
].operand
[1].predicate
);
30207 gcc_unreachable ();
30214 if (VECTOR_MODE_P (mode
))
30215 op
= safe_vector_operand (op
, mode
);
30217 /* If we aren't optimizing, only allow one memory operand to be
30219 if (memory_operand (op
, mode
))
30222 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30225 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30227 op
= force_reg (mode
, op
);
30231 args
[i
].mode
= mode
;
30237 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30242 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30243 GEN_INT ((int)sub_code
));
30244 else if (! comparison_p
)
30245 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30248 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30252 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30257 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30261 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30265 gcc_unreachable ();
30275 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30276 insns with vec_merge. */
30279 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30283 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30284 rtx op1
, op0
= expand_normal (arg0
);
30285 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30286 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30288 if (optimize
|| !target
30289 || GET_MODE (target
) != tmode
30290 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30291 target
= gen_reg_rtx (tmode
);
30293 if (VECTOR_MODE_P (mode0
))
30294 op0
= safe_vector_operand (op0
, mode0
);
30296 if ((optimize
&& !register_operand (op0
, mode0
))
30297 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30298 op0
= copy_to_mode_reg (mode0
, op0
);
30301 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30302 op1
= copy_to_mode_reg (mode0
, op1
);
30304 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30311 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30314 ix86_expand_sse_compare (const struct builtin_description
*d
,
30315 tree exp
, rtx target
, bool swap
)
30318 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30319 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30320 rtx op0
= expand_normal (arg0
);
30321 rtx op1
= expand_normal (arg1
);
30323 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30324 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30325 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30326 enum rtx_code comparison
= d
->comparison
;
30328 if (VECTOR_MODE_P (mode0
))
30329 op0
= safe_vector_operand (op0
, mode0
);
30330 if (VECTOR_MODE_P (mode1
))
30331 op1
= safe_vector_operand (op1
, mode1
);
30333 /* Swap operands if we have a comparison that isn't available in
30337 rtx tmp
= gen_reg_rtx (mode1
);
30338 emit_move_insn (tmp
, op1
);
30343 if (optimize
|| !target
30344 || GET_MODE (target
) != tmode
30345 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30346 target
= gen_reg_rtx (tmode
);
30348 if ((optimize
&& !register_operand (op0
, mode0
))
30349 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30350 op0
= copy_to_mode_reg (mode0
, op0
);
30351 if ((optimize
&& !register_operand (op1
, mode1
))
30352 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30353 op1
= copy_to_mode_reg (mode1
, op1
);
30355 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30356 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30363 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30366 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30370 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30371 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30372 rtx op0
= expand_normal (arg0
);
30373 rtx op1
= expand_normal (arg1
);
30374 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30375 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30376 enum rtx_code comparison
= d
->comparison
;
30378 if (VECTOR_MODE_P (mode0
))
30379 op0
= safe_vector_operand (op0
, mode0
);
30380 if (VECTOR_MODE_P (mode1
))
30381 op1
= safe_vector_operand (op1
, mode1
);
30383 /* Swap operands if we have a comparison that isn't available in
30385 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30392 target
= gen_reg_rtx (SImode
);
30393 emit_move_insn (target
, const0_rtx
);
30394 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30396 if ((optimize
&& !register_operand (op0
, mode0
))
30397 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30398 op0
= copy_to_mode_reg (mode0
, op0
);
30399 if ((optimize
&& !register_operand (op1
, mode1
))
30400 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30401 op1
= copy_to_mode_reg (mode1
, op1
);
30403 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30407 emit_insn (gen_rtx_SET (VOIDmode
,
30408 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30409 gen_rtx_fmt_ee (comparison
, QImode
,
30413 return SUBREG_REG (target
);
30416 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30419 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30423 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30424 rtx op1
, op0
= expand_normal (arg0
);
30425 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30426 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30428 if (optimize
|| target
== 0
30429 || GET_MODE (target
) != tmode
30430 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30431 target
= gen_reg_rtx (tmode
);
30433 if (VECTOR_MODE_P (mode0
))
30434 op0
= safe_vector_operand (op0
, mode0
);
30436 if ((optimize
&& !register_operand (op0
, mode0
))
30437 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30438 op0
= copy_to_mode_reg (mode0
, op0
);
30440 op1
= GEN_INT (d
->comparison
);
30442 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30450 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30451 tree exp
, rtx target
)
30454 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30455 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30456 rtx op0
= expand_normal (arg0
);
30457 rtx op1
= expand_normal (arg1
);
30459 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30460 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30461 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30463 if (optimize
|| target
== 0
30464 || GET_MODE (target
) != tmode
30465 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30466 target
= gen_reg_rtx (tmode
);
30468 op0
= safe_vector_operand (op0
, mode0
);
30469 op1
= safe_vector_operand (op1
, mode1
);
30471 if ((optimize
&& !register_operand (op0
, mode0
))
30472 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30473 op0
= copy_to_mode_reg (mode0
, op0
);
30474 if ((optimize
&& !register_operand (op1
, mode1
))
30475 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30476 op1
= copy_to_mode_reg (mode1
, op1
);
30478 op2
= GEN_INT (d
->comparison
);
30480 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30487 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30490 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30494 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30495 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30496 rtx op0
= expand_normal (arg0
);
30497 rtx op1
= expand_normal (arg1
);
30498 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30499 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30500 enum rtx_code comparison
= d
->comparison
;
30502 if (VECTOR_MODE_P (mode0
))
30503 op0
= safe_vector_operand (op0
, mode0
);
30504 if (VECTOR_MODE_P (mode1
))
30505 op1
= safe_vector_operand (op1
, mode1
);
30507 target
= gen_reg_rtx (SImode
);
30508 emit_move_insn (target
, const0_rtx
);
30509 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30511 if ((optimize
&& !register_operand (op0
, mode0
))
30512 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30513 op0
= copy_to_mode_reg (mode0
, op0
);
30514 if ((optimize
&& !register_operand (op1
, mode1
))
30515 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30516 op1
= copy_to_mode_reg (mode1
, op1
);
30518 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30522 emit_insn (gen_rtx_SET (VOIDmode
,
30523 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30524 gen_rtx_fmt_ee (comparison
, QImode
,
30528 return SUBREG_REG (target
);
30531 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30534 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30535 tree exp
, rtx target
)
30538 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30539 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30540 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30541 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30542 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30543 rtx scratch0
, scratch1
;
30544 rtx op0
= expand_normal (arg0
);
30545 rtx op1
= expand_normal (arg1
);
30546 rtx op2
= expand_normal (arg2
);
30547 rtx op3
= expand_normal (arg3
);
30548 rtx op4
= expand_normal (arg4
);
30549 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30551 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30552 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30553 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30554 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30555 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30556 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30557 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30559 if (VECTOR_MODE_P (modev2
))
30560 op0
= safe_vector_operand (op0
, modev2
);
30561 if (VECTOR_MODE_P (modev4
))
30562 op2
= safe_vector_operand (op2
, modev4
);
30564 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30565 op0
= copy_to_mode_reg (modev2
, op0
);
30566 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30567 op1
= copy_to_mode_reg (modei3
, op1
);
30568 if ((optimize
&& !register_operand (op2
, modev4
))
30569 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30570 op2
= copy_to_mode_reg (modev4
, op2
);
30571 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30572 op3
= copy_to_mode_reg (modei5
, op3
);
30574 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30576 error ("the fifth argument must be an 8-bit immediate");
30580 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30582 if (optimize
|| !target
30583 || GET_MODE (target
) != tmode0
30584 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30585 target
= gen_reg_rtx (tmode0
);
30587 scratch1
= gen_reg_rtx (tmode1
);
30589 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30591 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30593 if (optimize
|| !target
30594 || GET_MODE (target
) != tmode1
30595 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30596 target
= gen_reg_rtx (tmode1
);
30598 scratch0
= gen_reg_rtx (tmode0
);
30600 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30604 gcc_assert (d
->flag
);
30606 scratch0
= gen_reg_rtx (tmode0
);
30607 scratch1
= gen_reg_rtx (tmode1
);
30609 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30619 target
= gen_reg_rtx (SImode
);
30620 emit_move_insn (target
, const0_rtx
);
30621 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30624 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30625 gen_rtx_fmt_ee (EQ
, QImode
,
30626 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30629 return SUBREG_REG (target
);
30636 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30639 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30640 tree exp
, rtx target
)
30643 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30644 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30645 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30646 rtx scratch0
, scratch1
;
30647 rtx op0
= expand_normal (arg0
);
30648 rtx op1
= expand_normal (arg1
);
30649 rtx op2
= expand_normal (arg2
);
30650 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30652 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30653 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30654 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30655 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30656 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30658 if (VECTOR_MODE_P (modev2
))
30659 op0
= safe_vector_operand (op0
, modev2
);
30660 if (VECTOR_MODE_P (modev3
))
30661 op1
= safe_vector_operand (op1
, modev3
);
30663 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30664 op0
= copy_to_mode_reg (modev2
, op0
);
30665 if ((optimize
&& !register_operand (op1
, modev3
))
30666 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30667 op1
= copy_to_mode_reg (modev3
, op1
);
30669 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30671 error ("the third argument must be an 8-bit immediate");
30675 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30677 if (optimize
|| !target
30678 || GET_MODE (target
) != tmode0
30679 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30680 target
= gen_reg_rtx (tmode0
);
30682 scratch1
= gen_reg_rtx (tmode1
);
30684 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30686 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30688 if (optimize
|| !target
30689 || GET_MODE (target
) != tmode1
30690 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30691 target
= gen_reg_rtx (tmode1
);
30693 scratch0
= gen_reg_rtx (tmode0
);
30695 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30699 gcc_assert (d
->flag
);
30701 scratch0
= gen_reg_rtx (tmode0
);
30702 scratch1
= gen_reg_rtx (tmode1
);
30704 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30714 target
= gen_reg_rtx (SImode
);
30715 emit_move_insn (target
, const0_rtx
);
30716 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30719 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30720 gen_rtx_fmt_ee (EQ
, QImode
,
30721 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30724 return SUBREG_REG (target
);
30730 /* Subroutine of ix86_expand_builtin to take care of insns with
30731 variable number of operands. */
30734 ix86_expand_args_builtin (const struct builtin_description
*d
,
30735 tree exp
, rtx target
)
30737 rtx pat
, real_target
;
30738 unsigned int i
, nargs
;
30739 unsigned int nargs_constant
= 0;
30740 int num_memory
= 0;
30744 enum machine_mode mode
;
30746 bool last_arg_count
= false;
30747 enum insn_code icode
= d
->icode
;
30748 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30749 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30750 enum machine_mode rmode
= VOIDmode
;
30752 enum rtx_code comparison
= d
->comparison
;
30754 switch ((enum ix86_builtin_func_type
) d
->flag
)
30756 case V2DF_FTYPE_V2DF_ROUND
:
30757 case V4DF_FTYPE_V4DF_ROUND
:
30758 case V4SF_FTYPE_V4SF_ROUND
:
30759 case V8SF_FTYPE_V8SF_ROUND
:
30760 case V4SI_FTYPE_V4SF_ROUND
:
30761 case V8SI_FTYPE_V8SF_ROUND
:
30762 return ix86_expand_sse_round (d
, exp
, target
);
30763 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30764 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30765 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30766 case INT_FTYPE_V8SF_V8SF_PTEST
:
30767 case INT_FTYPE_V4DI_V4DI_PTEST
:
30768 case INT_FTYPE_V4DF_V4DF_PTEST
:
30769 case INT_FTYPE_V4SF_V4SF_PTEST
:
30770 case INT_FTYPE_V2DI_V2DI_PTEST
:
30771 case INT_FTYPE_V2DF_V2DF_PTEST
:
30772 return ix86_expand_sse_ptest (d
, exp
, target
);
30773 case FLOAT128_FTYPE_FLOAT128
:
30774 case FLOAT_FTYPE_FLOAT
:
30775 case INT_FTYPE_INT
:
30776 case UINT64_FTYPE_INT
:
30777 case UINT16_FTYPE_UINT16
:
30778 case INT64_FTYPE_INT64
:
30779 case INT64_FTYPE_V4SF
:
30780 case INT64_FTYPE_V2DF
:
30781 case INT_FTYPE_V16QI
:
30782 case INT_FTYPE_V8QI
:
30783 case INT_FTYPE_V8SF
:
30784 case INT_FTYPE_V4DF
:
30785 case INT_FTYPE_V4SF
:
30786 case INT_FTYPE_V2DF
:
30787 case INT_FTYPE_V32QI
:
30788 case V16QI_FTYPE_V16QI
:
30789 case V8SI_FTYPE_V8SF
:
30790 case V8SI_FTYPE_V4SI
:
30791 case V8HI_FTYPE_V8HI
:
30792 case V8HI_FTYPE_V16QI
:
30793 case V8QI_FTYPE_V8QI
:
30794 case V8SF_FTYPE_V8SF
:
30795 case V8SF_FTYPE_V8SI
:
30796 case V8SF_FTYPE_V4SF
:
30797 case V8SF_FTYPE_V8HI
:
30798 case V4SI_FTYPE_V4SI
:
30799 case V4SI_FTYPE_V16QI
:
30800 case V4SI_FTYPE_V4SF
:
30801 case V4SI_FTYPE_V8SI
:
30802 case V4SI_FTYPE_V8HI
:
30803 case V4SI_FTYPE_V4DF
:
30804 case V4SI_FTYPE_V2DF
:
30805 case V4HI_FTYPE_V4HI
:
30806 case V4DF_FTYPE_V4DF
:
30807 case V4DF_FTYPE_V4SI
:
30808 case V4DF_FTYPE_V4SF
:
30809 case V4DF_FTYPE_V2DF
:
30810 case V4SF_FTYPE_V4SF
:
30811 case V4SF_FTYPE_V4SI
:
30812 case V4SF_FTYPE_V8SF
:
30813 case V4SF_FTYPE_V4DF
:
30814 case V4SF_FTYPE_V8HI
:
30815 case V4SF_FTYPE_V2DF
:
30816 case V2DI_FTYPE_V2DI
:
30817 case V2DI_FTYPE_V16QI
:
30818 case V2DI_FTYPE_V8HI
:
30819 case V2DI_FTYPE_V4SI
:
30820 case V2DF_FTYPE_V2DF
:
30821 case V2DF_FTYPE_V4SI
:
30822 case V2DF_FTYPE_V4DF
:
30823 case V2DF_FTYPE_V4SF
:
30824 case V2DF_FTYPE_V2SI
:
30825 case V2SI_FTYPE_V2SI
:
30826 case V2SI_FTYPE_V4SF
:
30827 case V2SI_FTYPE_V2SF
:
30828 case V2SI_FTYPE_V2DF
:
30829 case V2SF_FTYPE_V2SF
:
30830 case V2SF_FTYPE_V2SI
:
30831 case V32QI_FTYPE_V32QI
:
30832 case V32QI_FTYPE_V16QI
:
30833 case V16HI_FTYPE_V16HI
:
30834 case V16HI_FTYPE_V8HI
:
30835 case V8SI_FTYPE_V8SI
:
30836 case V16HI_FTYPE_V16QI
:
30837 case V8SI_FTYPE_V16QI
:
30838 case V4DI_FTYPE_V16QI
:
30839 case V8SI_FTYPE_V8HI
:
30840 case V4DI_FTYPE_V8HI
:
30841 case V4DI_FTYPE_V4SI
:
30842 case V4DI_FTYPE_V2DI
:
30845 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30846 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30847 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30848 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30849 case V16QI_FTYPE_V16QI_V16QI
:
30850 case V16QI_FTYPE_V8HI_V8HI
:
30851 case V8QI_FTYPE_V8QI_V8QI
:
30852 case V8QI_FTYPE_V4HI_V4HI
:
30853 case V8HI_FTYPE_V8HI_V8HI
:
30854 case V8HI_FTYPE_V16QI_V16QI
:
30855 case V8HI_FTYPE_V4SI_V4SI
:
30856 case V8SF_FTYPE_V8SF_V8SF
:
30857 case V8SF_FTYPE_V8SF_V8SI
:
30858 case V4SI_FTYPE_V4SI_V4SI
:
30859 case V4SI_FTYPE_V8HI_V8HI
:
30860 case V4SI_FTYPE_V4SF_V4SF
:
30861 case V4SI_FTYPE_V2DF_V2DF
:
30862 case V4HI_FTYPE_V4HI_V4HI
:
30863 case V4HI_FTYPE_V8QI_V8QI
:
30864 case V4HI_FTYPE_V2SI_V2SI
:
30865 case V4DF_FTYPE_V4DF_V4DF
:
30866 case V4DF_FTYPE_V4DF_V4DI
:
30867 case V4SF_FTYPE_V4SF_V4SF
:
30868 case V4SF_FTYPE_V4SF_V4SI
:
30869 case V4SF_FTYPE_V4SF_V2SI
:
30870 case V4SF_FTYPE_V4SF_V2DF
:
30871 case V4SF_FTYPE_V4SF_DI
:
30872 case V4SF_FTYPE_V4SF_SI
:
30873 case V2DI_FTYPE_V2DI_V2DI
:
30874 case V2DI_FTYPE_V16QI_V16QI
:
30875 case V2DI_FTYPE_V4SI_V4SI
:
30876 case V2UDI_FTYPE_V4USI_V4USI
:
30877 case V2DI_FTYPE_V2DI_V16QI
:
30878 case V2DI_FTYPE_V2DF_V2DF
:
30879 case V2SI_FTYPE_V2SI_V2SI
:
30880 case V2SI_FTYPE_V4HI_V4HI
:
30881 case V2SI_FTYPE_V2SF_V2SF
:
30882 case V2DF_FTYPE_V2DF_V2DF
:
30883 case V2DF_FTYPE_V2DF_V4SF
:
30884 case V2DF_FTYPE_V2DF_V2DI
:
30885 case V2DF_FTYPE_V2DF_DI
:
30886 case V2DF_FTYPE_V2DF_SI
:
30887 case V2SF_FTYPE_V2SF_V2SF
:
30888 case V1DI_FTYPE_V1DI_V1DI
:
30889 case V1DI_FTYPE_V8QI_V8QI
:
30890 case V1DI_FTYPE_V2SI_V2SI
:
30891 case V32QI_FTYPE_V16HI_V16HI
:
30892 case V16HI_FTYPE_V8SI_V8SI
:
30893 case V32QI_FTYPE_V32QI_V32QI
:
30894 case V16HI_FTYPE_V32QI_V32QI
:
30895 case V16HI_FTYPE_V16HI_V16HI
:
30896 case V8SI_FTYPE_V4DF_V4DF
:
30897 case V8SI_FTYPE_V8SI_V8SI
:
30898 case V8SI_FTYPE_V16HI_V16HI
:
30899 case V4DI_FTYPE_V4DI_V4DI
:
30900 case V4DI_FTYPE_V8SI_V8SI
:
30901 case V4UDI_FTYPE_V8USI_V8USI
:
30902 if (comparison
== UNKNOWN
)
30903 return ix86_expand_binop_builtin (icode
, exp
, target
);
30906 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30907 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30908 gcc_assert (comparison
!= UNKNOWN
);
30912 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30913 case V16HI_FTYPE_V16HI_SI_COUNT
:
30914 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30915 case V8SI_FTYPE_V8SI_SI_COUNT
:
30916 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30917 case V4DI_FTYPE_V4DI_INT_COUNT
:
30918 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30919 case V8HI_FTYPE_V8HI_SI_COUNT
:
30920 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30921 case V4SI_FTYPE_V4SI_SI_COUNT
:
30922 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30923 case V4HI_FTYPE_V4HI_SI_COUNT
:
30924 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30925 case V2DI_FTYPE_V2DI_SI_COUNT
:
30926 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30927 case V2SI_FTYPE_V2SI_SI_COUNT
:
30928 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30929 case V1DI_FTYPE_V1DI_SI_COUNT
:
30931 last_arg_count
= true;
30933 case UINT64_FTYPE_UINT64_UINT64
:
30934 case UINT_FTYPE_UINT_UINT
:
30935 case UINT_FTYPE_UINT_USHORT
:
30936 case UINT_FTYPE_UINT_UCHAR
:
30937 case UINT16_FTYPE_UINT16_INT
:
30938 case UINT8_FTYPE_UINT8_INT
:
30941 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30944 nargs_constant
= 1;
30946 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30949 nargs_constant
= 1;
30951 case V8HI_FTYPE_V8HI_INT
:
30952 case V8HI_FTYPE_V8SF_INT
:
30953 case V8HI_FTYPE_V4SF_INT
:
30954 case V8SF_FTYPE_V8SF_INT
:
30955 case V4SI_FTYPE_V4SI_INT
:
30956 case V4SI_FTYPE_V8SI_INT
:
30957 case V4HI_FTYPE_V4HI_INT
:
30958 case V4DF_FTYPE_V4DF_INT
:
30959 case V4SF_FTYPE_V4SF_INT
:
30960 case V4SF_FTYPE_V8SF_INT
:
30961 case V2DI_FTYPE_V2DI_INT
:
30962 case V2DF_FTYPE_V2DF_INT
:
30963 case V2DF_FTYPE_V4DF_INT
:
30964 case V16HI_FTYPE_V16HI_INT
:
30965 case V8SI_FTYPE_V8SI_INT
:
30966 case V4DI_FTYPE_V4DI_INT
:
30967 case V2DI_FTYPE_V4DI_INT
:
30969 nargs_constant
= 1;
30971 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30972 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30973 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30974 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30975 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30976 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30979 case V32QI_FTYPE_V32QI_V32QI_INT
:
30980 case V16HI_FTYPE_V16HI_V16HI_INT
:
30981 case V16QI_FTYPE_V16QI_V16QI_INT
:
30982 case V4DI_FTYPE_V4DI_V4DI_INT
:
30983 case V8HI_FTYPE_V8HI_V8HI_INT
:
30984 case V8SI_FTYPE_V8SI_V8SI_INT
:
30985 case V8SI_FTYPE_V8SI_V4SI_INT
:
30986 case V8SF_FTYPE_V8SF_V8SF_INT
:
30987 case V8SF_FTYPE_V8SF_V4SF_INT
:
30988 case V4SI_FTYPE_V4SI_V4SI_INT
:
30989 case V4DF_FTYPE_V4DF_V4DF_INT
:
30990 case V4DF_FTYPE_V4DF_V2DF_INT
:
30991 case V4SF_FTYPE_V4SF_V4SF_INT
:
30992 case V2DI_FTYPE_V2DI_V2DI_INT
:
30993 case V4DI_FTYPE_V4DI_V2DI_INT
:
30994 case V2DF_FTYPE_V2DF_V2DF_INT
:
30996 nargs_constant
= 1;
30998 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31001 nargs_constant
= 1;
31003 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31006 nargs_constant
= 1;
31008 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31011 nargs_constant
= 1;
31013 case V2DI_FTYPE_V2DI_UINT_UINT
:
31015 nargs_constant
= 2;
31017 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31018 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31019 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31020 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31022 nargs_constant
= 1;
31024 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31026 nargs_constant
= 2;
31028 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31029 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31033 gcc_unreachable ();
31036 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31038 if (comparison
!= UNKNOWN
)
31040 gcc_assert (nargs
== 2);
31041 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31044 if (rmode
== VOIDmode
|| rmode
== tmode
)
31048 || GET_MODE (target
) != tmode
31049 || !insn_p
->operand
[0].predicate (target
, tmode
))
31050 target
= gen_reg_rtx (tmode
);
31051 real_target
= target
;
31055 target
= gen_reg_rtx (rmode
);
31056 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31059 for (i
= 0; i
< nargs
; i
++)
31061 tree arg
= CALL_EXPR_ARG (exp
, i
);
31062 rtx op
= expand_normal (arg
);
31063 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31064 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31066 if (last_arg_count
&& (i
+ 1) == nargs
)
31068 /* SIMD shift insns take either an 8-bit immediate or
31069 register as count. But builtin functions take int as
31070 count. If count doesn't match, we put it in register. */
31073 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31074 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31075 op
= copy_to_reg (op
);
31078 else if ((nargs
- i
) <= nargs_constant
)
31083 case CODE_FOR_avx2_inserti128
:
31084 case CODE_FOR_avx2_extracti128
:
31085 error ("the last argument must be an 1-bit immediate");
31088 case CODE_FOR_sse4_1_roundsd
:
31089 case CODE_FOR_sse4_1_roundss
:
31091 case CODE_FOR_sse4_1_roundpd
:
31092 case CODE_FOR_sse4_1_roundps
:
31093 case CODE_FOR_avx_roundpd256
:
31094 case CODE_FOR_avx_roundps256
:
31096 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31097 case CODE_FOR_sse4_1_roundps_sfix
:
31098 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31099 case CODE_FOR_avx_roundps_sfix256
:
31101 case CODE_FOR_sse4_1_blendps
:
31102 case CODE_FOR_avx_blendpd256
:
31103 case CODE_FOR_avx_vpermilv4df
:
31104 error ("the last argument must be a 4-bit immediate");
31107 case CODE_FOR_sse4_1_blendpd
:
31108 case CODE_FOR_avx_vpermilv2df
:
31109 case CODE_FOR_xop_vpermil2v2df3
:
31110 case CODE_FOR_xop_vpermil2v4sf3
:
31111 case CODE_FOR_xop_vpermil2v4df3
:
31112 case CODE_FOR_xop_vpermil2v8sf3
:
31113 error ("the last argument must be a 2-bit immediate");
31116 case CODE_FOR_avx_vextractf128v4df
:
31117 case CODE_FOR_avx_vextractf128v8sf
:
31118 case CODE_FOR_avx_vextractf128v8si
:
31119 case CODE_FOR_avx_vinsertf128v4df
:
31120 case CODE_FOR_avx_vinsertf128v8sf
:
31121 case CODE_FOR_avx_vinsertf128v8si
:
31122 error ("the last argument must be a 1-bit immediate");
31125 case CODE_FOR_avx_vmcmpv2df3
:
31126 case CODE_FOR_avx_vmcmpv4sf3
:
31127 case CODE_FOR_avx_cmpv2df3
:
31128 case CODE_FOR_avx_cmpv4sf3
:
31129 case CODE_FOR_avx_cmpv4df3
:
31130 case CODE_FOR_avx_cmpv8sf3
:
31131 error ("the last argument must be a 5-bit immediate");
31135 switch (nargs_constant
)
31138 if ((nargs
- i
) == nargs_constant
)
31140 error ("the next to last argument must be an 8-bit immediate");
31144 error ("the last argument must be an 8-bit immediate");
31147 gcc_unreachable ();
31154 if (VECTOR_MODE_P (mode
))
31155 op
= safe_vector_operand (op
, mode
);
31157 /* If we aren't optimizing, only allow one memory operand to
31159 if (memory_operand (op
, mode
))
31162 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31164 if (optimize
|| !match
|| num_memory
> 1)
31165 op
= copy_to_mode_reg (mode
, op
);
31169 op
= copy_to_reg (op
);
31170 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31175 args
[i
].mode
= mode
;
31181 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31184 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31187 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31191 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31192 args
[2].op
, args
[3].op
);
31195 gcc_unreachable ();
31205 /* Subroutine of ix86_expand_builtin to take care of special insns
31206 with variable number of operands. */
31209 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31210 tree exp
, rtx target
)
31214 unsigned int i
, nargs
, arg_adjust
, memory
;
31218 enum machine_mode mode
;
31220 enum insn_code icode
= d
->icode
;
31221 bool last_arg_constant
= false;
31222 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31223 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31224 enum { load
, store
} klass
;
31226 switch ((enum ix86_builtin_func_type
) d
->flag
)
31228 case VOID_FTYPE_VOID
:
31229 emit_insn (GEN_FCN (icode
) (target
));
31231 case VOID_FTYPE_UINT64
:
31232 case VOID_FTYPE_UNSIGNED
:
31238 case INT_FTYPE_VOID
:
31239 case UINT64_FTYPE_VOID
:
31240 case UNSIGNED_FTYPE_VOID
:
31245 case UINT64_FTYPE_PUNSIGNED
:
31246 case V2DI_FTYPE_PV2DI
:
31247 case V4DI_FTYPE_PV4DI
:
31248 case V32QI_FTYPE_PCCHAR
:
31249 case V16QI_FTYPE_PCCHAR
:
31250 case V8SF_FTYPE_PCV4SF
:
31251 case V8SF_FTYPE_PCFLOAT
:
31252 case V4SF_FTYPE_PCFLOAT
:
31253 case V4DF_FTYPE_PCV2DF
:
31254 case V4DF_FTYPE_PCDOUBLE
:
31255 case V2DF_FTYPE_PCDOUBLE
:
31256 case VOID_FTYPE_PVOID
:
31261 case VOID_FTYPE_PV2SF_V4SF
:
31262 case VOID_FTYPE_PV4DI_V4DI
:
31263 case VOID_FTYPE_PV2DI_V2DI
:
31264 case VOID_FTYPE_PCHAR_V32QI
:
31265 case VOID_FTYPE_PCHAR_V16QI
:
31266 case VOID_FTYPE_PFLOAT_V8SF
:
31267 case VOID_FTYPE_PFLOAT_V4SF
:
31268 case VOID_FTYPE_PDOUBLE_V4DF
:
31269 case VOID_FTYPE_PDOUBLE_V2DF
:
31270 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31271 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31272 case VOID_FTYPE_PINT_INT
:
31275 /* Reserve memory operand for target. */
31276 memory
= ARRAY_SIZE (args
);
31278 case V4SF_FTYPE_V4SF_PCV2SF
:
31279 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31284 case V8SF_FTYPE_PCV8SF_V8SI
:
31285 case V4DF_FTYPE_PCV4DF_V4DI
:
31286 case V4SF_FTYPE_PCV4SF_V4SI
:
31287 case V2DF_FTYPE_PCV2DF_V2DI
:
31288 case V8SI_FTYPE_PCV8SI_V8SI
:
31289 case V4DI_FTYPE_PCV4DI_V4DI
:
31290 case V4SI_FTYPE_PCV4SI_V4SI
:
31291 case V2DI_FTYPE_PCV2DI_V2DI
:
31296 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31297 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31298 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31299 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31300 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31301 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31302 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31303 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31306 /* Reserve memory operand for target. */
31307 memory
= ARRAY_SIZE (args
);
31309 case VOID_FTYPE_UINT_UINT_UINT
:
31310 case VOID_FTYPE_UINT64_UINT_UINT
:
31311 case UCHAR_FTYPE_UINT_UINT_UINT
:
31312 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31315 memory
= ARRAY_SIZE (args
);
31316 last_arg_constant
= true;
31319 gcc_unreachable ();
31322 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31324 if (klass
== store
)
31326 arg
= CALL_EXPR_ARG (exp
, 0);
31327 op
= expand_normal (arg
);
31328 gcc_assert (target
== 0);
31331 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31332 target
= gen_rtx_MEM (tmode
, op
);
31335 target
= force_reg (tmode
, op
);
31343 || !register_operand (target
, tmode
)
31344 || GET_MODE (target
) != tmode
)
31345 target
= gen_reg_rtx (tmode
);
31348 for (i
= 0; i
< nargs
; i
++)
31350 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31353 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31354 op
= expand_normal (arg
);
31355 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31357 if (last_arg_constant
&& (i
+ 1) == nargs
)
31361 if (icode
== CODE_FOR_lwp_lwpvalsi3
31362 || icode
== CODE_FOR_lwp_lwpinssi3
31363 || icode
== CODE_FOR_lwp_lwpvaldi3
31364 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31365 error ("the last argument must be a 32-bit immediate");
31367 error ("the last argument must be an 8-bit immediate");
31375 /* This must be the memory operand. */
31376 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31377 op
= gen_rtx_MEM (mode
, op
);
31378 gcc_assert (GET_MODE (op
) == mode
31379 || GET_MODE (op
) == VOIDmode
);
31383 /* This must be register. */
31384 if (VECTOR_MODE_P (mode
))
31385 op
= safe_vector_operand (op
, mode
);
31387 gcc_assert (GET_MODE (op
) == mode
31388 || GET_MODE (op
) == VOIDmode
);
31389 op
= copy_to_mode_reg (mode
, op
);
31394 args
[i
].mode
= mode
;
31400 pat
= GEN_FCN (icode
) (target
);
31403 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31406 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31409 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31412 gcc_unreachable ();
31418 return klass
== store
? 0 : target
;
31421 /* Return the integer constant in ARG. Constrain it to be in the range
31422 of the subparts of VEC_TYPE; issue an error if not. */
31425 get_element_number (tree vec_type
, tree arg
)
31427 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31429 if (!host_integerp (arg
, 1)
31430 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31432 error ("selector must be an integer constant in the range 0..%wi", max
);
31439 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31440 ix86_expand_vector_init. We DO have language-level syntax for this, in
31441 the form of (type){ init-list }. Except that since we can't place emms
31442 instructions from inside the compiler, we can't allow the use of MMX
31443 registers unless the user explicitly asks for it. So we do *not* define
31444 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31445 we have builtins invoked by mmintrin.h that gives us license to emit
31446 these sorts of instructions. */
31449 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31451 enum machine_mode tmode
= TYPE_MODE (type
);
31452 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31453 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31454 rtvec v
= rtvec_alloc (n_elt
);
31456 gcc_assert (VECTOR_MODE_P (tmode
));
31457 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31459 for (i
= 0; i
< n_elt
; ++i
)
31461 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31462 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31465 if (!target
|| !register_operand (target
, tmode
))
31466 target
= gen_reg_rtx (tmode
);
31468 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31472 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31473 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31474 had a language-level syntax for referencing vector elements. */
31477 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31479 enum machine_mode tmode
, mode0
;
31484 arg0
= CALL_EXPR_ARG (exp
, 0);
31485 arg1
= CALL_EXPR_ARG (exp
, 1);
31487 op0
= expand_normal (arg0
);
31488 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31490 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31491 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31492 gcc_assert (VECTOR_MODE_P (mode0
));
31494 op0
= force_reg (mode0
, op0
);
31496 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31497 target
= gen_reg_rtx (tmode
);
31499 ix86_expand_vector_extract (true, target
, op0
, elt
);
31504 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31505 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31506 a language-level syntax for referencing vector elements. */
31509 ix86_expand_vec_set_builtin (tree exp
)
31511 enum machine_mode tmode
, mode1
;
31512 tree arg0
, arg1
, arg2
;
31514 rtx op0
, op1
, target
;
31516 arg0
= CALL_EXPR_ARG (exp
, 0);
31517 arg1
= CALL_EXPR_ARG (exp
, 1);
31518 arg2
= CALL_EXPR_ARG (exp
, 2);
31520 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31521 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31522 gcc_assert (VECTOR_MODE_P (tmode
));
31524 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31525 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31526 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31528 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31529 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31531 op0
= force_reg (tmode
, op0
);
31532 op1
= force_reg (mode1
, op1
);
31534 /* OP0 is the source of these builtin functions and shouldn't be
31535 modified. Create a copy, use it and return it as target. */
31536 target
= gen_reg_rtx (tmode
);
31537 emit_move_insn (target
, op0
);
31538 ix86_expand_vector_set (true, target
, op1
, elt
);
31543 /* Expand an expression EXP that calls a built-in function,
31544 with result going to TARGET if that's convenient
31545 (and in mode MODE if that's convenient).
31546 SUBTARGET may be used as the target for computing one of EXP's operands.
31547 IGNORE is nonzero if the value is to be ignored. */
31550 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31551 enum machine_mode mode ATTRIBUTE_UNUSED
,
31552 int ignore ATTRIBUTE_UNUSED
)
31554 const struct builtin_description
*d
;
31556 enum insn_code icode
;
31557 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31558 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31559 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31560 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31561 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31563 /* For CPU builtins that can be folded, fold first and expand the fold. */
31566 case IX86_BUILTIN_CPU_INIT
:
31568 /* Make it call __cpu_indicator_init in libgcc. */
31569 tree call_expr
, fndecl
, type
;
31570 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31571 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31572 call_expr
= build_call_expr (fndecl
, 0);
31573 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31575 case IX86_BUILTIN_CPU_IS
:
31576 case IX86_BUILTIN_CPU_SUPPORTS
:
31578 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31579 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31580 gcc_assert (fold_expr
!= NULL_TREE
);
31581 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31585 /* Determine whether the builtin function is available under the current ISA.
31586 Originally the builtin was not created if it wasn't applicable to the
31587 current ISA based on the command line switches. With function specific
31588 options, we need to check in the context of the function making the call
31589 whether it is supported. */
31590 if (ix86_builtins_isa
[fcode
].isa
31591 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31593 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31594 NULL
, (enum fpmath_unit
) 0, false);
31597 error ("%qE needs unknown isa option", fndecl
);
31600 gcc_assert (opts
!= NULL
);
31601 error ("%qE needs isa option %s", fndecl
, opts
);
31609 case IX86_BUILTIN_MASKMOVQ
:
31610 case IX86_BUILTIN_MASKMOVDQU
:
31611 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31612 ? CODE_FOR_mmx_maskmovq
31613 : CODE_FOR_sse2_maskmovdqu
);
31614 /* Note the arg order is different from the operand order. */
31615 arg1
= CALL_EXPR_ARG (exp
, 0);
31616 arg2
= CALL_EXPR_ARG (exp
, 1);
31617 arg0
= CALL_EXPR_ARG (exp
, 2);
31618 op0
= expand_normal (arg0
);
31619 op1
= expand_normal (arg1
);
31620 op2
= expand_normal (arg2
);
31621 mode0
= insn_data
[icode
].operand
[0].mode
;
31622 mode1
= insn_data
[icode
].operand
[1].mode
;
31623 mode2
= insn_data
[icode
].operand
[2].mode
;
31625 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31626 op0
= gen_rtx_MEM (mode1
, op0
);
31628 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31629 op0
= copy_to_mode_reg (mode0
, op0
);
31630 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31631 op1
= copy_to_mode_reg (mode1
, op1
);
31632 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31633 op2
= copy_to_mode_reg (mode2
, op2
);
31634 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31640 case IX86_BUILTIN_LDMXCSR
:
31641 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31642 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31643 emit_move_insn (target
, op0
);
31644 emit_insn (gen_sse_ldmxcsr (target
));
31647 case IX86_BUILTIN_STMXCSR
:
31648 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31649 emit_insn (gen_sse_stmxcsr (target
));
31650 return copy_to_mode_reg (SImode
, target
);
31652 case IX86_BUILTIN_CLFLUSH
:
31653 arg0
= CALL_EXPR_ARG (exp
, 0);
31654 op0
= expand_normal (arg0
);
31655 icode
= CODE_FOR_sse2_clflush
;
31656 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31657 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31659 emit_insn (gen_sse2_clflush (op0
));
31662 case IX86_BUILTIN_MONITOR
:
31663 arg0
= CALL_EXPR_ARG (exp
, 0);
31664 arg1
= CALL_EXPR_ARG (exp
, 1);
31665 arg2
= CALL_EXPR_ARG (exp
, 2);
31666 op0
= expand_normal (arg0
);
31667 op1
= expand_normal (arg1
);
31668 op2
= expand_normal (arg2
);
31670 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31672 op1
= copy_to_mode_reg (SImode
, op1
);
31674 op2
= copy_to_mode_reg (SImode
, op2
);
31675 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31678 case IX86_BUILTIN_MWAIT
:
31679 arg0
= CALL_EXPR_ARG (exp
, 0);
31680 arg1
= CALL_EXPR_ARG (exp
, 1);
31681 op0
= expand_normal (arg0
);
31682 op1
= expand_normal (arg1
);
31684 op0
= copy_to_mode_reg (SImode
, op0
);
31686 op1
= copy_to_mode_reg (SImode
, op1
);
31687 emit_insn (gen_sse3_mwait (op0
, op1
));
31690 case IX86_BUILTIN_VEC_INIT_V2SI
:
31691 case IX86_BUILTIN_VEC_INIT_V4HI
:
31692 case IX86_BUILTIN_VEC_INIT_V8QI
:
31693 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31695 case IX86_BUILTIN_VEC_EXT_V2DF
:
31696 case IX86_BUILTIN_VEC_EXT_V2DI
:
31697 case IX86_BUILTIN_VEC_EXT_V4SF
:
31698 case IX86_BUILTIN_VEC_EXT_V4SI
:
31699 case IX86_BUILTIN_VEC_EXT_V8HI
:
31700 case IX86_BUILTIN_VEC_EXT_V2SI
:
31701 case IX86_BUILTIN_VEC_EXT_V4HI
:
31702 case IX86_BUILTIN_VEC_EXT_V16QI
:
31703 return ix86_expand_vec_ext_builtin (exp
, target
);
31705 case IX86_BUILTIN_VEC_SET_V2DI
:
31706 case IX86_BUILTIN_VEC_SET_V4SF
:
31707 case IX86_BUILTIN_VEC_SET_V4SI
:
31708 case IX86_BUILTIN_VEC_SET_V8HI
:
31709 case IX86_BUILTIN_VEC_SET_V4HI
:
31710 case IX86_BUILTIN_VEC_SET_V16QI
:
31711 return ix86_expand_vec_set_builtin (exp
);
31713 case IX86_BUILTIN_INFQ
:
31714 case IX86_BUILTIN_HUGE_VALQ
:
31716 REAL_VALUE_TYPE inf
;
31720 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31722 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31725 target
= gen_reg_rtx (mode
);
31727 emit_move_insn (target
, tmp
);
31731 case IX86_BUILTIN_RDPMC
:
31732 case IX86_BUILTIN_RDTSC
:
31733 case IX86_BUILTIN_RDTSCP
:
31735 op0
= gen_reg_rtx (DImode
);
31736 op1
= gen_reg_rtx (DImode
);
31738 if (fcode
== IX86_BUILTIN_RDPMC
)
31740 arg0
= CALL_EXPR_ARG (exp
, 0);
31741 op2
= expand_normal (arg0
);
31742 if (!register_operand (op2
, SImode
))
31743 op2
= copy_to_mode_reg (SImode
, op2
);
31745 insn
= (TARGET_64BIT
31746 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31747 : gen_rdpmc (op0
, op2
));
31750 else if (fcode
== IX86_BUILTIN_RDTSC
)
31752 insn
= (TARGET_64BIT
31753 ? gen_rdtsc_rex64 (op0
, op1
)
31754 : gen_rdtsc (op0
));
31759 op2
= gen_reg_rtx (SImode
);
31761 insn
= (TARGET_64BIT
31762 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31763 : gen_rdtscp (op0
, op2
));
31766 arg0
= CALL_EXPR_ARG (exp
, 0);
31767 op4
= expand_normal (arg0
);
31768 if (!address_operand (op4
, VOIDmode
))
31770 op4
= convert_memory_address (Pmode
, op4
);
31771 op4
= copy_addr_to_reg (op4
);
31773 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31777 target
= gen_reg_rtx (mode
);
31781 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31782 op1
, 1, OPTAB_DIRECT
);
31783 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31784 op0
, 1, OPTAB_DIRECT
);
31787 emit_move_insn (target
, op0
);
31790 case IX86_BUILTIN_FXSAVE
:
31791 case IX86_BUILTIN_FXRSTOR
:
31792 case IX86_BUILTIN_FXSAVE64
:
31793 case IX86_BUILTIN_FXRSTOR64
:
31796 case IX86_BUILTIN_FXSAVE
:
31797 icode
= CODE_FOR_fxsave
;
31799 case IX86_BUILTIN_FXRSTOR
:
31800 icode
= CODE_FOR_fxrstor
;
31802 case IX86_BUILTIN_FXSAVE64
:
31803 icode
= CODE_FOR_fxsave64
;
31805 case IX86_BUILTIN_FXRSTOR64
:
31806 icode
= CODE_FOR_fxrstor64
;
31809 gcc_unreachable ();
31812 arg0
= CALL_EXPR_ARG (exp
, 0);
31813 op0
= expand_normal (arg0
);
31815 if (!address_operand (op0
, VOIDmode
))
31817 op0
= convert_memory_address (Pmode
, op0
);
31818 op0
= copy_addr_to_reg (op0
);
31820 op0
= gen_rtx_MEM (BLKmode
, op0
);
31822 pat
= GEN_FCN (icode
) (op0
);
31827 case IX86_BUILTIN_XSAVE
:
31828 case IX86_BUILTIN_XRSTOR
:
31829 case IX86_BUILTIN_XSAVE64
:
31830 case IX86_BUILTIN_XRSTOR64
:
31831 case IX86_BUILTIN_XSAVEOPT
:
31832 case IX86_BUILTIN_XSAVEOPT64
:
31833 arg0
= CALL_EXPR_ARG (exp
, 0);
31834 arg1
= CALL_EXPR_ARG (exp
, 1);
31835 op0
= expand_normal (arg0
);
31836 op1
= expand_normal (arg1
);
31838 if (!address_operand (op0
, VOIDmode
))
31840 op0
= convert_memory_address (Pmode
, op0
);
31841 op0
= copy_addr_to_reg (op0
);
31843 op0
= gen_rtx_MEM (BLKmode
, op0
);
31845 op1
= force_reg (DImode
, op1
);
31849 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31850 NULL
, 1, OPTAB_DIRECT
);
31853 case IX86_BUILTIN_XSAVE
:
31854 icode
= CODE_FOR_xsave_rex64
;
31856 case IX86_BUILTIN_XRSTOR
:
31857 icode
= CODE_FOR_xrstor_rex64
;
31859 case IX86_BUILTIN_XSAVE64
:
31860 icode
= CODE_FOR_xsave64
;
31862 case IX86_BUILTIN_XRSTOR64
:
31863 icode
= CODE_FOR_xrstor64
;
31865 case IX86_BUILTIN_XSAVEOPT
:
31866 icode
= CODE_FOR_xsaveopt_rex64
;
31868 case IX86_BUILTIN_XSAVEOPT64
:
31869 icode
= CODE_FOR_xsaveopt64
;
31872 gcc_unreachable ();
31875 op2
= gen_lowpart (SImode
, op2
);
31876 op1
= gen_lowpart (SImode
, op1
);
31877 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31883 case IX86_BUILTIN_XSAVE
:
31884 icode
= CODE_FOR_xsave
;
31886 case IX86_BUILTIN_XRSTOR
:
31887 icode
= CODE_FOR_xrstor
;
31889 case IX86_BUILTIN_XSAVEOPT
:
31890 icode
= CODE_FOR_xsaveopt
;
31893 gcc_unreachable ();
31895 pat
= GEN_FCN (icode
) (op0
, op1
);
31902 case IX86_BUILTIN_LLWPCB
:
31903 arg0
= CALL_EXPR_ARG (exp
, 0);
31904 op0
= expand_normal (arg0
);
31905 icode
= CODE_FOR_lwp_llwpcb
;
31906 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31907 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31908 emit_insn (gen_lwp_llwpcb (op0
));
31911 case IX86_BUILTIN_SLWPCB
:
31912 icode
= CODE_FOR_lwp_slwpcb
;
31914 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31915 target
= gen_reg_rtx (Pmode
);
31916 emit_insn (gen_lwp_slwpcb (target
));
31919 case IX86_BUILTIN_BEXTRI32
:
31920 case IX86_BUILTIN_BEXTRI64
:
31921 arg0
= CALL_EXPR_ARG (exp
, 0);
31922 arg1
= CALL_EXPR_ARG (exp
, 1);
31923 op0
= expand_normal (arg0
);
31924 op1
= expand_normal (arg1
);
31925 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31926 ? CODE_FOR_tbm_bextri_si
31927 : CODE_FOR_tbm_bextri_di
);
31928 if (!CONST_INT_P (op1
))
31930 error ("last argument must be an immediate");
31935 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31936 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31937 op1
= GEN_INT (length
);
31938 op2
= GEN_INT (lsb_index
);
31939 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31945 case IX86_BUILTIN_RDRAND16_STEP
:
31946 icode
= CODE_FOR_rdrandhi_1
;
31950 case IX86_BUILTIN_RDRAND32_STEP
:
31951 icode
= CODE_FOR_rdrandsi_1
;
31955 case IX86_BUILTIN_RDRAND64_STEP
:
31956 icode
= CODE_FOR_rdranddi_1
;
31960 op0
= gen_reg_rtx (mode0
);
31961 emit_insn (GEN_FCN (icode
) (op0
));
31963 arg0
= CALL_EXPR_ARG (exp
, 0);
31964 op1
= expand_normal (arg0
);
31965 if (!address_operand (op1
, VOIDmode
))
31967 op1
= convert_memory_address (Pmode
, op1
);
31968 op1
= copy_addr_to_reg (op1
);
31970 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31972 op1
= gen_reg_rtx (SImode
);
31973 emit_move_insn (op1
, CONST1_RTX (SImode
));
31975 /* Emit SImode conditional move. */
31976 if (mode0
== HImode
)
31978 op2
= gen_reg_rtx (SImode
);
31979 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
31981 else if (mode0
== SImode
)
31984 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
31987 target
= gen_reg_rtx (SImode
);
31989 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
31991 emit_insn (gen_rtx_SET (VOIDmode
, target
,
31992 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
31995 case IX86_BUILTIN_RDSEED16_STEP
:
31996 icode
= CODE_FOR_rdseedhi_1
;
32000 case IX86_BUILTIN_RDSEED32_STEP
:
32001 icode
= CODE_FOR_rdseedsi_1
;
32005 case IX86_BUILTIN_RDSEED64_STEP
:
32006 icode
= CODE_FOR_rdseeddi_1
;
32010 op0
= gen_reg_rtx (mode0
);
32011 emit_insn (GEN_FCN (icode
) (op0
));
32013 arg0
= CALL_EXPR_ARG (exp
, 0);
32014 op1
= expand_normal (arg0
);
32015 if (!address_operand (op1
, VOIDmode
))
32017 op1
= convert_memory_address (Pmode
, op1
);
32018 op1
= copy_addr_to_reg (op1
);
32020 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32022 op2
= gen_reg_rtx (QImode
);
32024 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32026 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32029 target
= gen_reg_rtx (SImode
);
32031 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32034 case IX86_BUILTIN_ADDCARRYX32
:
32035 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32039 case IX86_BUILTIN_ADDCARRYX64
:
32040 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32044 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32045 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32046 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32047 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32049 op0
= gen_reg_rtx (QImode
);
32051 /* Generate CF from input operand. */
32052 op1
= expand_normal (arg0
);
32053 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32054 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32056 /* Gen ADCX instruction to compute X+Y+CF. */
32057 op2
= expand_normal (arg1
);
32058 op3
= expand_normal (arg2
);
32061 op2
= copy_to_mode_reg (mode0
, op2
);
32063 op3
= copy_to_mode_reg (mode0
, op3
);
32065 op0
= gen_reg_rtx (mode0
);
32067 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32068 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32069 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32071 /* Store the result. */
32072 op4
= expand_normal (arg3
);
32073 if (!address_operand (op4
, VOIDmode
))
32075 op4
= convert_memory_address (Pmode
, op4
);
32076 op4
= copy_addr_to_reg (op4
);
32078 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32080 /* Return current CF value. */
32082 target
= gen_reg_rtx (QImode
);
32084 PUT_MODE (pat
, QImode
);
32085 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32088 case IX86_BUILTIN_GATHERSIV2DF
:
32089 icode
= CODE_FOR_avx2_gathersiv2df
;
32091 case IX86_BUILTIN_GATHERSIV4DF
:
32092 icode
= CODE_FOR_avx2_gathersiv4df
;
32094 case IX86_BUILTIN_GATHERDIV2DF
:
32095 icode
= CODE_FOR_avx2_gatherdiv2df
;
32097 case IX86_BUILTIN_GATHERDIV4DF
:
32098 icode
= CODE_FOR_avx2_gatherdiv4df
;
32100 case IX86_BUILTIN_GATHERSIV4SF
:
32101 icode
= CODE_FOR_avx2_gathersiv4sf
;
32103 case IX86_BUILTIN_GATHERSIV8SF
:
32104 icode
= CODE_FOR_avx2_gathersiv8sf
;
32106 case IX86_BUILTIN_GATHERDIV4SF
:
32107 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32109 case IX86_BUILTIN_GATHERDIV8SF
:
32110 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32112 case IX86_BUILTIN_GATHERSIV2DI
:
32113 icode
= CODE_FOR_avx2_gathersiv2di
;
32115 case IX86_BUILTIN_GATHERSIV4DI
:
32116 icode
= CODE_FOR_avx2_gathersiv4di
;
32118 case IX86_BUILTIN_GATHERDIV2DI
:
32119 icode
= CODE_FOR_avx2_gatherdiv2di
;
32121 case IX86_BUILTIN_GATHERDIV4DI
:
32122 icode
= CODE_FOR_avx2_gatherdiv4di
;
32124 case IX86_BUILTIN_GATHERSIV4SI
:
32125 icode
= CODE_FOR_avx2_gathersiv4si
;
32127 case IX86_BUILTIN_GATHERSIV8SI
:
32128 icode
= CODE_FOR_avx2_gathersiv8si
;
32130 case IX86_BUILTIN_GATHERDIV4SI
:
32131 icode
= CODE_FOR_avx2_gatherdiv4si
;
32133 case IX86_BUILTIN_GATHERDIV8SI
:
32134 icode
= CODE_FOR_avx2_gatherdiv8si
;
32136 case IX86_BUILTIN_GATHERALTSIV4DF
:
32137 icode
= CODE_FOR_avx2_gathersiv4df
;
32139 case IX86_BUILTIN_GATHERALTDIV8SF
:
32140 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32142 case IX86_BUILTIN_GATHERALTSIV4DI
:
32143 icode
= CODE_FOR_avx2_gathersiv4di
;
32145 case IX86_BUILTIN_GATHERALTDIV8SI
:
32146 icode
= CODE_FOR_avx2_gatherdiv8si
;
32150 arg0
= CALL_EXPR_ARG (exp
, 0);
32151 arg1
= CALL_EXPR_ARG (exp
, 1);
32152 arg2
= CALL_EXPR_ARG (exp
, 2);
32153 arg3
= CALL_EXPR_ARG (exp
, 3);
32154 arg4
= CALL_EXPR_ARG (exp
, 4);
32155 op0
= expand_normal (arg0
);
32156 op1
= expand_normal (arg1
);
32157 op2
= expand_normal (arg2
);
32158 op3
= expand_normal (arg3
);
32159 op4
= expand_normal (arg4
);
32160 /* Note the arg order is different from the operand order. */
32161 mode0
= insn_data
[icode
].operand
[1].mode
;
32162 mode2
= insn_data
[icode
].operand
[3].mode
;
32163 mode3
= insn_data
[icode
].operand
[4].mode
;
32164 mode4
= insn_data
[icode
].operand
[5].mode
;
32166 if (target
== NULL_RTX
32167 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32168 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32170 subtarget
= target
;
32172 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32173 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32175 rtx half
= gen_reg_rtx (V4SImode
);
32176 if (!nonimmediate_operand (op2
, V8SImode
))
32177 op2
= copy_to_mode_reg (V8SImode
, op2
);
32178 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32181 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32182 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32184 rtx (*gen
) (rtx
, rtx
);
32185 rtx half
= gen_reg_rtx (mode0
);
32186 if (mode0
== V4SFmode
)
32187 gen
= gen_vec_extract_lo_v8sf
;
32189 gen
= gen_vec_extract_lo_v8si
;
32190 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32191 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32192 emit_insn (gen (half
, op0
));
32194 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32195 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32196 emit_insn (gen (half
, op3
));
32200 /* Force memory operand only with base register here. But we
32201 don't want to do it on memory operand for other builtin
32203 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32205 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32206 op0
= copy_to_mode_reg (mode0
, op0
);
32207 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32208 op1
= copy_to_mode_reg (Pmode
, op1
);
32209 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32210 op2
= copy_to_mode_reg (mode2
, op2
);
32211 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32212 op3
= copy_to_mode_reg (mode3
, op3
);
32213 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32215 error ("last argument must be scale 1, 2, 4, 8");
32219 /* Optimize. If mask is known to have all high bits set,
32220 replace op0 with pc_rtx to signal that the instruction
32221 overwrites the whole destination and doesn't use its
32222 previous contents. */
32225 if (TREE_CODE (arg3
) == VECTOR_CST
)
32227 unsigned int negative
= 0;
32228 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32230 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32231 if (TREE_CODE (cst
) == INTEGER_CST
32232 && tree_int_cst_sign_bit (cst
))
32234 else if (TREE_CODE (cst
) == REAL_CST
32235 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32238 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32241 else if (TREE_CODE (arg3
) == SSA_NAME
)
32243 /* Recognize also when mask is like:
32244 __v2df src = _mm_setzero_pd ();
32245 __v2df mask = _mm_cmpeq_pd (src, src);
32247 __v8sf src = _mm256_setzero_ps ();
32248 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32249 as that is a cheaper way to load all ones into
32250 a register than having to load a constant from
32252 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32253 if (is_gimple_call (def_stmt
))
32255 tree fndecl
= gimple_call_fndecl (def_stmt
);
32257 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32258 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32260 case IX86_BUILTIN_CMPPD
:
32261 case IX86_BUILTIN_CMPPS
:
32262 case IX86_BUILTIN_CMPPD256
:
32263 case IX86_BUILTIN_CMPPS256
:
32264 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32267 case IX86_BUILTIN_CMPEQPD
:
32268 case IX86_BUILTIN_CMPEQPS
:
32269 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32270 && initializer_zerop (gimple_call_arg (def_stmt
,
32281 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32286 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32287 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32289 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32290 ? V4SFmode
: V4SImode
;
32291 if (target
== NULL_RTX
)
32292 target
= gen_reg_rtx (tmode
);
32293 if (tmode
== V4SFmode
)
32294 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32296 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32299 target
= subtarget
;
32303 case IX86_BUILTIN_XABORT
:
32304 icode
= CODE_FOR_xabort
;
32305 arg0
= CALL_EXPR_ARG (exp
, 0);
32306 op0
= expand_normal (arg0
);
32307 mode0
= insn_data
[icode
].operand
[0].mode
;
32308 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32310 error ("the xabort's argument must be an 8-bit immediate");
32313 emit_insn (gen_xabort (op0
));
32320 for (i
= 0, d
= bdesc_special_args
;
32321 i
< ARRAY_SIZE (bdesc_special_args
);
32323 if (d
->code
== fcode
)
32324 return ix86_expand_special_args_builtin (d
, exp
, target
);
32326 for (i
= 0, d
= bdesc_args
;
32327 i
< ARRAY_SIZE (bdesc_args
);
32329 if (d
->code
== fcode
)
32332 case IX86_BUILTIN_FABSQ
:
32333 case IX86_BUILTIN_COPYSIGNQ
:
32335 /* Emit a normal call if SSE isn't available. */
32336 return expand_call (exp
, target
, ignore
);
32338 return ix86_expand_args_builtin (d
, exp
, target
);
32341 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32342 if (d
->code
== fcode
)
32343 return ix86_expand_sse_comi (d
, exp
, target
);
32345 for (i
= 0, d
= bdesc_pcmpestr
;
32346 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32348 if (d
->code
== fcode
)
32349 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32351 for (i
= 0, d
= bdesc_pcmpistr
;
32352 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32354 if (d
->code
== fcode
)
32355 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32357 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32358 if (d
->code
== fcode
)
32359 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32360 (enum ix86_builtin_func_type
)
32361 d
->flag
, d
->comparison
);
32363 gcc_unreachable ();
32366 /* Returns a function decl for a vectorized version of the builtin function
32367 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32368 if it is not available. */
32371 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32374 enum machine_mode in_mode
, out_mode
;
32376 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32378 if (TREE_CODE (type_out
) != VECTOR_TYPE
32379 || TREE_CODE (type_in
) != VECTOR_TYPE
32380 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32383 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32384 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32385 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32386 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32390 case BUILT_IN_SQRT
:
32391 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32393 if (out_n
== 2 && in_n
== 2)
32394 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32395 else if (out_n
== 4 && in_n
== 4)
32396 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32400 case BUILT_IN_SQRTF
:
32401 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32403 if (out_n
== 4 && in_n
== 4)
32404 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32405 else if (out_n
== 8 && in_n
== 8)
32406 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32410 case BUILT_IN_IFLOOR
:
32411 case BUILT_IN_LFLOOR
:
32412 case BUILT_IN_LLFLOOR
:
32413 /* The round insn does not trap on denormals. */
32414 if (flag_trapping_math
|| !TARGET_ROUND
)
32417 if (out_mode
== SImode
&& in_mode
== DFmode
)
32419 if (out_n
== 4 && in_n
== 2)
32420 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32421 else if (out_n
== 8 && in_n
== 4)
32422 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32426 case BUILT_IN_IFLOORF
:
32427 case BUILT_IN_LFLOORF
:
32428 case BUILT_IN_LLFLOORF
:
32429 /* The round insn does not trap on denormals. */
32430 if (flag_trapping_math
|| !TARGET_ROUND
)
32433 if (out_mode
== SImode
&& in_mode
== SFmode
)
32435 if (out_n
== 4 && in_n
== 4)
32436 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32437 else if (out_n
== 8 && in_n
== 8)
32438 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32442 case BUILT_IN_ICEIL
:
32443 case BUILT_IN_LCEIL
:
32444 case BUILT_IN_LLCEIL
:
32445 /* The round insn does not trap on denormals. */
32446 if (flag_trapping_math
|| !TARGET_ROUND
)
32449 if (out_mode
== SImode
&& in_mode
== DFmode
)
32451 if (out_n
== 4 && in_n
== 2)
32452 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32453 else if (out_n
== 8 && in_n
== 4)
32454 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32458 case BUILT_IN_ICEILF
:
32459 case BUILT_IN_LCEILF
:
32460 case BUILT_IN_LLCEILF
:
32461 /* The round insn does not trap on denormals. */
32462 if (flag_trapping_math
|| !TARGET_ROUND
)
32465 if (out_mode
== SImode
&& in_mode
== SFmode
)
32467 if (out_n
== 4 && in_n
== 4)
32468 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32469 else if (out_n
== 8 && in_n
== 8)
32470 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32474 case BUILT_IN_IRINT
:
32475 case BUILT_IN_LRINT
:
32476 case BUILT_IN_LLRINT
:
32477 if (out_mode
== SImode
&& in_mode
== DFmode
)
32479 if (out_n
== 4 && in_n
== 2)
32480 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32481 else if (out_n
== 8 && in_n
== 4)
32482 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32486 case BUILT_IN_IRINTF
:
32487 case BUILT_IN_LRINTF
:
32488 case BUILT_IN_LLRINTF
:
32489 if (out_mode
== SImode
&& in_mode
== SFmode
)
32491 if (out_n
== 4 && in_n
== 4)
32492 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32493 else if (out_n
== 8 && in_n
== 8)
32494 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32498 case BUILT_IN_IROUND
:
32499 case BUILT_IN_LROUND
:
32500 case BUILT_IN_LLROUND
:
32501 /* The round insn does not trap on denormals. */
32502 if (flag_trapping_math
|| !TARGET_ROUND
)
32505 if (out_mode
== SImode
&& in_mode
== DFmode
)
32507 if (out_n
== 4 && in_n
== 2)
32508 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32509 else if (out_n
== 8 && in_n
== 4)
32510 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32514 case BUILT_IN_IROUNDF
:
32515 case BUILT_IN_LROUNDF
:
32516 case BUILT_IN_LLROUNDF
:
32517 /* The round insn does not trap on denormals. */
32518 if (flag_trapping_math
|| !TARGET_ROUND
)
32521 if (out_mode
== SImode
&& in_mode
== SFmode
)
32523 if (out_n
== 4 && in_n
== 4)
32524 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32525 else if (out_n
== 8 && in_n
== 8)
32526 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32530 case BUILT_IN_COPYSIGN
:
32531 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32533 if (out_n
== 2 && in_n
== 2)
32534 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32535 else if (out_n
== 4 && in_n
== 4)
32536 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32540 case BUILT_IN_COPYSIGNF
:
32541 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32543 if (out_n
== 4 && in_n
== 4)
32544 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32545 else if (out_n
== 8 && in_n
== 8)
32546 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32550 case BUILT_IN_FLOOR
:
32551 /* The round insn does not trap on denormals. */
32552 if (flag_trapping_math
|| !TARGET_ROUND
)
32555 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32557 if (out_n
== 2 && in_n
== 2)
32558 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32559 else if (out_n
== 4 && in_n
== 4)
32560 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32564 case BUILT_IN_FLOORF
:
32565 /* The round insn does not trap on denormals. */
32566 if (flag_trapping_math
|| !TARGET_ROUND
)
32569 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32571 if (out_n
== 4 && in_n
== 4)
32572 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32573 else if (out_n
== 8 && in_n
== 8)
32574 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32578 case BUILT_IN_CEIL
:
32579 /* The round insn does not trap on denormals. */
32580 if (flag_trapping_math
|| !TARGET_ROUND
)
32583 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32585 if (out_n
== 2 && in_n
== 2)
32586 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32587 else if (out_n
== 4 && in_n
== 4)
32588 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32592 case BUILT_IN_CEILF
:
32593 /* The round insn does not trap on denormals. */
32594 if (flag_trapping_math
|| !TARGET_ROUND
)
32597 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32599 if (out_n
== 4 && in_n
== 4)
32600 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32601 else if (out_n
== 8 && in_n
== 8)
32602 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32606 case BUILT_IN_TRUNC
:
32607 /* The round insn does not trap on denormals. */
32608 if (flag_trapping_math
|| !TARGET_ROUND
)
32611 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32613 if (out_n
== 2 && in_n
== 2)
32614 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32615 else if (out_n
== 4 && in_n
== 4)
32616 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32620 case BUILT_IN_TRUNCF
:
32621 /* The round insn does not trap on denormals. */
32622 if (flag_trapping_math
|| !TARGET_ROUND
)
32625 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32627 if (out_n
== 4 && in_n
== 4)
32628 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32629 else if (out_n
== 8 && in_n
== 8)
32630 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32634 case BUILT_IN_RINT
:
32635 /* The round insn does not trap on denormals. */
32636 if (flag_trapping_math
|| !TARGET_ROUND
)
32639 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32641 if (out_n
== 2 && in_n
== 2)
32642 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32643 else if (out_n
== 4 && in_n
== 4)
32644 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32648 case BUILT_IN_RINTF
:
32649 /* The round insn does not trap on denormals. */
32650 if (flag_trapping_math
|| !TARGET_ROUND
)
32653 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32655 if (out_n
== 4 && in_n
== 4)
32656 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32657 else if (out_n
== 8 && in_n
== 8)
32658 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32662 case BUILT_IN_ROUND
:
32663 /* The round insn does not trap on denormals. */
32664 if (flag_trapping_math
|| !TARGET_ROUND
)
32667 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32669 if (out_n
== 2 && in_n
== 2)
32670 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32671 else if (out_n
== 4 && in_n
== 4)
32672 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32676 case BUILT_IN_ROUNDF
:
32677 /* The round insn does not trap on denormals. */
32678 if (flag_trapping_math
|| !TARGET_ROUND
)
32681 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32683 if (out_n
== 4 && in_n
== 4)
32684 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32685 else if (out_n
== 8 && in_n
== 8)
32686 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32691 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32693 if (out_n
== 2 && in_n
== 2)
32694 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32695 if (out_n
== 4 && in_n
== 4)
32696 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32700 case BUILT_IN_FMAF
:
32701 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32703 if (out_n
== 4 && in_n
== 4)
32704 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32705 if (out_n
== 8 && in_n
== 8)
32706 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32714 /* Dispatch to a handler for a vectorization library. */
32715 if (ix86_veclib_handler
)
32716 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32722 /* Handler for an SVML-style interface to
32723 a library with vectorized intrinsics. */
32726 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32729 tree fntype
, new_fndecl
, args
;
32732 enum machine_mode el_mode
, in_mode
;
32735 /* The SVML is suitable for unsafe math only. */
32736 if (!flag_unsafe_math_optimizations
)
32739 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32740 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32741 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32742 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32743 if (el_mode
!= in_mode
32751 case BUILT_IN_LOG10
:
32753 case BUILT_IN_TANH
:
32755 case BUILT_IN_ATAN
:
32756 case BUILT_IN_ATAN2
:
32757 case BUILT_IN_ATANH
:
32758 case BUILT_IN_CBRT
:
32759 case BUILT_IN_SINH
:
32761 case BUILT_IN_ASINH
:
32762 case BUILT_IN_ASIN
:
32763 case BUILT_IN_COSH
:
32765 case BUILT_IN_ACOSH
:
32766 case BUILT_IN_ACOS
:
32767 if (el_mode
!= DFmode
|| n
!= 2)
32771 case BUILT_IN_EXPF
:
32772 case BUILT_IN_LOGF
:
32773 case BUILT_IN_LOG10F
:
32774 case BUILT_IN_POWF
:
32775 case BUILT_IN_TANHF
:
32776 case BUILT_IN_TANF
:
32777 case BUILT_IN_ATANF
:
32778 case BUILT_IN_ATAN2F
:
32779 case BUILT_IN_ATANHF
:
32780 case BUILT_IN_CBRTF
:
32781 case BUILT_IN_SINHF
:
32782 case BUILT_IN_SINF
:
32783 case BUILT_IN_ASINHF
:
32784 case BUILT_IN_ASINF
:
32785 case BUILT_IN_COSHF
:
32786 case BUILT_IN_COSF
:
32787 case BUILT_IN_ACOSHF
:
32788 case BUILT_IN_ACOSF
:
32789 if (el_mode
!= SFmode
|| n
!= 4)
32797 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32799 if (fn
== BUILT_IN_LOGF
)
32800 strcpy (name
, "vmlsLn4");
32801 else if (fn
== BUILT_IN_LOG
)
32802 strcpy (name
, "vmldLn2");
32805 sprintf (name
, "vmls%s", bname
+10);
32806 name
[strlen (name
)-1] = '4';
32809 sprintf (name
, "vmld%s2", bname
+10);
32811 /* Convert to uppercase. */
32815 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32817 args
= TREE_CHAIN (args
))
32821 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32823 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32825 /* Build a function declaration for the vectorized function. */
32826 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32827 FUNCTION_DECL
, get_identifier (name
), fntype
);
32828 TREE_PUBLIC (new_fndecl
) = 1;
32829 DECL_EXTERNAL (new_fndecl
) = 1;
32830 DECL_IS_NOVOPS (new_fndecl
) = 1;
32831 TREE_READONLY (new_fndecl
) = 1;
32836 /* Handler for an ACML-style interface to
32837 a library with vectorized intrinsics. */
32840 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32842 char name
[20] = "__vr.._";
32843 tree fntype
, new_fndecl
, args
;
32846 enum machine_mode el_mode
, in_mode
;
32849 /* The ACML is 64bits only and suitable for unsafe math only as
32850 it does not correctly support parts of IEEE with the required
32851 precision such as denormals. */
32853 || !flag_unsafe_math_optimizations
)
32856 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32857 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32858 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32859 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32860 if (el_mode
!= in_mode
32870 case BUILT_IN_LOG2
:
32871 case BUILT_IN_LOG10
:
32874 if (el_mode
!= DFmode
32879 case BUILT_IN_SINF
:
32880 case BUILT_IN_COSF
:
32881 case BUILT_IN_EXPF
:
32882 case BUILT_IN_POWF
:
32883 case BUILT_IN_LOGF
:
32884 case BUILT_IN_LOG2F
:
32885 case BUILT_IN_LOG10F
:
32888 if (el_mode
!= SFmode
32897 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32898 sprintf (name
+ 7, "%s", bname
+10);
32901 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32903 args
= TREE_CHAIN (args
))
32907 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32909 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32911 /* Build a function declaration for the vectorized function. */
32912 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32913 FUNCTION_DECL
, get_identifier (name
), fntype
);
32914 TREE_PUBLIC (new_fndecl
) = 1;
32915 DECL_EXTERNAL (new_fndecl
) = 1;
32916 DECL_IS_NOVOPS (new_fndecl
) = 1;
32917 TREE_READONLY (new_fndecl
) = 1;
32922 /* Returns a decl of a function that implements gather load with
32923 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32924 Return NULL_TREE if it is not available. */
32927 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32928 const_tree index_type
, int scale
)
32931 enum ix86_builtins code
;
32936 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32937 && !POINTER_TYPE_P (index_type
))
32938 || (TYPE_MODE (index_type
) != SImode
32939 && TYPE_MODE (index_type
) != DImode
))
32942 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32945 /* v*gather* insn sign extends index to pointer mode. */
32946 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32947 && TYPE_UNSIGNED (index_type
))
32952 || (scale
& (scale
- 1)) != 0)
32955 si
= TYPE_MODE (index_type
) == SImode
;
32956 switch (TYPE_MODE (mem_vectype
))
32959 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32962 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32965 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32968 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32971 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32974 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32977 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32980 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
32986 return ix86_builtins
[code
];
32989 /* Returns a code for a target-specific builtin that implements
32990 reciprocal of the function, or NULL_TREE if not available. */
32993 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
32994 bool sqrt ATTRIBUTE_UNUSED
)
32996 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
32997 && flag_finite_math_only
&& !flag_trapping_math
32998 && flag_unsafe_math_optimizations
))
33002 /* Machine dependent builtins. */
33005 /* Vectorized version of sqrt to rsqrt conversion. */
33006 case IX86_BUILTIN_SQRTPS_NR
:
33007 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33009 case IX86_BUILTIN_SQRTPS_NR256
:
33010 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33016 /* Normal builtins. */
33019 /* Sqrt to rsqrt conversion. */
33020 case BUILT_IN_SQRTF
:
33021 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33028 /* Helper for avx_vpermilps256_operand et al. This is also used by
33029 the expansion functions to turn the parallel back into a mask.
33030 The return value is 0 for no match and the imm8+1 for a match. */
33033 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33035 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33037 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33039 if (XVECLEN (par
, 0) != (int) nelt
)
33042 /* Validate that all of the elements are constants, and not totally
33043 out of range. Copy the data into an integral array to make the
33044 subsequent checks easier. */
33045 for (i
= 0; i
< nelt
; ++i
)
33047 rtx er
= XVECEXP (par
, 0, i
);
33048 unsigned HOST_WIDE_INT ei
;
33050 if (!CONST_INT_P (er
))
33061 /* In the 256-bit DFmode case, we can only move elements within
33063 for (i
= 0; i
< 2; ++i
)
33067 mask
|= ipar
[i
] << i
;
33069 for (i
= 2; i
< 4; ++i
)
33073 mask
|= (ipar
[i
] - 2) << i
;
33078 /* In the 256-bit SFmode case, we have full freedom of movement
33079 within the low 128-bit lane, but the high 128-bit lane must
33080 mirror the exact same pattern. */
33081 for (i
= 0; i
< 4; ++i
)
33082 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33089 /* In the 128-bit case, we've full freedom in the placement of
33090 the elements from the source operand. */
33091 for (i
= 0; i
< nelt
; ++i
)
33092 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33096 gcc_unreachable ();
33099 /* Make sure success has a non-zero value by adding one. */
33103 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33104 the expansion functions to turn the parallel back into a mask.
33105 The return value is 0 for no match and the imm8+1 for a match. */
33108 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33110 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33112 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33114 if (XVECLEN (par
, 0) != (int) nelt
)
33117 /* Validate that all of the elements are constants, and not totally
33118 out of range. Copy the data into an integral array to make the
33119 subsequent checks easier. */
33120 for (i
= 0; i
< nelt
; ++i
)
33122 rtx er
= XVECEXP (par
, 0, i
);
33123 unsigned HOST_WIDE_INT ei
;
33125 if (!CONST_INT_P (er
))
33128 if (ei
>= 2 * nelt
)
33133 /* Validate that the halves of the permute are halves. */
33134 for (i
= 0; i
< nelt2
- 1; ++i
)
33135 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33137 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33138 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33141 /* Reconstruct the mask. */
33142 for (i
= 0; i
< 2; ++i
)
33144 unsigned e
= ipar
[i
* nelt2
];
33148 mask
|= e
<< (i
* 4);
33151 /* Make sure success has a non-zero value by adding one. */
33155 /* Store OPERAND to the memory after reload is completed. This means
33156 that we can't easily use assign_stack_local. */
33158 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33162 gcc_assert (reload_completed
);
33163 if (ix86_using_red_zone ())
33165 result
= gen_rtx_MEM (mode
,
33166 gen_rtx_PLUS (Pmode
,
33168 GEN_INT (-RED_ZONE_SIZE
)));
33169 emit_move_insn (result
, operand
);
33171 else if (TARGET_64BIT
)
33177 operand
= gen_lowpart (DImode
, operand
);
33181 gen_rtx_SET (VOIDmode
,
33182 gen_rtx_MEM (DImode
,
33183 gen_rtx_PRE_DEC (DImode
,
33184 stack_pointer_rtx
)),
33188 gcc_unreachable ();
33190 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33199 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33201 gen_rtx_SET (VOIDmode
,
33202 gen_rtx_MEM (SImode
,
33203 gen_rtx_PRE_DEC (Pmode
,
33204 stack_pointer_rtx
)),
33207 gen_rtx_SET (VOIDmode
,
33208 gen_rtx_MEM (SImode
,
33209 gen_rtx_PRE_DEC (Pmode
,
33210 stack_pointer_rtx
)),
33215 /* Store HImodes as SImodes. */
33216 operand
= gen_lowpart (SImode
, operand
);
33220 gen_rtx_SET (VOIDmode
,
33221 gen_rtx_MEM (GET_MODE (operand
),
33222 gen_rtx_PRE_DEC (SImode
,
33223 stack_pointer_rtx
)),
33227 gcc_unreachable ();
33229 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33234 /* Free operand from the memory. */
33236 ix86_free_from_memory (enum machine_mode mode
)
33238 if (!ix86_using_red_zone ())
33242 if (mode
== DImode
|| TARGET_64BIT
)
33246 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33247 to pop or add instruction if registers are available. */
33248 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33249 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33254 /* Return a register priority for hard reg REGNO. */
33256 ix86_register_priority (int hard_regno
)
33258 /* ebp and r13 as the base always wants a displacement, r12 as the
33259 base always wants an index. So discourage their usage in an
33261 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33263 if (hard_regno
== BP_REG
)
33265 /* New x86-64 int registers result in bigger code size. Discourage
33267 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33269 /* New x86-64 SSE registers result in bigger code size. Discourage
33271 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33273 /* Usage of AX register results in smaller code. Prefer it. */
33274 if (hard_regno
== 0)
33279 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33281 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33282 QImode must go into class Q_REGS.
33283 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33284 movdf to do mem-to-mem moves through integer regs. */
33287 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33289 enum machine_mode mode
= GET_MODE (x
);
33291 /* We're only allowed to return a subclass of CLASS. Many of the
33292 following checks fail for NO_REGS, so eliminate that early. */
33293 if (regclass
== NO_REGS
)
33296 /* All classes can load zeros. */
33297 if (x
== CONST0_RTX (mode
))
33300 /* Force constants into memory if we are loading a (nonzero) constant into
33301 an MMX or SSE register. This is because there are no MMX/SSE instructions
33302 to load from a constant. */
33304 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33307 /* Prefer SSE regs only, if we can use them for math. */
33308 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33309 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33311 /* Floating-point constants need more complex checks. */
33312 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33314 /* General regs can load everything. */
33315 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33318 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33319 zero above. We only want to wind up preferring 80387 registers if
33320 we plan on doing computation with them. */
33322 && standard_80387_constant_p (x
) > 0)
33324 /* Limit class to non-sse. */
33325 if (regclass
== FLOAT_SSE_REGS
)
33327 if (regclass
== FP_TOP_SSE_REGS
)
33329 if (regclass
== FP_SECOND_SSE_REGS
)
33330 return FP_SECOND_REG
;
33331 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33338 /* Generally when we see PLUS here, it's the function invariant
33339 (plus soft-fp const_int). Which can only be computed into general
33341 if (GET_CODE (x
) == PLUS
)
33342 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33344 /* QImode constants are easy to load, but non-constant QImode data
33345 must go into Q_REGS. */
33346 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33348 if (reg_class_subset_p (regclass
, Q_REGS
))
33350 if (reg_class_subset_p (Q_REGS
, regclass
))
33358 /* Discourage putting floating-point values in SSE registers unless
33359 SSE math is being used, and likewise for the 387 registers. */
33361 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33363 enum machine_mode mode
= GET_MODE (x
);
33365 /* Restrict the output reload class to the register bank that we are doing
33366 math on. If we would like not to return a subset of CLASS, reject this
33367 alternative: if reload cannot do this, it will still use its choice. */
33368 mode
= GET_MODE (x
);
33369 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33370 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33372 if (X87_FLOAT_MODE_P (mode
))
33374 if (regclass
== FP_TOP_SSE_REGS
)
33376 else if (regclass
== FP_SECOND_SSE_REGS
)
33377 return FP_SECOND_REG
;
33379 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33386 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33387 enum machine_mode mode
, secondary_reload_info
*sri
)
33389 /* Double-word spills from general registers to non-offsettable memory
33390 references (zero-extended addresses) require special handling. */
33393 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33394 && rclass
== GENERAL_REGS
33395 && !offsettable_memref_p (x
))
33398 ? CODE_FOR_reload_noff_load
33399 : CODE_FOR_reload_noff_store
);
33400 /* Add the cost of moving address to a temporary. */
33401 sri
->extra_cost
= 1;
33406 /* QImode spills from non-QI registers require
33407 intermediate register on 32bit targets. */
33409 && !in_p
&& mode
== QImode
33410 && (rclass
== GENERAL_REGS
33411 || rclass
== LEGACY_REGS
33412 || rclass
== NON_Q_REGS
33415 || rclass
== INDEX_REGS
))
33424 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33425 regno
= true_regnum (x
);
33427 /* Return Q_REGS if the operand is in memory. */
33432 /* This condition handles corner case where an expression involving
33433 pointers gets vectorized. We're trying to use the address of a
33434 stack slot as a vector initializer.
33436 (set (reg:V2DI 74 [ vect_cst_.2 ])
33437 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33439 Eventually frame gets turned into sp+offset like this:
33441 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33442 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33443 (const_int 392 [0x188]))))
33445 That later gets turned into:
33447 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33448 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33449 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33451 We'll have the following reload recorded:
33453 Reload 0: reload_in (DI) =
33454 (plus:DI (reg/f:DI 7 sp)
33455 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33456 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33457 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33458 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33459 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33460 reload_reg_rtx: (reg:V2DI 22 xmm1)
33462 Which isn't going to work since SSE instructions can't handle scalar
33463 additions. Returning GENERAL_REGS forces the addition into integer
33464 register and reload can handle subsequent reloads without problems. */
33466 if (in_p
&& GET_CODE (x
) == PLUS
33467 && SSE_CLASS_P (rclass
)
33468 && SCALAR_INT_MODE_P (mode
))
33469 return GENERAL_REGS
;
33474 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33477 ix86_class_likely_spilled_p (reg_class_t rclass
)
33488 case SSE_FIRST_REG
:
33490 case FP_SECOND_REG
:
33500 /* If we are copying between general and FP registers, we need a memory
33501 location. The same is true for SSE and MMX registers.
33503 To optimize register_move_cost performance, allow inline variant.
33505 The macro can't work reliably when one of the CLASSES is class containing
33506 registers from multiple units (SSE, MMX, integer). We avoid this by never
33507 combining those units in single alternative in the machine description.
33508 Ensure that this constraint holds to avoid unexpected surprises.
33510 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33511 enforce these sanity checks. */
33514 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33515 enum machine_mode mode
, int strict
)
33517 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33518 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33519 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33520 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33521 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33522 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33524 gcc_assert (!strict
|| lra_in_progress
);
33528 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33531 /* ??? This is a lie. We do have moves between mmx/general, and for
33532 mmx/sse2. But by saying we need secondary memory we discourage the
33533 register allocator from using the mmx registers unless needed. */
33534 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33537 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33539 /* SSE1 doesn't have any direct moves from other classes. */
33543 /* If the target says that inter-unit moves are more expensive
33544 than moving through memory, then don't generate them. */
33545 if (!TARGET_INTER_UNIT_MOVES
)
33548 /* Between SSE and general, we have moves no larger than word size. */
33549 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33557 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33558 enum machine_mode mode
, int strict
)
33560 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33563 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33565 On the 80386, this is the size of MODE in words,
33566 except in the FP regs, where a single reg is always enough. */
33568 static unsigned char
33569 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33571 if (MAYBE_INTEGER_CLASS_P (rclass
))
33573 if (mode
== XFmode
)
33574 return (TARGET_64BIT
? 2 : 3);
33575 else if (mode
== XCmode
)
33576 return (TARGET_64BIT
? 4 : 6);
33578 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33582 if (COMPLEX_MODE_P (mode
))
33589 /* Return true if the registers in CLASS cannot represent the change from
33590 modes FROM to TO. */
33593 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33594 enum reg_class regclass
)
33599 /* x87 registers can't do subreg at all, as all values are reformatted
33600 to extended precision. */
33601 if (MAYBE_FLOAT_CLASS_P (regclass
))
33604 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33606 /* Vector registers do not support QI or HImode loads. If we don't
33607 disallow a change to these modes, reload will assume it's ok to
33608 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33609 the vec_dupv4hi pattern. */
33610 if (GET_MODE_SIZE (from
) < 4)
33613 /* Vector registers do not support subreg with nonzero offsets, which
33614 are otherwise valid for integer registers. Since we can't see
33615 whether we have a nonzero offset from here, prohibit all
33616 nonparadoxical subregs changing size. */
33617 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33624 /* Return the cost of moving data of mode M between a
33625 register and memory. A value of 2 is the default; this cost is
33626 relative to those in `REGISTER_MOVE_COST'.
33628 This function is used extensively by register_move_cost that is used to
33629 build tables at startup. Make it inline in this case.
33630 When IN is 2, return maximum of in and out move cost.
33632 If moving between registers and memory is more expensive than
33633 between two registers, you should define this macro to express the
33636 Model also increased moving costs of QImode registers in non
33640 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33644 if (FLOAT_CLASS_P (regclass
))
33662 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33663 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33665 if (SSE_CLASS_P (regclass
))
33668 switch (GET_MODE_SIZE (mode
))
33683 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33684 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33686 if (MMX_CLASS_P (regclass
))
33689 switch (GET_MODE_SIZE (mode
))
33701 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33702 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33704 switch (GET_MODE_SIZE (mode
))
33707 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33710 return ix86_cost
->int_store
[0];
33711 if (TARGET_PARTIAL_REG_DEPENDENCY
33712 && optimize_function_for_speed_p (cfun
))
33713 cost
= ix86_cost
->movzbl_load
;
33715 cost
= ix86_cost
->int_load
[0];
33717 return MAX (cost
, ix86_cost
->int_store
[0]);
33723 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33725 return ix86_cost
->movzbl_load
;
33727 return ix86_cost
->int_store
[0] + 4;
33732 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33733 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33735 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33736 if (mode
== TFmode
)
33739 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33741 cost
= ix86_cost
->int_load
[2];
33743 cost
= ix86_cost
->int_store
[2];
33744 return (cost
* (((int) GET_MODE_SIZE (mode
)
33745 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33750 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33753 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33757 /* Return the cost of moving data from a register in class CLASS1 to
33758 one in class CLASS2.
33760 It is not required that the cost always equal 2 when FROM is the same as TO;
33761 on some machines it is expensive to move between registers if they are not
33762 general registers. */
33765 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33766 reg_class_t class2_i
)
33768 enum reg_class class1
= (enum reg_class
) class1_i
;
33769 enum reg_class class2
= (enum reg_class
) class2_i
;
33771 /* In case we require secondary memory, compute cost of the store followed
33772 by load. In order to avoid bad register allocation choices, we need
33773 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33775 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33779 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33780 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33782 /* In case of copying from general_purpose_register we may emit multiple
33783 stores followed by single load causing memory size mismatch stall.
33784 Count this as arbitrarily high cost of 20. */
33785 if (targetm
.class_max_nregs (class1
, mode
)
33786 > targetm
.class_max_nregs (class2
, mode
))
33789 /* In the case of FP/MMX moves, the registers actually overlap, and we
33790 have to switch modes in order to treat them differently. */
33791 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33792 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33798 /* Moves between SSE/MMX and integer unit are expensive. */
33799 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33800 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33802 /* ??? By keeping returned value relatively high, we limit the number
33803 of moves between integer and MMX/SSE registers for all targets.
33804 Additionally, high value prevents problem with x86_modes_tieable_p(),
33805 where integer modes in MMX/SSE registers are not tieable
33806 because of missing QImode and HImode moves to, from or between
33807 MMX/SSE registers. */
33808 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33810 if (MAYBE_FLOAT_CLASS_P (class1
))
33811 return ix86_cost
->fp_move
;
33812 if (MAYBE_SSE_CLASS_P (class1
))
33813 return ix86_cost
->sse_move
;
33814 if (MAYBE_MMX_CLASS_P (class1
))
33815 return ix86_cost
->mmx_move
;
33819 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33823 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33825 /* Flags and only flags can only hold CCmode values. */
33826 if (CC_REGNO_P (regno
))
33827 return GET_MODE_CLASS (mode
) == MODE_CC
;
33828 if (GET_MODE_CLASS (mode
) == MODE_CC
33829 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33830 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33832 if (STACK_REGNO_P (regno
))
33833 return VALID_FP_MODE_P (mode
);
33834 if (SSE_REGNO_P (regno
))
33836 /* We implement the move patterns for all vector modes into and
33837 out of SSE registers, even when no operation instructions
33838 are available. OImode move is available only when AVX is
33840 return ((TARGET_AVX
&& mode
== OImode
)
33841 || VALID_AVX256_REG_MODE (mode
)
33842 || VALID_SSE_REG_MODE (mode
)
33843 || VALID_SSE2_REG_MODE (mode
)
33844 || VALID_MMX_REG_MODE (mode
)
33845 || VALID_MMX_REG_MODE_3DNOW (mode
));
33847 if (MMX_REGNO_P (regno
))
33849 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33850 so if the register is available at all, then we can move data of
33851 the given mode into or out of it. */
33852 return (VALID_MMX_REG_MODE (mode
)
33853 || VALID_MMX_REG_MODE_3DNOW (mode
));
33856 if (mode
== QImode
)
33858 /* Take care for QImode values - they can be in non-QI regs,
33859 but then they do cause partial register stalls. */
33860 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33862 if (!TARGET_PARTIAL_REG_STALL
)
33864 return !can_create_pseudo_p ();
33866 /* We handle both integer and floats in the general purpose registers. */
33867 else if (VALID_INT_MODE_P (mode
))
33869 else if (VALID_FP_MODE_P (mode
))
33871 else if (VALID_DFP_MODE_P (mode
))
33873 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33874 on to use that value in smaller contexts, this can easily force a
33875 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33876 supporting DImode, allow it. */
33877 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33883 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33884 tieable integer mode. */
33887 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33896 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33899 return TARGET_64BIT
;
33906 /* Return true if MODE1 is accessible in a register that can hold MODE2
33907 without copying. That is, all register classes that can hold MODE2
33908 can also hold MODE1. */
33911 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33913 if (mode1
== mode2
)
33916 if (ix86_tieable_integer_mode_p (mode1
)
33917 && ix86_tieable_integer_mode_p (mode2
))
33920 /* MODE2 being XFmode implies fp stack or general regs, which means we
33921 can tie any smaller floating point modes to it. Note that we do not
33922 tie this with TFmode. */
33923 if (mode2
== XFmode
)
33924 return mode1
== SFmode
|| mode1
== DFmode
;
33926 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33927 that we can tie it with SFmode. */
33928 if (mode2
== DFmode
)
33929 return mode1
== SFmode
;
33931 /* If MODE2 is only appropriate for an SSE register, then tie with
33932 any other mode acceptable to SSE registers. */
33933 if (GET_MODE_SIZE (mode2
) == 32
33934 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33935 return (GET_MODE_SIZE (mode1
) == 32
33936 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33937 if (GET_MODE_SIZE (mode2
) == 16
33938 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33939 return (GET_MODE_SIZE (mode1
) == 16
33940 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33942 /* If MODE2 is appropriate for an MMX register, then tie
33943 with any other mode acceptable to MMX registers. */
33944 if (GET_MODE_SIZE (mode2
) == 8
33945 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33946 return (GET_MODE_SIZE (mode1
) == 8
33947 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33952 /* Return the cost of moving between two registers of mode MODE. */
33955 ix86_set_reg_reg_cost (enum machine_mode mode
)
33957 unsigned int units
= UNITS_PER_WORD
;
33959 switch (GET_MODE_CLASS (mode
))
33965 units
= GET_MODE_SIZE (CCmode
);
33969 if ((TARGET_SSE
&& mode
== TFmode
)
33970 || (TARGET_80387
&& mode
== XFmode
)
33971 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33972 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33973 units
= GET_MODE_SIZE (mode
);
33976 case MODE_COMPLEX_FLOAT
:
33977 if ((TARGET_SSE
&& mode
== TCmode
)
33978 || (TARGET_80387
&& mode
== XCmode
)
33979 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33980 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
33981 units
= GET_MODE_SIZE (mode
);
33984 case MODE_VECTOR_INT
:
33985 case MODE_VECTOR_FLOAT
:
33986 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33987 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33988 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33989 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
33990 units
= GET_MODE_SIZE (mode
);
33993 /* Return the cost of moving between two registers of mode MODE,
33994 assuming that the move will be in pieces of at most UNITS bytes. */
33995 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
33998 /* Compute a (partial) cost for rtx X. Return true if the complete
33999 cost has been computed, and false if subexpressions should be
34000 scanned. In either case, *TOTAL contains the cost result. */
34003 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34006 enum rtx_code code
= (enum rtx_code
) code_i
;
34007 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34008 enum machine_mode mode
= GET_MODE (x
);
34009 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34014 if (register_operand (SET_DEST (x
), VOIDmode
)
34015 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34017 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34026 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34028 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34030 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34032 || (!GET_CODE (x
) != LABEL_REF
34033 && (GET_CODE (x
) != SYMBOL_REF
34034 || !SYMBOL_REF_LOCAL_P (x
)))))
34041 if (mode
== VOIDmode
)
34046 switch (standard_80387_constant_p (x
))
34051 default: /* Other constants */
34058 if (SSE_FLOAT_MODE_P (mode
))
34061 switch (standard_sse_constant_p (x
))
34065 case 1: /* 0: xor eliminates false dependency */
34068 default: /* -1: cmp contains false dependency */
34073 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34074 it'll probably end up. Add a penalty for size. */
34075 *total
= (COSTS_N_INSNS (1)
34076 + (flag_pic
!= 0 && !TARGET_64BIT
)
34077 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34081 /* The zero extensions is often completely free on x86_64, so make
34082 it as cheap as possible. */
34083 if (TARGET_64BIT
&& mode
== DImode
34084 && GET_MODE (XEXP (x
, 0)) == SImode
)
34086 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34087 *total
= cost
->add
;
34089 *total
= cost
->movzx
;
34093 *total
= cost
->movsx
;
34097 if (SCALAR_INT_MODE_P (mode
)
34098 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34099 && CONST_INT_P (XEXP (x
, 1)))
34101 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34104 *total
= cost
->add
;
34107 if ((value
== 2 || value
== 3)
34108 && cost
->lea
<= cost
->shift_const
)
34110 *total
= cost
->lea
;
34120 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34122 /* ??? Should be SSE vector operation cost. */
34123 /* At least for published AMD latencies, this really is the same
34124 as the latency for a simple fpu operation like fabs. */
34125 /* V*QImode is emulated with 1-11 insns. */
34126 if (mode
== V16QImode
|| mode
== V32QImode
)
34129 if (TARGET_XOP
&& mode
== V16QImode
)
34131 /* For XOP we use vpshab, which requires a broadcast of the
34132 value to the variable shift insn. For constants this
34133 means a V16Q const in mem; even when we can perform the
34134 shift with one insn set the cost to prefer paddb. */
34135 if (CONSTANT_P (XEXP (x
, 1)))
34137 *total
= (cost
->fabs
34138 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34139 + (speed
? 2 : COSTS_N_BYTES (16)));
34144 else if (TARGET_SSSE3
)
34146 *total
= cost
->fabs
* count
;
34149 *total
= cost
->fabs
;
34151 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34153 if (CONST_INT_P (XEXP (x
, 1)))
34155 if (INTVAL (XEXP (x
, 1)) > 32)
34156 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34158 *total
= cost
->shift_const
* 2;
34162 if (GET_CODE (XEXP (x
, 1)) == AND
)
34163 *total
= cost
->shift_var
* 2;
34165 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34170 if (CONST_INT_P (XEXP (x
, 1)))
34171 *total
= cost
->shift_const
;
34173 *total
= cost
->shift_var
;
34181 gcc_assert (FLOAT_MODE_P (mode
));
34182 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34184 /* ??? SSE scalar/vector cost should be used here. */
34185 /* ??? Bald assumption that fma has the same cost as fmul. */
34186 *total
= cost
->fmul
;
34187 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34189 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34191 if (GET_CODE (sub
) == NEG
)
34192 sub
= XEXP (sub
, 0);
34193 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34196 if (GET_CODE (sub
) == NEG
)
34197 sub
= XEXP (sub
, 0);
34198 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34203 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34205 /* ??? SSE scalar cost should be used here. */
34206 *total
= cost
->fmul
;
34209 else if (X87_FLOAT_MODE_P (mode
))
34211 *total
= cost
->fmul
;
34214 else if (FLOAT_MODE_P (mode
))
34216 /* ??? SSE vector cost should be used here. */
34217 *total
= cost
->fmul
;
34220 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34222 /* V*QImode is emulated with 7-13 insns. */
34223 if (mode
== V16QImode
|| mode
== V32QImode
)
34226 if (TARGET_XOP
&& mode
== V16QImode
)
34228 else if (TARGET_SSSE3
)
34230 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34232 /* V*DImode is emulated with 5-8 insns. */
34233 else if (mode
== V2DImode
|| mode
== V4DImode
)
34235 if (TARGET_XOP
&& mode
== V2DImode
)
34236 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34238 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34240 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34241 insns, including two PMULUDQ. */
34242 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34243 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34245 *total
= cost
->fmul
;
34250 rtx op0
= XEXP (x
, 0);
34251 rtx op1
= XEXP (x
, 1);
34253 if (CONST_INT_P (XEXP (x
, 1)))
34255 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34256 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34260 /* This is arbitrary. */
34263 /* Compute costs correctly for widening multiplication. */
34264 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34265 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34266 == GET_MODE_SIZE (mode
))
34268 int is_mulwiden
= 0;
34269 enum machine_mode inner_mode
= GET_MODE (op0
);
34271 if (GET_CODE (op0
) == GET_CODE (op1
))
34272 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34273 else if (CONST_INT_P (op1
))
34275 if (GET_CODE (op0
) == SIGN_EXTEND
)
34276 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34279 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34283 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34286 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34287 + nbits
* cost
->mult_bit
34288 + rtx_cost (op0
, outer_code
, opno
, speed
)
34289 + rtx_cost (op1
, outer_code
, opno
, speed
));
34298 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34299 /* ??? SSE cost should be used here. */
34300 *total
= cost
->fdiv
;
34301 else if (X87_FLOAT_MODE_P (mode
))
34302 *total
= cost
->fdiv
;
34303 else if (FLOAT_MODE_P (mode
))
34304 /* ??? SSE vector cost should be used here. */
34305 *total
= cost
->fdiv
;
34307 *total
= cost
->divide
[MODE_INDEX (mode
)];
34311 if (GET_MODE_CLASS (mode
) == MODE_INT
34312 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34314 if (GET_CODE (XEXP (x
, 0)) == PLUS
34315 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34316 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34317 && CONSTANT_P (XEXP (x
, 1)))
34319 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34320 if (val
== 2 || val
== 4 || val
== 8)
34322 *total
= cost
->lea
;
34323 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34324 outer_code
, opno
, speed
);
34325 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34326 outer_code
, opno
, speed
);
34327 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34331 else if (GET_CODE (XEXP (x
, 0)) == MULT
34332 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34334 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34335 if (val
== 2 || val
== 4 || val
== 8)
34337 *total
= cost
->lea
;
34338 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34339 outer_code
, opno
, speed
);
34340 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34344 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34346 *total
= cost
->lea
;
34347 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34348 outer_code
, opno
, speed
);
34349 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34350 outer_code
, opno
, speed
);
34351 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34358 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34360 /* ??? SSE cost should be used here. */
34361 *total
= cost
->fadd
;
34364 else if (X87_FLOAT_MODE_P (mode
))
34366 *total
= cost
->fadd
;
34369 else if (FLOAT_MODE_P (mode
))
34371 /* ??? SSE vector cost should be used here. */
34372 *total
= cost
->fadd
;
34380 if (GET_MODE_CLASS (mode
) == MODE_INT
34381 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34383 *total
= (cost
->add
* 2
34384 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34385 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34386 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34387 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34393 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34395 /* ??? SSE cost should be used here. */
34396 *total
= cost
->fchs
;
34399 else if (X87_FLOAT_MODE_P (mode
))
34401 *total
= cost
->fchs
;
34404 else if (FLOAT_MODE_P (mode
))
34406 /* ??? SSE vector cost should be used here. */
34407 *total
= cost
->fchs
;
34413 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34415 /* ??? Should be SSE vector operation cost. */
34416 /* At least for published AMD latencies, this really is the same
34417 as the latency for a simple fpu operation like fabs. */
34418 *total
= cost
->fabs
;
34420 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34421 *total
= cost
->add
* 2;
34423 *total
= cost
->add
;
34427 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34428 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34429 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34430 && XEXP (x
, 1) == const0_rtx
)
34432 /* This kind of construct is implemented using test[bwl].
34433 Treat it as if we had an AND. */
34434 *total
= (cost
->add
34435 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34436 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34442 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34447 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34448 /* ??? SSE cost should be used here. */
34449 *total
= cost
->fabs
;
34450 else if (X87_FLOAT_MODE_P (mode
))
34451 *total
= cost
->fabs
;
34452 else if (FLOAT_MODE_P (mode
))
34453 /* ??? SSE vector cost should be used here. */
34454 *total
= cost
->fabs
;
34458 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34459 /* ??? SSE cost should be used here. */
34460 *total
= cost
->fsqrt
;
34461 else if (X87_FLOAT_MODE_P (mode
))
34462 *total
= cost
->fsqrt
;
34463 else if (FLOAT_MODE_P (mode
))
34464 /* ??? SSE vector cost should be used here. */
34465 *total
= cost
->fsqrt
;
34469 if (XINT (x
, 1) == UNSPEC_TP
)
34476 case VEC_DUPLICATE
:
34477 /* ??? Assume all of these vector manipulation patterns are
34478 recognizable. In which case they all pretty much have the
34480 *total
= cost
->fabs
;
34490 static int current_machopic_label_num
;
34492 /* Given a symbol name and its associated stub, write out the
34493 definition of the stub. */
34496 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34498 unsigned int length
;
34499 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34500 int label
= ++current_machopic_label_num
;
34502 /* For 64-bit we shouldn't get here. */
34503 gcc_assert (!TARGET_64BIT
);
34505 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34506 symb
= targetm
.strip_name_encoding (symb
);
34508 length
= strlen (stub
);
34509 binder_name
= XALLOCAVEC (char, length
+ 32);
34510 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34512 length
= strlen (symb
);
34513 symbol_name
= XALLOCAVEC (char, length
+ 32);
34514 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34516 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34518 if (MACHOPIC_ATT_STUB
)
34519 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34520 else if (MACHOPIC_PURE
)
34521 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34523 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34525 fprintf (file
, "%s:\n", stub
);
34526 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34528 if (MACHOPIC_ATT_STUB
)
34530 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34532 else if (MACHOPIC_PURE
)
34535 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34536 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34537 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34538 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34539 label
, lazy_ptr_name
, label
);
34540 fprintf (file
, "\tjmp\t*%%ecx\n");
34543 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34545 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34546 it needs no stub-binding-helper. */
34547 if (MACHOPIC_ATT_STUB
)
34550 fprintf (file
, "%s:\n", binder_name
);
34554 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34555 fprintf (file
, "\tpushl\t%%ecx\n");
34558 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34560 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34562 /* N.B. Keep the correspondence of these
34563 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34564 old-pic/new-pic/non-pic stubs; altering this will break
34565 compatibility with existing dylibs. */
34568 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34569 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34572 /* 16-byte -mdynamic-no-pic stub. */
34573 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34575 fprintf (file
, "%s:\n", lazy_ptr_name
);
34576 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34577 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34579 #endif /* TARGET_MACHO */
34581 /* Order the registers for register allocator. */
34584 x86_order_regs_for_local_alloc (void)
34589 /* First allocate the local general purpose registers. */
34590 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34591 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34592 reg_alloc_order
[pos
++] = i
;
34594 /* Global general purpose registers. */
34595 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34596 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34597 reg_alloc_order
[pos
++] = i
;
34599 /* x87 registers come first in case we are doing FP math
34601 if (!TARGET_SSE_MATH
)
34602 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34603 reg_alloc_order
[pos
++] = i
;
34605 /* SSE registers. */
34606 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34607 reg_alloc_order
[pos
++] = i
;
34608 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34609 reg_alloc_order
[pos
++] = i
;
34611 /* x87 registers. */
34612 if (TARGET_SSE_MATH
)
34613 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34614 reg_alloc_order
[pos
++] = i
;
34616 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34617 reg_alloc_order
[pos
++] = i
;
34619 /* Initialize the rest of array as we do not allocate some registers
34621 while (pos
< FIRST_PSEUDO_REGISTER
)
34622 reg_alloc_order
[pos
++] = 0;
34625 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34626 in struct attribute_spec handler. */
34628 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34630 int flags ATTRIBUTE_UNUSED
,
34631 bool *no_add_attrs
)
34633 if (TREE_CODE (*node
) != FUNCTION_TYPE
34634 && TREE_CODE (*node
) != METHOD_TYPE
34635 && TREE_CODE (*node
) != FIELD_DECL
34636 && TREE_CODE (*node
) != TYPE_DECL
)
34638 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34640 *no_add_attrs
= true;
34645 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34647 *no_add_attrs
= true;
34650 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34654 cst
= TREE_VALUE (args
);
34655 if (TREE_CODE (cst
) != INTEGER_CST
)
34657 warning (OPT_Wattributes
,
34658 "%qE attribute requires an integer constant argument",
34660 *no_add_attrs
= true;
34662 else if (compare_tree_int (cst
, 0) != 0
34663 && compare_tree_int (cst
, 1) != 0)
34665 warning (OPT_Wattributes
,
34666 "argument to %qE attribute is neither zero, nor one",
34668 *no_add_attrs
= true;
34677 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34678 struct attribute_spec.handler. */
34680 ix86_handle_abi_attribute (tree
*node
, tree name
,
34681 tree args ATTRIBUTE_UNUSED
,
34682 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34684 if (TREE_CODE (*node
) != FUNCTION_TYPE
34685 && TREE_CODE (*node
) != METHOD_TYPE
34686 && TREE_CODE (*node
) != FIELD_DECL
34687 && TREE_CODE (*node
) != TYPE_DECL
)
34689 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34691 *no_add_attrs
= true;
34695 /* Can combine regparm with all attributes but fastcall. */
34696 if (is_attribute_p ("ms_abi", name
))
34698 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34700 error ("ms_abi and sysv_abi attributes are not compatible");
34705 else if (is_attribute_p ("sysv_abi", name
))
34707 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34709 error ("ms_abi and sysv_abi attributes are not compatible");
34718 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34719 struct attribute_spec.handler. */
34721 ix86_handle_struct_attribute (tree
*node
, tree name
,
34722 tree args ATTRIBUTE_UNUSED
,
34723 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34726 if (DECL_P (*node
))
34728 if (TREE_CODE (*node
) == TYPE_DECL
)
34729 type
= &TREE_TYPE (*node
);
34734 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34736 warning (OPT_Wattributes
, "%qE attribute ignored",
34738 *no_add_attrs
= true;
34741 else if ((is_attribute_p ("ms_struct", name
)
34742 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34743 || ((is_attribute_p ("gcc_struct", name
)
34744 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34746 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34748 *no_add_attrs
= true;
34755 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34756 tree args ATTRIBUTE_UNUSED
,
34757 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34759 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34761 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34763 *no_add_attrs
= true;
34769 ix86_ms_bitfield_layout_p (const_tree record_type
)
34771 return ((TARGET_MS_BITFIELD_LAYOUT
34772 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34773 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34776 /* Returns an expression indicating where the this parameter is
34777 located on entry to the FUNCTION. */
34780 x86_this_parameter (tree function
)
34782 tree type
= TREE_TYPE (function
);
34783 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34788 const int *parm_regs
;
34790 if (ix86_function_type_abi (type
) == MS_ABI
)
34791 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34793 parm_regs
= x86_64_int_parameter_registers
;
34794 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34797 nregs
= ix86_function_regparm (type
, function
);
34799 if (nregs
> 0 && !stdarg_p (type
))
34802 unsigned int ccvt
= ix86_get_callcvt (type
);
34804 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34805 regno
= aggr
? DX_REG
: CX_REG
;
34806 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34810 return gen_rtx_MEM (SImode
,
34811 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34820 return gen_rtx_MEM (SImode
,
34821 plus_constant (Pmode
,
34822 stack_pointer_rtx
, 4));
34825 return gen_rtx_REG (SImode
, regno
);
34828 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34832 /* Determine whether x86_output_mi_thunk can succeed. */
34835 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34836 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34837 HOST_WIDE_INT vcall_offset
, const_tree function
)
34839 /* 64-bit can handle anything. */
34843 /* For 32-bit, everything's fine if we have one free register. */
34844 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34847 /* Need a free register for vcall_offset. */
34851 /* Need a free register for GOT references. */
34852 if (flag_pic
&& !targetm
.binds_local_p (function
))
34855 /* Otherwise ok. */
34859 /* Output the assembler code for a thunk function. THUNK_DECL is the
34860 declaration for the thunk function itself, FUNCTION is the decl for
34861 the target function. DELTA is an immediate constant offset to be
34862 added to THIS. If VCALL_OFFSET is nonzero, the word at
34863 *(*this + vcall_offset) should be added to THIS. */
34866 x86_output_mi_thunk (FILE *file
,
34867 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34868 HOST_WIDE_INT vcall_offset
, tree function
)
34870 rtx this_param
= x86_this_parameter (function
);
34871 rtx this_reg
, tmp
, fnaddr
;
34872 unsigned int tmp_regno
;
34875 tmp_regno
= R10_REG
;
34878 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34879 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34880 tmp_regno
= AX_REG
;
34881 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34882 tmp_regno
= DX_REG
;
34884 tmp_regno
= CX_REG
;
34887 emit_note (NOTE_INSN_PROLOGUE_END
);
34889 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34890 pull it in now and let DELTA benefit. */
34891 if (REG_P (this_param
))
34892 this_reg
= this_param
;
34893 else if (vcall_offset
)
34895 /* Put the this parameter into %eax. */
34896 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34897 emit_move_insn (this_reg
, this_param
);
34900 this_reg
= NULL_RTX
;
34902 /* Adjust the this parameter by a fixed constant. */
34905 rtx delta_rtx
= GEN_INT (delta
);
34906 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34910 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34912 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34913 emit_move_insn (tmp
, delta_rtx
);
34918 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34921 /* Adjust the this parameter by a value stored in the vtable. */
34924 rtx vcall_addr
, vcall_mem
, this_mem
;
34926 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34928 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34929 if (Pmode
!= ptr_mode
)
34930 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34931 emit_move_insn (tmp
, this_mem
);
34933 /* Adjust the this parameter. */
34934 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34936 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34938 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34939 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34940 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34943 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34944 if (Pmode
!= ptr_mode
)
34945 emit_insn (gen_addsi_1_zext (this_reg
,
34946 gen_rtx_REG (ptr_mode
,
34950 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34953 /* If necessary, drop THIS back to its stack slot. */
34954 if (this_reg
&& this_reg
!= this_param
)
34955 emit_move_insn (this_param
, this_reg
);
34957 fnaddr
= XEXP (DECL_RTL (function
), 0);
34960 if (!flag_pic
|| targetm
.binds_local_p (function
)
34961 || cfun
->machine
->call_abi
== MS_ABI
)
34965 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34966 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34967 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34972 if (!flag_pic
|| targetm
.binds_local_p (function
))
34975 else if (TARGET_MACHO
)
34977 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34978 fnaddr
= XEXP (fnaddr
, 0);
34980 #endif /* TARGET_MACHO */
34983 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
34984 output_set_got (tmp
, NULL_RTX
);
34986 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
34987 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
34988 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
34992 /* Our sibling call patterns do not allow memories, because we have no
34993 predicate that can distinguish between frame and non-frame memory.
34994 For our purposes here, we can get away with (ab)using a jump pattern,
34995 because we're going to do no optimization. */
34996 if (MEM_P (fnaddr
))
34997 emit_jump_insn (gen_indirect_jump (fnaddr
));
35000 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35001 fnaddr
= legitimize_pic_address (fnaddr
,
35002 gen_rtx_REG (Pmode
, tmp_regno
));
35004 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35006 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35007 if (GET_MODE (fnaddr
) != word_mode
)
35008 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35009 emit_move_insn (tmp
, fnaddr
);
35013 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35014 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35015 tmp
= emit_call_insn (tmp
);
35016 SIBLING_CALL_P (tmp
) = 1;
35020 /* Emit just enough of rest_of_compilation to get the insns emitted.
35021 Note that use_thunk calls assemble_start_function et al. */
35022 tmp
= get_insns ();
35023 shorten_branches (tmp
);
35024 final_start_function (tmp
, file
, 1);
35025 final (tmp
, file
, 1);
35026 final_end_function ();
35030 x86_file_start (void)
35032 default_file_start ();
35034 darwin_file_start ();
35036 if (X86_FILE_START_VERSION_DIRECTIVE
)
35037 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35038 if (X86_FILE_START_FLTUSED
)
35039 fputs ("\t.global\t__fltused\n", asm_out_file
);
35040 if (ix86_asm_dialect
== ASM_INTEL
)
35041 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35045 x86_field_alignment (tree field
, int computed
)
35047 enum machine_mode mode
;
35048 tree type
= TREE_TYPE (field
);
35050 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35052 mode
= TYPE_MODE (strip_array_types (type
));
35053 if (mode
== DFmode
|| mode
== DCmode
35054 || GET_MODE_CLASS (mode
) == MODE_INT
35055 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35056 return MIN (32, computed
);
35060 /* Output assembler code to FILE to increment profiler label # LABELNO
35061 for profiling a function entry. */
35063 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35065 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35070 #ifndef NO_PROFILE_COUNTERS
35071 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35074 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
35075 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35077 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35081 #ifndef NO_PROFILE_COUNTERS
35082 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35085 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35089 #ifndef NO_PROFILE_COUNTERS
35090 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35093 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35097 /* We don't have exact information about the insn sizes, but we may assume
35098 quite safely that we are informed about all 1 byte insns and memory
35099 address sizes. This is enough to eliminate unnecessary padding in
35103 min_insn_size (rtx insn
)
35107 if (!INSN_P (insn
) || !active_insn_p (insn
))
35110 /* Discard alignments we've emit and jump instructions. */
35111 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35112 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35114 if (JUMP_TABLE_DATA_P (insn
))
35117 /* Important case - calls are always 5 bytes.
35118 It is common to have many calls in the row. */
35120 && symbolic_reference_mentioned_p (PATTERN (insn
))
35121 && !SIBLING_CALL_P (insn
))
35123 len
= get_attr_length (insn
);
35127 /* For normal instructions we rely on get_attr_length being exact,
35128 with a few exceptions. */
35129 if (!JUMP_P (insn
))
35131 enum attr_type type
= get_attr_type (insn
);
35136 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35137 || asm_noperands (PATTERN (insn
)) >= 0)
35144 /* Otherwise trust get_attr_length. */
35148 l
= get_attr_length_address (insn
);
35149 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35158 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35160 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35164 ix86_avoid_jump_mispredicts (void)
35166 rtx insn
, start
= get_insns ();
35167 int nbytes
= 0, njumps
= 0;
35170 /* Look for all minimal intervals of instructions containing 4 jumps.
35171 The intervals are bounded by START and INSN. NBYTES is the total
35172 size of instructions in the interval including INSN and not including
35173 START. When the NBYTES is smaller than 16 bytes, it is possible
35174 that the end of START and INSN ends up in the same 16byte page.
35176 The smallest offset in the page INSN can start is the case where START
35177 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35178 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35180 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35184 if (LABEL_P (insn
))
35186 int align
= label_to_alignment (insn
);
35187 int max_skip
= label_to_max_skip (insn
);
35191 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35192 already in the current 16 byte page, because otherwise
35193 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35194 bytes to reach 16 byte boundary. */
35196 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35199 fprintf (dump_file
, "Label %i with max_skip %i\n",
35200 INSN_UID (insn
), max_skip
);
35203 while (nbytes
+ max_skip
>= 16)
35205 start
= NEXT_INSN (start
);
35206 if ((JUMP_P (start
)
35207 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35208 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35210 njumps
--, isjump
= 1;
35213 nbytes
-= min_insn_size (start
);
35219 min_size
= min_insn_size (insn
);
35220 nbytes
+= min_size
;
35222 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35223 INSN_UID (insn
), min_size
);
35225 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35226 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35234 start
= NEXT_INSN (start
);
35235 if ((JUMP_P (start
)
35236 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35237 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35239 njumps
--, isjump
= 1;
35242 nbytes
-= min_insn_size (start
);
35244 gcc_assert (njumps
>= 0);
35246 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35247 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35249 if (njumps
== 3 && isjump
&& nbytes
< 16)
35251 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35254 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35255 INSN_UID (insn
), padsize
);
35256 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35262 /* AMD Athlon works faster
35263 when RET is not destination of conditional jump or directly preceded
35264 by other jump instruction. We avoid the penalty by inserting NOP just
35265 before the RET instructions in such cases. */
35267 ix86_pad_returns (void)
35272 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35274 basic_block bb
= e
->src
;
35275 rtx ret
= BB_END (bb
);
35277 bool replace
= false;
35279 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35280 || optimize_bb_for_size_p (bb
))
35282 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35283 if (active_insn_p (prev
) || LABEL_P (prev
))
35285 if (prev
&& LABEL_P (prev
))
35290 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35291 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35292 && !(e
->flags
& EDGE_FALLTHRU
))
35297 prev
= prev_active_insn (ret
);
35299 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35302 /* Empty functions get branch mispredict even when
35303 the jump destination is not visible to us. */
35304 if (!prev
&& !optimize_function_for_size_p (cfun
))
35309 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35315 /* Count the minimum number of instructions in BB. Return 4 if the
35316 number of instructions >= 4. */
35319 ix86_count_insn_bb (basic_block bb
)
35322 int insn_count
= 0;
35324 /* Count number of instructions in this block. Return 4 if the number
35325 of instructions >= 4. */
35326 FOR_BB_INSNS (bb
, insn
)
35328 /* Only happen in exit blocks. */
35330 && ANY_RETURN_P (PATTERN (insn
)))
35333 if (NONDEBUG_INSN_P (insn
)
35334 && GET_CODE (PATTERN (insn
)) != USE
35335 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35338 if (insn_count
>= 4)
35347 /* Count the minimum number of instructions in code path in BB.
35348 Return 4 if the number of instructions >= 4. */
35351 ix86_count_insn (basic_block bb
)
35355 int min_prev_count
;
35357 /* Only bother counting instructions along paths with no
35358 more than 2 basic blocks between entry and exit. Given
35359 that BB has an edge to exit, determine if a predecessor
35360 of BB has an edge from entry. If so, compute the number
35361 of instructions in the predecessor block. If there
35362 happen to be multiple such blocks, compute the minimum. */
35363 min_prev_count
= 4;
35364 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35367 edge_iterator prev_ei
;
35369 if (e
->src
== ENTRY_BLOCK_PTR
)
35371 min_prev_count
= 0;
35374 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35376 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35378 int count
= ix86_count_insn_bb (e
->src
);
35379 if (count
< min_prev_count
)
35380 min_prev_count
= count
;
35386 if (min_prev_count
< 4)
35387 min_prev_count
+= ix86_count_insn_bb (bb
);
35389 return min_prev_count
;
35392 /* Pad short function to 4 instructions. */
35395 ix86_pad_short_function (void)
35400 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35402 rtx ret
= BB_END (e
->src
);
35403 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35405 int insn_count
= ix86_count_insn (e
->src
);
35407 /* Pad short function. */
35408 if (insn_count
< 4)
35412 /* Find epilogue. */
35415 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35416 insn
= PREV_INSN (insn
);
35421 /* Two NOPs count as one instruction. */
35422 insn_count
= 2 * (4 - insn_count
);
35423 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35429 /* Implement machine specific optimizations. We implement padding of returns
35430 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35434 /* We are freeing block_for_insn in the toplev to keep compatibility
35435 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35436 compute_bb_for_insn ();
35438 if (optimize
&& optimize_function_for_speed_p (cfun
))
35440 if (TARGET_PAD_SHORT_FUNCTION
)
35441 ix86_pad_short_function ();
35442 else if (TARGET_PAD_RETURNS
)
35443 ix86_pad_returns ();
35444 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35445 if (TARGET_FOUR_JUMP_LIMIT
)
35446 ix86_avoid_jump_mispredicts ();
35451 /* Return nonzero when QImode register that must be represented via REX prefix
35454 x86_extended_QIreg_mentioned_p (rtx insn
)
35457 extract_insn_cached (insn
);
35458 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35459 if (GENERAL_REG_P (recog_data
.operand
[i
])
35460 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35465 /* Return nonzero when P points to register encoded via REX prefix.
35466 Called via for_each_rtx. */
35468 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35470 unsigned int regno
;
35473 regno
= REGNO (*p
);
35474 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35477 /* Return true when INSN mentions register that must be encoded using REX
35480 x86_extended_reg_mentioned_p (rtx insn
)
35482 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35483 extended_reg_mentioned_1
, NULL
);
35486 /* If profitable, negate (without causing overflow) integer constant
35487 of mode MODE at location LOC. Return true in this case. */
35489 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35493 if (!CONST_INT_P (*loc
))
35499 /* DImode x86_64 constants must fit in 32 bits. */
35500 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35511 gcc_unreachable ();
35514 /* Avoid overflows. */
35515 if (mode_signbit_p (mode
, *loc
))
35518 val
= INTVAL (*loc
);
35520 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35521 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35522 if ((val
< 0 && val
!= -128)
35525 *loc
= GEN_INT (-val
);
35532 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35533 optabs would emit if we didn't have TFmode patterns. */
35536 x86_emit_floatuns (rtx operands
[2])
35538 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35539 enum machine_mode mode
, inmode
;
35541 inmode
= GET_MODE (operands
[1]);
35542 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35545 in
= force_reg (inmode
, operands
[1]);
35546 mode
= GET_MODE (out
);
35547 neglab
= gen_label_rtx ();
35548 donelab
= gen_label_rtx ();
35549 f0
= gen_reg_rtx (mode
);
35551 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35553 expand_float (out
, in
, 0);
35555 emit_jump_insn (gen_jump (donelab
));
35558 emit_label (neglab
);
35560 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35562 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35564 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35566 expand_float (f0
, i0
, 0);
35568 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35570 emit_label (donelab
);
35573 /* AVX2 does support 32-byte integer vector operations,
35574 thus the longest vector we are faced with is V32QImode. */
35575 #define MAX_VECT_LEN 32
35577 struct expand_vec_perm_d
35579 rtx target
, op0
, op1
;
35580 unsigned char perm
[MAX_VECT_LEN
];
35581 enum machine_mode vmode
;
35582 unsigned char nelt
;
35583 bool one_operand_p
;
35587 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35588 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35589 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35591 /* Get a vector mode of the same size as the original but with elements
35592 twice as wide. This is only guaranteed to apply to integral vectors. */
35594 static inline enum machine_mode
35595 get_mode_wider_vector (enum machine_mode o
)
35597 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35598 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35599 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35600 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35604 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35605 with all elements equal to VAR. Return true if successful. */
35608 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35609 rtx target
, rtx val
)
35632 /* First attempt to recognize VAL as-is. */
35633 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35634 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35635 if (recog_memoized (insn
) < 0)
35638 /* If that fails, force VAL into a register. */
35641 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35642 seq
= get_insns ();
35645 emit_insn_before (seq
, insn
);
35647 ok
= recog_memoized (insn
) >= 0;
35656 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35660 val
= gen_lowpart (SImode
, val
);
35661 x
= gen_rtx_TRUNCATE (HImode
, val
);
35662 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35663 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35676 struct expand_vec_perm_d dperm
;
35680 memset (&dperm
, 0, sizeof (dperm
));
35681 dperm
.target
= target
;
35682 dperm
.vmode
= mode
;
35683 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35684 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35685 dperm
.one_operand_p
= true;
35687 /* Extend to SImode using a paradoxical SUBREG. */
35688 tmp1
= gen_reg_rtx (SImode
);
35689 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35691 /* Insert the SImode value as low element of a V4SImode vector. */
35692 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35693 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35695 ok
= (expand_vec_perm_1 (&dperm
)
35696 || expand_vec_perm_broadcast_1 (&dperm
));
35708 /* Replicate the value once into the next wider mode and recurse. */
35710 enum machine_mode smode
, wsmode
, wvmode
;
35713 smode
= GET_MODE_INNER (mode
);
35714 wvmode
= get_mode_wider_vector (mode
);
35715 wsmode
= GET_MODE_INNER (wvmode
);
35717 val
= convert_modes (wsmode
, smode
, val
, true);
35718 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35719 GEN_INT (GET_MODE_BITSIZE (smode
)),
35720 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35721 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35723 x
= gen_lowpart (wvmode
, target
);
35724 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35732 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35733 rtx x
= gen_reg_rtx (hvmode
);
35735 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35738 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35739 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35748 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35749 whose ONE_VAR element is VAR, and other elements are zero. Return true
35753 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35754 rtx target
, rtx var
, int one_var
)
35756 enum machine_mode vsimode
;
35759 bool use_vector_set
= false;
35764 /* For SSE4.1, we normally use vector set. But if the second
35765 element is zero and inter-unit moves are OK, we use movq
35767 use_vector_set
= (TARGET_64BIT
35769 && !(TARGET_INTER_UNIT_MOVES
35775 use_vector_set
= TARGET_SSE4_1
;
35778 use_vector_set
= TARGET_SSE2
;
35781 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35788 use_vector_set
= TARGET_AVX
;
35791 /* Use ix86_expand_vector_set in 64bit mode only. */
35792 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35798 if (use_vector_set
)
35800 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35801 var
= force_reg (GET_MODE_INNER (mode
), var
);
35802 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35818 var
= force_reg (GET_MODE_INNER (mode
), var
);
35819 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35820 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35825 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35826 new_target
= gen_reg_rtx (mode
);
35828 new_target
= target
;
35829 var
= force_reg (GET_MODE_INNER (mode
), var
);
35830 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35831 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35832 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35835 /* We need to shuffle the value to the correct position, so
35836 create a new pseudo to store the intermediate result. */
35838 /* With SSE2, we can use the integer shuffle insns. */
35839 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35841 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35843 GEN_INT (one_var
== 1 ? 0 : 1),
35844 GEN_INT (one_var
== 2 ? 0 : 1),
35845 GEN_INT (one_var
== 3 ? 0 : 1)));
35846 if (target
!= new_target
)
35847 emit_move_insn (target
, new_target
);
35851 /* Otherwise convert the intermediate result to V4SFmode and
35852 use the SSE1 shuffle instructions. */
35853 if (mode
!= V4SFmode
)
35855 tmp
= gen_reg_rtx (V4SFmode
);
35856 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35861 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35863 GEN_INT (one_var
== 1 ? 0 : 1),
35864 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35865 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35867 if (mode
!= V4SFmode
)
35868 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35869 else if (tmp
!= target
)
35870 emit_move_insn (target
, tmp
);
35872 else if (target
!= new_target
)
35873 emit_move_insn (target
, new_target
);
35878 vsimode
= V4SImode
;
35884 vsimode
= V2SImode
;
35890 /* Zero extend the variable element to SImode and recurse. */
35891 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35893 x
= gen_reg_rtx (vsimode
);
35894 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35896 gcc_unreachable ();
35898 emit_move_insn (target
, gen_lowpart (mode
, x
));
35906 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35907 consisting of the values in VALS. It is known that all elements
35908 except ONE_VAR are constants. Return true if successful. */
35911 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35912 rtx target
, rtx vals
, int one_var
)
35914 rtx var
= XVECEXP (vals
, 0, one_var
);
35915 enum machine_mode wmode
;
35918 const_vec
= copy_rtx (vals
);
35919 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35920 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35928 /* For the two element vectors, it's just as easy to use
35929 the general case. */
35933 /* Use ix86_expand_vector_set in 64bit mode only. */
35956 /* There's no way to set one QImode entry easily. Combine
35957 the variable value with its adjacent constant value, and
35958 promote to an HImode set. */
35959 x
= XVECEXP (vals
, 0, one_var
^ 1);
35962 var
= convert_modes (HImode
, QImode
, var
, true);
35963 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35964 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35965 x
= GEN_INT (INTVAL (x
) & 0xff);
35969 var
= convert_modes (HImode
, QImode
, var
, true);
35970 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35972 if (x
!= const0_rtx
)
35973 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35974 1, OPTAB_LIB_WIDEN
);
35976 x
= gen_reg_rtx (wmode
);
35977 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35978 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35980 emit_move_insn (target
, gen_lowpart (mode
, x
));
35987 emit_move_insn (target
, const_vec
);
35988 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35992 /* A subroutine of ix86_expand_vector_init_general. Use vector
35993 concatenate to handle the most general case: all values variable,
35994 and none identical. */
35997 ix86_expand_vector_init_concat (enum machine_mode mode
,
35998 rtx target
, rtx
*ops
, int n
)
36000 enum machine_mode cmode
, hmode
= VOIDmode
;
36001 rtx first
[8], second
[4];
36041 gcc_unreachable ();
36044 if (!register_operand (ops
[1], cmode
))
36045 ops
[1] = force_reg (cmode
, ops
[1]);
36046 if (!register_operand (ops
[0], cmode
))
36047 ops
[0] = force_reg (cmode
, ops
[0]);
36048 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36049 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36069 gcc_unreachable ();
36085 gcc_unreachable ();
36090 /* FIXME: We process inputs backward to help RA. PR 36222. */
36093 for (; i
> 0; i
-= 2, j
--)
36095 first
[j
] = gen_reg_rtx (cmode
);
36096 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36097 ix86_expand_vector_init (false, first
[j
],
36098 gen_rtx_PARALLEL (cmode
, v
));
36104 gcc_assert (hmode
!= VOIDmode
);
36105 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36107 second
[j
] = gen_reg_rtx (hmode
);
36108 ix86_expand_vector_init_concat (hmode
, second
[j
],
36112 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36115 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36119 gcc_unreachable ();
36123 /* A subroutine of ix86_expand_vector_init_general. Use vector
36124 interleave to handle the most general case: all values variable,
36125 and none identical. */
36128 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36129 rtx target
, rtx
*ops
, int n
)
36131 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36134 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36135 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36136 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36141 gen_load_even
= gen_vec_setv8hi
;
36142 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36143 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36144 inner_mode
= HImode
;
36145 first_imode
= V4SImode
;
36146 second_imode
= V2DImode
;
36147 third_imode
= VOIDmode
;
36150 gen_load_even
= gen_vec_setv16qi
;
36151 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36152 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36153 inner_mode
= QImode
;
36154 first_imode
= V8HImode
;
36155 second_imode
= V4SImode
;
36156 third_imode
= V2DImode
;
36159 gcc_unreachable ();
36162 for (i
= 0; i
< n
; i
++)
36164 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36165 op0
= gen_reg_rtx (SImode
);
36166 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36168 /* Insert the SImode value as low element of V4SImode vector. */
36169 op1
= gen_reg_rtx (V4SImode
);
36170 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36171 gen_rtx_VEC_DUPLICATE (V4SImode
,
36173 CONST0_RTX (V4SImode
),
36175 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36177 /* Cast the V4SImode vector back to a vector in orignal mode. */
36178 op0
= gen_reg_rtx (mode
);
36179 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36181 /* Load even elements into the second positon. */
36182 emit_insn (gen_load_even (op0
,
36183 force_reg (inner_mode
,
36187 /* Cast vector to FIRST_IMODE vector. */
36188 ops
[i
] = gen_reg_rtx (first_imode
);
36189 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36192 /* Interleave low FIRST_IMODE vectors. */
36193 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36195 op0
= gen_reg_rtx (first_imode
);
36196 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36198 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36199 ops
[j
] = gen_reg_rtx (second_imode
);
36200 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36203 /* Interleave low SECOND_IMODE vectors. */
36204 switch (second_imode
)
36207 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36209 op0
= gen_reg_rtx (second_imode
);
36210 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36213 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36215 ops
[j
] = gen_reg_rtx (third_imode
);
36216 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36218 second_imode
= V2DImode
;
36219 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36223 op0
= gen_reg_rtx (second_imode
);
36224 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36227 /* Cast the SECOND_IMODE vector back to a vector on original
36229 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36230 gen_lowpart (mode
, op0
)));
36234 gcc_unreachable ();
36238 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36239 all values variable, and none identical. */
36242 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36243 rtx target
, rtx vals
)
36245 rtx ops
[32], op0
, op1
;
36246 enum machine_mode half_mode
= VOIDmode
;
36253 if (!mmx_ok
&& !TARGET_SSE
)
36265 n
= GET_MODE_NUNITS (mode
);
36266 for (i
= 0; i
< n
; i
++)
36267 ops
[i
] = XVECEXP (vals
, 0, i
);
36268 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36272 half_mode
= V16QImode
;
36276 half_mode
= V8HImode
;
36280 n
= GET_MODE_NUNITS (mode
);
36281 for (i
= 0; i
< n
; i
++)
36282 ops
[i
] = XVECEXP (vals
, 0, i
);
36283 op0
= gen_reg_rtx (half_mode
);
36284 op1
= gen_reg_rtx (half_mode
);
36285 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36287 ix86_expand_vector_init_interleave (half_mode
, op1
,
36288 &ops
[n
>> 1], n
>> 2);
36289 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36290 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36294 if (!TARGET_SSE4_1
)
36302 /* Don't use ix86_expand_vector_init_interleave if we can't
36303 move from GPR to SSE register directly. */
36304 if (!TARGET_INTER_UNIT_MOVES
)
36307 n
= GET_MODE_NUNITS (mode
);
36308 for (i
= 0; i
< n
; i
++)
36309 ops
[i
] = XVECEXP (vals
, 0, i
);
36310 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36318 gcc_unreachable ();
36322 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36323 enum machine_mode inner_mode
;
36324 rtx words
[4], shift
;
36326 inner_mode
= GET_MODE_INNER (mode
);
36327 n_elts
= GET_MODE_NUNITS (mode
);
36328 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36329 n_elt_per_word
= n_elts
/ n_words
;
36330 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36332 for (i
= 0; i
< n_words
; ++i
)
36334 rtx word
= NULL_RTX
;
36336 for (j
= 0; j
< n_elt_per_word
; ++j
)
36338 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36339 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36345 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36346 word
, 1, OPTAB_LIB_WIDEN
);
36347 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36348 word
, 1, OPTAB_LIB_WIDEN
);
36356 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36357 else if (n_words
== 2)
36359 rtx tmp
= gen_reg_rtx (mode
);
36360 emit_clobber (tmp
);
36361 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36362 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36363 emit_move_insn (target
, tmp
);
36365 else if (n_words
== 4)
36367 rtx tmp
= gen_reg_rtx (V4SImode
);
36368 gcc_assert (word_mode
== SImode
);
36369 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36370 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36371 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36374 gcc_unreachable ();
36378 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36379 instructions unless MMX_OK is true. */
36382 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36384 enum machine_mode mode
= GET_MODE (target
);
36385 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36386 int n_elts
= GET_MODE_NUNITS (mode
);
36387 int n_var
= 0, one_var
= -1;
36388 bool all_same
= true, all_const_zero
= true;
36392 for (i
= 0; i
< n_elts
; ++i
)
36394 x
= XVECEXP (vals
, 0, i
);
36395 if (!(CONST_INT_P (x
)
36396 || GET_CODE (x
) == CONST_DOUBLE
36397 || GET_CODE (x
) == CONST_FIXED
))
36398 n_var
++, one_var
= i
;
36399 else if (x
!= CONST0_RTX (inner_mode
))
36400 all_const_zero
= false;
36401 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36405 /* Constants are best loaded from the constant pool. */
36408 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36412 /* If all values are identical, broadcast the value. */
36414 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36415 XVECEXP (vals
, 0, 0)))
36418 /* Values where only one field is non-constant are best loaded from
36419 the pool and overwritten via move later. */
36423 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36424 XVECEXP (vals
, 0, one_var
),
36428 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36432 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36436 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36438 enum machine_mode mode
= GET_MODE (target
);
36439 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36440 enum machine_mode half_mode
;
36441 bool use_vec_merge
= false;
36443 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36445 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36446 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36447 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36448 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36449 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36450 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36452 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36454 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36455 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36456 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36457 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36458 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36459 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36469 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36470 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36472 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36474 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36475 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36481 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36485 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36486 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36488 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36490 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36491 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36498 /* For the two element vectors, we implement a VEC_CONCAT with
36499 the extraction of the other element. */
36501 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36502 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36505 op0
= val
, op1
= tmp
;
36507 op0
= tmp
, op1
= val
;
36509 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36510 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36515 use_vec_merge
= TARGET_SSE4_1
;
36522 use_vec_merge
= true;
36526 /* tmp = target = A B C D */
36527 tmp
= copy_to_reg (target
);
36528 /* target = A A B B */
36529 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36530 /* target = X A B B */
36531 ix86_expand_vector_set (false, target
, val
, 0);
36532 /* target = A X C D */
36533 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36534 const1_rtx
, const0_rtx
,
36535 GEN_INT (2+4), GEN_INT (3+4)));
36539 /* tmp = target = A B C D */
36540 tmp
= copy_to_reg (target
);
36541 /* tmp = X B C D */
36542 ix86_expand_vector_set (false, tmp
, val
, 0);
36543 /* target = A B X D */
36544 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36545 const0_rtx
, const1_rtx
,
36546 GEN_INT (0+4), GEN_INT (3+4)));
36550 /* tmp = target = A B C D */
36551 tmp
= copy_to_reg (target
);
36552 /* tmp = X B C D */
36553 ix86_expand_vector_set (false, tmp
, val
, 0);
36554 /* target = A B X D */
36555 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36556 const0_rtx
, const1_rtx
,
36557 GEN_INT (2+4), GEN_INT (0+4)));
36561 gcc_unreachable ();
36566 use_vec_merge
= TARGET_SSE4_1
;
36570 /* Element 0 handled by vec_merge below. */
36573 use_vec_merge
= true;
36579 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36580 store into element 0, then shuffle them back. */
36584 order
[0] = GEN_INT (elt
);
36585 order
[1] = const1_rtx
;
36586 order
[2] = const2_rtx
;
36587 order
[3] = GEN_INT (3);
36588 order
[elt
] = const0_rtx
;
36590 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36591 order
[1], order
[2], order
[3]));
36593 ix86_expand_vector_set (false, target
, val
, 0);
36595 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36596 order
[1], order
[2], order
[3]));
36600 /* For SSE1, we have to reuse the V4SF code. */
36601 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36602 gen_lowpart (SFmode
, val
), elt
);
36607 use_vec_merge
= TARGET_SSE2
;
36610 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36614 use_vec_merge
= TARGET_SSE4_1
;
36621 half_mode
= V16QImode
;
36627 half_mode
= V8HImode
;
36633 half_mode
= V4SImode
;
36639 half_mode
= V2DImode
;
36645 half_mode
= V4SFmode
;
36651 half_mode
= V2DFmode
;
36657 /* Compute offset. */
36661 gcc_assert (i
<= 1);
36663 /* Extract the half. */
36664 tmp
= gen_reg_rtx (half_mode
);
36665 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36667 /* Put val in tmp at elt. */
36668 ix86_expand_vector_set (false, tmp
, val
, elt
);
36671 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36680 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36681 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36682 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36686 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36688 emit_move_insn (mem
, target
);
36690 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36691 emit_move_insn (tmp
, val
);
36693 emit_move_insn (target
, mem
);
36698 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36700 enum machine_mode mode
= GET_MODE (vec
);
36701 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36702 bool use_vec_extr
= false;
36715 use_vec_extr
= true;
36719 use_vec_extr
= TARGET_SSE4_1
;
36731 tmp
= gen_reg_rtx (mode
);
36732 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36733 GEN_INT (elt
), GEN_INT (elt
),
36734 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36738 tmp
= gen_reg_rtx (mode
);
36739 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36743 gcc_unreachable ();
36746 use_vec_extr
= true;
36751 use_vec_extr
= TARGET_SSE4_1
;
36765 tmp
= gen_reg_rtx (mode
);
36766 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36767 GEN_INT (elt
), GEN_INT (elt
),
36768 GEN_INT (elt
), GEN_INT (elt
)));
36772 tmp
= gen_reg_rtx (mode
);
36773 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36777 gcc_unreachable ();
36780 use_vec_extr
= true;
36785 /* For SSE1, we have to reuse the V4SF code. */
36786 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36787 gen_lowpart (V4SFmode
, vec
), elt
);
36793 use_vec_extr
= TARGET_SSE2
;
36796 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36800 use_vec_extr
= TARGET_SSE4_1
;
36806 tmp
= gen_reg_rtx (V4SFmode
);
36808 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36810 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36811 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36819 tmp
= gen_reg_rtx (V2DFmode
);
36821 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36823 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36824 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36832 tmp
= gen_reg_rtx (V16QImode
);
36834 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36836 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36837 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36845 tmp
= gen_reg_rtx (V8HImode
);
36847 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36849 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36850 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36858 tmp
= gen_reg_rtx (V4SImode
);
36860 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36862 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36863 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36871 tmp
= gen_reg_rtx (V2DImode
);
36873 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36875 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36876 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36882 /* ??? Could extract the appropriate HImode element and shift. */
36889 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36890 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36892 /* Let the rtl optimizers know about the zero extension performed. */
36893 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36895 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36896 target
= gen_lowpart (SImode
, target
);
36899 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36903 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36905 emit_move_insn (mem
, vec
);
36907 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36908 emit_move_insn (target
, tmp
);
36912 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36913 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36914 The upper bits of DEST are undefined, though they shouldn't cause
36915 exceptions (some bits from src or all zeros are ok). */
36918 emit_reduc_half (rtx dest
, rtx src
, int i
)
36921 switch (GET_MODE (src
))
36925 tem
= gen_sse_movhlps (dest
, src
, src
);
36927 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36928 GEN_INT (1 + 4), GEN_INT (1 + 4));
36931 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36937 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36938 gen_lowpart (V1TImode
, src
),
36943 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36945 tem
= gen_avx_shufps256 (dest
, src
, src
,
36946 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36950 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36952 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36959 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36960 gen_lowpart (V4DImode
, src
),
36961 gen_lowpart (V4DImode
, src
),
36964 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36965 gen_lowpart (V2TImode
, src
),
36969 gcc_unreachable ();
36974 /* Expand a vector reduction. FN is the binary pattern to reduce;
36975 DEST is the destination; IN is the input vector. */
36978 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36980 rtx half
, dst
, vec
= in
;
36981 enum machine_mode mode
= GET_MODE (in
);
36984 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
36986 && mode
== V8HImode
36987 && fn
== gen_uminv8hi3
)
36989 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
36993 for (i
= GET_MODE_BITSIZE (mode
);
36994 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
36997 half
= gen_reg_rtx (mode
);
36998 emit_reduc_half (half
, vec
, i
);
36999 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37002 dst
= gen_reg_rtx (mode
);
37003 emit_insn (fn (dst
, half
, vec
));
37008 /* Target hook for scalar_mode_supported_p. */
37010 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37012 if (DECIMAL_FLOAT_MODE_P (mode
))
37013 return default_decimal_float_supported_p ();
37014 else if (mode
== TFmode
)
37017 return default_scalar_mode_supported_p (mode
);
37020 /* Implements target hook vector_mode_supported_p. */
37022 ix86_vector_mode_supported_p (enum machine_mode mode
)
37024 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37026 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37028 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37030 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37032 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37037 /* Target hook for c_mode_for_suffix. */
37038 static enum machine_mode
37039 ix86_c_mode_for_suffix (char suffix
)
37049 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37051 We do this in the new i386 backend to maintain source compatibility
37052 with the old cc0-based compiler. */
37055 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37056 tree inputs ATTRIBUTE_UNUSED
,
37059 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37061 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37066 /* Implements target vector targetm.asm.encode_section_info. */
37068 static void ATTRIBUTE_UNUSED
37069 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37071 default_encode_section_info (decl
, rtl
, first
);
37073 if (TREE_CODE (decl
) == VAR_DECL
37074 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37075 && ix86_in_large_data_p (decl
))
37076 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37079 /* Worker function for REVERSE_CONDITION. */
37082 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37084 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37085 ? reverse_condition (code
)
37086 : reverse_condition_maybe_unordered (code
));
37089 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37093 output_387_reg_move (rtx insn
, rtx
*operands
)
37095 if (REG_P (operands
[0]))
37097 if (REG_P (operands
[1])
37098 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37100 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37101 return output_387_ffreep (operands
, 0);
37102 return "fstp\t%y0";
37104 if (STACK_TOP_P (operands
[0]))
37105 return "fld%Z1\t%y1";
37108 else if (MEM_P (operands
[0]))
37110 gcc_assert (REG_P (operands
[1]));
37111 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37112 return "fstp%Z0\t%y0";
37115 /* There is no non-popping store to memory for XFmode.
37116 So if we need one, follow the store with a load. */
37117 if (GET_MODE (operands
[0]) == XFmode
)
37118 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37120 return "fst%Z0\t%y0";
37127 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37128 FP status register is set. */
37131 ix86_emit_fp_unordered_jump (rtx label
)
37133 rtx reg
= gen_reg_rtx (HImode
);
37136 emit_insn (gen_x86_fnstsw_1 (reg
));
37138 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37140 emit_insn (gen_x86_sahf_1 (reg
));
37142 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37143 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37147 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37149 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37150 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37153 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37154 gen_rtx_LABEL_REF (VOIDmode
, label
),
37156 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37158 emit_jump_insn (temp
);
37159 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37162 /* Output code to perform a log1p XFmode calculation. */
37164 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37166 rtx label1
= gen_label_rtx ();
37167 rtx label2
= gen_label_rtx ();
37169 rtx tmp
= gen_reg_rtx (XFmode
);
37170 rtx tmp2
= gen_reg_rtx (XFmode
);
37173 emit_insn (gen_absxf2 (tmp
, op1
));
37174 test
= gen_rtx_GE (VOIDmode
, tmp
,
37175 CONST_DOUBLE_FROM_REAL_VALUE (
37176 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37178 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37180 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37181 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37182 emit_jump (label2
);
37184 emit_label (label1
);
37185 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37186 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37187 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37188 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37190 emit_label (label2
);
37193 /* Emit code for round calculation. */
37194 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37196 enum machine_mode inmode
= GET_MODE (op1
);
37197 enum machine_mode outmode
= GET_MODE (op0
);
37198 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37199 rtx scratch
= gen_reg_rtx (HImode
);
37200 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37201 rtx jump_label
= gen_label_rtx ();
37203 rtx (*gen_abs
) (rtx
, rtx
);
37204 rtx (*gen_neg
) (rtx
, rtx
);
37209 gen_abs
= gen_abssf2
;
37212 gen_abs
= gen_absdf2
;
37215 gen_abs
= gen_absxf2
;
37218 gcc_unreachable ();
37224 gen_neg
= gen_negsf2
;
37227 gen_neg
= gen_negdf2
;
37230 gen_neg
= gen_negxf2
;
37233 gen_neg
= gen_neghi2
;
37236 gen_neg
= gen_negsi2
;
37239 gen_neg
= gen_negdi2
;
37242 gcc_unreachable ();
37245 e1
= gen_reg_rtx (inmode
);
37246 e2
= gen_reg_rtx (inmode
);
37247 res
= gen_reg_rtx (outmode
);
37249 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37251 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37253 /* scratch = fxam(op1) */
37254 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37255 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37257 /* e1 = fabs(op1) */
37258 emit_insn (gen_abs (e1
, op1
));
37260 /* e2 = e1 + 0.5 */
37261 half
= force_reg (inmode
, half
);
37262 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37263 gen_rtx_PLUS (inmode
, e1
, half
)));
37265 /* res = floor(e2) */
37266 if (inmode
!= XFmode
)
37268 tmp1
= gen_reg_rtx (XFmode
);
37270 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37271 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37281 rtx tmp0
= gen_reg_rtx (XFmode
);
37283 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37285 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37286 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37287 UNSPEC_TRUNC_NOOP
)));
37291 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37294 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37297 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37300 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37303 gcc_unreachable ();
37306 /* flags = signbit(a) */
37307 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37309 /* if (flags) then res = -res */
37310 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37311 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37312 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37314 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37315 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37316 JUMP_LABEL (insn
) = jump_label
;
37318 emit_insn (gen_neg (res
, res
));
37320 emit_label (jump_label
);
37321 LABEL_NUSES (jump_label
) = 1;
37323 emit_move_insn (op0
, res
);
37326 /* Output code to perform a Newton-Rhapson approximation of a single precision
37327 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37329 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37331 rtx x0
, x1
, e0
, e1
;
37333 x0
= gen_reg_rtx (mode
);
37334 e0
= gen_reg_rtx (mode
);
37335 e1
= gen_reg_rtx (mode
);
37336 x1
= gen_reg_rtx (mode
);
37338 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37340 b
= force_reg (mode
, b
);
37342 /* x0 = rcp(b) estimate */
37343 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37344 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37347 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37348 gen_rtx_MULT (mode
, x0
, b
)));
37351 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37352 gen_rtx_MULT (mode
, x0
, e0
)));
37355 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37356 gen_rtx_PLUS (mode
, x0
, x0
)));
37359 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37360 gen_rtx_MINUS (mode
, e1
, e0
)));
37363 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37364 gen_rtx_MULT (mode
, a
, x1
)));
37367 /* Output code to perform a Newton-Rhapson approximation of a
37368 single precision floating point [reciprocal] square root. */
37370 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37373 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37376 x0
= gen_reg_rtx (mode
);
37377 e0
= gen_reg_rtx (mode
);
37378 e1
= gen_reg_rtx (mode
);
37379 e2
= gen_reg_rtx (mode
);
37380 e3
= gen_reg_rtx (mode
);
37382 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37383 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37385 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37386 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37388 if (VECTOR_MODE_P (mode
))
37390 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37391 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37394 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37395 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37397 a
= force_reg (mode
, a
);
37399 /* x0 = rsqrt(a) estimate */
37400 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37401 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37404 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37409 zero
= gen_reg_rtx (mode
);
37410 mask
= gen_reg_rtx (mode
);
37412 zero
= force_reg (mode
, CONST0_RTX(mode
));
37413 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37414 gen_rtx_NE (mode
, zero
, a
)));
37416 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37417 gen_rtx_AND (mode
, x0
, mask
)));
37421 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37422 gen_rtx_MULT (mode
, x0
, a
)));
37424 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37425 gen_rtx_MULT (mode
, e0
, x0
)));
37428 mthree
= force_reg (mode
, mthree
);
37429 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37430 gen_rtx_PLUS (mode
, e1
, mthree
)));
37432 mhalf
= force_reg (mode
, mhalf
);
37434 /* e3 = -.5 * x0 */
37435 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37436 gen_rtx_MULT (mode
, x0
, mhalf
)));
37438 /* e3 = -.5 * e0 */
37439 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37440 gen_rtx_MULT (mode
, e0
, mhalf
)));
37441 /* ret = e2 * e3 */
37442 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37443 gen_rtx_MULT (mode
, e2
, e3
)));
37446 #ifdef TARGET_SOLARIS
37447 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37450 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37453 /* With Binutils 2.15, the "@unwind" marker must be specified on
37454 every occurrence of the ".eh_frame" section, not just the first
37457 && strcmp (name
, ".eh_frame") == 0)
37459 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37460 flags
& SECTION_WRITE
? "aw" : "a");
37465 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37467 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37472 default_elf_asm_named_section (name
, flags
, decl
);
37474 #endif /* TARGET_SOLARIS */
37476 /* Return the mangling of TYPE if it is an extended fundamental type. */
37478 static const char *
37479 ix86_mangle_type (const_tree type
)
37481 type
= TYPE_MAIN_VARIANT (type
);
37483 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37484 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37487 switch (TYPE_MODE (type
))
37490 /* __float128 is "g". */
37493 /* "long double" or __float80 is "e". */
37500 /* For 32-bit code we can save PIC register setup by using
37501 __stack_chk_fail_local hidden function instead of calling
37502 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37503 register, so it is better to call __stack_chk_fail directly. */
37505 static tree ATTRIBUTE_UNUSED
37506 ix86_stack_protect_fail (void)
37508 return TARGET_64BIT
37509 ? default_external_stack_protect_fail ()
37510 : default_hidden_stack_protect_fail ();
37513 /* Select a format to encode pointers in exception handling data. CODE
37514 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37515 true if the symbol may be affected by dynamic relocations.
37517 ??? All x86 object file formats are capable of representing this.
37518 After all, the relocation needed is the same as for the call insn.
37519 Whether or not a particular assembler allows us to enter such, I
37520 guess we'll have to see. */
37522 asm_preferred_eh_data_format (int code
, int global
)
37526 int type
= DW_EH_PE_sdata8
;
37528 || ix86_cmodel
== CM_SMALL_PIC
37529 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37530 type
= DW_EH_PE_sdata4
;
37531 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37533 if (ix86_cmodel
== CM_SMALL
37534 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37535 return DW_EH_PE_udata4
;
37536 return DW_EH_PE_absptr
;
37539 /* Expand copysign from SIGN to the positive value ABS_VALUE
37540 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37543 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37545 enum machine_mode mode
= GET_MODE (sign
);
37546 rtx sgn
= gen_reg_rtx (mode
);
37547 if (mask
== NULL_RTX
)
37549 enum machine_mode vmode
;
37551 if (mode
== SFmode
)
37553 else if (mode
== DFmode
)
37558 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37559 if (!VECTOR_MODE_P (mode
))
37561 /* We need to generate a scalar mode mask in this case. */
37562 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37563 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37564 mask
= gen_reg_rtx (mode
);
37565 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37569 mask
= gen_rtx_NOT (mode
, mask
);
37570 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37571 gen_rtx_AND (mode
, mask
, sign
)));
37572 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37573 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37576 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37577 mask for masking out the sign-bit is stored in *SMASK, if that is
37580 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37582 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37585 xa
= gen_reg_rtx (mode
);
37586 if (mode
== SFmode
)
37588 else if (mode
== DFmode
)
37592 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37593 if (!VECTOR_MODE_P (mode
))
37595 /* We need to generate a scalar mode mask in this case. */
37596 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37597 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37598 mask
= gen_reg_rtx (mode
);
37599 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37601 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37602 gen_rtx_AND (mode
, op0
, mask
)));
37610 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37611 swapping the operands if SWAP_OPERANDS is true. The expanded
37612 code is a forward jump to a newly created label in case the
37613 comparison is true. The generated label rtx is returned. */
37615 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37616 bool swap_operands
)
37627 label
= gen_label_rtx ();
37628 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37629 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37630 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37631 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37632 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37633 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37634 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37635 JUMP_LABEL (tmp
) = label
;
37640 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37641 using comparison code CODE. Operands are swapped for the comparison if
37642 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37644 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37645 bool swap_operands
)
37647 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37648 enum machine_mode mode
= GET_MODE (op0
);
37649 rtx mask
= gen_reg_rtx (mode
);
37658 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37660 emit_insn (insn (mask
, op0
, op1
,
37661 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37665 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37666 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37668 ix86_gen_TWO52 (enum machine_mode mode
)
37670 REAL_VALUE_TYPE TWO52r
;
37673 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37674 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37675 TWO52
= force_reg (mode
, TWO52
);
37680 /* Expand SSE sequence for computing lround from OP1 storing
37683 ix86_expand_lround (rtx op0
, rtx op1
)
37685 /* C code for the stuff we're doing below:
37686 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37689 enum machine_mode mode
= GET_MODE (op1
);
37690 const struct real_format
*fmt
;
37691 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37694 /* load nextafter (0.5, 0.0) */
37695 fmt
= REAL_MODE_FORMAT (mode
);
37696 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37697 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37699 /* adj = copysign (0.5, op1) */
37700 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37701 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37703 /* adj = op1 + adj */
37704 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37706 /* op0 = (imode)adj */
37707 expand_fix (op0
, adj
, 0);
37710 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37713 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37715 /* C code for the stuff we're doing below (for do_floor):
37717 xi -= (double)xi > op1 ? 1 : 0;
37720 enum machine_mode fmode
= GET_MODE (op1
);
37721 enum machine_mode imode
= GET_MODE (op0
);
37722 rtx ireg
, freg
, label
, tmp
;
37724 /* reg = (long)op1 */
37725 ireg
= gen_reg_rtx (imode
);
37726 expand_fix (ireg
, op1
, 0);
37728 /* freg = (double)reg */
37729 freg
= gen_reg_rtx (fmode
);
37730 expand_float (freg
, ireg
, 0);
37732 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37733 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37734 freg
, op1
, !do_floor
);
37735 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37736 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37737 emit_move_insn (ireg
, tmp
);
37739 emit_label (label
);
37740 LABEL_NUSES (label
) = 1;
37742 emit_move_insn (op0
, ireg
);
37745 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37746 result in OPERAND0. */
37748 ix86_expand_rint (rtx operand0
, rtx operand1
)
37750 /* C code for the stuff we're doing below:
37751 xa = fabs (operand1);
37752 if (!isless (xa, 2**52))
37754 xa = xa + 2**52 - 2**52;
37755 return copysign (xa, operand1);
37757 enum machine_mode mode
= GET_MODE (operand0
);
37758 rtx res
, xa
, label
, TWO52
, mask
;
37760 res
= gen_reg_rtx (mode
);
37761 emit_move_insn (res
, operand1
);
37763 /* xa = abs (operand1) */
37764 xa
= ix86_expand_sse_fabs (res
, &mask
);
37766 /* if (!isless (xa, TWO52)) goto label; */
37767 TWO52
= ix86_gen_TWO52 (mode
);
37768 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37770 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37771 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37773 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37775 emit_label (label
);
37776 LABEL_NUSES (label
) = 1;
37778 emit_move_insn (operand0
, res
);
37781 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37784 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37786 /* C code for the stuff we expand below.
37787 double xa = fabs (x), x2;
37788 if (!isless (xa, TWO52))
37790 xa = xa + TWO52 - TWO52;
37791 x2 = copysign (xa, x);
37800 enum machine_mode mode
= GET_MODE (operand0
);
37801 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37803 TWO52
= ix86_gen_TWO52 (mode
);
37805 /* Temporary for holding the result, initialized to the input
37806 operand to ease control flow. */
37807 res
= gen_reg_rtx (mode
);
37808 emit_move_insn (res
, operand1
);
37810 /* xa = abs (operand1) */
37811 xa
= ix86_expand_sse_fabs (res
, &mask
);
37813 /* if (!isless (xa, TWO52)) goto label; */
37814 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37816 /* xa = xa + TWO52 - TWO52; */
37817 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37818 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37820 /* xa = copysign (xa, operand1) */
37821 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37823 /* generate 1.0 or -1.0 */
37824 one
= force_reg (mode
,
37825 const_double_from_real_value (do_floor
37826 ? dconst1
: dconstm1
, mode
));
37828 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37829 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37830 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37831 gen_rtx_AND (mode
, one
, tmp
)));
37832 /* We always need to subtract here to preserve signed zero. */
37833 tmp
= expand_simple_binop (mode
, MINUS
,
37834 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37835 emit_move_insn (res
, tmp
);
37837 emit_label (label
);
37838 LABEL_NUSES (label
) = 1;
37840 emit_move_insn (operand0
, res
);
37843 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37846 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37848 /* C code for the stuff we expand below.
37849 double xa = fabs (x), x2;
37850 if (!isless (xa, TWO52))
37852 x2 = (double)(long)x;
37859 if (HONOR_SIGNED_ZEROS (mode))
37860 return copysign (x2, x);
37863 enum machine_mode mode
= GET_MODE (operand0
);
37864 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37866 TWO52
= ix86_gen_TWO52 (mode
);
37868 /* Temporary for holding the result, initialized to the input
37869 operand to ease control flow. */
37870 res
= gen_reg_rtx (mode
);
37871 emit_move_insn (res
, operand1
);
37873 /* xa = abs (operand1) */
37874 xa
= ix86_expand_sse_fabs (res
, &mask
);
37876 /* if (!isless (xa, TWO52)) goto label; */
37877 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37879 /* xa = (double)(long)x */
37880 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37881 expand_fix (xi
, res
, 0);
37882 expand_float (xa
, xi
, 0);
37885 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37887 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37888 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37889 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37890 gen_rtx_AND (mode
, one
, tmp
)));
37891 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37892 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37893 emit_move_insn (res
, tmp
);
37895 if (HONOR_SIGNED_ZEROS (mode
))
37896 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37898 emit_label (label
);
37899 LABEL_NUSES (label
) = 1;
37901 emit_move_insn (operand0
, res
);
37904 /* Expand SSE sequence for computing round from OPERAND1 storing
37905 into OPERAND0. Sequence that works without relying on DImode truncation
37906 via cvttsd2siq that is only available on 64bit targets. */
37908 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37910 /* C code for the stuff we expand below.
37911 double xa = fabs (x), xa2, x2;
37912 if (!isless (xa, TWO52))
37914 Using the absolute value and copying back sign makes
37915 -0.0 -> -0.0 correct.
37916 xa2 = xa + TWO52 - TWO52;
37921 else if (dxa > 0.5)
37923 x2 = copysign (xa2, x);
37926 enum machine_mode mode
= GET_MODE (operand0
);
37927 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37929 TWO52
= ix86_gen_TWO52 (mode
);
37931 /* Temporary for holding the result, initialized to the input
37932 operand to ease control flow. */
37933 res
= gen_reg_rtx (mode
);
37934 emit_move_insn (res
, operand1
);
37936 /* xa = abs (operand1) */
37937 xa
= ix86_expand_sse_fabs (res
, &mask
);
37939 /* if (!isless (xa, TWO52)) goto label; */
37940 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37942 /* xa2 = xa + TWO52 - TWO52; */
37943 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37944 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37946 /* dxa = xa2 - xa; */
37947 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37949 /* generate 0.5, 1.0 and -0.5 */
37950 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37951 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37952 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37956 tmp
= gen_reg_rtx (mode
);
37957 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37958 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37959 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37960 gen_rtx_AND (mode
, one
, tmp
)));
37961 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37962 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37963 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37964 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37965 gen_rtx_AND (mode
, one
, tmp
)));
37966 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37968 /* res = copysign (xa2, operand1) */
37969 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37971 emit_label (label
);
37972 LABEL_NUSES (label
) = 1;
37974 emit_move_insn (operand0
, res
);
37977 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37980 ix86_expand_trunc (rtx operand0
, rtx operand1
)
37982 /* C code for SSE variant we expand below.
37983 double xa = fabs (x), x2;
37984 if (!isless (xa, TWO52))
37986 x2 = (double)(long)x;
37987 if (HONOR_SIGNED_ZEROS (mode))
37988 return copysign (x2, x);
37991 enum machine_mode mode
= GET_MODE (operand0
);
37992 rtx xa
, xi
, TWO52
, label
, res
, mask
;
37994 TWO52
= ix86_gen_TWO52 (mode
);
37996 /* Temporary for holding the result, initialized to the input
37997 operand to ease control flow. */
37998 res
= gen_reg_rtx (mode
);
37999 emit_move_insn (res
, operand1
);
38001 /* xa = abs (operand1) */
38002 xa
= ix86_expand_sse_fabs (res
, &mask
);
38004 /* if (!isless (xa, TWO52)) goto label; */
38005 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38007 /* x = (double)(long)x */
38008 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38009 expand_fix (xi
, res
, 0);
38010 expand_float (res
, xi
, 0);
38012 if (HONOR_SIGNED_ZEROS (mode
))
38013 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38015 emit_label (label
);
38016 LABEL_NUSES (label
) = 1;
38018 emit_move_insn (operand0
, res
);
38021 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38024 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38026 enum machine_mode mode
= GET_MODE (operand0
);
38027 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38029 /* C code for SSE variant we expand below.
38030 double xa = fabs (x), x2;
38031 if (!isless (xa, TWO52))
38033 xa2 = xa + TWO52 - TWO52;
38037 x2 = copysign (xa2, x);
38041 TWO52
= ix86_gen_TWO52 (mode
);
38043 /* Temporary for holding the result, initialized to the input
38044 operand to ease control flow. */
38045 res
= gen_reg_rtx (mode
);
38046 emit_move_insn (res
, operand1
);
38048 /* xa = abs (operand1) */
38049 xa
= ix86_expand_sse_fabs (res
, &smask
);
38051 /* if (!isless (xa, TWO52)) goto label; */
38052 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38054 /* res = xa + TWO52 - TWO52; */
38055 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38056 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38057 emit_move_insn (res
, tmp
);
38060 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38062 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38063 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38064 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38065 gen_rtx_AND (mode
, mask
, one
)));
38066 tmp
= expand_simple_binop (mode
, MINUS
,
38067 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38068 emit_move_insn (res
, tmp
);
38070 /* res = copysign (res, operand1) */
38071 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38073 emit_label (label
);
38074 LABEL_NUSES (label
) = 1;
38076 emit_move_insn (operand0
, res
);
38079 /* Expand SSE sequence for computing round from OPERAND1 storing
38082 ix86_expand_round (rtx operand0
, rtx operand1
)
38084 /* C code for the stuff we're doing below:
38085 double xa = fabs (x);
38086 if (!isless (xa, TWO52))
38088 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38089 return copysign (xa, x);
38091 enum machine_mode mode
= GET_MODE (operand0
);
38092 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38093 const struct real_format
*fmt
;
38094 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38096 /* Temporary for holding the result, initialized to the input
38097 operand to ease control flow. */
38098 res
= gen_reg_rtx (mode
);
38099 emit_move_insn (res
, operand1
);
38101 TWO52
= ix86_gen_TWO52 (mode
);
38102 xa
= ix86_expand_sse_fabs (res
, &mask
);
38103 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38105 /* load nextafter (0.5, 0.0) */
38106 fmt
= REAL_MODE_FORMAT (mode
);
38107 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38108 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38110 /* xa = xa + 0.5 */
38111 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38112 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38114 /* xa = (double)(int64_t)xa */
38115 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38116 expand_fix (xi
, xa
, 0);
38117 expand_float (xa
, xi
, 0);
38119 /* res = copysign (xa, operand1) */
38120 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38122 emit_label (label
);
38123 LABEL_NUSES (label
) = 1;
38125 emit_move_insn (operand0
, res
);
38128 /* Expand SSE sequence for computing round
38129 from OP1 storing into OP0 using sse4 round insn. */
38131 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38133 enum machine_mode mode
= GET_MODE (op0
);
38134 rtx e1
, e2
, res
, half
;
38135 const struct real_format
*fmt
;
38136 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38137 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38138 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38143 gen_copysign
= gen_copysignsf3
;
38144 gen_round
= gen_sse4_1_roundsf2
;
38147 gen_copysign
= gen_copysigndf3
;
38148 gen_round
= gen_sse4_1_rounddf2
;
38151 gcc_unreachable ();
38154 /* round (a) = trunc (a + copysign (0.5, a)) */
38156 /* load nextafter (0.5, 0.0) */
38157 fmt
= REAL_MODE_FORMAT (mode
);
38158 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38159 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38160 half
= const_double_from_real_value (pred_half
, mode
);
38162 /* e1 = copysign (0.5, op1) */
38163 e1
= gen_reg_rtx (mode
);
38164 emit_insn (gen_copysign (e1
, half
, op1
));
38166 /* e2 = op1 + e1 */
38167 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38169 /* res = trunc (e2) */
38170 res
= gen_reg_rtx (mode
);
38171 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38173 emit_move_insn (op0
, res
);
38177 /* Table of valid machine attributes. */
38178 static const struct attribute_spec ix86_attribute_table
[] =
38180 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38181 affects_type_identity } */
38182 /* Stdcall attribute says callee is responsible for popping arguments
38183 if they are not variable. */
38184 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38186 /* Fastcall attribute says callee is responsible for popping arguments
38187 if they are not variable. */
38188 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38190 /* Thiscall attribute says callee is responsible for popping arguments
38191 if they are not variable. */
38192 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38194 /* Cdecl attribute says the callee is a normal C declaration */
38195 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38197 /* Regparm attribute specifies how many integer arguments are to be
38198 passed in registers. */
38199 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38201 /* Sseregparm attribute says we are using x86_64 calling conventions
38202 for FP arguments. */
38203 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38205 /* The transactional memory builtins are implicitly regparm or fastcall
38206 depending on the ABI. Override the generic do-nothing attribute that
38207 these builtins were declared with. */
38208 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38210 /* force_align_arg_pointer says this function realigns the stack at entry. */
38211 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38212 false, true, true, ix86_handle_cconv_attribute
, false },
38213 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38214 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38215 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38216 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38219 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38221 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38223 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38224 SUBTARGET_ATTRIBUTE_TABLE
,
38226 /* ms_abi and sysv_abi calling convention function attributes. */
38227 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38228 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38229 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38231 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38232 ix86_handle_callee_pop_aggregate_return
, true },
38234 { NULL
, 0, 0, false, false, false, NULL
, false }
38237 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38239 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38241 int misalign ATTRIBUTE_UNUSED
)
38245 switch (type_of_cost
)
38248 return ix86_cost
->scalar_stmt_cost
;
38251 return ix86_cost
->scalar_load_cost
;
38254 return ix86_cost
->scalar_store_cost
;
38257 return ix86_cost
->vec_stmt_cost
;
38260 return ix86_cost
->vec_align_load_cost
;
38263 return ix86_cost
->vec_store_cost
;
38265 case vec_to_scalar
:
38266 return ix86_cost
->vec_to_scalar_cost
;
38268 case scalar_to_vec
:
38269 return ix86_cost
->scalar_to_vec_cost
;
38271 case unaligned_load
:
38272 case unaligned_store
:
38273 return ix86_cost
->vec_unalign_load_cost
;
38275 case cond_branch_taken
:
38276 return ix86_cost
->cond_taken_branch_cost
;
38278 case cond_branch_not_taken
:
38279 return ix86_cost
->cond_not_taken_branch_cost
;
38282 case vec_promote_demote
:
38283 return ix86_cost
->vec_stmt_cost
;
38285 case vec_construct
:
38286 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38287 return elements
/ 2 + 1;
38290 gcc_unreachable ();
38294 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38295 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38296 insn every time. */
38298 static GTY(()) rtx vselect_insn
;
38300 /* Initialize vselect_insn. */
38303 init_vselect_insn (void)
38308 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38309 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38310 XVECEXP (x
, 0, i
) = const0_rtx
;
38311 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38313 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38315 vselect_insn
= emit_insn (x
);
38319 /* Construct (set target (vec_select op0 (parallel perm))) and
38320 return true if that's a valid instruction in the active ISA. */
38323 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38324 unsigned nelt
, bool testing_p
)
38327 rtx x
, save_vconcat
;
38330 if (vselect_insn
== NULL_RTX
)
38331 init_vselect_insn ();
38333 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38334 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38335 for (i
= 0; i
< nelt
; ++i
)
38336 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38337 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38338 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38339 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38340 SET_DEST (PATTERN (vselect_insn
)) = target
;
38341 icode
= recog_memoized (vselect_insn
);
38343 if (icode
>= 0 && !testing_p
)
38344 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38346 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38347 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38348 INSN_CODE (vselect_insn
) = -1;
38353 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38356 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38357 const unsigned char *perm
, unsigned nelt
,
38360 enum machine_mode v2mode
;
38364 if (vselect_insn
== NULL_RTX
)
38365 init_vselect_insn ();
38367 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38368 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38369 PUT_MODE (x
, v2mode
);
38372 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38373 XEXP (x
, 0) = const0_rtx
;
38374 XEXP (x
, 1) = const0_rtx
;
38378 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38379 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38382 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38384 enum machine_mode vmode
= d
->vmode
;
38385 unsigned i
, mask
, nelt
= d
->nelt
;
38386 rtx target
, op0
, op1
, x
;
38387 rtx rperm
[32], vperm
;
38389 if (d
->one_operand_p
)
38391 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38393 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38395 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38400 /* This is a blend, not a permute. Elements must stay in their
38401 respective lanes. */
38402 for (i
= 0; i
< nelt
; ++i
)
38404 unsigned e
= d
->perm
[i
];
38405 if (!(e
== i
|| e
== i
+ nelt
))
38412 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38413 decision should be extracted elsewhere, so that we only try that
38414 sequence once all budget==3 options have been tried. */
38415 target
= d
->target
;
38428 for (i
= 0; i
< nelt
; ++i
)
38429 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38433 for (i
= 0; i
< 2; ++i
)
38434 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38439 for (i
= 0; i
< 4; ++i
)
38440 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38445 /* See if bytes move in pairs so we can use pblendw with
38446 an immediate argument, rather than pblendvb with a vector
38448 for (i
= 0; i
< 16; i
+= 2)
38449 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38452 for (i
= 0; i
< nelt
; ++i
)
38453 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38456 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38457 vperm
= force_reg (vmode
, vperm
);
38459 if (GET_MODE_SIZE (vmode
) == 16)
38460 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38462 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38466 for (i
= 0; i
< 8; ++i
)
38467 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38472 target
= gen_lowpart (vmode
, target
);
38473 op0
= gen_lowpart (vmode
, op0
);
38474 op1
= gen_lowpart (vmode
, op1
);
38478 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38479 for (i
= 0; i
< 32; i
+= 2)
38480 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38482 /* See if bytes move in quadruplets. If yes, vpblendd
38483 with immediate can be used. */
38484 for (i
= 0; i
< 32; i
+= 4)
38485 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38489 /* See if bytes move the same in both lanes. If yes,
38490 vpblendw with immediate can be used. */
38491 for (i
= 0; i
< 16; i
+= 2)
38492 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38495 /* Use vpblendw. */
38496 for (i
= 0; i
< 16; ++i
)
38497 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38502 /* Use vpblendd. */
38503 for (i
= 0; i
< 8; ++i
)
38504 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38509 /* See if words move in pairs. If yes, vpblendd can be used. */
38510 for (i
= 0; i
< 16; i
+= 2)
38511 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38515 /* See if words move the same in both lanes. If not,
38516 vpblendvb must be used. */
38517 for (i
= 0; i
< 8; i
++)
38518 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38520 /* Use vpblendvb. */
38521 for (i
= 0; i
< 32; ++i
)
38522 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38526 target
= gen_lowpart (vmode
, target
);
38527 op0
= gen_lowpart (vmode
, op0
);
38528 op1
= gen_lowpart (vmode
, op1
);
38529 goto finish_pblendvb
;
38532 /* Use vpblendw. */
38533 for (i
= 0; i
< 16; ++i
)
38534 mask
|= (d
->perm
[i
] >= 16) << i
;
38538 /* Use vpblendd. */
38539 for (i
= 0; i
< 8; ++i
)
38540 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38545 /* Use vpblendd. */
38546 for (i
= 0; i
< 4; ++i
)
38547 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38552 gcc_unreachable ();
38555 /* This matches five different patterns with the different modes. */
38556 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38557 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38563 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38564 in terms of the variable form of vpermilps.
38566 Note that we will have already failed the immediate input vpermilps,
38567 which requires that the high and low part shuffle be identical; the
38568 variable form doesn't require that. */
38571 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38573 rtx rperm
[8], vperm
;
38576 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38579 /* We can only permute within the 128-bit lane. */
38580 for (i
= 0; i
< 8; ++i
)
38582 unsigned e
= d
->perm
[i
];
38583 if (i
< 4 ? e
>= 4 : e
< 4)
38590 for (i
= 0; i
< 8; ++i
)
38592 unsigned e
= d
->perm
[i
];
38594 /* Within each 128-bit lane, the elements of op0 are numbered
38595 from 0 and the elements of op1 are numbered from 4. */
38601 rperm
[i
] = GEN_INT (e
);
38604 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38605 vperm
= force_reg (V8SImode
, vperm
);
38606 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38611 /* Return true if permutation D can be performed as VMODE permutation
38615 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38617 unsigned int i
, j
, chunk
;
38619 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38620 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38621 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38624 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38627 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38628 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38629 if (d
->perm
[i
] & (chunk
- 1))
38632 for (j
= 1; j
< chunk
; ++j
)
38633 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38640 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38643 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38645 unsigned i
, nelt
, eltsz
, mask
;
38646 unsigned char perm
[32];
38647 enum machine_mode vmode
= V16QImode
;
38648 rtx rperm
[32], vperm
, target
, op0
, op1
;
38652 if (!d
->one_operand_p
)
38654 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38657 && valid_perm_using_mode_p (V2TImode
, d
))
38662 /* Use vperm2i128 insn. The pattern uses
38663 V4DImode instead of V2TImode. */
38664 target
= gen_lowpart (V4DImode
, d
->target
);
38665 op0
= gen_lowpart (V4DImode
, d
->op0
);
38666 op1
= gen_lowpart (V4DImode
, d
->op1
);
38668 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38669 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38670 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38678 if (GET_MODE_SIZE (d
->vmode
) == 16)
38683 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38688 /* V4DImode should be already handled through
38689 expand_vselect by vpermq instruction. */
38690 gcc_assert (d
->vmode
!= V4DImode
);
38693 if (d
->vmode
== V8SImode
38694 || d
->vmode
== V16HImode
38695 || d
->vmode
== V32QImode
)
38697 /* First see if vpermq can be used for
38698 V8SImode/V16HImode/V32QImode. */
38699 if (valid_perm_using_mode_p (V4DImode
, d
))
38701 for (i
= 0; i
< 4; i
++)
38702 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38705 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38706 gen_lowpart (V4DImode
, d
->op0
),
38710 /* Next see if vpermd can be used. */
38711 if (valid_perm_using_mode_p (V8SImode
, d
))
38714 /* Or if vpermps can be used. */
38715 else if (d
->vmode
== V8SFmode
)
38718 if (vmode
== V32QImode
)
38720 /* vpshufb only works intra lanes, it is not
38721 possible to shuffle bytes in between the lanes. */
38722 for (i
= 0; i
< nelt
; ++i
)
38723 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38734 if (vmode
== V8SImode
)
38735 for (i
= 0; i
< 8; ++i
)
38736 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38739 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38740 if (!d
->one_operand_p
)
38741 mask
= 2 * nelt
- 1;
38742 else if (vmode
== V16QImode
)
38745 mask
= nelt
/ 2 - 1;
38747 for (i
= 0; i
< nelt
; ++i
)
38749 unsigned j
, e
= d
->perm
[i
] & mask
;
38750 for (j
= 0; j
< eltsz
; ++j
)
38751 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38755 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38756 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38757 vperm
= force_reg (vmode
, vperm
);
38759 target
= gen_lowpart (vmode
, d
->target
);
38760 op0
= gen_lowpart (vmode
, d
->op0
);
38761 if (d
->one_operand_p
)
38763 if (vmode
== V16QImode
)
38764 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38765 else if (vmode
== V32QImode
)
38766 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38767 else if (vmode
== V8SFmode
)
38768 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38770 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38774 op1
= gen_lowpart (vmode
, d
->op1
);
38775 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38781 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38782 in a single instruction. */
38785 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38787 unsigned i
, nelt
= d
->nelt
;
38788 unsigned char perm2
[MAX_VECT_LEN
];
38790 /* Check plain VEC_SELECT first, because AVX has instructions that could
38791 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38792 input where SEL+CONCAT may not. */
38793 if (d
->one_operand_p
)
38795 int mask
= nelt
- 1;
38796 bool identity_perm
= true;
38797 bool broadcast_perm
= true;
38799 for (i
= 0; i
< nelt
; i
++)
38801 perm2
[i
] = d
->perm
[i
] & mask
;
38803 identity_perm
= false;
38805 broadcast_perm
= false;
38811 emit_move_insn (d
->target
, d
->op0
);
38814 else if (broadcast_perm
&& TARGET_AVX2
)
38816 /* Use vpbroadcast{b,w,d}. */
38817 rtx (*gen
) (rtx
, rtx
) = NULL
;
38821 gen
= gen_avx2_pbroadcastv32qi_1
;
38824 gen
= gen_avx2_pbroadcastv16hi_1
;
38827 gen
= gen_avx2_pbroadcastv8si_1
;
38830 gen
= gen_avx2_pbroadcastv16qi
;
38833 gen
= gen_avx2_pbroadcastv8hi
;
38836 gen
= gen_avx2_vec_dupv8sf_1
;
38838 /* For other modes prefer other shuffles this function creates. */
38844 emit_insn (gen (d
->target
, d
->op0
));
38849 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38852 /* There are plenty of patterns in sse.md that are written for
38853 SEL+CONCAT and are not replicated for a single op. Perhaps
38854 that should be changed, to avoid the nastiness here. */
38856 /* Recognize interleave style patterns, which means incrementing
38857 every other permutation operand. */
38858 for (i
= 0; i
< nelt
; i
+= 2)
38860 perm2
[i
] = d
->perm
[i
] & mask
;
38861 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38863 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38867 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38870 for (i
= 0; i
< nelt
; i
+= 4)
38872 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38873 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38874 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38875 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38878 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38884 /* Finally, try the fully general two operand permute. */
38885 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38889 /* Recognize interleave style patterns with reversed operands. */
38890 if (!d
->one_operand_p
)
38892 for (i
= 0; i
< nelt
; ++i
)
38894 unsigned e
= d
->perm
[i
];
38902 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38907 /* Try the SSE4.1 blend variable merge instructions. */
38908 if (expand_vec_perm_blend (d
))
38911 /* Try one of the AVX vpermil variable permutations. */
38912 if (expand_vec_perm_vpermil (d
))
38915 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38916 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38917 if (expand_vec_perm_pshufb (d
))
38923 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38924 in terms of a pair of pshuflw + pshufhw instructions. */
38927 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38929 unsigned char perm2
[MAX_VECT_LEN
];
38933 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38936 /* The two permutations only operate in 64-bit lanes. */
38937 for (i
= 0; i
< 4; ++i
)
38938 if (d
->perm
[i
] >= 4)
38940 for (i
= 4; i
< 8; ++i
)
38941 if (d
->perm
[i
] < 4)
38947 /* Emit the pshuflw. */
38948 memcpy (perm2
, d
->perm
, 4);
38949 for (i
= 4; i
< 8; ++i
)
38951 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38954 /* Emit the pshufhw. */
38955 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38956 for (i
= 0; i
< 4; ++i
)
38958 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38964 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38965 the permutation using the SSSE3 palignr instruction. This succeeds
38966 when all of the elements in PERM fit within one vector and we merely
38967 need to shift them down so that a single vector permutation has a
38968 chance to succeed. */
38971 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38973 unsigned i
, nelt
= d
->nelt
;
38978 /* Even with AVX, palignr only operates on 128-bit vectors. */
38979 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38982 min
= nelt
, max
= 0;
38983 for (i
= 0; i
< nelt
; ++i
)
38985 unsigned e
= d
->perm
[i
];
38991 if (min
== 0 || max
- min
>= nelt
)
38994 /* Given that we have SSSE3, we know we'll be able to implement the
38995 single operand permutation after the palignr with pshufb. */
38999 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39000 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39001 gen_lowpart (TImode
, d
->op1
),
39002 gen_lowpart (TImode
, d
->op0
), shift
));
39004 d
->op0
= d
->op1
= d
->target
;
39005 d
->one_operand_p
= true;
39008 for (i
= 0; i
< nelt
; ++i
)
39010 unsigned e
= d
->perm
[i
] - min
;
39016 /* Test for the degenerate case where the alignment by itself
39017 produces the desired permutation. */
39021 ok
= expand_vec_perm_1 (d
);
39027 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39029 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39030 a two vector permutation into a single vector permutation by using
39031 an interleave operation to merge the vectors. */
39034 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39036 struct expand_vec_perm_d dremap
, dfinal
;
39037 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39038 unsigned HOST_WIDE_INT contents
;
39039 unsigned char remap
[2 * MAX_VECT_LEN
];
39041 bool ok
, same_halves
= false;
39043 if (GET_MODE_SIZE (d
->vmode
) == 16)
39045 if (d
->one_operand_p
)
39048 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39052 /* For 32-byte modes allow even d->one_operand_p.
39053 The lack of cross-lane shuffling in some instructions
39054 might prevent a single insn shuffle. */
39056 dfinal
.testing_p
= true;
39057 /* If expand_vec_perm_interleave3 can expand this into
39058 a 3 insn sequence, give up and let it be expanded as
39059 3 insn sequence. While that is one insn longer,
39060 it doesn't need a memory operand and in the common
39061 case that both interleave low and high permutations
39062 with the same operands are adjacent needs 4 insns
39063 for both after CSE. */
39064 if (expand_vec_perm_interleave3 (&dfinal
))
39070 /* Examine from whence the elements come. */
39072 for (i
= 0; i
< nelt
; ++i
)
39073 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39075 memset (remap
, 0xff, sizeof (remap
));
39078 if (GET_MODE_SIZE (d
->vmode
) == 16)
39080 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39082 /* Split the two input vectors into 4 halves. */
39083 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39088 /* If the elements from the low halves use interleave low, and similarly
39089 for interleave high. If the elements are from mis-matched halves, we
39090 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39091 if ((contents
& (h1
| h3
)) == contents
)
39094 for (i
= 0; i
< nelt2
; ++i
)
39097 remap
[i
+ nelt
] = i
* 2 + 1;
39098 dremap
.perm
[i
* 2] = i
;
39099 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39101 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39102 dremap
.vmode
= V4SFmode
;
39104 else if ((contents
& (h2
| h4
)) == contents
)
39107 for (i
= 0; i
< nelt2
; ++i
)
39109 remap
[i
+ nelt2
] = i
* 2;
39110 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39111 dremap
.perm
[i
* 2] = i
+ nelt2
;
39112 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39114 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39115 dremap
.vmode
= V4SFmode
;
39117 else if ((contents
& (h1
| h4
)) == contents
)
39120 for (i
= 0; i
< nelt2
; ++i
)
39123 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39124 dremap
.perm
[i
] = i
;
39125 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39130 dremap
.vmode
= V2DImode
;
39132 dremap
.perm
[0] = 0;
39133 dremap
.perm
[1] = 3;
39136 else if ((contents
& (h2
| h3
)) == contents
)
39139 for (i
= 0; i
< nelt2
; ++i
)
39141 remap
[i
+ nelt2
] = i
;
39142 remap
[i
+ nelt
] = i
+ nelt2
;
39143 dremap
.perm
[i
] = i
+ nelt2
;
39144 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39149 dremap
.vmode
= V2DImode
;
39151 dremap
.perm
[0] = 1;
39152 dremap
.perm
[1] = 2;
39160 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39161 unsigned HOST_WIDE_INT q
[8];
39162 unsigned int nonzero_halves
[4];
39164 /* Split the two input vectors into 8 quarters. */
39165 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39166 for (i
= 1; i
< 8; ++i
)
39167 q
[i
] = q
[0] << (nelt4
* i
);
39168 for (i
= 0; i
< 4; ++i
)
39169 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39171 nonzero_halves
[nzcnt
] = i
;
39177 gcc_assert (d
->one_operand_p
);
39178 nonzero_halves
[1] = nonzero_halves
[0];
39179 same_halves
= true;
39181 else if (d
->one_operand_p
)
39183 gcc_assert (nonzero_halves
[0] == 0);
39184 gcc_assert (nonzero_halves
[1] == 1);
39189 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39191 /* Attempt to increase the likelihood that dfinal
39192 shuffle will be intra-lane. */
39193 char tmph
= nonzero_halves
[0];
39194 nonzero_halves
[0] = nonzero_halves
[1];
39195 nonzero_halves
[1] = tmph
;
39198 /* vperm2f128 or vperm2i128. */
39199 for (i
= 0; i
< nelt2
; ++i
)
39201 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39202 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39203 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39204 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39207 if (d
->vmode
!= V8SFmode
39208 && d
->vmode
!= V4DFmode
39209 && d
->vmode
!= V8SImode
)
39211 dremap
.vmode
= V8SImode
;
39213 for (i
= 0; i
< 4; ++i
)
39215 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39216 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39220 else if (d
->one_operand_p
)
39222 else if (TARGET_AVX2
39223 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39226 for (i
= 0; i
< nelt4
; ++i
)
39229 remap
[i
+ nelt
] = i
* 2 + 1;
39230 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39231 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39232 dremap
.perm
[i
* 2] = i
;
39233 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39234 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39235 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39238 else if (TARGET_AVX2
39239 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39242 for (i
= 0; i
< nelt4
; ++i
)
39244 remap
[i
+ nelt4
] = i
* 2;
39245 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39246 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39247 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39248 dremap
.perm
[i
* 2] = i
+ nelt4
;
39249 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39250 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39251 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39258 /* Use the remapping array set up above to move the elements from their
39259 swizzled locations into their final destinations. */
39261 for (i
= 0; i
< nelt
; ++i
)
39263 unsigned e
= remap
[d
->perm
[i
]];
39264 gcc_assert (e
< nelt
);
39265 /* If same_halves is true, both halves of the remapped vector are the
39266 same. Avoid cross-lane accesses if possible. */
39267 if (same_halves
&& i
>= nelt2
)
39269 gcc_assert (e
< nelt2
);
39270 dfinal
.perm
[i
] = e
+ nelt2
;
39273 dfinal
.perm
[i
] = e
;
39275 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39276 dfinal
.op1
= dfinal
.op0
;
39277 dfinal
.one_operand_p
= true;
39278 dremap
.target
= dfinal
.op0
;
39280 /* Test if the final remap can be done with a single insn. For V4SFmode or
39281 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39283 ok
= expand_vec_perm_1 (&dfinal
);
39284 seq
= get_insns ();
39293 if (dremap
.vmode
!= dfinal
.vmode
)
39295 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39296 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39297 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39300 ok
= expand_vec_perm_1 (&dremap
);
39307 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39308 a single vector cross-lane permutation into vpermq followed
39309 by any of the single insn permutations. */
39312 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39314 struct expand_vec_perm_d dremap
, dfinal
;
39315 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39316 unsigned contents
[2];
39320 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39321 && d
->one_operand_p
))
39326 for (i
= 0; i
< nelt2
; ++i
)
39328 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39329 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39332 for (i
= 0; i
< 2; ++i
)
39334 unsigned int cnt
= 0;
39335 for (j
= 0; j
< 4; ++j
)
39336 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39344 dremap
.vmode
= V4DImode
;
39346 dremap
.target
= gen_reg_rtx (V4DImode
);
39347 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39348 dremap
.op1
= dremap
.op0
;
39349 dremap
.one_operand_p
= true;
39350 for (i
= 0; i
< 2; ++i
)
39352 unsigned int cnt
= 0;
39353 for (j
= 0; j
< 4; ++j
)
39354 if ((contents
[i
] & (1u << j
)) != 0)
39355 dremap
.perm
[2 * i
+ cnt
++] = j
;
39356 for (; cnt
< 2; ++cnt
)
39357 dremap
.perm
[2 * i
+ cnt
] = 0;
39361 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39362 dfinal
.op1
= dfinal
.op0
;
39363 dfinal
.one_operand_p
= true;
39364 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39368 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39369 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39371 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39372 dfinal
.perm
[i
] |= nelt4
;
39374 gcc_unreachable ();
39377 ok
= expand_vec_perm_1 (&dremap
);
39380 ok
= expand_vec_perm_1 (&dfinal
);
39386 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39387 a vector permutation using two instructions, vperm2f128 resp.
39388 vperm2i128 followed by any single in-lane permutation. */
39391 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39393 struct expand_vec_perm_d dfirst
, dsecond
;
39394 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39398 || GET_MODE_SIZE (d
->vmode
) != 32
39399 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39403 dsecond
.one_operand_p
= false;
39404 dsecond
.testing_p
= true;
39406 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39407 immediate. For perm < 16 the second permutation uses
39408 d->op0 as first operand, for perm >= 16 it uses d->op1
39409 as first operand. The second operand is the result of
39411 for (perm
= 0; perm
< 32; perm
++)
39413 /* Ignore permutations which do not move anything cross-lane. */
39416 /* The second shuffle for e.g. V4DFmode has
39417 0123 and ABCD operands.
39418 Ignore AB23, as 23 is already in the second lane
39419 of the first operand. */
39420 if ((perm
& 0xc) == (1 << 2)) continue;
39421 /* And 01CD, as 01 is in the first lane of the first
39423 if ((perm
& 3) == 0) continue;
39424 /* And 4567, as then the vperm2[fi]128 doesn't change
39425 anything on the original 4567 second operand. */
39426 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39430 /* The second shuffle for e.g. V4DFmode has
39431 4567 and ABCD operands.
39432 Ignore AB67, as 67 is already in the second lane
39433 of the first operand. */
39434 if ((perm
& 0xc) == (3 << 2)) continue;
39435 /* And 45CD, as 45 is in the first lane of the first
39437 if ((perm
& 3) == 2) continue;
39438 /* And 0123, as then the vperm2[fi]128 doesn't change
39439 anything on the original 0123 first operand. */
39440 if ((perm
& 0xf) == (1 << 2)) continue;
39443 for (i
= 0; i
< nelt
; i
++)
39445 j
= d
->perm
[i
] / nelt2
;
39446 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39447 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39448 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39449 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39457 ok
= expand_vec_perm_1 (&dsecond
);
39468 /* Found a usable second shuffle. dfirst will be
39469 vperm2f128 on d->op0 and d->op1. */
39470 dsecond
.testing_p
= false;
39472 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39473 for (i
= 0; i
< nelt
; i
++)
39474 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39475 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39477 ok
= expand_vec_perm_1 (&dfirst
);
39480 /* And dsecond is some single insn shuffle, taking
39481 d->op0 and result of vperm2f128 (if perm < 16) or
39482 d->op1 and result of vperm2f128 (otherwise). */
39483 dsecond
.op1
= dfirst
.target
;
39485 dsecond
.op0
= dfirst
.op1
;
39487 ok
= expand_vec_perm_1 (&dsecond
);
39493 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39494 if (d
->one_operand_p
)
39501 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39502 a two vector permutation using 2 intra-lane interleave insns
39503 and cross-lane shuffle for 32-byte vectors. */
39506 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39509 rtx (*gen
) (rtx
, rtx
, rtx
);
39511 if (d
->one_operand_p
)
39513 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39515 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39521 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39523 for (i
= 0; i
< nelt
; i
+= 2)
39524 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39525 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39535 gen
= gen_vec_interleave_highv32qi
;
39537 gen
= gen_vec_interleave_lowv32qi
;
39541 gen
= gen_vec_interleave_highv16hi
;
39543 gen
= gen_vec_interleave_lowv16hi
;
39547 gen
= gen_vec_interleave_highv8si
;
39549 gen
= gen_vec_interleave_lowv8si
;
39553 gen
= gen_vec_interleave_highv4di
;
39555 gen
= gen_vec_interleave_lowv4di
;
39559 gen
= gen_vec_interleave_highv8sf
;
39561 gen
= gen_vec_interleave_lowv8sf
;
39565 gen
= gen_vec_interleave_highv4df
;
39567 gen
= gen_vec_interleave_lowv4df
;
39570 gcc_unreachable ();
39573 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39578 a single vector permutation using a single intra-lane vector
39579 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39580 the non-swapped and swapped vectors together. */
39583 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39585 struct expand_vec_perm_d dfirst
, dsecond
;
39586 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39589 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39593 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39594 || !d
->one_operand_p
)
39598 for (i
= 0; i
< nelt
; i
++)
39599 dfirst
.perm
[i
] = 0xff;
39600 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39602 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39603 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39605 dfirst
.perm
[j
] = d
->perm
[i
];
39609 for (i
= 0; i
< nelt
; i
++)
39610 if (dfirst
.perm
[i
] == 0xff)
39611 dfirst
.perm
[i
] = i
;
39614 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39617 ok
= expand_vec_perm_1 (&dfirst
);
39618 seq
= get_insns ();
39630 dsecond
.op0
= dfirst
.target
;
39631 dsecond
.op1
= dfirst
.target
;
39632 dsecond
.one_operand_p
= true;
39633 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39634 for (i
= 0; i
< nelt
; i
++)
39635 dsecond
.perm
[i
] = i
^ nelt2
;
39637 ok
= expand_vec_perm_1 (&dsecond
);
39640 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39641 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39645 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39646 permutation using two vperm2f128, followed by a vshufpd insn blending
39647 the two vectors together. */
39650 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39652 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39655 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39665 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39666 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39667 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39668 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39669 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39670 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39671 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39672 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39673 dthird
.perm
[0] = (d
->perm
[0] % 2);
39674 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39675 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39676 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39678 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39679 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39680 dthird
.op0
= dfirst
.target
;
39681 dthird
.op1
= dsecond
.target
;
39682 dthird
.one_operand_p
= false;
39684 canonicalize_perm (&dfirst
);
39685 canonicalize_perm (&dsecond
);
39687 ok
= expand_vec_perm_1 (&dfirst
)
39688 && expand_vec_perm_1 (&dsecond
)
39689 && expand_vec_perm_1 (&dthird
);
39696 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39697 permutation with two pshufb insns and an ior. We should have already
39698 failed all two instruction sequences. */
39701 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39703 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39704 unsigned int i
, nelt
, eltsz
;
39706 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39708 gcc_assert (!d
->one_operand_p
);
39711 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39713 /* Generate two permutation masks. If the required element is within
39714 the given vector it is shuffled into the proper lane. If the required
39715 element is in the other vector, force a zero into the lane by setting
39716 bit 7 in the permutation mask. */
39717 m128
= GEN_INT (-128);
39718 for (i
= 0; i
< nelt
; ++i
)
39720 unsigned j
, e
= d
->perm
[i
];
39721 unsigned which
= (e
>= nelt
);
39725 for (j
= 0; j
< eltsz
; ++j
)
39727 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39728 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39732 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39733 vperm
= force_reg (V16QImode
, vperm
);
39735 l
= gen_reg_rtx (V16QImode
);
39736 op
= gen_lowpart (V16QImode
, d
->op0
);
39737 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39739 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39740 vperm
= force_reg (V16QImode
, vperm
);
39742 h
= gen_reg_rtx (V16QImode
);
39743 op
= gen_lowpart (V16QImode
, d
->op1
);
39744 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39746 op
= gen_lowpart (V16QImode
, d
->target
);
39747 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39752 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39753 with two vpshufb insns, vpermq and vpor. We should have already failed
39754 all two or three instruction sequences. */
39757 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39759 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39760 unsigned int i
, nelt
, eltsz
;
39763 || !d
->one_operand_p
39764 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39771 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39773 /* Generate two permutation masks. If the required element is within
39774 the same lane, it is shuffled in. If the required element from the
39775 other lane, force a zero by setting bit 7 in the permutation mask.
39776 In the other mask the mask has non-negative elements if element
39777 is requested from the other lane, but also moved to the other lane,
39778 so that the result of vpshufb can have the two V2TImode halves
39780 m128
= GEN_INT (-128);
39781 for (i
= 0; i
< nelt
; ++i
)
39783 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39784 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39786 for (j
= 0; j
< eltsz
; ++j
)
39788 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39789 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39793 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39794 vperm
= force_reg (V32QImode
, vperm
);
39796 h
= gen_reg_rtx (V32QImode
);
39797 op
= gen_lowpart (V32QImode
, d
->op0
);
39798 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39800 /* Swap the 128-byte lanes of h into hp. */
39801 hp
= gen_reg_rtx (V4DImode
);
39802 op
= gen_lowpart (V4DImode
, h
);
39803 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39806 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39807 vperm
= force_reg (V32QImode
, vperm
);
39809 l
= gen_reg_rtx (V32QImode
);
39810 op
= gen_lowpart (V32QImode
, d
->op0
);
39811 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39813 op
= gen_lowpart (V32QImode
, d
->target
);
39814 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39819 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39820 and extract-odd permutations of two V32QImode and V16QImode operand
39821 with two vpshufb insns, vpor and vpermq. We should have already
39822 failed all two or three instruction sequences. */
39825 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39827 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39828 unsigned int i
, nelt
, eltsz
;
39831 || d
->one_operand_p
39832 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39835 for (i
= 0; i
< d
->nelt
; ++i
)
39836 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39843 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39845 /* Generate two permutation masks. In the first permutation mask
39846 the first quarter will contain indexes for the first half
39847 of the op0, the second quarter will contain bit 7 set, third quarter
39848 will contain indexes for the second half of the op0 and the
39849 last quarter bit 7 set. In the second permutation mask
39850 the first quarter will contain bit 7 set, the second quarter
39851 indexes for the first half of the op1, the third quarter bit 7 set
39852 and last quarter indexes for the second half of the op1.
39853 I.e. the first mask e.g. for V32QImode extract even will be:
39854 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39855 (all values masked with 0xf except for -128) and second mask
39856 for extract even will be
39857 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39858 m128
= GEN_INT (-128);
39859 for (i
= 0; i
< nelt
; ++i
)
39861 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39862 unsigned which
= d
->perm
[i
] >= nelt
;
39863 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39865 for (j
= 0; j
< eltsz
; ++j
)
39867 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39868 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39872 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39873 vperm
= force_reg (V32QImode
, vperm
);
39875 l
= gen_reg_rtx (V32QImode
);
39876 op
= gen_lowpart (V32QImode
, d
->op0
);
39877 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39879 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39880 vperm
= force_reg (V32QImode
, vperm
);
39882 h
= gen_reg_rtx (V32QImode
);
39883 op
= gen_lowpart (V32QImode
, d
->op1
);
39884 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39886 ior
= gen_reg_rtx (V32QImode
);
39887 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39889 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39890 op
= gen_lowpart (V4DImode
, d
->target
);
39891 ior
= gen_lowpart (V4DImode
, ior
);
39892 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39893 const1_rtx
, GEN_INT (3)));
39898 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39899 and extract-odd permutations. */
39902 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39909 t1
= gen_reg_rtx (V4DFmode
);
39910 t2
= gen_reg_rtx (V4DFmode
);
39912 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39913 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39914 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39916 /* Now an unpck[lh]pd will produce the result required. */
39918 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39920 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39926 int mask
= odd
? 0xdd : 0x88;
39928 t1
= gen_reg_rtx (V8SFmode
);
39929 t2
= gen_reg_rtx (V8SFmode
);
39930 t3
= gen_reg_rtx (V8SFmode
);
39932 /* Shuffle within the 128-bit lanes to produce:
39933 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39934 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39937 /* Shuffle the lanes around to produce:
39938 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39939 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39942 /* Shuffle within the 128-bit lanes to produce:
39943 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39944 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39946 /* Shuffle within the 128-bit lanes to produce:
39947 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39948 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39950 /* Shuffle the lanes around to produce:
39951 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39952 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39961 /* These are always directly implementable by expand_vec_perm_1. */
39962 gcc_unreachable ();
39966 return expand_vec_perm_pshufb2 (d
);
39969 /* We need 2*log2(N)-1 operations to achieve odd/even
39970 with interleave. */
39971 t1
= gen_reg_rtx (V8HImode
);
39972 t2
= gen_reg_rtx (V8HImode
);
39973 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39974 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39975 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39976 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39978 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39980 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
39987 return expand_vec_perm_pshufb2 (d
);
39990 t1
= gen_reg_rtx (V16QImode
);
39991 t2
= gen_reg_rtx (V16QImode
);
39992 t3
= gen_reg_rtx (V16QImode
);
39993 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
39994 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
39995 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
39996 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
39997 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
39998 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40000 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40002 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40009 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40014 struct expand_vec_perm_d d_copy
= *d
;
40015 d_copy
.vmode
= V4DFmode
;
40016 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40017 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40018 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40019 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40022 t1
= gen_reg_rtx (V4DImode
);
40023 t2
= gen_reg_rtx (V4DImode
);
40025 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40026 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40027 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40029 /* Now an vpunpck[lh]qdq will produce the result required. */
40031 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40033 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40040 struct expand_vec_perm_d d_copy
= *d
;
40041 d_copy
.vmode
= V8SFmode
;
40042 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40043 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40044 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40045 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40048 t1
= gen_reg_rtx (V8SImode
);
40049 t2
= gen_reg_rtx (V8SImode
);
40051 /* Shuffle the lanes around into
40052 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40053 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40054 gen_lowpart (V4DImode
, d
->op0
),
40055 gen_lowpart (V4DImode
, d
->op1
),
40057 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40058 gen_lowpart (V4DImode
, d
->op0
),
40059 gen_lowpart (V4DImode
, d
->op1
),
40062 /* Swap the 2nd and 3rd position in each lane into
40063 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40064 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40065 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40066 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40067 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40069 /* Now an vpunpck[lh]qdq will produce
40070 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40072 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40073 gen_lowpart (V4DImode
, t1
),
40074 gen_lowpart (V4DImode
, t2
));
40076 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40077 gen_lowpart (V4DImode
, t1
),
40078 gen_lowpart (V4DImode
, t2
));
40083 gcc_unreachable ();
40089 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40090 extract-even and extract-odd permutations. */
40093 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40095 unsigned i
, odd
, nelt
= d
->nelt
;
40098 if (odd
!= 0 && odd
!= 1)
40101 for (i
= 1; i
< nelt
; ++i
)
40102 if (d
->perm
[i
] != 2 * i
+ odd
)
40105 return expand_vec_perm_even_odd_1 (d
, odd
);
40108 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40109 permutations. We assume that expand_vec_perm_1 has already failed. */
40112 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40114 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40115 enum machine_mode vmode
= d
->vmode
;
40116 unsigned char perm2
[4];
40124 /* These are special-cased in sse.md so that we can optionally
40125 use the vbroadcast instruction. They expand to two insns
40126 if the input happens to be in a register. */
40127 gcc_unreachable ();
40133 /* These are always implementable using standard shuffle patterns. */
40134 gcc_unreachable ();
40138 /* These can be implemented via interleave. We save one insn by
40139 stopping once we have promoted to V4SImode and then use pshufd. */
40143 rtx (*gen
) (rtx
, rtx
, rtx
)
40144 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40145 : gen_vec_interleave_lowv8hi
;
40149 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40150 : gen_vec_interleave_highv8hi
;
40155 dest
= gen_reg_rtx (vmode
);
40156 emit_insn (gen (dest
, op0
, op0
));
40157 vmode
= get_mode_wider_vector (vmode
);
40158 op0
= gen_lowpart (vmode
, dest
);
40160 while (vmode
!= V4SImode
);
40162 memset (perm2
, elt
, 4);
40163 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40172 /* For AVX2 broadcasts of the first element vpbroadcast* or
40173 vpermq should be used by expand_vec_perm_1. */
40174 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40178 gcc_unreachable ();
40182 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40183 broadcast permutations. */
40186 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40188 unsigned i
, elt
, nelt
= d
->nelt
;
40190 if (!d
->one_operand_p
)
40194 for (i
= 1; i
< nelt
; ++i
)
40195 if (d
->perm
[i
] != elt
)
40198 return expand_vec_perm_broadcast_1 (d
);
40201 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40202 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40203 all the shorter instruction sequences. */
40206 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40208 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40209 unsigned int i
, nelt
, eltsz
;
40213 || d
->one_operand_p
40214 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40221 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40223 /* Generate 4 permutation masks. If the required element is within
40224 the same lane, it is shuffled in. If the required element from the
40225 other lane, force a zero by setting bit 7 in the permutation mask.
40226 In the other mask the mask has non-negative elements if element
40227 is requested from the other lane, but also moved to the other lane,
40228 so that the result of vpshufb can have the two V2TImode halves
40230 m128
= GEN_INT (-128);
40231 for (i
= 0; i
< 32; ++i
)
40233 rperm
[0][i
] = m128
;
40234 rperm
[1][i
] = m128
;
40235 rperm
[2][i
] = m128
;
40236 rperm
[3][i
] = m128
;
40242 for (i
= 0; i
< nelt
; ++i
)
40244 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40245 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40246 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40248 for (j
= 0; j
< eltsz
; ++j
)
40249 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40250 used
[which
] = true;
40253 for (i
= 0; i
< 2; ++i
)
40255 if (!used
[2 * i
+ 1])
40260 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40261 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40262 vperm
= force_reg (V32QImode
, vperm
);
40263 h
[i
] = gen_reg_rtx (V32QImode
);
40264 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40265 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40268 /* Swap the 128-byte lanes of h[X]. */
40269 for (i
= 0; i
< 2; ++i
)
40271 if (h
[i
] == NULL_RTX
)
40273 op
= gen_reg_rtx (V4DImode
);
40274 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40275 const2_rtx
, GEN_INT (3), const0_rtx
,
40277 h
[i
] = gen_lowpart (V32QImode
, op
);
40280 for (i
= 0; i
< 2; ++i
)
40287 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40288 vperm
= force_reg (V32QImode
, vperm
);
40289 l
[i
] = gen_reg_rtx (V32QImode
);
40290 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40291 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40294 for (i
= 0; i
< 2; ++i
)
40298 op
= gen_reg_rtx (V32QImode
);
40299 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40306 gcc_assert (l
[0] && l
[1]);
40307 op
= gen_lowpart (V32QImode
, d
->target
);
40308 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40312 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40313 With all of the interface bits taken care of, perform the expansion
40314 in D and return true on success. */
40317 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40319 /* Try a single instruction expansion. */
40320 if (expand_vec_perm_1 (d
))
40323 /* Try sequences of two instructions. */
40325 if (expand_vec_perm_pshuflw_pshufhw (d
))
40328 if (expand_vec_perm_palignr (d
))
40331 if (expand_vec_perm_interleave2 (d
))
40334 if (expand_vec_perm_broadcast (d
))
40337 if (expand_vec_perm_vpermq_perm_1 (d
))
40340 if (expand_vec_perm_vperm2f128 (d
))
40343 /* Try sequences of three instructions. */
40345 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40348 if (expand_vec_perm_pshufb2 (d
))
40351 if (expand_vec_perm_interleave3 (d
))
40354 if (expand_vec_perm_vperm2f128_vblend (d
))
40357 /* Try sequences of four instructions. */
40359 if (expand_vec_perm_vpshufb2_vpermq (d
))
40362 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40365 /* ??? Look for narrow permutations whose element orderings would
40366 allow the promotion to a wider mode. */
40368 /* ??? Look for sequences of interleave or a wider permute that place
40369 the data into the correct lanes for a half-vector shuffle like
40370 pshuf[lh]w or vpermilps. */
40372 /* ??? Look for sequences of interleave that produce the desired results.
40373 The combinatorics of punpck[lh] get pretty ugly... */
40375 if (expand_vec_perm_even_odd (d
))
40378 /* Even longer sequences. */
40379 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40385 /* If a permutation only uses one operand, make it clear. Returns true
40386 if the permutation references both operands. */
40389 canonicalize_perm (struct expand_vec_perm_d
*d
)
40391 int i
, which
, nelt
= d
->nelt
;
40393 for (i
= which
= 0; i
< nelt
; ++i
)
40394 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40396 d
->one_operand_p
= true;
40403 if (!rtx_equal_p (d
->op0
, d
->op1
))
40405 d
->one_operand_p
= false;
40408 /* The elements of PERM do not suggest that only the first operand
40409 is used, but both operands are identical. Allow easier matching
40410 of the permutation by folding the permutation into the single
40415 for (i
= 0; i
< nelt
; ++i
)
40416 d
->perm
[i
] &= nelt
- 1;
40425 return (which
== 3);
40429 ix86_expand_vec_perm_const (rtx operands
[4])
40431 struct expand_vec_perm_d d
;
40432 unsigned char perm
[MAX_VECT_LEN
];
40437 d
.target
= operands
[0];
40438 d
.op0
= operands
[1];
40439 d
.op1
= operands
[2];
40442 d
.vmode
= GET_MODE (d
.target
);
40443 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40444 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40445 d
.testing_p
= false;
40447 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40448 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40449 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40451 for (i
= 0; i
< nelt
; ++i
)
40453 rtx e
= XVECEXP (sel
, 0, i
);
40454 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40459 two_args
= canonicalize_perm (&d
);
40461 if (ix86_expand_vec_perm_const_1 (&d
))
40464 /* If the selector says both arguments are needed, but the operands are the
40465 same, the above tried to expand with one_operand_p and flattened selector.
40466 If that didn't work, retry without one_operand_p; we succeeded with that
40468 if (two_args
&& d
.one_operand_p
)
40470 d
.one_operand_p
= false;
40471 memcpy (d
.perm
, perm
, sizeof (perm
));
40472 return ix86_expand_vec_perm_const_1 (&d
);
40478 /* Implement targetm.vectorize.vec_perm_const_ok. */
40481 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40482 const unsigned char *sel
)
40484 struct expand_vec_perm_d d
;
40485 unsigned int i
, nelt
, which
;
40489 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40490 d
.testing_p
= true;
40492 /* Given sufficient ISA support we can just return true here
40493 for selected vector modes. */
40494 if (GET_MODE_SIZE (d
.vmode
) == 16)
40496 /* All implementable with a single vpperm insn. */
40499 /* All implementable with 2 pshufb + 1 ior. */
40502 /* All implementable with shufpd or unpck[lh]pd. */
40507 /* Extract the values from the vector CST into the permutation
40509 memcpy (d
.perm
, sel
, nelt
);
40510 for (i
= which
= 0; i
< nelt
; ++i
)
40512 unsigned char e
= d
.perm
[i
];
40513 gcc_assert (e
< 2 * nelt
);
40514 which
|= (e
< nelt
? 1 : 2);
40517 /* For all elements from second vector, fold the elements to first. */
40519 for (i
= 0; i
< nelt
; ++i
)
40522 /* Check whether the mask can be applied to the vector type. */
40523 d
.one_operand_p
= (which
!= 3);
40525 /* Implementable with shufps or pshufd. */
40526 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40529 /* Otherwise we have to go through the motions and see if we can
40530 figure out how to generate the requested permutation. */
40531 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40532 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40533 if (!d
.one_operand_p
)
40534 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40537 ret
= ix86_expand_vec_perm_const_1 (&d
);
40544 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40546 struct expand_vec_perm_d d
;
40552 d
.vmode
= GET_MODE (targ
);
40553 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40554 d
.one_operand_p
= false;
40555 d
.testing_p
= false;
40557 for (i
= 0; i
< nelt
; ++i
)
40558 d
.perm
[i
] = i
* 2 + odd
;
40560 /* We'll either be able to implement the permutation directly... */
40561 if (expand_vec_perm_1 (&d
))
40564 /* ... or we use the special-case patterns. */
40565 expand_vec_perm_even_odd_1 (&d
, odd
);
40569 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40571 struct expand_vec_perm_d d
;
40572 unsigned i
, nelt
, base
;
40578 d
.vmode
= GET_MODE (targ
);
40579 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40580 d
.one_operand_p
= false;
40581 d
.testing_p
= false;
40583 base
= high_p
? nelt
/ 2 : 0;
40584 for (i
= 0; i
< nelt
/ 2; ++i
)
40586 d
.perm
[i
* 2] = i
+ base
;
40587 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40590 /* Note that for AVX this isn't one instruction. */
40591 ok
= ix86_expand_vec_perm_const_1 (&d
);
40596 /* Expand a vector operation CODE for a V*QImode in terms of the
40597 same operation on V*HImode. */
40600 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40602 enum machine_mode qimode
= GET_MODE (dest
);
40603 enum machine_mode himode
;
40604 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40605 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40606 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40607 struct expand_vec_perm_d d
;
40608 bool ok
, full_interleave
;
40609 bool uns_p
= false;
40616 gen_il
= gen_vec_interleave_lowv16qi
;
40617 gen_ih
= gen_vec_interleave_highv16qi
;
40620 himode
= V16HImode
;
40621 gen_il
= gen_avx2_interleave_lowv32qi
;
40622 gen_ih
= gen_avx2_interleave_highv32qi
;
40625 gcc_unreachable ();
40628 op2_l
= op2_h
= op2
;
40632 /* Unpack data such that we've got a source byte in each low byte of
40633 each word. We don't care what goes into the high byte of each word.
40634 Rather than trying to get zero in there, most convenient is to let
40635 it be a copy of the low byte. */
40636 op2_l
= gen_reg_rtx (qimode
);
40637 op2_h
= gen_reg_rtx (qimode
);
40638 emit_insn (gen_il (op2_l
, op2
, op2
));
40639 emit_insn (gen_ih (op2_h
, op2
, op2
));
40642 op1_l
= gen_reg_rtx (qimode
);
40643 op1_h
= gen_reg_rtx (qimode
);
40644 emit_insn (gen_il (op1_l
, op1
, op1
));
40645 emit_insn (gen_ih (op1_h
, op1
, op1
));
40646 full_interleave
= qimode
== V16QImode
;
40654 op1_l
= gen_reg_rtx (himode
);
40655 op1_h
= gen_reg_rtx (himode
);
40656 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40657 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40658 full_interleave
= true;
40661 gcc_unreachable ();
40664 /* Perform the operation. */
40665 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40667 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40669 gcc_assert (res_l
&& res_h
);
40671 /* Merge the data back into the right place. */
40673 d
.op0
= gen_lowpart (qimode
, res_l
);
40674 d
.op1
= gen_lowpart (qimode
, res_h
);
40676 d
.nelt
= GET_MODE_NUNITS (qimode
);
40677 d
.one_operand_p
= false;
40678 d
.testing_p
= false;
40680 if (full_interleave
)
40682 /* For SSE2, we used an full interleave, so the desired
40683 results are in the even elements. */
40684 for (i
= 0; i
< 32; ++i
)
40689 /* For AVX, the interleave used above was not cross-lane. So the
40690 extraction is evens but with the second and third quarter swapped.
40691 Happily, that is even one insn shorter than even extraction. */
40692 for (i
= 0; i
< 32; ++i
)
40693 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40696 ok
= ix86_expand_vec_perm_const_1 (&d
);
40699 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40700 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40704 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40705 bool uns_p
, bool odd_p
)
40707 enum machine_mode mode
= GET_MODE (op1
);
40708 enum machine_mode wmode
= GET_MODE (dest
);
40711 /* We only play even/odd games with vectors of SImode. */
40712 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40714 /* If we're looking for the odd results, shift those members down to
40715 the even slots. For some cpus this is faster than a PSHUFD. */
40718 if (TARGET_XOP
&& mode
== V4SImode
)
40720 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40721 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40725 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40726 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40727 x
, NULL
, 1, OPTAB_DIRECT
);
40728 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40729 x
, NULL
, 1, OPTAB_DIRECT
);
40730 op1
= gen_lowpart (mode
, op1
);
40731 op2
= gen_lowpart (mode
, op2
);
40734 if (mode
== V8SImode
)
40737 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40739 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40742 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40743 else if (TARGET_SSE4_1
)
40744 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40747 rtx s1
, s2
, t0
, t1
, t2
;
40749 /* The easiest way to implement this without PMULDQ is to go through
40750 the motions as if we are performing a full 64-bit multiply. With
40751 the exception that we need to do less shuffling of the elements. */
40753 /* Compute the sign-extension, aka highparts, of the two operands. */
40754 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40755 op1
, pc_rtx
, pc_rtx
);
40756 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40757 op2
, pc_rtx
, pc_rtx
);
40759 /* Multiply LO(A) * HI(B), and vice-versa. */
40760 t1
= gen_reg_rtx (wmode
);
40761 t2
= gen_reg_rtx (wmode
);
40762 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40763 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40765 /* Multiply LO(A) * LO(B). */
40766 t0
= gen_reg_rtx (wmode
);
40767 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40769 /* Combine and shift the highparts into place. */
40770 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40771 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40774 /* Combine high and low parts. */
40775 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40782 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40783 bool uns_p
, bool high_p
)
40785 enum machine_mode wmode
= GET_MODE (dest
);
40786 enum machine_mode mode
= GET_MODE (op1
);
40787 rtx t1
, t2
, t3
, t4
, mask
;
40792 t1
= gen_reg_rtx (mode
);
40793 t2
= gen_reg_rtx (mode
);
40794 if (TARGET_XOP
&& !uns_p
)
40796 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40797 shuffle the elements once so that all elements are in the right
40798 place for immediate use: { A C B D }. */
40799 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40800 const1_rtx
, GEN_INT (3)));
40801 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40802 const1_rtx
, GEN_INT (3)));
40806 /* Put the elements into place for the multiply. */
40807 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40808 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40811 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40815 /* Shuffle the elements between the lanes. After this we
40816 have { A B E F | C D G H } for each operand. */
40817 t1
= gen_reg_rtx (V4DImode
);
40818 t2
= gen_reg_rtx (V4DImode
);
40819 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40820 const0_rtx
, const2_rtx
,
40821 const1_rtx
, GEN_INT (3)));
40822 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40823 const0_rtx
, const2_rtx
,
40824 const1_rtx
, GEN_INT (3)));
40826 /* Shuffle the elements within the lanes. After this we
40827 have { A A B B | C C D D } or { E E F F | G G H H }. */
40828 t3
= gen_reg_rtx (V8SImode
);
40829 t4
= gen_reg_rtx (V8SImode
);
40830 mask
= GEN_INT (high_p
40831 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40832 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40833 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40834 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40836 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40841 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40842 uns_p
, OPTAB_DIRECT
);
40843 t2
= expand_binop (mode
,
40844 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40845 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40846 gcc_assert (t1
&& t2
);
40848 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40853 t1
= gen_reg_rtx (wmode
);
40854 t2
= gen_reg_rtx (wmode
);
40855 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40856 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40858 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40862 gcc_unreachable ();
40867 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40871 res_1
= gen_reg_rtx (V4SImode
);
40872 res_2
= gen_reg_rtx (V4SImode
);
40873 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40874 op1
, op2
, true, false);
40875 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40876 op1
, op2
, true, true);
40878 /* Move the results in element 2 down to element 1; we don't care
40879 what goes in elements 2 and 3. Then we can merge the parts
40880 back together with an interleave.
40882 Note that two other sequences were tried:
40883 (1) Use interleaves at the start instead of psrldq, which allows
40884 us to use a single shufps to merge things back at the end.
40885 (2) Use shufps here to combine the two vectors, then pshufd to
40886 put the elements in the correct order.
40887 In both cases the cost of the reformatting stall was too high
40888 and the overall sequence slower. */
40890 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40891 const0_rtx
, const0_rtx
));
40892 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40893 const0_rtx
, const0_rtx
));
40894 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40896 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40900 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40902 enum machine_mode mode
= GET_MODE (op0
);
40903 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40905 if (TARGET_XOP
&& mode
== V2DImode
)
40907 /* op1: A,B,C,D, op2: E,F,G,H */
40908 op1
= gen_lowpart (V4SImode
, op1
);
40909 op2
= gen_lowpart (V4SImode
, op2
);
40911 t1
= gen_reg_rtx (V4SImode
);
40912 t2
= gen_reg_rtx (V4SImode
);
40913 t3
= gen_reg_rtx (V2DImode
);
40914 t4
= gen_reg_rtx (V2DImode
);
40917 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40923 /* t2: (B*E),(A*F),(D*G),(C*H) */
40924 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40926 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40927 emit_insn (gen_xop_phadddq (t3
, t2
));
40929 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40930 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40932 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40933 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40937 enum machine_mode nmode
;
40938 rtx (*umul
) (rtx
, rtx
, rtx
);
40940 if (mode
== V2DImode
)
40942 umul
= gen_vec_widen_umult_even_v4si
;
40945 else if (mode
== V4DImode
)
40947 umul
= gen_vec_widen_umult_even_v8si
;
40951 gcc_unreachable ();
40954 /* Multiply low parts. */
40955 t1
= gen_reg_rtx (mode
);
40956 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40958 /* Shift input vectors right 32 bits so we can multiply high parts. */
40960 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40961 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40963 /* Multiply high parts by low parts. */
40964 t4
= gen_reg_rtx (mode
);
40965 t5
= gen_reg_rtx (mode
);
40966 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40967 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40969 /* Combine and shift the highparts back. */
40970 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40971 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40973 /* Combine high and low parts. */
40974 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40977 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40978 gen_rtx_MULT (mode
, op1
, op2
));
40981 /* Expand an insert into a vector register through pinsr insn.
40982 Return true if successful. */
40985 ix86_expand_pinsr (rtx
*operands
)
40987 rtx dst
= operands
[0];
40988 rtx src
= operands
[3];
40990 unsigned int size
= INTVAL (operands
[1]);
40991 unsigned int pos
= INTVAL (operands
[2]);
40993 if (GET_CODE (dst
) == SUBREG
)
40995 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
40996 dst
= SUBREG_REG (dst
);
40999 if (GET_CODE (src
) == SUBREG
)
41000 src
= SUBREG_REG (src
);
41002 switch (GET_MODE (dst
))
41009 enum machine_mode srcmode
, dstmode
;
41010 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41012 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41017 if (!TARGET_SSE4_1
)
41019 dstmode
= V16QImode
;
41020 pinsr
= gen_sse4_1_pinsrb
;
41026 dstmode
= V8HImode
;
41027 pinsr
= gen_sse2_pinsrw
;
41031 if (!TARGET_SSE4_1
)
41033 dstmode
= V4SImode
;
41034 pinsr
= gen_sse4_1_pinsrd
;
41038 gcc_assert (TARGET_64BIT
);
41039 if (!TARGET_SSE4_1
)
41041 dstmode
= V2DImode
;
41042 pinsr
= gen_sse4_1_pinsrq
;
41049 dst
= gen_lowpart (dstmode
, dst
);
41050 src
= gen_lowpart (srcmode
, src
);
41054 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41063 /* This function returns the calling abi specific va_list type node.
41064 It returns the FNDECL specific va_list type. */
41067 ix86_fn_abi_va_list (tree fndecl
)
41070 return va_list_type_node
;
41071 gcc_assert (fndecl
!= NULL_TREE
);
41073 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41074 return ms_va_list_type_node
;
41076 return sysv_va_list_type_node
;
41079 /* Returns the canonical va_list type specified by TYPE. If there
41080 is no valid TYPE provided, it return NULL_TREE. */
41083 ix86_canonical_va_list_type (tree type
)
41087 /* Resolve references and pointers to va_list type. */
41088 if (TREE_CODE (type
) == MEM_REF
)
41089 type
= TREE_TYPE (type
);
41090 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41091 type
= TREE_TYPE (type
);
41092 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41093 type
= TREE_TYPE (type
);
41095 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41097 wtype
= va_list_type_node
;
41098 gcc_assert (wtype
!= NULL_TREE
);
41100 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41102 /* If va_list is an array type, the argument may have decayed
41103 to a pointer type, e.g. by being passed to another function.
41104 In that case, unwrap both types so that we can compare the
41105 underlying records. */
41106 if (TREE_CODE (htype
) == ARRAY_TYPE
41107 || POINTER_TYPE_P (htype
))
41109 wtype
= TREE_TYPE (wtype
);
41110 htype
= TREE_TYPE (htype
);
41113 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41114 return va_list_type_node
;
41115 wtype
= sysv_va_list_type_node
;
41116 gcc_assert (wtype
!= NULL_TREE
);
41118 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41120 /* If va_list is an array type, the argument may have decayed
41121 to a pointer type, e.g. by being passed to another function.
41122 In that case, unwrap both types so that we can compare the
41123 underlying records. */
41124 if (TREE_CODE (htype
) == ARRAY_TYPE
41125 || POINTER_TYPE_P (htype
))
41127 wtype
= TREE_TYPE (wtype
);
41128 htype
= TREE_TYPE (htype
);
41131 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41132 return sysv_va_list_type_node
;
41133 wtype
= ms_va_list_type_node
;
41134 gcc_assert (wtype
!= NULL_TREE
);
41136 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41138 /* If va_list is an array type, the argument may have decayed
41139 to a pointer type, e.g. by being passed to another function.
41140 In that case, unwrap both types so that we can compare the
41141 underlying records. */
41142 if (TREE_CODE (htype
) == ARRAY_TYPE
41143 || POINTER_TYPE_P (htype
))
41145 wtype
= TREE_TYPE (wtype
);
41146 htype
= TREE_TYPE (htype
);
41149 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41150 return ms_va_list_type_node
;
41153 return std_canonical_va_list_type (type
);
41156 /* Iterate through the target-specific builtin types for va_list.
41157 IDX denotes the iterator, *PTREE is set to the result type of
41158 the va_list builtin, and *PNAME to its internal type.
41159 Returns zero if there is no element for this index, otherwise
41160 IDX should be increased upon the next call.
41161 Note, do not iterate a base builtin's name like __builtin_va_list.
41162 Used from c_common_nodes_and_builtins. */
41165 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41175 *ptree
= ms_va_list_type_node
;
41176 *pname
= "__builtin_ms_va_list";
41180 *ptree
= sysv_va_list_type_node
;
41181 *pname
= "__builtin_sysv_va_list";
41189 #undef TARGET_SCHED_DISPATCH
41190 #define TARGET_SCHED_DISPATCH has_dispatch
41191 #undef TARGET_SCHED_DISPATCH_DO
41192 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41193 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41194 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41195 #undef TARGET_SCHED_REORDER
41196 #define TARGET_SCHED_REORDER ix86_sched_reorder
41197 #undef TARGET_SCHED_ADJUST_PRIORITY
41198 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41199 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41200 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41202 /* The size of the dispatch window is the total number of bytes of
41203 object code allowed in a window. */
41204 #define DISPATCH_WINDOW_SIZE 16
41206 /* Number of dispatch windows considered for scheduling. */
41207 #define MAX_DISPATCH_WINDOWS 3
41209 /* Maximum number of instructions in a window. */
41212 /* Maximum number of immediate operands in a window. */
41215 /* Maximum number of immediate bits allowed in a window. */
41216 #define MAX_IMM_SIZE 128
41218 /* Maximum number of 32 bit immediates allowed in a window. */
41219 #define MAX_IMM_32 4
41221 /* Maximum number of 64 bit immediates allowed in a window. */
41222 #define MAX_IMM_64 2
41224 /* Maximum total of loads or prefetches allowed in a window. */
41227 /* Maximum total of stores allowed in a window. */
41228 #define MAX_STORE 1
41234 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41235 enum dispatch_group
{
41250 /* Number of allowable groups in a dispatch window. It is an array
41251 indexed by dispatch_group enum. 100 is used as a big number,
41252 because the number of these kind of operations does not have any
41253 effect in dispatch window, but we need them for other reasons in
41255 static unsigned int num_allowable_groups
[disp_last
] = {
41256 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41259 char group_name
[disp_last
+ 1][16] = {
41260 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41261 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41262 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41265 /* Instruction path. */
41268 path_single
, /* Single micro op. */
41269 path_double
, /* Double micro op. */
41270 path_multi
, /* Instructions with more than 2 micro op.. */
41274 /* sched_insn_info defines a window to the instructions scheduled in
41275 the basic block. It contains a pointer to the insn_info table and
41276 the instruction scheduled.
41278 Windows are allocated for each basic block and are linked
41280 typedef struct sched_insn_info_s
{
41282 enum dispatch_group group
;
41283 enum insn_path path
;
41288 /* Linked list of dispatch windows. This is a two way list of
41289 dispatch windows of a basic block. It contains information about
41290 the number of uops in the window and the total number of
41291 instructions and of bytes in the object code for this dispatch
41293 typedef struct dispatch_windows_s
{
41294 int num_insn
; /* Number of insn in the window. */
41295 int num_uops
; /* Number of uops in the window. */
41296 int window_size
; /* Number of bytes in the window. */
41297 int window_num
; /* Window number between 0 or 1. */
41298 int num_imm
; /* Number of immediates in an insn. */
41299 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41300 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41301 int imm_size
; /* Total immediates in the window. */
41302 int num_loads
; /* Total memory loads in the window. */
41303 int num_stores
; /* Total memory stores in the window. */
41304 int violation
; /* Violation exists in window. */
41305 sched_insn_info
*window
; /* Pointer to the window. */
41306 struct dispatch_windows_s
*next
;
41307 struct dispatch_windows_s
*prev
;
41308 } dispatch_windows
;
41310 /* Immediate valuse used in an insn. */
41311 typedef struct imm_info_s
41318 static dispatch_windows
*dispatch_window_list
;
41319 static dispatch_windows
*dispatch_window_list1
;
41321 /* Get dispatch group of insn. */
41323 static enum dispatch_group
41324 get_mem_group (rtx insn
)
41326 enum attr_memory memory
;
41328 if (INSN_CODE (insn
) < 0)
41329 return disp_no_group
;
41330 memory
= get_attr_memory (insn
);
41331 if (memory
== MEMORY_STORE
)
41334 if (memory
== MEMORY_LOAD
)
41337 if (memory
== MEMORY_BOTH
)
41338 return disp_load_store
;
41340 return disp_no_group
;
41343 /* Return true if insn is a compare instruction. */
41348 enum attr_type type
;
41350 type
= get_attr_type (insn
);
41351 return (type
== TYPE_TEST
41352 || type
== TYPE_ICMP
41353 || type
== TYPE_FCMP
41354 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41357 /* Return true if a dispatch violation encountered. */
41360 dispatch_violation (void)
41362 if (dispatch_window_list
->next
)
41363 return dispatch_window_list
->next
->violation
;
41364 return dispatch_window_list
->violation
;
41367 /* Return true if insn is a branch instruction. */
41370 is_branch (rtx insn
)
41372 return (CALL_P (insn
) || JUMP_P (insn
));
41375 /* Return true if insn is a prefetch instruction. */
41378 is_prefetch (rtx insn
)
41380 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41383 /* This function initializes a dispatch window and the list container holding a
41384 pointer to the window. */
41387 init_window (int window_num
)
41390 dispatch_windows
*new_list
;
41392 if (window_num
== 0)
41393 new_list
= dispatch_window_list
;
41395 new_list
= dispatch_window_list1
;
41397 new_list
->num_insn
= 0;
41398 new_list
->num_uops
= 0;
41399 new_list
->window_size
= 0;
41400 new_list
->next
= NULL
;
41401 new_list
->prev
= NULL
;
41402 new_list
->window_num
= window_num
;
41403 new_list
->num_imm
= 0;
41404 new_list
->num_imm_32
= 0;
41405 new_list
->num_imm_64
= 0;
41406 new_list
->imm_size
= 0;
41407 new_list
->num_loads
= 0;
41408 new_list
->num_stores
= 0;
41409 new_list
->violation
= false;
41411 for (i
= 0; i
< MAX_INSN
; i
++)
41413 new_list
->window
[i
].insn
= NULL
;
41414 new_list
->window
[i
].group
= disp_no_group
;
41415 new_list
->window
[i
].path
= no_path
;
41416 new_list
->window
[i
].byte_len
= 0;
41417 new_list
->window
[i
].imm_bytes
= 0;
41422 /* This function allocates and initializes a dispatch window and the
41423 list container holding a pointer to the window. */
41425 static dispatch_windows
*
41426 allocate_window (void)
41428 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41429 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41434 /* This routine initializes the dispatch scheduling information. It
41435 initiates building dispatch scheduler tables and constructs the
41436 first dispatch window. */
41439 init_dispatch_sched (void)
41441 /* Allocate a dispatch list and a window. */
41442 dispatch_window_list
= allocate_window ();
41443 dispatch_window_list1
= allocate_window ();
41448 /* This function returns true if a branch is detected. End of a basic block
41449 does not have to be a branch, but here we assume only branches end a
41453 is_end_basic_block (enum dispatch_group group
)
41455 return group
== disp_branch
;
41458 /* This function is called when the end of a window processing is reached. */
41461 process_end_window (void)
41463 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41464 if (dispatch_window_list
->next
)
41466 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41467 gcc_assert (dispatch_window_list
->window_size
41468 + dispatch_window_list1
->window_size
<= 48);
41474 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41475 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41476 for 48 bytes of instructions. Note that these windows are not dispatch
41477 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41479 static dispatch_windows
*
41480 allocate_next_window (int window_num
)
41482 if (window_num
== 0)
41484 if (dispatch_window_list
->next
)
41487 return dispatch_window_list
;
41490 dispatch_window_list
->next
= dispatch_window_list1
;
41491 dispatch_window_list1
->prev
= dispatch_window_list
;
41493 return dispatch_window_list1
;
41496 /* Increment the number of immediate operands of an instruction. */
41499 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41504 switch ( GET_CODE (*in_rtx
))
41509 (imm_values
->imm
)++;
41510 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41511 (imm_values
->imm32
)++;
41513 (imm_values
->imm64
)++;
41517 (imm_values
->imm
)++;
41518 (imm_values
->imm64
)++;
41522 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41524 (imm_values
->imm
)++;
41525 (imm_values
->imm32
)++;
41536 /* Compute number of immediate operands of an instruction. */
41539 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41541 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41542 (rtx_function
) find_constant_1
, (void *) imm_values
);
41545 /* Return total size of immediate operands of an instruction along with number
41546 of corresponding immediate-operands. It initializes its parameters to zero
41547 befor calling FIND_CONSTANT.
41548 INSN is the input instruction. IMM is the total of immediates.
41549 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41553 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41555 imm_info imm_values
= {0, 0, 0};
41557 find_constant (insn
, &imm_values
);
41558 *imm
= imm_values
.imm
;
41559 *imm32
= imm_values
.imm32
;
41560 *imm64
= imm_values
.imm64
;
41561 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41564 /* This function indicates if an operand of an instruction is an
41568 has_immediate (rtx insn
)
41570 int num_imm_operand
;
41571 int num_imm32_operand
;
41572 int num_imm64_operand
;
41575 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41576 &num_imm64_operand
);
41580 /* Return single or double path for instructions. */
41582 static enum insn_path
41583 get_insn_path (rtx insn
)
41585 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41587 if ((int)path
== 0)
41588 return path_single
;
41590 if ((int)path
== 1)
41591 return path_double
;
41596 /* Return insn dispatch group. */
41598 static enum dispatch_group
41599 get_insn_group (rtx insn
)
41601 enum dispatch_group group
= get_mem_group (insn
);
41605 if (is_branch (insn
))
41606 return disp_branch
;
41611 if (has_immediate (insn
))
41614 if (is_prefetch (insn
))
41615 return disp_prefetch
;
41617 return disp_no_group
;
41620 /* Count number of GROUP restricted instructions in a dispatch
41621 window WINDOW_LIST. */
41624 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41626 enum dispatch_group group
= get_insn_group (insn
);
41628 int num_imm_operand
;
41629 int num_imm32_operand
;
41630 int num_imm64_operand
;
41632 if (group
== disp_no_group
)
41635 if (group
== disp_imm
)
41637 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41638 &num_imm64_operand
);
41639 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41640 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41641 || (num_imm32_operand
> 0
41642 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41643 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41644 || (num_imm64_operand
> 0
41645 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41646 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41647 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41648 && num_imm64_operand
> 0
41649 && ((window_list
->num_imm_64
> 0
41650 && window_list
->num_insn
>= 2)
41651 || window_list
->num_insn
>= 3)))
41657 if ((group
== disp_load_store
41658 && (window_list
->num_loads
>= MAX_LOAD
41659 || window_list
->num_stores
>= MAX_STORE
))
41660 || ((group
== disp_load
41661 || group
== disp_prefetch
)
41662 && window_list
->num_loads
>= MAX_LOAD
)
41663 || (group
== disp_store
41664 && window_list
->num_stores
>= MAX_STORE
))
41670 /* This function returns true if insn satisfies dispatch rules on the
41671 last window scheduled. */
41674 fits_dispatch_window (rtx insn
)
41676 dispatch_windows
*window_list
= dispatch_window_list
;
41677 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41678 unsigned int num_restrict
;
41679 enum dispatch_group group
= get_insn_group (insn
);
41680 enum insn_path path
= get_insn_path (insn
);
41683 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41684 instructions should be given the lowest priority in the
41685 scheduling process in Haifa scheduler to make sure they will be
41686 scheduled in the same dispatch window as the reference to them. */
41687 if (group
== disp_jcc
|| group
== disp_cmp
)
41690 /* Check nonrestricted. */
41691 if (group
== disp_no_group
|| group
== disp_branch
)
41694 /* Get last dispatch window. */
41695 if (window_list_next
)
41696 window_list
= window_list_next
;
41698 if (window_list
->window_num
== 1)
41700 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41703 || (min_insn_size (insn
) + sum
) >= 48)
41704 /* Window 1 is full. Go for next window. */
41708 num_restrict
= count_num_restricted (insn
, window_list
);
41710 if (num_restrict
> num_allowable_groups
[group
])
41713 /* See if it fits in the first window. */
41714 if (window_list
->window_num
== 0)
41716 /* The first widow should have only single and double path
41718 if (path
== path_double
41719 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41721 else if (path
!= path_single
)
41727 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41728 dispatch window WINDOW_LIST. */
41731 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41733 int byte_len
= min_insn_size (insn
);
41734 int num_insn
= window_list
->num_insn
;
41736 sched_insn_info
*window
= window_list
->window
;
41737 enum dispatch_group group
= get_insn_group (insn
);
41738 enum insn_path path
= get_insn_path (insn
);
41739 int num_imm_operand
;
41740 int num_imm32_operand
;
41741 int num_imm64_operand
;
41743 if (!window_list
->violation
&& group
!= disp_cmp
41744 && !fits_dispatch_window (insn
))
41745 window_list
->violation
= true;
41747 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41748 &num_imm64_operand
);
41750 /* Initialize window with new instruction. */
41751 window
[num_insn
].insn
= insn
;
41752 window
[num_insn
].byte_len
= byte_len
;
41753 window
[num_insn
].group
= group
;
41754 window
[num_insn
].path
= path
;
41755 window
[num_insn
].imm_bytes
= imm_size
;
41757 window_list
->window_size
+= byte_len
;
41758 window_list
->num_insn
= num_insn
+ 1;
41759 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41760 window_list
->imm_size
+= imm_size
;
41761 window_list
->num_imm
+= num_imm_operand
;
41762 window_list
->num_imm_32
+= num_imm32_operand
;
41763 window_list
->num_imm_64
+= num_imm64_operand
;
41765 if (group
== disp_store
)
41766 window_list
->num_stores
+= 1;
41767 else if (group
== disp_load
41768 || group
== disp_prefetch
)
41769 window_list
->num_loads
+= 1;
41770 else if (group
== disp_load_store
)
41772 window_list
->num_stores
+= 1;
41773 window_list
->num_loads
+= 1;
41777 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41778 If the total bytes of instructions or the number of instructions in
41779 the window exceed allowable, it allocates a new window. */
41782 add_to_dispatch_window (rtx insn
)
41785 dispatch_windows
*window_list
;
41786 dispatch_windows
*next_list
;
41787 dispatch_windows
*window0_list
;
41788 enum insn_path path
;
41789 enum dispatch_group insn_group
;
41797 if (INSN_CODE (insn
) < 0)
41800 byte_len
= min_insn_size (insn
);
41801 window_list
= dispatch_window_list
;
41802 next_list
= window_list
->next
;
41803 path
= get_insn_path (insn
);
41804 insn_group
= get_insn_group (insn
);
41806 /* Get the last dispatch window. */
41808 window_list
= dispatch_window_list
->next
;
41810 if (path
== path_single
)
41812 else if (path
== path_double
)
41815 insn_num_uops
= (int) path
;
41817 /* If current window is full, get a new window.
41818 Window number zero is full, if MAX_INSN uops are scheduled in it.
41819 Window number one is full, if window zero's bytes plus window
41820 one's bytes is 32, or if the bytes of the new instruction added
41821 to the total makes it greater than 48, or it has already MAX_INSN
41822 instructions in it. */
41823 num_insn
= window_list
->num_insn
;
41824 num_uops
= window_list
->num_uops
;
41825 window_num
= window_list
->window_num
;
41826 insn_fits
= fits_dispatch_window (insn
);
41828 if (num_insn
>= MAX_INSN
41829 || num_uops
+ insn_num_uops
> MAX_INSN
41832 window_num
= ~window_num
& 1;
41833 window_list
= allocate_next_window (window_num
);
41836 if (window_num
== 0)
41838 add_insn_window (insn
, window_list
, insn_num_uops
);
41839 if (window_list
->num_insn
>= MAX_INSN
41840 && insn_group
== disp_branch
)
41842 process_end_window ();
41846 else if (window_num
== 1)
41848 window0_list
= window_list
->prev
;
41849 sum
= window0_list
->window_size
+ window_list
->window_size
;
41851 || (byte_len
+ sum
) >= 48)
41853 process_end_window ();
41854 window_list
= dispatch_window_list
;
41857 add_insn_window (insn
, window_list
, insn_num_uops
);
41860 gcc_unreachable ();
41862 if (is_end_basic_block (insn_group
))
41864 /* End of basic block is reached do end-basic-block process. */
41865 process_end_window ();
41870 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41872 DEBUG_FUNCTION
static void
41873 debug_dispatch_window_file (FILE *file
, int window_num
)
41875 dispatch_windows
*list
;
41878 if (window_num
== 0)
41879 list
= dispatch_window_list
;
41881 list
= dispatch_window_list1
;
41883 fprintf (file
, "Window #%d:\n", list
->window_num
);
41884 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41885 list
->num_insn
, list
->num_uops
, list
->window_size
);
41886 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41887 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41889 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41891 fprintf (file
, " insn info:\n");
41893 for (i
= 0; i
< MAX_INSN
; i
++)
41895 if (!list
->window
[i
].insn
)
41897 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41898 i
, group_name
[list
->window
[i
].group
],
41899 i
, (void *)list
->window
[i
].insn
,
41900 i
, list
->window
[i
].path
,
41901 i
, list
->window
[i
].byte_len
,
41902 i
, list
->window
[i
].imm_bytes
);
41906 /* Print to stdout a dispatch window. */
41908 DEBUG_FUNCTION
void
41909 debug_dispatch_window (int window_num
)
41911 debug_dispatch_window_file (stdout
, window_num
);
41914 /* Print INSN dispatch information to FILE. */
41916 DEBUG_FUNCTION
static void
41917 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41920 enum insn_path path
;
41921 enum dispatch_group group
;
41923 int num_imm_operand
;
41924 int num_imm32_operand
;
41925 int num_imm64_operand
;
41927 if (INSN_CODE (insn
) < 0)
41930 byte_len
= min_insn_size (insn
);
41931 path
= get_insn_path (insn
);
41932 group
= get_insn_group (insn
);
41933 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41934 &num_imm64_operand
);
41936 fprintf (file
, " insn info:\n");
41937 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41938 group_name
[group
], path
, byte_len
);
41939 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41940 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41943 /* Print to STDERR the status of the ready list with respect to
41944 dispatch windows. */
41946 DEBUG_FUNCTION
void
41947 debug_ready_dispatch (void)
41950 int no_ready
= number_in_ready ();
41952 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41954 for (i
= 0; i
< no_ready
; i
++)
41955 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41958 /* This routine is the driver of the dispatch scheduler. */
41961 do_dispatch (rtx insn
, int mode
)
41963 if (mode
== DISPATCH_INIT
)
41964 init_dispatch_sched ();
41965 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41966 add_to_dispatch_window (insn
);
41969 /* Return TRUE if Dispatch Scheduling is supported. */
41972 has_dispatch (rtx insn
, int action
)
41974 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41975 && flag_dispatch_scheduler
)
41981 case IS_DISPATCH_ON
:
41986 return is_cmp (insn
);
41988 case DISPATCH_VIOLATION
:
41989 return dispatch_violation ();
41991 case FITS_DISPATCH_WINDOW
:
41992 return fits_dispatch_window (insn
);
41998 /* Implementation of reassociation_width target hook used by
41999 reassoc phase to identify parallelism level in reassociated
42000 tree. Statements tree_code is passed in OPC. Arguments type
42003 Currently parallel reassociation is enabled for Atom
42004 processors only and we set reassociation width to be 2
42005 because Atom may issue up to 2 instructions per cycle.
42007 Return value should be fixed if parallel reassociation is
42008 enabled for other processors. */
42011 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42012 enum machine_mode mode
)
42016 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42018 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42024 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42025 place emms and femms instructions. */
42027 static enum machine_mode
42028 ix86_preferred_simd_mode (enum machine_mode mode
)
42036 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42038 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42040 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42042 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42045 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42051 if (!TARGET_VECTORIZE_DOUBLE
)
42053 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42055 else if (TARGET_SSE2
)
42064 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42067 static unsigned int
42068 ix86_autovectorize_vector_sizes (void)
42070 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42075 /* Return class of registers which could be used for pseudo of MODE
42076 and of class RCLASS for spilling instead of memory. Return NO_REGS
42077 if it is not possible or non-profitable. */
42079 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42081 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42082 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42083 && INTEGER_CLASS_P (rclass
))
42088 /* Implement targetm.vectorize.init_cost. */
42091 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42093 unsigned *cost
= XNEWVEC (unsigned, 3);
42094 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42098 /* Implement targetm.vectorize.add_stmt_cost. */
42101 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42102 struct _stmt_vec_info
*stmt_info
, int misalign
,
42103 enum vect_cost_model_location where
)
42105 unsigned *cost
= (unsigned *) data
;
42106 unsigned retval
= 0;
42108 if (flag_vect_cost_model
)
42110 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42111 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42113 /* Statements in an inner loop relative to the loop being
42114 vectorized are weighted more heavily. The value here is
42115 arbitrary and could potentially be improved with analysis. */
42116 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42117 count
*= 50; /* FIXME. */
42119 retval
= (unsigned) (count
* stmt_cost
);
42120 cost
[where
] += retval
;
42126 /* Implement targetm.vectorize.finish_cost. */
42129 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42130 unsigned *body_cost
, unsigned *epilogue_cost
)
42132 unsigned *cost
= (unsigned *) data
;
42133 *prologue_cost
= cost
[vect_prologue
];
42134 *body_cost
= cost
[vect_body
];
42135 *epilogue_cost
= cost
[vect_epilogue
];
42138 /* Implement targetm.vectorize.destroy_cost_data. */
42141 ix86_destroy_cost_data (void *data
)
42146 /* Validate target specific memory model bits in VAL. */
42148 static unsigned HOST_WIDE_INT
42149 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42151 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42154 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42156 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42158 warning (OPT_Winvalid_memory_model
,
42159 "Unknown architecture specific memory model");
42160 return MEMMODEL_SEQ_CST
;
42162 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42163 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42165 warning (OPT_Winvalid_memory_model
,
42166 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42167 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42169 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42171 warning (OPT_Winvalid_memory_model
,
42172 "HLE_RELEASE not used with RELEASE or stronger memory model");
42173 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42178 /* Initialize the GCC target structure. */
42179 #undef TARGET_RETURN_IN_MEMORY
42180 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42182 #undef TARGET_LEGITIMIZE_ADDRESS
42183 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42185 #undef TARGET_ATTRIBUTE_TABLE
42186 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42187 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42188 # undef TARGET_MERGE_DECL_ATTRIBUTES
42189 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42192 #undef TARGET_COMP_TYPE_ATTRIBUTES
42193 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42195 #undef TARGET_INIT_BUILTINS
42196 #define TARGET_INIT_BUILTINS ix86_init_builtins
42197 #undef TARGET_BUILTIN_DECL
42198 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42199 #undef TARGET_EXPAND_BUILTIN
42200 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42202 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42203 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42204 ix86_builtin_vectorized_function
42206 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42207 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42209 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42210 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42212 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42213 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42215 #undef TARGET_BUILTIN_RECIPROCAL
42216 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42218 #undef TARGET_ASM_FUNCTION_EPILOGUE
42219 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42221 #undef TARGET_ENCODE_SECTION_INFO
42222 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42223 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42225 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42228 #undef TARGET_ASM_OPEN_PAREN
42229 #define TARGET_ASM_OPEN_PAREN ""
42230 #undef TARGET_ASM_CLOSE_PAREN
42231 #define TARGET_ASM_CLOSE_PAREN ""
42233 #undef TARGET_ASM_BYTE_OP
42234 #define TARGET_ASM_BYTE_OP ASM_BYTE
42236 #undef TARGET_ASM_ALIGNED_HI_OP
42237 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42238 #undef TARGET_ASM_ALIGNED_SI_OP
42239 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42241 #undef TARGET_ASM_ALIGNED_DI_OP
42242 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42245 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42246 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42248 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42249 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42251 #undef TARGET_ASM_UNALIGNED_HI_OP
42252 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42253 #undef TARGET_ASM_UNALIGNED_SI_OP
42254 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42255 #undef TARGET_ASM_UNALIGNED_DI_OP
42256 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42258 #undef TARGET_PRINT_OPERAND
42259 #define TARGET_PRINT_OPERAND ix86_print_operand
42260 #undef TARGET_PRINT_OPERAND_ADDRESS
42261 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42262 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42263 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42264 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42265 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42267 #undef TARGET_SCHED_INIT_GLOBAL
42268 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42269 #undef TARGET_SCHED_ADJUST_COST
42270 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42271 #undef TARGET_SCHED_ISSUE_RATE
42272 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42273 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42274 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42275 ia32_multipass_dfa_lookahead
42277 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42278 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42280 #undef TARGET_MEMMODEL_CHECK
42281 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42284 #undef TARGET_HAVE_TLS
42285 #define TARGET_HAVE_TLS true
42287 #undef TARGET_CANNOT_FORCE_CONST_MEM
42288 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42289 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42290 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42292 #undef TARGET_DELEGITIMIZE_ADDRESS
42293 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42295 #undef TARGET_MS_BITFIELD_LAYOUT_P
42296 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42299 #undef TARGET_BINDS_LOCAL_P
42300 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42302 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42303 #undef TARGET_BINDS_LOCAL_P
42304 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42307 #undef TARGET_ASM_OUTPUT_MI_THUNK
42308 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42309 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42310 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42312 #undef TARGET_ASM_FILE_START
42313 #define TARGET_ASM_FILE_START x86_file_start
42315 #undef TARGET_OPTION_OVERRIDE
42316 #define TARGET_OPTION_OVERRIDE ix86_option_override
42318 #undef TARGET_REGISTER_MOVE_COST
42319 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42320 #undef TARGET_MEMORY_MOVE_COST
42321 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42322 #undef TARGET_RTX_COSTS
42323 #define TARGET_RTX_COSTS ix86_rtx_costs
42324 #undef TARGET_ADDRESS_COST
42325 #define TARGET_ADDRESS_COST ix86_address_cost
42327 #undef TARGET_FIXED_CONDITION_CODE_REGS
42328 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42329 #undef TARGET_CC_MODES_COMPATIBLE
42330 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42332 #undef TARGET_MACHINE_DEPENDENT_REORG
42333 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42335 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42336 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42338 #undef TARGET_BUILD_BUILTIN_VA_LIST
42339 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42341 #undef TARGET_FOLD_BUILTIN
42342 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42344 #undef TARGET_COMPARE_VERSION_PRIORITY
42345 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42347 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42348 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42349 ix86_generate_version_dispatcher_body
42351 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42352 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42353 ix86_get_function_versions_dispatcher
42355 #undef TARGET_ENUM_VA_LIST_P
42356 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42358 #undef TARGET_FN_ABI_VA_LIST
42359 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42361 #undef TARGET_CANONICAL_VA_LIST_TYPE
42362 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42364 #undef TARGET_EXPAND_BUILTIN_VA_START
42365 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42367 #undef TARGET_MD_ASM_CLOBBERS
42368 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42370 #undef TARGET_PROMOTE_PROTOTYPES
42371 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42372 #undef TARGET_STRUCT_VALUE_RTX
42373 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42374 #undef TARGET_SETUP_INCOMING_VARARGS
42375 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42376 #undef TARGET_MUST_PASS_IN_STACK
42377 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42378 #undef TARGET_FUNCTION_ARG_ADVANCE
42379 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42380 #undef TARGET_FUNCTION_ARG
42381 #define TARGET_FUNCTION_ARG ix86_function_arg
42382 #undef TARGET_FUNCTION_ARG_BOUNDARY
42383 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42384 #undef TARGET_PASS_BY_REFERENCE
42385 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42386 #undef TARGET_INTERNAL_ARG_POINTER
42387 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42388 #undef TARGET_UPDATE_STACK_BOUNDARY
42389 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42390 #undef TARGET_GET_DRAP_RTX
42391 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42392 #undef TARGET_STRICT_ARGUMENT_NAMING
42393 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42394 #undef TARGET_STATIC_CHAIN
42395 #define TARGET_STATIC_CHAIN ix86_static_chain
42396 #undef TARGET_TRAMPOLINE_INIT
42397 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42398 #undef TARGET_RETURN_POPS_ARGS
42399 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42401 #undef TARGET_LEGITIMATE_COMBINED_INSN
42402 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42404 #undef TARGET_ASAN_SHADOW_OFFSET
42405 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42407 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42408 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42410 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42411 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42413 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42414 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42416 #undef TARGET_C_MODE_FOR_SUFFIX
42417 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42420 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42421 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42424 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42425 #undef TARGET_INSERT_ATTRIBUTES
42426 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42429 #undef TARGET_MANGLE_TYPE
42430 #define TARGET_MANGLE_TYPE ix86_mangle_type
42433 #undef TARGET_STACK_PROTECT_FAIL
42434 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42437 #undef TARGET_FUNCTION_VALUE
42438 #define TARGET_FUNCTION_VALUE ix86_function_value
42440 #undef TARGET_FUNCTION_VALUE_REGNO_P
42441 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42443 #undef TARGET_PROMOTE_FUNCTION_MODE
42444 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42446 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42447 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42449 #undef TARGET_INSTANTIATE_DECLS
42450 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42452 #undef TARGET_SECONDARY_RELOAD
42453 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42455 #undef TARGET_CLASS_MAX_NREGS
42456 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42458 #undef TARGET_PREFERRED_RELOAD_CLASS
42459 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42460 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42461 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42462 #undef TARGET_CLASS_LIKELY_SPILLED_P
42463 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42465 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42466 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42467 ix86_builtin_vectorization_cost
42468 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42469 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42470 ix86_vectorize_vec_perm_const_ok
42471 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42472 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42473 ix86_preferred_simd_mode
42474 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42475 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42476 ix86_autovectorize_vector_sizes
42477 #undef TARGET_VECTORIZE_INIT_COST
42478 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42479 #undef TARGET_VECTORIZE_ADD_STMT_COST
42480 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42481 #undef TARGET_VECTORIZE_FINISH_COST
42482 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42483 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42484 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42486 #undef TARGET_SET_CURRENT_FUNCTION
42487 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42489 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42490 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42492 #undef TARGET_OPTION_SAVE
42493 #define TARGET_OPTION_SAVE ix86_function_specific_save
42495 #undef TARGET_OPTION_RESTORE
42496 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42498 #undef TARGET_OPTION_PRINT
42499 #define TARGET_OPTION_PRINT ix86_function_specific_print
42501 #undef TARGET_OPTION_FUNCTION_VERSIONS
42502 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42504 #undef TARGET_CAN_INLINE_P
42505 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42507 #undef TARGET_EXPAND_TO_RTL_HOOK
42508 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42510 #undef TARGET_LEGITIMATE_ADDRESS_P
42511 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42513 #undef TARGET_LRA_P
42514 #define TARGET_LRA_P hook_bool_void_true
42516 #undef TARGET_REGISTER_PRIORITY
42517 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42519 #undef TARGET_LEGITIMATE_CONSTANT_P
42520 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42522 #undef TARGET_FRAME_POINTER_REQUIRED
42523 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42525 #undef TARGET_CAN_ELIMINATE
42526 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42528 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42529 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42531 #undef TARGET_ASM_CODE_END
42532 #define TARGET_ASM_CODE_END ix86_code_end
42534 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42535 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42538 #undef TARGET_INIT_LIBFUNCS
42539 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42542 #undef TARGET_SPILL_CLASS
42543 #define TARGET_SPILL_CLASS ix86_spill_class
42545 struct gcc_target targetm
= TARGET_INITIALIZER
;
42547 #include "gt-i386.h"