1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx
legitimize_dllimport_symbol (rtx
, bool);
68 #ifndef CHECK_STACK_LIMIT
69 #define CHECK_STACK_LIMIT (-1)
72 /* Return index of given mode in mult and division cost tables. */
73 #define MODE_INDEX(mode) \
74 ((mode) == QImode ? 0 \
75 : (mode) == HImode ? 1 \
76 : (mode) == SImode ? 2 \
77 : (mode) == DImode ? 3 \
80 /* Processor costs (relative to an add) */
81 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
82 #define COSTS_N_BYTES(N) ((N) * 2)
84 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
87 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
88 COSTS_N_BYTES (2), /* cost of an add instruction */
89 COSTS_N_BYTES (3), /* cost of a lea instruction */
90 COSTS_N_BYTES (2), /* variable shift costs */
91 COSTS_N_BYTES (3), /* constant shift costs */
92 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
93 COSTS_N_BYTES (3), /* HI */
94 COSTS_N_BYTES (3), /* SI */
95 COSTS_N_BYTES (3), /* DI */
96 COSTS_N_BYTES (5)}, /* other */
97 0, /* cost of multiply per each bit set */
98 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
99 COSTS_N_BYTES (3), /* HI */
100 COSTS_N_BYTES (3), /* SI */
101 COSTS_N_BYTES (3), /* DI */
102 COSTS_N_BYTES (5)}, /* other */
103 COSTS_N_BYTES (3), /* cost of movsx */
104 COSTS_N_BYTES (3), /* cost of movzx */
105 0, /* "large" insn */
107 2, /* cost for loading QImode using movzbl */
108 {2, 2, 2}, /* cost of loading integer registers
109 in QImode, HImode and SImode.
110 Relative to reg-reg move (2). */
111 {2, 2, 2}, /* cost of storing integer registers */
112 2, /* cost of reg,reg fld/fst */
113 {2, 2, 2}, /* cost of loading fp registers
114 in SFmode, DFmode and XFmode */
115 {2, 2, 2}, /* cost of storing fp registers
116 in SFmode, DFmode and XFmode */
117 3, /* cost of moving MMX register */
118 {3, 3}, /* cost of loading MMX registers
119 in SImode and DImode */
120 {3, 3}, /* cost of storing MMX registers
121 in SImode and DImode */
122 3, /* cost of moving SSE register */
123 {3, 3, 3}, /* cost of loading SSE registers
124 in SImode, DImode and TImode */
125 {3, 3, 3}, /* cost of storing SSE registers
126 in SImode, DImode and TImode */
127 3, /* MMX or SSE register to integer */
128 0, /* size of l1 cache */
129 0, /* size of l2 cache */
130 0, /* size of prefetch block */
131 0, /* number of parallel prefetches */
133 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
134 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
135 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
136 COSTS_N_BYTES (2), /* cost of FABS instruction. */
137 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
138 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
139 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
140 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
141 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
142 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}}},
143 1, /* scalar_stmt_cost. */
144 1, /* scalar load_cost. */
145 1, /* scalar_store_cost. */
146 1, /* vec_stmt_cost. */
147 1, /* vec_to_scalar_cost. */
148 1, /* scalar_to_vec_cost. */
149 1, /* vec_align_load_cost. */
150 1, /* vec_unalign_load_cost. */
151 1, /* vec_store_cost. */
152 1, /* cond_taken_branch_cost. */
153 1, /* cond_not_taken_branch_cost. */
156 /* Processor costs (relative to an add) */
158 struct processor_costs i386_cost
= { /* 386 specific costs */
159 COSTS_N_INSNS (1), /* cost of an add instruction */
160 COSTS_N_INSNS (1), /* cost of a lea instruction */
161 COSTS_N_INSNS (3), /* variable shift costs */
162 COSTS_N_INSNS (2), /* constant shift costs */
163 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
164 COSTS_N_INSNS (6), /* HI */
165 COSTS_N_INSNS (6), /* SI */
166 COSTS_N_INSNS (6), /* DI */
167 COSTS_N_INSNS (6)}, /* other */
168 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
169 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
170 COSTS_N_INSNS (23), /* HI */
171 COSTS_N_INSNS (23), /* SI */
172 COSTS_N_INSNS (23), /* DI */
173 COSTS_N_INSNS (23)}, /* other */
174 COSTS_N_INSNS (3), /* cost of movsx */
175 COSTS_N_INSNS (2), /* cost of movzx */
176 15, /* "large" insn */
178 4, /* cost for loading QImode using movzbl */
179 {2, 4, 2}, /* cost of loading integer registers
180 in QImode, HImode and SImode.
181 Relative to reg-reg move (2). */
182 {2, 4, 2}, /* cost of storing integer registers */
183 2, /* cost of reg,reg fld/fst */
184 {8, 8, 8}, /* cost of loading fp registers
185 in SFmode, DFmode and XFmode */
186 {8, 8, 8}, /* cost of storing fp registers
187 in SFmode, DFmode and XFmode */
188 2, /* cost of moving MMX register */
189 {4, 8}, /* cost of loading MMX registers
190 in SImode and DImode */
191 {4, 8}, /* cost of storing MMX registers
192 in SImode and DImode */
193 2, /* cost of moving SSE register */
194 {4, 8, 16}, /* cost of loading SSE registers
195 in SImode, DImode and TImode */
196 {4, 8, 16}, /* cost of storing SSE registers
197 in SImode, DImode and TImode */
198 3, /* MMX or SSE register to integer */
199 0, /* size of l1 cache */
200 0, /* size of l2 cache */
201 0, /* size of prefetch block */
202 0, /* number of parallel prefetches */
204 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
205 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
206 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
207 COSTS_N_INSNS (22), /* cost of FABS instruction. */
208 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
209 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
210 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
211 DUMMY_STRINGOP_ALGS
},
212 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
, false}}},
213 DUMMY_STRINGOP_ALGS
},
214 1, /* scalar_stmt_cost. */
215 1, /* scalar load_cost. */
216 1, /* scalar_store_cost. */
217 1, /* vec_stmt_cost. */
218 1, /* vec_to_scalar_cost. */
219 1, /* scalar_to_vec_cost. */
220 1, /* vec_align_load_cost. */
221 2, /* vec_unalign_load_cost. */
222 1, /* vec_store_cost. */
223 3, /* cond_taken_branch_cost. */
224 1, /* cond_not_taken_branch_cost. */
228 struct processor_costs i486_cost
= { /* 486 specific costs */
229 COSTS_N_INSNS (1), /* cost of an add instruction */
230 COSTS_N_INSNS (1), /* cost of a lea instruction */
231 COSTS_N_INSNS (3), /* variable shift costs */
232 COSTS_N_INSNS (2), /* constant shift costs */
233 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
234 COSTS_N_INSNS (12), /* HI */
235 COSTS_N_INSNS (12), /* SI */
236 COSTS_N_INSNS (12), /* DI */
237 COSTS_N_INSNS (12)}, /* other */
238 1, /* cost of multiply per each bit set */
239 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
240 COSTS_N_INSNS (40), /* HI */
241 COSTS_N_INSNS (40), /* SI */
242 COSTS_N_INSNS (40), /* DI */
243 COSTS_N_INSNS (40)}, /* other */
244 COSTS_N_INSNS (3), /* cost of movsx */
245 COSTS_N_INSNS (2), /* cost of movzx */
246 15, /* "large" insn */
248 4, /* cost for loading QImode using movzbl */
249 {2, 4, 2}, /* cost of loading integer registers
250 in QImode, HImode and SImode.
251 Relative to reg-reg move (2). */
252 {2, 4, 2}, /* cost of storing integer registers */
253 2, /* cost of reg,reg fld/fst */
254 {8, 8, 8}, /* cost of loading fp registers
255 in SFmode, DFmode and XFmode */
256 {8, 8, 8}, /* cost of storing fp registers
257 in SFmode, DFmode and XFmode */
258 2, /* cost of moving MMX register */
259 {4, 8}, /* cost of loading MMX registers
260 in SImode and DImode */
261 {4, 8}, /* cost of storing MMX registers
262 in SImode and DImode */
263 2, /* cost of moving SSE register */
264 {4, 8, 16}, /* cost of loading SSE registers
265 in SImode, DImode and TImode */
266 {4, 8, 16}, /* cost of storing SSE registers
267 in SImode, DImode and TImode */
268 3, /* MMX or SSE register to integer */
269 4, /* size of l1 cache. 486 has 8kB cache
270 shared for code and data, so 4kB is
271 not really precise. */
272 4, /* size of l2 cache */
273 0, /* size of prefetch block */
274 0, /* number of parallel prefetches */
276 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
277 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
278 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
279 COSTS_N_INSNS (3), /* cost of FABS instruction. */
280 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
281 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
282 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
283 DUMMY_STRINGOP_ALGS
},
284 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
, false}}},
285 DUMMY_STRINGOP_ALGS
},
286 1, /* scalar_stmt_cost. */
287 1, /* scalar load_cost. */
288 1, /* scalar_store_cost. */
289 1, /* vec_stmt_cost. */
290 1, /* vec_to_scalar_cost. */
291 1, /* scalar_to_vec_cost. */
292 1, /* vec_align_load_cost. */
293 2, /* vec_unalign_load_cost. */
294 1, /* vec_store_cost. */
295 3, /* cond_taken_branch_cost. */
296 1, /* cond_not_taken_branch_cost. */
300 struct processor_costs pentium_cost
= {
301 COSTS_N_INSNS (1), /* cost of an add instruction */
302 COSTS_N_INSNS (1), /* cost of a lea instruction */
303 COSTS_N_INSNS (4), /* variable shift costs */
304 COSTS_N_INSNS (1), /* constant shift costs */
305 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
306 COSTS_N_INSNS (11), /* HI */
307 COSTS_N_INSNS (11), /* SI */
308 COSTS_N_INSNS (11), /* DI */
309 COSTS_N_INSNS (11)}, /* other */
310 0, /* cost of multiply per each bit set */
311 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
312 COSTS_N_INSNS (25), /* HI */
313 COSTS_N_INSNS (25), /* SI */
314 COSTS_N_INSNS (25), /* DI */
315 COSTS_N_INSNS (25)}, /* other */
316 COSTS_N_INSNS (3), /* cost of movsx */
317 COSTS_N_INSNS (2), /* cost of movzx */
318 8, /* "large" insn */
320 6, /* cost for loading QImode using movzbl */
321 {2, 4, 2}, /* cost of loading integer registers
322 in QImode, HImode and SImode.
323 Relative to reg-reg move (2). */
324 {2, 4, 2}, /* cost of storing integer registers */
325 2, /* cost of reg,reg fld/fst */
326 {2, 2, 6}, /* cost of loading fp registers
327 in SFmode, DFmode and XFmode */
328 {4, 4, 6}, /* cost of storing fp registers
329 in SFmode, DFmode and XFmode */
330 8, /* cost of moving MMX register */
331 {8, 8}, /* cost of loading MMX registers
332 in SImode and DImode */
333 {8, 8}, /* cost of storing MMX registers
334 in SImode and DImode */
335 2, /* cost of moving SSE register */
336 {4, 8, 16}, /* cost of loading SSE registers
337 in SImode, DImode and TImode */
338 {4, 8, 16}, /* cost of storing SSE registers
339 in SImode, DImode and TImode */
340 3, /* MMX or SSE register to integer */
341 8, /* size of l1 cache. */
342 8, /* size of l2 cache */
343 0, /* size of prefetch block */
344 0, /* number of parallel prefetches */
346 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
347 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
348 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
349 COSTS_N_INSNS (1), /* cost of FABS instruction. */
350 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
351 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
352 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
353 DUMMY_STRINGOP_ALGS
},
354 {{libcall
, {{-1, rep_prefix_4_byte
, false}}},
355 DUMMY_STRINGOP_ALGS
},
356 1, /* scalar_stmt_cost. */
357 1, /* scalar load_cost. */
358 1, /* scalar_store_cost. */
359 1, /* vec_stmt_cost. */
360 1, /* vec_to_scalar_cost. */
361 1, /* scalar_to_vec_cost. */
362 1, /* vec_align_load_cost. */
363 2, /* vec_unalign_load_cost. */
364 1, /* vec_store_cost. */
365 3, /* cond_taken_branch_cost. */
366 1, /* cond_not_taken_branch_cost. */
370 struct processor_costs pentiumpro_cost
= {
371 COSTS_N_INSNS (1), /* cost of an add instruction */
372 COSTS_N_INSNS (1), /* cost of a lea instruction */
373 COSTS_N_INSNS (1), /* variable shift costs */
374 COSTS_N_INSNS (1), /* constant shift costs */
375 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
376 COSTS_N_INSNS (4), /* HI */
377 COSTS_N_INSNS (4), /* SI */
378 COSTS_N_INSNS (4), /* DI */
379 COSTS_N_INSNS (4)}, /* other */
380 0, /* cost of multiply per each bit set */
381 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
382 COSTS_N_INSNS (17), /* HI */
383 COSTS_N_INSNS (17), /* SI */
384 COSTS_N_INSNS (17), /* DI */
385 COSTS_N_INSNS (17)}, /* other */
386 COSTS_N_INSNS (1), /* cost of movsx */
387 COSTS_N_INSNS (1), /* cost of movzx */
388 8, /* "large" insn */
390 2, /* cost for loading QImode using movzbl */
391 {4, 4, 4}, /* cost of loading integer registers
392 in QImode, HImode and SImode.
393 Relative to reg-reg move (2). */
394 {2, 2, 2}, /* cost of storing integer registers */
395 2, /* cost of reg,reg fld/fst */
396 {2, 2, 6}, /* cost of loading fp registers
397 in SFmode, DFmode and XFmode */
398 {4, 4, 6}, /* cost of storing fp registers
399 in SFmode, DFmode and XFmode */
400 2, /* cost of moving MMX register */
401 {2, 2}, /* cost of loading MMX registers
402 in SImode and DImode */
403 {2, 2}, /* cost of storing MMX registers
404 in SImode and DImode */
405 2, /* cost of moving SSE register */
406 {2, 2, 8}, /* cost of loading SSE registers
407 in SImode, DImode and TImode */
408 {2, 2, 8}, /* cost of storing SSE registers
409 in SImode, DImode and TImode */
410 3, /* MMX or SSE register to integer */
411 8, /* size of l1 cache. */
412 256, /* size of l2 cache */
413 32, /* size of prefetch block */
414 6, /* number of parallel prefetches */
416 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
417 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
418 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
419 COSTS_N_INSNS (2), /* cost of FABS instruction. */
420 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
421 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
422 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
423 (we ensure the alignment). For small blocks inline loop is still a
424 noticeable win, for bigger blocks either rep movsl or rep movsb is
425 way to go. Rep movsb has apparently more expensive startup time in CPU,
426 but after 4K the difference is down in the noise. */
427 {{rep_prefix_4_byte
, {{128, loop
, false}, {1024, unrolled_loop
, false},
428 {8192, rep_prefix_4_byte
, false},
429 {-1, rep_prefix_1_byte
, false}}},
430 DUMMY_STRINGOP_ALGS
},
431 {{rep_prefix_4_byte
, {{1024, unrolled_loop
, false},
432 {8192, rep_prefix_4_byte
, false},
433 {-1, libcall
, false}}},
434 DUMMY_STRINGOP_ALGS
},
435 1, /* scalar_stmt_cost. */
436 1, /* scalar load_cost. */
437 1, /* scalar_store_cost. */
438 1, /* vec_stmt_cost. */
439 1, /* vec_to_scalar_cost. */
440 1, /* scalar_to_vec_cost. */
441 1, /* vec_align_load_cost. */
442 2, /* vec_unalign_load_cost. */
443 1, /* vec_store_cost. */
444 3, /* cond_taken_branch_cost. */
445 1, /* cond_not_taken_branch_cost. */
449 struct processor_costs geode_cost
= {
450 COSTS_N_INSNS (1), /* cost of an add instruction */
451 COSTS_N_INSNS (1), /* cost of a lea instruction */
452 COSTS_N_INSNS (2), /* variable shift costs */
453 COSTS_N_INSNS (1), /* constant shift costs */
454 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
455 COSTS_N_INSNS (4), /* HI */
456 COSTS_N_INSNS (7), /* SI */
457 COSTS_N_INSNS (7), /* DI */
458 COSTS_N_INSNS (7)}, /* other */
459 0, /* cost of multiply per each bit set */
460 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
461 COSTS_N_INSNS (23), /* HI */
462 COSTS_N_INSNS (39), /* SI */
463 COSTS_N_INSNS (39), /* DI */
464 COSTS_N_INSNS (39)}, /* other */
465 COSTS_N_INSNS (1), /* cost of movsx */
466 COSTS_N_INSNS (1), /* cost of movzx */
467 8, /* "large" insn */
469 1, /* cost for loading QImode using movzbl */
470 {1, 1, 1}, /* cost of loading integer registers
471 in QImode, HImode and SImode.
472 Relative to reg-reg move (2). */
473 {1, 1, 1}, /* cost of storing integer registers */
474 1, /* cost of reg,reg fld/fst */
475 {1, 1, 1}, /* cost of loading fp registers
476 in SFmode, DFmode and XFmode */
477 {4, 6, 6}, /* cost of storing fp registers
478 in SFmode, DFmode and XFmode */
480 1, /* cost of moving MMX register */
481 {1, 1}, /* cost of loading MMX registers
482 in SImode and DImode */
483 {1, 1}, /* cost of storing MMX registers
484 in SImode and DImode */
485 1, /* cost of moving SSE register */
486 {1, 1, 1}, /* cost of loading SSE registers
487 in SImode, DImode and TImode */
488 {1, 1, 1}, /* cost of storing SSE registers
489 in SImode, DImode and TImode */
490 1, /* MMX or SSE register to integer */
491 64, /* size of l1 cache. */
492 128, /* size of l2 cache. */
493 32, /* size of prefetch block */
494 1, /* number of parallel prefetches */
496 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
497 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
498 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
499 COSTS_N_INSNS (1), /* cost of FABS instruction. */
500 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
501 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
502 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
503 DUMMY_STRINGOP_ALGS
},
504 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
505 DUMMY_STRINGOP_ALGS
},
506 1, /* scalar_stmt_cost. */
507 1, /* scalar load_cost. */
508 1, /* scalar_store_cost. */
509 1, /* vec_stmt_cost. */
510 1, /* vec_to_scalar_cost. */
511 1, /* scalar_to_vec_cost. */
512 1, /* vec_align_load_cost. */
513 2, /* vec_unalign_load_cost. */
514 1, /* vec_store_cost. */
515 3, /* cond_taken_branch_cost. */
516 1, /* cond_not_taken_branch_cost. */
520 struct processor_costs k6_cost
= {
521 COSTS_N_INSNS (1), /* cost of an add instruction */
522 COSTS_N_INSNS (2), /* cost of a lea instruction */
523 COSTS_N_INSNS (1), /* variable shift costs */
524 COSTS_N_INSNS (1), /* constant shift costs */
525 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
526 COSTS_N_INSNS (3), /* HI */
527 COSTS_N_INSNS (3), /* SI */
528 COSTS_N_INSNS (3), /* DI */
529 COSTS_N_INSNS (3)}, /* other */
530 0, /* cost of multiply per each bit set */
531 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
532 COSTS_N_INSNS (18), /* HI */
533 COSTS_N_INSNS (18), /* SI */
534 COSTS_N_INSNS (18), /* DI */
535 COSTS_N_INSNS (18)}, /* other */
536 COSTS_N_INSNS (2), /* cost of movsx */
537 COSTS_N_INSNS (2), /* cost of movzx */
538 8, /* "large" insn */
540 3, /* cost for loading QImode using movzbl */
541 {4, 5, 4}, /* cost of loading integer registers
542 in QImode, HImode and SImode.
543 Relative to reg-reg move (2). */
544 {2, 3, 2}, /* cost of storing integer registers */
545 4, /* cost of reg,reg fld/fst */
546 {6, 6, 6}, /* cost of loading fp registers
547 in SFmode, DFmode and XFmode */
548 {4, 4, 4}, /* cost of storing fp registers
549 in SFmode, DFmode and XFmode */
550 2, /* cost of moving MMX register */
551 {2, 2}, /* cost of loading MMX registers
552 in SImode and DImode */
553 {2, 2}, /* cost of storing MMX registers
554 in SImode and DImode */
555 2, /* cost of moving SSE register */
556 {2, 2, 8}, /* cost of loading SSE registers
557 in SImode, DImode and TImode */
558 {2, 2, 8}, /* cost of storing SSE registers
559 in SImode, DImode and TImode */
560 6, /* MMX or SSE register to integer */
561 32, /* size of l1 cache. */
562 32, /* size of l2 cache. Some models
563 have integrated l2 cache, but
564 optimizing for k6 is not important
565 enough to worry about that. */
566 32, /* size of prefetch block */
567 1, /* number of parallel prefetches */
569 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
570 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
571 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
572 COSTS_N_INSNS (2), /* cost of FABS instruction. */
573 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
574 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
575 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
576 DUMMY_STRINGOP_ALGS
},
577 {{libcall
, {{256, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
578 DUMMY_STRINGOP_ALGS
},
579 1, /* scalar_stmt_cost. */
580 1, /* scalar load_cost. */
581 1, /* scalar_store_cost. */
582 1, /* vec_stmt_cost. */
583 1, /* vec_to_scalar_cost. */
584 1, /* scalar_to_vec_cost. */
585 1, /* vec_align_load_cost. */
586 2, /* vec_unalign_load_cost. */
587 1, /* vec_store_cost. */
588 3, /* cond_taken_branch_cost. */
589 1, /* cond_not_taken_branch_cost. */
593 struct processor_costs athlon_cost
= {
594 COSTS_N_INSNS (1), /* cost of an add instruction */
595 COSTS_N_INSNS (2), /* cost of a lea instruction */
596 COSTS_N_INSNS (1), /* variable shift costs */
597 COSTS_N_INSNS (1), /* constant shift costs */
598 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
599 COSTS_N_INSNS (5), /* HI */
600 COSTS_N_INSNS (5), /* SI */
601 COSTS_N_INSNS (5), /* DI */
602 COSTS_N_INSNS (5)}, /* other */
603 0, /* cost of multiply per each bit set */
604 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
605 COSTS_N_INSNS (26), /* HI */
606 COSTS_N_INSNS (42), /* SI */
607 COSTS_N_INSNS (74), /* DI */
608 COSTS_N_INSNS (74)}, /* other */
609 COSTS_N_INSNS (1), /* cost of movsx */
610 COSTS_N_INSNS (1), /* cost of movzx */
611 8, /* "large" insn */
613 4, /* cost for loading QImode using movzbl */
614 {3, 4, 3}, /* cost of loading integer registers
615 in QImode, HImode and SImode.
616 Relative to reg-reg move (2). */
617 {3, 4, 3}, /* cost of storing integer registers */
618 4, /* cost of reg,reg fld/fst */
619 {4, 4, 12}, /* cost of loading fp registers
620 in SFmode, DFmode and XFmode */
621 {6, 6, 8}, /* cost of storing fp registers
622 in SFmode, DFmode and XFmode */
623 2, /* cost of moving MMX register */
624 {4, 4}, /* cost of loading MMX registers
625 in SImode and DImode */
626 {4, 4}, /* cost of storing MMX registers
627 in SImode and DImode */
628 2, /* cost of moving SSE register */
629 {4, 4, 6}, /* cost of loading SSE registers
630 in SImode, DImode and TImode */
631 {4, 4, 5}, /* cost of storing SSE registers
632 in SImode, DImode and TImode */
633 5, /* MMX or SSE register to integer */
634 64, /* size of l1 cache. */
635 256, /* size of l2 cache. */
636 64, /* size of prefetch block */
637 6, /* number of parallel prefetches */
639 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
640 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
641 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
642 COSTS_N_INSNS (2), /* cost of FABS instruction. */
643 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
644 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
645 /* For some reason, Athlon deals better with REP prefix (relative to loops)
646 compared to K8. Alignment becomes important after 8 bytes for memcpy and
647 128 bytes for memset. */
648 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
649 DUMMY_STRINGOP_ALGS
},
650 {{libcall
, {{2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
651 DUMMY_STRINGOP_ALGS
},
652 1, /* scalar_stmt_cost. */
653 1, /* scalar load_cost. */
654 1, /* scalar_store_cost. */
655 1, /* vec_stmt_cost. */
656 1, /* vec_to_scalar_cost. */
657 1, /* scalar_to_vec_cost. */
658 1, /* vec_align_load_cost. */
659 2, /* vec_unalign_load_cost. */
660 1, /* vec_store_cost. */
661 3, /* cond_taken_branch_cost. */
662 1, /* cond_not_taken_branch_cost. */
666 struct processor_costs k8_cost
= {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (2), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (4), /* HI */
673 COSTS_N_INSNS (3), /* SI */
674 COSTS_N_INSNS (4), /* DI */
675 COSTS_N_INSNS (5)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (26), /* HI */
679 COSTS_N_INSNS (42), /* SI */
680 COSTS_N_INSNS (74), /* DI */
681 COSTS_N_INSNS (74)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 8, /* "large" insn */
686 4, /* cost for loading QImode using movzbl */
687 {3, 4, 3}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {3, 4, 3}, /* cost of storing integer registers */
691 4, /* cost of reg,reg fld/fst */
692 {4, 4, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {6, 6, 8}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 2, /* cost of moving MMX register */
697 {3, 3}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {4, 4}, /* cost of storing MMX registers
700 in SImode and DImode */
701 2, /* cost of moving SSE register */
702 {4, 3, 6}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {4, 4, 5}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 5, /* MMX or SSE register to integer */
707 64, /* size of l1 cache. */
708 512, /* size of l2 cache. */
709 64, /* size of prefetch block */
710 /* New AMD processors never drop prefetches; if they cannot be performed
711 immediately, they are queued. We set number of simultaneous prefetches
712 to a large constant to reflect this (it probably is not a good idea not
713 to limit number of prefetches at all, as their execution also takes some
715 100, /* number of parallel prefetches */
717 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
718 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
719 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
720 COSTS_N_INSNS (2), /* cost of FABS instruction. */
721 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
722 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
723 /* K8 has optimized REP instruction for medium sized blocks, but for very
724 small blocks it is better to use loop. For large blocks, libcall can
725 do nontemporary accesses and beat inline considerably. */
726 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
727 {-1, rep_prefix_4_byte
, false}}},
728 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
729 {-1, libcall
, false}}}},
730 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
731 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
732 {libcall
, {{48, unrolled_loop
, false},
733 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
734 4, /* scalar_stmt_cost. */
735 2, /* scalar load_cost. */
736 2, /* scalar_store_cost. */
737 5, /* vec_stmt_cost. */
738 0, /* vec_to_scalar_cost. */
739 2, /* scalar_to_vec_cost. */
740 2, /* vec_align_load_cost. */
741 3, /* vec_unalign_load_cost. */
742 3, /* vec_store_cost. */
743 3, /* cond_taken_branch_cost. */
744 2, /* cond_not_taken_branch_cost. */
747 struct processor_costs amdfam10_cost
= {
748 COSTS_N_INSNS (1), /* cost of an add instruction */
749 COSTS_N_INSNS (2), /* cost of a lea instruction */
750 COSTS_N_INSNS (1), /* variable shift costs */
751 COSTS_N_INSNS (1), /* constant shift costs */
752 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
753 COSTS_N_INSNS (4), /* HI */
754 COSTS_N_INSNS (3), /* SI */
755 COSTS_N_INSNS (4), /* DI */
756 COSTS_N_INSNS (5)}, /* other */
757 0, /* cost of multiply per each bit set */
758 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
759 COSTS_N_INSNS (35), /* HI */
760 COSTS_N_INSNS (51), /* SI */
761 COSTS_N_INSNS (83), /* DI */
762 COSTS_N_INSNS (83)}, /* other */
763 COSTS_N_INSNS (1), /* cost of movsx */
764 COSTS_N_INSNS (1), /* cost of movzx */
765 8, /* "large" insn */
767 4, /* cost for loading QImode using movzbl */
768 {3, 4, 3}, /* cost of loading integer registers
769 in QImode, HImode and SImode.
770 Relative to reg-reg move (2). */
771 {3, 4, 3}, /* cost of storing integer registers */
772 4, /* cost of reg,reg fld/fst */
773 {4, 4, 12}, /* cost of loading fp registers
774 in SFmode, DFmode and XFmode */
775 {6, 6, 8}, /* cost of storing fp registers
776 in SFmode, DFmode and XFmode */
777 2, /* cost of moving MMX register */
778 {3, 3}, /* cost of loading MMX registers
779 in SImode and DImode */
780 {4, 4}, /* cost of storing MMX registers
781 in SImode and DImode */
782 2, /* cost of moving SSE register */
783 {4, 4, 3}, /* cost of loading SSE registers
784 in SImode, DImode and TImode */
785 {4, 4, 5}, /* cost of storing SSE registers
786 in SImode, DImode and TImode */
787 3, /* MMX or SSE register to integer */
789 MOVD reg64, xmmreg Double FSTORE 4
790 MOVD reg32, xmmreg Double FSTORE 4
792 MOVD reg64, xmmreg Double FADD 3
794 MOVD reg32, xmmreg Double FADD 3
796 64, /* size of l1 cache. */
797 512, /* size of l2 cache. */
798 64, /* size of prefetch block */
799 /* New AMD processors never drop prefetches; if they cannot be performed
800 immediately, they are queued. We set number of simultaneous prefetches
801 to a large constant to reflect this (it probably is not a good idea not
802 to limit number of prefetches at all, as their execution also takes some
804 100, /* number of parallel prefetches */
806 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
807 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
808 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
809 COSTS_N_INSNS (2), /* cost of FABS instruction. */
810 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
811 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
817 {-1, rep_prefix_4_byte
, false}}},
818 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
819 {-1, libcall
, false}}}},
820 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
821 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
822 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
823 {-1, libcall
, false}}}},
824 4, /* scalar_stmt_cost. */
825 2, /* scalar load_cost. */
826 2, /* scalar_store_cost. */
827 6, /* vec_stmt_cost. */
828 0, /* vec_to_scalar_cost. */
829 2, /* scalar_to_vec_cost. */
830 2, /* vec_align_load_cost. */
831 2, /* vec_unalign_load_cost. */
832 2, /* vec_store_cost. */
833 2, /* cond_taken_branch_cost. */
834 1, /* cond_not_taken_branch_cost. */
837 struct processor_costs bdver1_cost
= {
838 COSTS_N_INSNS (1), /* cost of an add instruction */
839 COSTS_N_INSNS (1), /* cost of a lea instruction */
840 COSTS_N_INSNS (1), /* variable shift costs */
841 COSTS_N_INSNS (1), /* constant shift costs */
842 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
843 COSTS_N_INSNS (4), /* HI */
844 COSTS_N_INSNS (4), /* SI */
845 COSTS_N_INSNS (6), /* DI */
846 COSTS_N_INSNS (6)}, /* other */
847 0, /* cost of multiply per each bit set */
848 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
849 COSTS_N_INSNS (35), /* HI */
850 COSTS_N_INSNS (51), /* SI */
851 COSTS_N_INSNS (83), /* DI */
852 COSTS_N_INSNS (83)}, /* other */
853 COSTS_N_INSNS (1), /* cost of movsx */
854 COSTS_N_INSNS (1), /* cost of movzx */
855 8, /* "large" insn */
857 4, /* cost for loading QImode using movzbl */
858 {5, 5, 4}, /* cost of loading integer registers
859 in QImode, HImode and SImode.
860 Relative to reg-reg move (2). */
861 {4, 4, 4}, /* cost of storing integer registers */
862 2, /* cost of reg,reg fld/fst */
863 {5, 5, 12}, /* cost of loading fp registers
864 in SFmode, DFmode and XFmode */
865 {4, 4, 8}, /* cost of storing fp registers
866 in SFmode, DFmode and XFmode */
867 2, /* cost of moving MMX register */
868 {4, 4}, /* cost of loading MMX registers
869 in SImode and DImode */
870 {4, 4}, /* cost of storing MMX registers
871 in SImode and DImode */
872 2, /* cost of moving SSE register */
873 {4, 4, 4}, /* cost of loading SSE registers
874 in SImode, DImode and TImode */
875 {4, 4, 4}, /* cost of storing SSE registers
876 in SImode, DImode and TImode */
877 2, /* MMX or SSE register to integer */
879 MOVD reg64, xmmreg Double FSTORE 4
880 MOVD reg32, xmmreg Double FSTORE 4
882 MOVD reg64, xmmreg Double FADD 3
884 MOVD reg32, xmmreg Double FADD 3
886 16, /* size of l1 cache. */
887 2048, /* size of l2 cache. */
888 64, /* size of prefetch block */
889 /* New AMD processors never drop prefetches; if they cannot be performed
890 immediately, they are queued. We set number of simultaneous prefetches
891 to a large constant to reflect this (it probably is not a good idea not
892 to limit number of prefetches at all, as their execution also takes some
894 100, /* number of parallel prefetches */
896 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
897 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
898 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
899 COSTS_N_INSNS (2), /* cost of FABS instruction. */
900 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
901 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
903 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
904 very small blocks it is better to use loop. For large blocks, libcall
905 can do nontemporary accesses and beat inline considerably. */
906 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
907 {-1, rep_prefix_4_byte
, false}}},
908 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
909 {-1, libcall
, false}}}},
910 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
911 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
912 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
913 {-1, libcall
, false}}}},
914 6, /* scalar_stmt_cost. */
915 4, /* scalar load_cost. */
916 4, /* scalar_store_cost. */
917 6, /* vec_stmt_cost. */
918 0, /* vec_to_scalar_cost. */
919 2, /* scalar_to_vec_cost. */
920 4, /* vec_align_load_cost. */
921 4, /* vec_unalign_load_cost. */
922 4, /* vec_store_cost. */
923 2, /* cond_taken_branch_cost. */
924 1, /* cond_not_taken_branch_cost. */
927 struct processor_costs bdver2_cost
= {
928 COSTS_N_INSNS (1), /* cost of an add instruction */
929 COSTS_N_INSNS (1), /* cost of a lea instruction */
930 COSTS_N_INSNS (1), /* variable shift costs */
931 COSTS_N_INSNS (1), /* constant shift costs */
932 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
933 COSTS_N_INSNS (4), /* HI */
934 COSTS_N_INSNS (4), /* SI */
935 COSTS_N_INSNS (6), /* DI */
936 COSTS_N_INSNS (6)}, /* other */
937 0, /* cost of multiply per each bit set */
938 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
939 COSTS_N_INSNS (35), /* HI */
940 COSTS_N_INSNS (51), /* SI */
941 COSTS_N_INSNS (83), /* DI */
942 COSTS_N_INSNS (83)}, /* other */
943 COSTS_N_INSNS (1), /* cost of movsx */
944 COSTS_N_INSNS (1), /* cost of movzx */
945 8, /* "large" insn */
947 4, /* cost for loading QImode using movzbl */
948 {5, 5, 4}, /* cost of loading integer registers
949 in QImode, HImode and SImode.
950 Relative to reg-reg move (2). */
951 {4, 4, 4}, /* cost of storing integer registers */
952 2, /* cost of reg,reg fld/fst */
953 {5, 5, 12}, /* cost of loading fp registers
954 in SFmode, DFmode and XFmode */
955 {4, 4, 8}, /* cost of storing fp registers
956 in SFmode, DFmode and XFmode */
957 2, /* cost of moving MMX register */
958 {4, 4}, /* cost of loading MMX registers
959 in SImode and DImode */
960 {4, 4}, /* cost of storing MMX registers
961 in SImode and DImode */
962 2, /* cost of moving SSE register */
963 {4, 4, 4}, /* cost of loading SSE registers
964 in SImode, DImode and TImode */
965 {4, 4, 4}, /* cost of storing SSE registers
966 in SImode, DImode and TImode */
967 2, /* MMX or SSE register to integer */
969 MOVD reg64, xmmreg Double FSTORE 4
970 MOVD reg32, xmmreg Double FSTORE 4
972 MOVD reg64, xmmreg Double FADD 3
974 MOVD reg32, xmmreg Double FADD 3
976 16, /* size of l1 cache. */
977 2048, /* size of l2 cache. */
978 64, /* size of prefetch block */
979 /* New AMD processors never drop prefetches; if they cannot be performed
980 immediately, they are queued. We set number of simultaneous prefetches
981 to a large constant to reflect this (it probably is not a good idea not
982 to limit number of prefetches at all, as their execution also takes some
984 100, /* number of parallel prefetches */
986 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
987 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
988 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
989 COSTS_N_INSNS (2), /* cost of FABS instruction. */
990 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
991 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
993 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
994 very small blocks it is better to use loop. For large blocks, libcall
995 can do nontemporary accesses and beat inline considerably. */
996 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
997 {-1, rep_prefix_4_byte
, false}}},
998 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
999 {-1, libcall
, false}}}},
1000 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1001 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1002 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1003 {-1, libcall
, false}}}},
1004 6, /* scalar_stmt_cost. */
1005 4, /* scalar load_cost. */
1006 4, /* scalar_store_cost. */
1007 6, /* vec_stmt_cost. */
1008 0, /* vec_to_scalar_cost. */
1009 2, /* scalar_to_vec_cost. */
1010 4, /* vec_align_load_cost. */
1011 4, /* vec_unalign_load_cost. */
1012 4, /* vec_store_cost. */
1013 2, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1017 struct processor_costs bdver3_cost
= {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 16, /* size of l1 cache. */
1059 2048, /* size of l2 cache. */
1060 64, /* size of prefetch block */
1061 /* New AMD processors never drop prefetches; if they cannot be performed
1062 immediately, they are queued. We set number of simultaneous prefetches
1063 to a large constant to reflect this (it probably is not a good idea not
1064 to limit number of prefetches at all, as their execution also takes some
1066 100, /* number of parallel prefetches */
1067 2, /* Branch cost */
1068 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1069 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1070 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1071 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1072 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1073 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1075 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1078 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1079 {-1, rep_prefix_4_byte
, false}}},
1080 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1081 {-1, libcall
, false}}}},
1082 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1083 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1084 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1085 {-1, libcall
, false}}}},
1086 6, /* scalar_stmt_cost. */
1087 4, /* scalar load_cost. */
1088 4, /* scalar_store_cost. */
1089 6, /* vec_stmt_cost. */
1090 0, /* vec_to_scalar_cost. */
1091 2, /* scalar_to_vec_cost. */
1092 4, /* vec_align_load_cost. */
1093 4, /* vec_unalign_load_cost. */
1094 4, /* vec_store_cost. */
1095 2, /* cond_taken_branch_cost. */
1096 1, /* cond_not_taken_branch_cost. */
1099 struct processor_costs btver1_cost
= {
1100 COSTS_N_INSNS (1), /* cost of an add instruction */
1101 COSTS_N_INSNS (2), /* cost of a lea instruction */
1102 COSTS_N_INSNS (1), /* variable shift costs */
1103 COSTS_N_INSNS (1), /* constant shift costs */
1104 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1105 COSTS_N_INSNS (4), /* HI */
1106 COSTS_N_INSNS (3), /* SI */
1107 COSTS_N_INSNS (4), /* DI */
1108 COSTS_N_INSNS (5)}, /* other */
1109 0, /* cost of multiply per each bit set */
1110 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1111 COSTS_N_INSNS (35), /* HI */
1112 COSTS_N_INSNS (51), /* SI */
1113 COSTS_N_INSNS (83), /* DI */
1114 COSTS_N_INSNS (83)}, /* other */
1115 COSTS_N_INSNS (1), /* cost of movsx */
1116 COSTS_N_INSNS (1), /* cost of movzx */
1117 8, /* "large" insn */
1119 4, /* cost for loading QImode using movzbl */
1120 {3, 4, 3}, /* cost of loading integer registers
1121 in QImode, HImode and SImode.
1122 Relative to reg-reg move (2). */
1123 {3, 4, 3}, /* cost of storing integer registers */
1124 4, /* cost of reg,reg fld/fst */
1125 {4, 4, 12}, /* cost of loading fp registers
1126 in SFmode, DFmode and XFmode */
1127 {6, 6, 8}, /* cost of storing fp registers
1128 in SFmode, DFmode and XFmode */
1129 2, /* cost of moving MMX register */
1130 {3, 3}, /* cost of loading MMX registers
1131 in SImode and DImode */
1132 {4, 4}, /* cost of storing MMX registers
1133 in SImode and DImode */
1134 2, /* cost of moving SSE register */
1135 {4, 4, 3}, /* cost of loading SSE registers
1136 in SImode, DImode and TImode */
1137 {4, 4, 5}, /* cost of storing SSE registers
1138 in SImode, DImode and TImode */
1139 3, /* MMX or SSE register to integer */
1141 MOVD reg64, xmmreg Double FSTORE 4
1142 MOVD reg32, xmmreg Double FSTORE 4
1144 MOVD reg64, xmmreg Double FADD 3
1146 MOVD reg32, xmmreg Double FADD 3
1148 32, /* size of l1 cache. */
1149 512, /* size of l2 cache. */
1150 64, /* size of prefetch block */
1151 100, /* number of parallel prefetches */
1152 2, /* Branch cost */
1153 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1154 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1155 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1156 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1157 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1158 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1160 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1161 very small blocks it is better to use loop. For large blocks, libcall can
1162 do nontemporary accesses and beat inline considerably. */
1163 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1164 {-1, rep_prefix_4_byte
, false}}},
1165 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1166 {-1, libcall
, false}}}},
1167 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1168 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1169 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1170 {-1, libcall
, false}}}},
1171 4, /* scalar_stmt_cost. */
1172 2, /* scalar load_cost. */
1173 2, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 2, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 2, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 struct processor_costs btver2_cost
= {
1185 COSTS_N_INSNS (1), /* cost of an add instruction */
1186 COSTS_N_INSNS (2), /* cost of a lea instruction */
1187 COSTS_N_INSNS (1), /* variable shift costs */
1188 COSTS_N_INSNS (1), /* constant shift costs */
1189 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1190 COSTS_N_INSNS (4), /* HI */
1191 COSTS_N_INSNS (3), /* SI */
1192 COSTS_N_INSNS (4), /* DI */
1193 COSTS_N_INSNS (5)}, /* other */
1194 0, /* cost of multiply per each bit set */
1195 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1196 COSTS_N_INSNS (35), /* HI */
1197 COSTS_N_INSNS (51), /* SI */
1198 COSTS_N_INSNS (83), /* DI */
1199 COSTS_N_INSNS (83)}, /* other */
1200 COSTS_N_INSNS (1), /* cost of movsx */
1201 COSTS_N_INSNS (1), /* cost of movzx */
1202 8, /* "large" insn */
1204 4, /* cost for loading QImode using movzbl */
1205 {3, 4, 3}, /* cost of loading integer registers
1206 in QImode, HImode and SImode.
1207 Relative to reg-reg move (2). */
1208 {3, 4, 3}, /* cost of storing integer registers */
1209 4, /* cost of reg,reg fld/fst */
1210 {4, 4, 12}, /* cost of loading fp registers
1211 in SFmode, DFmode and XFmode */
1212 {6, 6, 8}, /* cost of storing fp registers
1213 in SFmode, DFmode and XFmode */
1214 2, /* cost of moving MMX register */
1215 {3, 3}, /* cost of loading MMX registers
1216 in SImode and DImode */
1217 {4, 4}, /* cost of storing MMX registers
1218 in SImode and DImode */
1219 2, /* cost of moving SSE register */
1220 {4, 4, 3}, /* cost of loading SSE registers
1221 in SImode, DImode and TImode */
1222 {4, 4, 5}, /* cost of storing SSE registers
1223 in SImode, DImode and TImode */
1224 3, /* MMX or SSE register to integer */
1226 MOVD reg64, xmmreg Double FSTORE 4
1227 MOVD reg32, xmmreg Double FSTORE 4
1229 MOVD reg64, xmmreg Double FADD 3
1231 MOVD reg32, xmmreg Double FADD 3
1233 32, /* size of l1 cache. */
1234 2048, /* size of l2 cache. */
1235 64, /* size of prefetch block */
1236 100, /* number of parallel prefetches */
1237 2, /* Branch cost */
1238 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1239 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1240 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1241 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1242 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1243 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1245 {{libcall
, {{6, loop
, false}, {14, unrolled_loop
, false},
1246 {-1, rep_prefix_4_byte
, false}}},
1247 {libcall
, {{16, loop
, false}, {8192, rep_prefix_8_byte
, false},
1248 {-1, libcall
, false}}}},
1249 {{libcall
, {{8, loop
, false}, {24, unrolled_loop
, false},
1250 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1251 {libcall
, {{48, unrolled_loop
, false}, {8192, rep_prefix_8_byte
, false},
1252 {-1, libcall
, false}}}},
1253 4, /* scalar_stmt_cost. */
1254 2, /* scalar load_cost. */
1255 2, /* scalar_store_cost. */
1256 6, /* vec_stmt_cost. */
1257 0, /* vec_to_scalar_cost. */
1258 2, /* scalar_to_vec_cost. */
1259 2, /* vec_align_load_cost. */
1260 2, /* vec_unalign_load_cost. */
1261 2, /* vec_store_cost. */
1262 2, /* cond_taken_branch_cost. */
1263 1, /* cond_not_taken_branch_cost. */
1267 struct processor_costs pentium4_cost
= {
1268 COSTS_N_INSNS (1), /* cost of an add instruction */
1269 COSTS_N_INSNS (3), /* cost of a lea instruction */
1270 COSTS_N_INSNS (4), /* variable shift costs */
1271 COSTS_N_INSNS (4), /* constant shift costs */
1272 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1273 COSTS_N_INSNS (15), /* HI */
1274 COSTS_N_INSNS (15), /* SI */
1275 COSTS_N_INSNS (15), /* DI */
1276 COSTS_N_INSNS (15)}, /* other */
1277 0, /* cost of multiply per each bit set */
1278 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1279 COSTS_N_INSNS (56), /* HI */
1280 COSTS_N_INSNS (56), /* SI */
1281 COSTS_N_INSNS (56), /* DI */
1282 COSTS_N_INSNS (56)}, /* other */
1283 COSTS_N_INSNS (1), /* cost of movsx */
1284 COSTS_N_INSNS (1), /* cost of movzx */
1285 16, /* "large" insn */
1287 2, /* cost for loading QImode using movzbl */
1288 {4, 5, 4}, /* cost of loading integer registers
1289 in QImode, HImode and SImode.
1290 Relative to reg-reg move (2). */
1291 {2, 3, 2}, /* cost of storing integer registers */
1292 2, /* cost of reg,reg fld/fst */
1293 {2, 2, 6}, /* cost of loading fp registers
1294 in SFmode, DFmode and XFmode */
1295 {4, 4, 6}, /* cost of storing fp registers
1296 in SFmode, DFmode and XFmode */
1297 2, /* cost of moving MMX register */
1298 {2, 2}, /* cost of loading MMX registers
1299 in SImode and DImode */
1300 {2, 2}, /* cost of storing MMX registers
1301 in SImode and DImode */
1302 12, /* cost of moving SSE register */
1303 {12, 12, 12}, /* cost of loading SSE registers
1304 in SImode, DImode and TImode */
1305 {2, 2, 8}, /* cost of storing SSE registers
1306 in SImode, DImode and TImode */
1307 10, /* MMX or SSE register to integer */
1308 8, /* size of l1 cache. */
1309 256, /* size of l2 cache. */
1310 64, /* size of prefetch block */
1311 6, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1319 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1320 DUMMY_STRINGOP_ALGS
},
1321 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1322 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1323 DUMMY_STRINGOP_ALGS
},
1324 1, /* scalar_stmt_cost. */
1325 1, /* scalar load_cost. */
1326 1, /* scalar_store_cost. */
1327 1, /* vec_stmt_cost. */
1328 1, /* vec_to_scalar_cost. */
1329 1, /* scalar_to_vec_cost. */
1330 1, /* vec_align_load_cost. */
1331 2, /* vec_unalign_load_cost. */
1332 1, /* vec_store_cost. */
1333 3, /* cond_taken_branch_cost. */
1334 1, /* cond_not_taken_branch_cost. */
1338 struct processor_costs nocona_cost
= {
1339 COSTS_N_INSNS (1), /* cost of an add instruction */
1340 COSTS_N_INSNS (1), /* cost of a lea instruction */
1341 COSTS_N_INSNS (1), /* variable shift costs */
1342 COSTS_N_INSNS (1), /* constant shift costs */
1343 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1344 COSTS_N_INSNS (10), /* HI */
1345 COSTS_N_INSNS (10), /* SI */
1346 COSTS_N_INSNS (10), /* DI */
1347 COSTS_N_INSNS (10)}, /* other */
1348 0, /* cost of multiply per each bit set */
1349 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1350 COSTS_N_INSNS (66), /* HI */
1351 COSTS_N_INSNS (66), /* SI */
1352 COSTS_N_INSNS (66), /* DI */
1353 COSTS_N_INSNS (66)}, /* other */
1354 COSTS_N_INSNS (1), /* cost of movsx */
1355 COSTS_N_INSNS (1), /* cost of movzx */
1356 16, /* "large" insn */
1357 17, /* MOVE_RATIO */
1358 4, /* cost for loading QImode using movzbl */
1359 {4, 4, 4}, /* cost of loading integer registers
1360 in QImode, HImode and SImode.
1361 Relative to reg-reg move (2). */
1362 {4, 4, 4}, /* cost of storing integer registers */
1363 3, /* cost of reg,reg fld/fst */
1364 {12, 12, 12}, /* cost of loading fp registers
1365 in SFmode, DFmode and XFmode */
1366 {4, 4, 4}, /* cost of storing fp registers
1367 in SFmode, DFmode and XFmode */
1368 6, /* cost of moving MMX register */
1369 {12, 12}, /* cost of loading MMX registers
1370 in SImode and DImode */
1371 {12, 12}, /* cost of storing MMX registers
1372 in SImode and DImode */
1373 6, /* cost of moving SSE register */
1374 {12, 12, 12}, /* cost of loading SSE registers
1375 in SImode, DImode and TImode */
1376 {12, 12, 12}, /* cost of storing SSE registers
1377 in SImode, DImode and TImode */
1378 8, /* MMX or SSE register to integer */
1379 8, /* size of l1 cache. */
1380 1024, /* size of l2 cache. */
1381 128, /* size of prefetch block */
1382 8, /* number of parallel prefetches */
1383 1, /* Branch cost */
1384 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1385 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1386 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1387 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1388 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1389 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1390 {{libcall
, {{12, loop_1_byte
, false}, {-1, rep_prefix_4_byte
, false}}},
1391 {libcall
, {{32, loop
, false}, {20000, rep_prefix_8_byte
, false},
1392 {100000, unrolled_loop
, false}, {-1, libcall
, false}}}},
1393 {{libcall
, {{6, loop_1_byte
, false}, {48, loop
, false},
1394 {20480, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1395 {libcall
, {{24, loop
, false}, {64, unrolled_loop
, false},
1396 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1397 1, /* scalar_stmt_cost. */
1398 1, /* scalar load_cost. */
1399 1, /* scalar_store_cost. */
1400 1, /* vec_stmt_cost. */
1401 1, /* vec_to_scalar_cost. */
1402 1, /* scalar_to_vec_cost. */
1403 1, /* vec_align_load_cost. */
1404 2, /* vec_unalign_load_cost. */
1405 1, /* vec_store_cost. */
1406 3, /* cond_taken_branch_cost. */
1407 1, /* cond_not_taken_branch_cost. */
1411 struct processor_costs atom_cost
= {
1412 COSTS_N_INSNS (1), /* cost of an add instruction */
1413 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1414 COSTS_N_INSNS (1), /* variable shift costs */
1415 COSTS_N_INSNS (1), /* constant shift costs */
1416 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1417 COSTS_N_INSNS (4), /* HI */
1418 COSTS_N_INSNS (3), /* SI */
1419 COSTS_N_INSNS (4), /* DI */
1420 COSTS_N_INSNS (2)}, /* other */
1421 0, /* cost of multiply per each bit set */
1422 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1423 COSTS_N_INSNS (26), /* HI */
1424 COSTS_N_INSNS (42), /* SI */
1425 COSTS_N_INSNS (74), /* DI */
1426 COSTS_N_INSNS (74)}, /* other */
1427 COSTS_N_INSNS (1), /* cost of movsx */
1428 COSTS_N_INSNS (1), /* cost of movzx */
1429 8, /* "large" insn */
1430 17, /* MOVE_RATIO */
1431 4, /* cost for loading QImode using movzbl */
1432 {4, 4, 4}, /* cost of loading integer registers
1433 in QImode, HImode and SImode.
1434 Relative to reg-reg move (2). */
1435 {4, 4, 4}, /* cost of storing integer registers */
1436 4, /* cost of reg,reg fld/fst */
1437 {12, 12, 12}, /* cost of loading fp registers
1438 in SFmode, DFmode and XFmode */
1439 {6, 6, 8}, /* cost of storing fp registers
1440 in SFmode, DFmode and XFmode */
1441 2, /* cost of moving MMX register */
1442 {8, 8}, /* cost of loading MMX registers
1443 in SImode and DImode */
1444 {8, 8}, /* cost of storing MMX registers
1445 in SImode and DImode */
1446 2, /* cost of moving SSE register */
1447 {8, 8, 8}, /* cost of loading SSE registers
1448 in SImode, DImode and TImode */
1449 {8, 8, 8}, /* cost of storing SSE registers
1450 in SImode, DImode and TImode */
1451 5, /* MMX or SSE register to integer */
1452 32, /* size of l1 cache. */
1453 256, /* size of l2 cache. */
1454 64, /* size of prefetch block */
1455 6, /* number of parallel prefetches */
1456 3, /* Branch cost */
1457 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1458 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1459 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1460 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1461 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1462 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1463 {{libcall
, {{11, loop
, false}, {-1, rep_prefix_4_byte
, false}}},
1464 {libcall
, {{32, loop
, false}, {64, rep_prefix_4_byte
, false},
1465 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1466 {{libcall
, {{8, loop
, false}, {15, unrolled_loop
, false},
1467 {2048, rep_prefix_4_byte
, false}, {-1, libcall
, false}}},
1468 {libcall
, {{24, loop
, false}, {32, unrolled_loop
, false},
1469 {8192, rep_prefix_8_byte
, false}, {-1, libcall
, false}}}},
1470 1, /* scalar_stmt_cost. */
1471 1, /* scalar load_cost. */
1472 1, /* scalar_store_cost. */
1473 1, /* vec_stmt_cost. */
1474 1, /* vec_to_scalar_cost. */
1475 1, /* scalar_to_vec_cost. */
1476 1, /* vec_align_load_cost. */
1477 2, /* vec_unalign_load_cost. */
1478 1, /* vec_store_cost. */
1479 3, /* cond_taken_branch_cost. */
1480 1, /* cond_not_taken_branch_cost. */
1483 /* Generic64 should produce code tuned for Nocona and K8. */
1485 struct processor_costs generic64_cost
= {
1486 COSTS_N_INSNS (1), /* cost of an add instruction */
1487 /* On all chips taken into consideration lea is 2 cycles and more. With
1488 this cost however our current implementation of synth_mult results in
1489 use of unnecessary temporary registers causing regression on several
1490 SPECfp benchmarks. */
1491 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1492 COSTS_N_INSNS (1), /* variable shift costs */
1493 COSTS_N_INSNS (1), /* constant shift costs */
1494 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1495 COSTS_N_INSNS (4), /* HI */
1496 COSTS_N_INSNS (3), /* SI */
1497 COSTS_N_INSNS (4), /* DI */
1498 COSTS_N_INSNS (2)}, /* other */
1499 0, /* cost of multiply per each bit set */
1500 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1501 COSTS_N_INSNS (26), /* HI */
1502 COSTS_N_INSNS (42), /* SI */
1503 COSTS_N_INSNS (74), /* DI */
1504 COSTS_N_INSNS (74)}, /* other */
1505 COSTS_N_INSNS (1), /* cost of movsx */
1506 COSTS_N_INSNS (1), /* cost of movzx */
1507 8, /* "large" insn */
1508 17, /* MOVE_RATIO */
1509 4, /* cost for loading QImode using movzbl */
1510 {4, 4, 4}, /* cost of loading integer registers
1511 in QImode, HImode and SImode.
1512 Relative to reg-reg move (2). */
1513 {4, 4, 4}, /* cost of storing integer registers */
1514 4, /* cost of reg,reg fld/fst */
1515 {12, 12, 12}, /* cost of loading fp registers
1516 in SFmode, DFmode and XFmode */
1517 {6, 6, 8}, /* cost of storing fp registers
1518 in SFmode, DFmode and XFmode */
1519 2, /* cost of moving MMX register */
1520 {8, 8}, /* cost of loading MMX registers
1521 in SImode and DImode */
1522 {8, 8}, /* cost of storing MMX registers
1523 in SImode and DImode */
1524 2, /* cost of moving SSE register */
1525 {8, 8, 8}, /* cost of loading SSE registers
1526 in SImode, DImode and TImode */
1527 {8, 8, 8}, /* cost of storing SSE registers
1528 in SImode, DImode and TImode */
1529 5, /* MMX or SSE register to integer */
1530 32, /* size of l1 cache. */
1531 512, /* size of l2 cache. */
1532 64, /* size of prefetch block */
1533 6, /* number of parallel prefetches */
1534 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1535 value is increased to perhaps more appropriate value of 5. */
1536 3, /* Branch cost */
1537 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1538 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1539 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1540 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1541 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1542 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1543 {DUMMY_STRINGOP_ALGS
,
1544 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1545 {-1, libcall
, false}}}},
1546 {DUMMY_STRINGOP_ALGS
,
1547 {libcall
, {{32, loop
, false}, {8192, rep_prefix_8_byte
, false},
1548 {-1, libcall
, false}}}},
1549 1, /* scalar_stmt_cost. */
1550 1, /* scalar load_cost. */
1551 1, /* scalar_store_cost. */
1552 1, /* vec_stmt_cost. */
1553 1, /* vec_to_scalar_cost. */
1554 1, /* scalar_to_vec_cost. */
1555 1, /* vec_align_load_cost. */
1556 2, /* vec_unalign_load_cost. */
1557 1, /* vec_store_cost. */
1558 3, /* cond_taken_branch_cost. */
1559 1, /* cond_not_taken_branch_cost. */
1562 /* core_cost should produce code tuned for Core familly of CPUs. */
1564 struct processor_costs core_cost
= {
1565 COSTS_N_INSNS (1), /* cost of an add instruction */
1566 /* On all chips taken into consideration lea is 2 cycles and more. With
1567 this cost however our current implementation of synth_mult results in
1568 use of unnecessary temporary registers causing regression on several
1569 SPECfp benchmarks. */
1570 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1571 COSTS_N_INSNS (1), /* variable shift costs */
1572 COSTS_N_INSNS (1), /* constant shift costs */
1573 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1574 COSTS_N_INSNS (4), /* HI */
1575 COSTS_N_INSNS (3), /* SI */
1576 COSTS_N_INSNS (4), /* DI */
1577 COSTS_N_INSNS (2)}, /* other */
1578 0, /* cost of multiply per each bit set */
1579 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1580 COSTS_N_INSNS (26), /* HI */
1581 COSTS_N_INSNS (42), /* SI */
1582 COSTS_N_INSNS (74), /* DI */
1583 COSTS_N_INSNS (74)}, /* other */
1584 COSTS_N_INSNS (1), /* cost of movsx */
1585 COSTS_N_INSNS (1), /* cost of movzx */
1586 8, /* "large" insn */
1587 17, /* MOVE_RATIO */
1588 4, /* cost for loading QImode using movzbl */
1589 {4, 4, 4}, /* cost of loading integer registers
1590 in QImode, HImode and SImode.
1591 Relative to reg-reg move (2). */
1592 {4, 4, 4}, /* cost of storing integer registers */
1593 4, /* cost of reg,reg fld/fst */
1594 {12, 12, 12}, /* cost of loading fp registers
1595 in SFmode, DFmode and XFmode */
1596 {6, 6, 8}, /* cost of storing fp registers
1597 in SFmode, DFmode and XFmode */
1598 2, /* cost of moving MMX register */
1599 {8, 8}, /* cost of loading MMX registers
1600 in SImode and DImode */
1601 {8, 8}, /* cost of storing MMX registers
1602 in SImode and DImode */
1603 2, /* cost of moving SSE register */
1604 {8, 8, 8}, /* cost of loading SSE registers
1605 in SImode, DImode and TImode */
1606 {8, 8, 8}, /* cost of storing SSE registers
1607 in SImode, DImode and TImode */
1608 5, /* MMX or SSE register to integer */
1609 64, /* size of l1 cache. */
1610 512, /* size of l2 cache. */
1611 64, /* size of prefetch block */
1612 6, /* number of parallel prefetches */
1613 /* FIXME perhaps more appropriate value is 5. */
1614 3, /* Branch cost */
1615 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1616 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1617 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1618 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1619 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1620 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1621 {{libcall
, {{1024, rep_prefix_4_byte
, true}, {-1, libcall
, false}}},
1622 {libcall
, {{24, loop
, true}, {128, rep_prefix_8_byte
, true},
1623 {-1, libcall
, false}}}},
1624 {{libcall
, {{6, loop_1_byte
, true},
1626 {8192, rep_prefix_4_byte
, true},
1627 {-1, libcall
, false}}},
1628 {libcall
, {{24, loop
, true}, {512, rep_prefix_8_byte
, true},
1629 {-1, libcall
, false}}}},
1630 1, /* scalar_stmt_cost. */
1631 1, /* scalar load_cost. */
1632 1, /* scalar_store_cost. */
1633 1, /* vec_stmt_cost. */
1634 1, /* vec_to_scalar_cost. */
1635 1, /* scalar_to_vec_cost. */
1636 1, /* vec_align_load_cost. */
1637 2, /* vec_unalign_load_cost. */
1638 1, /* vec_store_cost. */
1639 3, /* cond_taken_branch_cost. */
1640 1, /* cond_not_taken_branch_cost. */
1643 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1646 struct processor_costs generic32_cost
= {
1647 COSTS_N_INSNS (1), /* cost of an add instruction */
1648 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1649 COSTS_N_INSNS (1), /* variable shift costs */
1650 COSTS_N_INSNS (1), /* constant shift costs */
1651 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1652 COSTS_N_INSNS (4), /* HI */
1653 COSTS_N_INSNS (3), /* SI */
1654 COSTS_N_INSNS (4), /* DI */
1655 COSTS_N_INSNS (2)}, /* other */
1656 0, /* cost of multiply per each bit set */
1657 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1658 COSTS_N_INSNS (26), /* HI */
1659 COSTS_N_INSNS (42), /* SI */
1660 COSTS_N_INSNS (74), /* DI */
1661 COSTS_N_INSNS (74)}, /* other */
1662 COSTS_N_INSNS (1), /* cost of movsx */
1663 COSTS_N_INSNS (1), /* cost of movzx */
1664 8, /* "large" insn */
1665 17, /* MOVE_RATIO */
1666 4, /* cost for loading QImode using movzbl */
1667 {4, 4, 4}, /* cost of loading integer registers
1668 in QImode, HImode and SImode.
1669 Relative to reg-reg move (2). */
1670 {4, 4, 4}, /* cost of storing integer registers */
1671 4, /* cost of reg,reg fld/fst */
1672 {12, 12, 12}, /* cost of loading fp registers
1673 in SFmode, DFmode and XFmode */
1674 {6, 6, 8}, /* cost of storing fp registers
1675 in SFmode, DFmode and XFmode */
1676 2, /* cost of moving MMX register */
1677 {8, 8}, /* cost of loading MMX registers
1678 in SImode and DImode */
1679 {8, 8}, /* cost of storing MMX registers
1680 in SImode and DImode */
1681 2, /* cost of moving SSE register */
1682 {8, 8, 8}, /* cost of loading SSE registers
1683 in SImode, DImode and TImode */
1684 {8, 8, 8}, /* cost of storing SSE registers
1685 in SImode, DImode and TImode */
1686 5, /* MMX or SSE register to integer */
1687 32, /* size of l1 cache. */
1688 256, /* size of l2 cache. */
1689 64, /* size of prefetch block */
1690 6, /* number of parallel prefetches */
1691 3, /* Branch cost */
1692 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1693 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1694 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1695 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1696 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1697 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1698 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1699 {-1, libcall
, false}}},
1700 DUMMY_STRINGOP_ALGS
},
1701 {{libcall
, {{32, loop
, false}, {8192, rep_prefix_4_byte
, false},
1702 {-1, libcall
, false}}},
1703 DUMMY_STRINGOP_ALGS
},
1704 1, /* scalar_stmt_cost. */
1705 1, /* scalar load_cost. */
1706 1, /* scalar_store_cost. */
1707 1, /* vec_stmt_cost. */
1708 1, /* vec_to_scalar_cost. */
1709 1, /* scalar_to_vec_cost. */
1710 1, /* vec_align_load_cost. */
1711 2, /* vec_unalign_load_cost. */
1712 1, /* vec_store_cost. */
1713 3, /* cond_taken_branch_cost. */
1714 1, /* cond_not_taken_branch_cost. */
1717 /* Set by -mtune. */
1718 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1720 /* Set by -mtune or -Os. */
1721 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1723 /* Processor feature/optimization bitmasks. */
1724 #define m_386 (1<<PROCESSOR_I386)
1725 #define m_486 (1<<PROCESSOR_I486)
1726 #define m_PENT (1<<PROCESSOR_PENTIUM)
1727 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1728 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1729 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1730 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1731 #define m_CORE2 (1<<PROCESSOR_CORE2)
1732 #define m_COREI7 (1<<PROCESSOR_COREI7)
1733 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1734 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1735 #define m_ATOM (1<<PROCESSOR_ATOM)
1737 #define m_GEODE (1<<PROCESSOR_GEODE)
1738 #define m_K6 (1<<PROCESSOR_K6)
1739 #define m_K6_GEODE (m_K6 | m_GEODE)
1740 #define m_K8 (1<<PROCESSOR_K8)
1741 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1742 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1743 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1744 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1745 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1746 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1747 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1748 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1749 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1750 #define m_BTVER (m_BTVER1 | m_BTVER2)
1751 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1753 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1754 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1756 /* Generic instruction choice should be common subset of supported CPUs
1757 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1758 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1760 /* Feature tests against the various tunings. */
1761 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1763 /* Feature tests against the various tunings used to create ix86_tune_features
1764 based on the processor mask. */
1765 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1766 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1767 negatively, so enabling for Generic64 seems like good code size
1768 tradeoff. We can't enable it for 32bit generic because it does not
1769 work well with PPro base chips. */
1770 m_386
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1772 /* X86_TUNE_PUSH_MEMORY */
1773 m_386
| m_P4_NOCONA
| m_CORE_ALL
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1775 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1778 /* X86_TUNE_UNROLL_STRLEN */
1779 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE_ALL
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1781 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1782 on simulation result. But after P4 was made, no performance benefit
1783 was observed with branch hints. It also increases the code size.
1784 As a result, icc never generates branch hints. */
1787 /* X86_TUNE_DOUBLE_WITH_ADD */
1790 /* X86_TUNE_USE_SAHF */
1791 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
1793 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1794 partial dependencies. */
1795 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1797 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1798 register stalls on Generic32 compilation setting as well. However
1799 in current implementation the partial register stalls are not eliminated
1800 very well - they can be introduced via subregs synthesized by combine
1801 and can happen in caller/callee saving sequences. Because this option
1802 pays back little on PPro based chips and is in conflict with partial reg
1803 dependencies used by Athlon/P4 based chips, it is better to leave it off
1804 for generic32 for now. */
1807 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1808 m_CORE_ALL
| m_GENERIC
,
1810 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1811 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1812 m_CORE_ALL
| m_GENERIC
,
1814 /* X86_TUNE_USE_HIMODE_FIOP */
1815 m_386
| m_486
| m_K6_GEODE
,
1817 /* X86_TUNE_USE_SIMODE_FIOP */
1818 ~(m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1820 /* X86_TUNE_USE_MOV0 */
1823 /* X86_TUNE_USE_CLTD */
1824 ~(m_PENT
| m_ATOM
| m_K6
),
1826 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1829 /* X86_TUNE_SPLIT_LONG_MOVES */
1832 /* X86_TUNE_READ_MODIFY_WRITE */
1835 /* X86_TUNE_READ_MODIFY */
1838 /* X86_TUNE_PROMOTE_QIMODE */
1839 m_386
| m_486
| m_PENT
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1841 /* X86_TUNE_FAST_PREFIX */
1842 ~(m_386
| m_486
| m_PENT
),
1844 /* X86_TUNE_SINGLE_STRINGOP */
1845 m_386
| m_P4_NOCONA
,
1847 /* X86_TUNE_QIMODE_MATH */
1850 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1851 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1852 might be considered for Generic32 if our scheme for avoiding partial
1853 stalls was more effective. */
1856 /* X86_TUNE_PROMOTE_QI_REGS */
1859 /* X86_TUNE_PROMOTE_HI_REGS */
1862 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1863 over esp addition. */
1864 m_386
| m_486
| m_PENT
| m_PPRO
,
1866 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1867 over esp addition. */
1870 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1871 over esp subtraction. */
1872 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
1874 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1875 over esp subtraction. */
1876 m_PENT
| m_K6_GEODE
,
1878 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1879 for DFmode copies */
1880 ~(m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
1882 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1883 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1885 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1886 conflict here in between PPro/Pentium4 based chips that thread 128bit
1887 SSE registers as single units versus K8 based chips that divide SSE
1888 registers to two 64bit halves. This knob promotes all store destinations
1889 to be 128bit to allow register renaming on 128bit SSE units, but usually
1890 results in one extra microop on 64bit SSE units. Experimental results
1891 shows that disabling this option on P4 brings over 20% SPECfp regression,
1892 while enabling it on K8 brings roughly 2.4% regression that can be partly
1893 masked by careful scheduling of moves. */
1894 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
1896 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1897 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
1899 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1902 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1905 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1906 are resolved on SSE register parts instead of whole registers, so we may
1907 maintain just lower part of scalar values in proper format leaving the
1908 upper part undefined. */
1911 /* X86_TUNE_SSE_TYPELESS_STORES */
1914 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1915 m_PPRO
| m_P4_NOCONA
,
1917 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1918 m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1920 /* X86_TUNE_PROLOGUE_USING_MOVE */
1921 m_PPRO
| m_ATHLON_K8
,
1923 /* X86_TUNE_EPILOGUE_USING_MOVE */
1924 m_PPRO
| m_ATHLON_K8
,
1926 /* X86_TUNE_SHIFT1 */
1929 /* X86_TUNE_USE_FFREEP */
1932 /* X86_TUNE_INTER_UNIT_MOVES */
1933 ~(m_AMD_MULTIPLE
| m_GENERIC
),
1935 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1936 ~(m_AMDFAM10
| m_BDVER
),
1938 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1939 than 4 branch instructions in the 16 byte window. */
1940 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1942 /* X86_TUNE_SCHEDULE */
1943 m_PENT
| m_PPRO
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1945 /* X86_TUNE_USE_BT */
1946 m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
1948 /* X86_TUNE_USE_INCDEC */
1949 ~(m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_GENERIC
),
1951 /* X86_TUNE_PAD_RETURNS */
1952 m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
,
1954 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1957 /* X86_TUNE_EXT_80387_CONSTANTS */
1958 m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
1960 /* X86_TUNE_AVOID_VECTOR_DECODE */
1961 m_CORE_ALL
| m_K8
| m_GENERIC64
,
1963 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1964 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1967 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1968 vector path on AMD machines. */
1969 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1971 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1973 m_CORE_ALL
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
1975 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1979 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1980 but one byte longer. */
1983 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1984 operand that cannot be represented using a modRM byte. The XOR
1985 replacement is long decoded, so this split helps here as well. */
1988 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1990 m_CORE_ALL
| m_AMDFAM10
| m_GENERIC
,
1992 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1993 from integer to FP. */
1996 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1997 with a subsequent conditional jump instruction into a single
1998 compare-and-branch uop. */
2001 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2002 will impact LEA instruction selection. */
2005 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2009 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2010 at -O3. For the moment, the prefetching seems badly tuned for Intel
2012 m_K6_GEODE
| m_AMD_MULTIPLE
,
2014 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2015 the auto-vectorizer. */
2018 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2019 during reassociation of integer computation. */
2022 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2023 during reassociation of fp computation. */
2026 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2027 regs instead of memory. */
2030 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2031 a conditional move. */
2035 /* Feature tests against the various architecture variations. */
2036 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2038 /* Feature tests against the various architecture variations, used to create
2039 ix86_arch_features based on the processor mask. */
2040 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2041 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2042 ~(m_386
| m_486
| m_PENT
| m_K6
),
2044 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2047 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2050 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2053 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2057 static const unsigned int x86_accumulate_outgoing_args
2058 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE_ALL
| m_AMD_MULTIPLE
| m_GENERIC
;
2060 static const unsigned int x86_arch_always_fancy_math_387
2061 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE_ALL
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2063 static const unsigned int x86_avx256_split_unaligned_load
2064 = m_COREI7
| m_GENERIC
;
2066 static const unsigned int x86_avx256_split_unaligned_store
2067 = m_COREI7
| m_BDVER
| m_GENERIC
;
2069 /* In case the average insn count for single function invocation is
2070 lower than this constant, emit fast (but longer) prologue and
2072 #define FAST_PROLOGUE_INSN_COUNT 20
2074 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2075 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2076 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2077 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2079 /* Array of the smallest class containing reg number REGNO, indexed by
2080 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2082 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2084 /* ax, dx, cx, bx */
2085 AREG
, DREG
, CREG
, BREG
,
2086 /* si, di, bp, sp */
2087 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2089 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2090 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2093 /* flags, fpsr, fpcr, frame */
2094 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2096 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2099 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2102 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2103 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2104 /* SSE REX registers */
2105 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2109 /* The "default" register map used in 32bit mode. */
2111 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2113 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2114 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2115 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2116 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2117 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2118 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2119 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2122 /* The "default" register map used in 64bit mode. */
2124 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2126 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2127 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2128 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2129 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2130 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2131 8,9,10,11,12,13,14,15, /* extended integer registers */
2132 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2135 /* Define the register numbers to be used in Dwarf debugging information.
2136 The SVR4 reference port C compiler uses the following register numbers
2137 in its Dwarf output code:
2138 0 for %eax (gcc regno = 0)
2139 1 for %ecx (gcc regno = 2)
2140 2 for %edx (gcc regno = 1)
2141 3 for %ebx (gcc regno = 3)
2142 4 for %esp (gcc regno = 7)
2143 5 for %ebp (gcc regno = 6)
2144 6 for %esi (gcc regno = 4)
2145 7 for %edi (gcc regno = 5)
2146 The following three DWARF register numbers are never generated by
2147 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2148 believes these numbers have these meanings.
2149 8 for %eip (no gcc equivalent)
2150 9 for %eflags (gcc regno = 17)
2151 10 for %trapno (no gcc equivalent)
2152 It is not at all clear how we should number the FP stack registers
2153 for the x86 architecture. If the version of SDB on x86/svr4 were
2154 a bit less brain dead with respect to floating-point then we would
2155 have a precedent to follow with respect to DWARF register numbers
2156 for x86 FP registers, but the SDB on x86/svr4 is so completely
2157 broken with respect to FP registers that it is hardly worth thinking
2158 of it as something to strive for compatibility with.
2159 The version of x86/svr4 SDB I have at the moment does (partially)
2160 seem to believe that DWARF register number 11 is associated with
2161 the x86 register %st(0), but that's about all. Higher DWARF
2162 register numbers don't seem to be associated with anything in
2163 particular, and even for DWARF regno 11, SDB only seems to under-
2164 stand that it should say that a variable lives in %st(0) (when
2165 asked via an `=' command) if we said it was in DWARF regno 11,
2166 but SDB still prints garbage when asked for the value of the
2167 variable in question (via a `/' command).
2168 (Also note that the labels SDB prints for various FP stack regs
2169 when doing an `x' command are all wrong.)
2170 Note that these problems generally don't affect the native SVR4
2171 C compiler because it doesn't allow the use of -O with -g and
2172 because when it is *not* optimizing, it allocates a memory
2173 location for each floating-point variable, and the memory
2174 location is what gets described in the DWARF AT_location
2175 attribute for the variable in question.
2176 Regardless of the severe mental illness of the x86/svr4 SDB, we
2177 do something sensible here and we use the following DWARF
2178 register numbers. Note that these are all stack-top-relative
2180 11 for %st(0) (gcc regno = 8)
2181 12 for %st(1) (gcc regno = 9)
2182 13 for %st(2) (gcc regno = 10)
2183 14 for %st(3) (gcc regno = 11)
2184 15 for %st(4) (gcc regno = 12)
2185 16 for %st(5) (gcc regno = 13)
2186 17 for %st(6) (gcc regno = 14)
2187 18 for %st(7) (gcc regno = 15)
2189 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2191 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2192 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2193 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2194 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2195 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2196 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2197 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2200 /* Define parameter passing and return registers. */
2202 static int const x86_64_int_parameter_registers
[6] =
2204 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2207 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2209 CX_REG
, DX_REG
, R8_REG
, R9_REG
2212 static int const x86_64_int_return_registers
[4] =
2214 AX_REG
, DX_REG
, DI_REG
, SI_REG
2217 /* Define the structure for the machine field in struct function. */
2219 struct GTY(()) stack_local_entry
{
2220 unsigned short mode
;
2223 struct stack_local_entry
*next
;
2226 /* Structure describing stack frame layout.
2227 Stack grows downward:
2233 saved static chain if ix86_static_chain_on_stack
2235 saved frame pointer if frame_pointer_needed
2236 <- HARD_FRAME_POINTER
2242 <- sse_regs_save_offset
2245 [va_arg registers] |
2249 [padding2] | = to_allocate
2258 int outgoing_arguments_size
;
2260 /* The offsets relative to ARG_POINTER. */
2261 HOST_WIDE_INT frame_pointer_offset
;
2262 HOST_WIDE_INT hard_frame_pointer_offset
;
2263 HOST_WIDE_INT stack_pointer_offset
;
2264 HOST_WIDE_INT hfp_save_offset
;
2265 HOST_WIDE_INT reg_save_offset
;
2266 HOST_WIDE_INT sse_reg_save_offset
;
2268 /* When save_regs_using_mov is set, emit prologue using
2269 move instead of push instructions. */
2270 bool save_regs_using_mov
;
2273 /* Which cpu are we scheduling for. */
2274 enum attr_cpu ix86_schedule
;
2276 /* Which cpu are we optimizing for. */
2277 enum processor_type ix86_tune
;
2279 /* Which instruction set architecture to use. */
2280 enum processor_type ix86_arch
;
2282 /* True if processor has SSE prefetch instruction. */
2283 unsigned char x86_prefetch_sse
;
2285 /* -mstackrealign option */
2286 static const char ix86_force_align_arg_pointer_string
[]
2287 = "force_align_arg_pointer";
2289 static rtx (*ix86_gen_leave
) (void);
2290 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2291 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2292 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2293 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2294 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2295 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2296 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2297 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2298 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2299 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2300 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2302 /* Preferred alignment for stack boundary in bits. */
2303 unsigned int ix86_preferred_stack_boundary
;
2305 /* Alignment for incoming stack boundary in bits specified at
2307 static unsigned int ix86_user_incoming_stack_boundary
;
2309 /* Default alignment for incoming stack boundary in bits. */
2310 static unsigned int ix86_default_incoming_stack_boundary
;
2312 /* Alignment for incoming stack boundary in bits. */
2313 unsigned int ix86_incoming_stack_boundary
;
2315 /* Calling abi specific va_list type nodes. */
2316 static GTY(()) tree sysv_va_list_type_node
;
2317 static GTY(()) tree ms_va_list_type_node
;
2319 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2320 char internal_label_prefix
[16];
2321 int internal_label_prefix_len
;
2323 /* Fence to use after loop using movnt. */
2326 /* Register class used for passing given 64bit part of the argument.
2327 These represent classes as documented by the PS ABI, with the exception
2328 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2329 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2331 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2332 whenever possible (upper half does contain padding). */
2333 enum x86_64_reg_class
2336 X86_64_INTEGER_CLASS
,
2337 X86_64_INTEGERSI_CLASS
,
2344 X86_64_COMPLEX_X87_CLASS
,
2348 #define MAX_CLASSES 4
2350 /* Table of constants used by fldpi, fldln2, etc.... */
2351 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2352 static bool ext_80387_constants_init
= 0;
2355 static struct machine_function
* ix86_init_machine_status (void);
2356 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2357 static bool ix86_function_value_regno_p (const unsigned int);
2358 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2360 static rtx
ix86_static_chain (const_tree
, bool);
2361 static int ix86_function_regparm (const_tree
, const_tree
);
2362 static void ix86_compute_frame_layout (struct ix86_frame
*);
2363 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2365 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2366 static tree
ix86_canonical_va_list_type (tree
);
2367 static void predict_jump (int);
2368 static unsigned int split_stack_prologue_scratch_regno (void);
2369 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2371 enum ix86_function_specific_strings
2373 IX86_FUNCTION_SPECIFIC_ARCH
,
2374 IX86_FUNCTION_SPECIFIC_TUNE
,
2375 IX86_FUNCTION_SPECIFIC_MAX
2378 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2379 const char *, enum fpmath_unit
, bool);
2380 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2381 static void ix86_function_specific_save (struct cl_target_option
*);
2382 static void ix86_function_specific_restore (struct cl_target_option
*);
2383 static void ix86_function_specific_print (FILE *, int,
2384 struct cl_target_option
*);
2385 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2386 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2387 struct gcc_options
*);
2388 static bool ix86_can_inline_p (tree
, tree
);
2389 static void ix86_set_current_function (tree
);
2390 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2392 static enum calling_abi
ix86_function_abi (const_tree
);
2395 #ifndef SUBTARGET32_DEFAULT_CPU
2396 #define SUBTARGET32_DEFAULT_CPU "i386"
2399 /* Whether -mtune= or -march= were specified */
2400 static int ix86_tune_defaulted
;
2401 static int ix86_arch_specified
;
2403 /* Vectorization library interface and handlers. */
2404 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2406 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2407 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2409 /* Processor target table, indexed by processor number */
2412 const struct processor_costs
*cost
; /* Processor costs */
2413 const int align_loop
; /* Default alignments. */
2414 const int align_loop_max_skip
;
2415 const int align_jump
;
2416 const int align_jump_max_skip
;
2417 const int align_func
;
2420 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2422 {&i386_cost
, 4, 3, 4, 3, 4},
2423 {&i486_cost
, 16, 15, 16, 15, 16},
2424 {&pentium_cost
, 16, 7, 16, 7, 16},
2425 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2426 {&geode_cost
, 0, 0, 0, 0, 0},
2427 {&k6_cost
, 32, 7, 32, 7, 32},
2428 {&athlon_cost
, 16, 7, 16, 7, 16},
2429 {&pentium4_cost
, 0, 0, 0, 0, 0},
2430 {&k8_cost
, 16, 7, 16, 7, 16},
2431 {&nocona_cost
, 0, 0, 0, 0, 0},
2433 {&core_cost
, 16, 10, 16, 10, 16},
2435 {&core_cost
, 16, 10, 16, 10, 16},
2437 {&core_cost
, 16, 10, 16, 10, 16},
2438 {&generic32_cost
, 16, 7, 16, 7, 16},
2439 {&generic64_cost
, 16, 10, 16, 10, 16},
2440 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2441 {&bdver1_cost
, 32, 24, 32, 7, 32},
2442 {&bdver2_cost
, 32, 24, 32, 7, 32},
2443 {&bdver3_cost
, 32, 24, 32, 7, 32},
2444 {&btver1_cost
, 32, 24, 32, 7, 32},
2445 {&btver2_cost
, 32, 24, 32, 7, 32},
2446 {&atom_cost
, 16, 15, 16, 7, 16}
2449 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2483 gate_insert_vzeroupper (void)
2485 return TARGET_VZEROUPPER
;
2489 rest_of_handle_insert_vzeroupper (void)
2493 /* vzeroupper instructions are inserted immediately after reload to
2494 account for possible spills from 256bit registers. The pass
2495 reuses mode switching infrastructure by re-running mode insertion
2496 pass, so disable entities that have already been processed. */
2497 for (i
= 0; i
< MAX_386_ENTITIES
; i
++)
2498 ix86_optimize_mode_switching
[i
] = 0;
2500 ix86_optimize_mode_switching
[AVX_U128
] = 1;
2502 /* Call optimize_mode_switching. */
2503 pass_mode_switching
.pass
.execute ();
2507 struct rtl_opt_pass pass_insert_vzeroupper
=
2511 "vzeroupper", /* name */
2512 OPTGROUP_NONE
, /* optinfo_flags */
2513 gate_insert_vzeroupper
, /* gate */
2514 rest_of_handle_insert_vzeroupper
, /* execute */
2517 0, /* static_pass_number */
2518 TV_NONE
, /* tv_id */
2519 0, /* properties_required */
2520 0, /* properties_provided */
2521 0, /* properties_destroyed */
2522 0, /* todo_flags_start */
2523 TODO_df_finish
| TODO_verify_rtl_sharing
|
2524 0, /* todo_flags_finish */
2528 /* Return true if a red-zone is in use. */
2531 ix86_using_red_zone (void)
2533 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2536 /* Return a string that documents the current -m options. The caller is
2537 responsible for freeing the string. */
2540 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2541 const char *tune
, enum fpmath_unit fpmath
,
2544 struct ix86_target_opts
2546 const char *option
; /* option string */
2547 HOST_WIDE_INT mask
; /* isa mask options */
2550 /* This table is ordered so that options like -msse4.2 that imply
2551 preceding options while match those first. */
2552 static struct ix86_target_opts isa_opts
[] =
2554 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2555 { "-mfma", OPTION_MASK_ISA_FMA
},
2556 { "-mxop", OPTION_MASK_ISA_XOP
},
2557 { "-mlwp", OPTION_MASK_ISA_LWP
},
2558 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2559 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2560 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2561 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2562 { "-msse3", OPTION_MASK_ISA_SSE3
},
2563 { "-msse2", OPTION_MASK_ISA_SSE2
},
2564 { "-msse", OPTION_MASK_ISA_SSE
},
2565 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2566 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2567 { "-mmmx", OPTION_MASK_ISA_MMX
},
2568 { "-mabm", OPTION_MASK_ISA_ABM
},
2569 { "-mbmi", OPTION_MASK_ISA_BMI
},
2570 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2571 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2572 { "-mhle", OPTION_MASK_ISA_HLE
},
2573 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2574 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2575 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2576 { "-madx", OPTION_MASK_ISA_ADX
},
2577 { "-mtbm", OPTION_MASK_ISA_TBM
},
2578 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2579 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2580 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2581 { "-maes", OPTION_MASK_ISA_AES
},
2582 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2583 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2584 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2585 { "-mf16c", OPTION_MASK_ISA_F16C
},
2586 { "-mrtm", OPTION_MASK_ISA_RTM
},
2587 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2588 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2592 static struct ix86_target_opts flag_opts
[] =
2594 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2595 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2596 { "-m80387", MASK_80387
},
2597 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2598 { "-malign-double", MASK_ALIGN_DOUBLE
},
2599 { "-mcld", MASK_CLD
},
2600 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2601 { "-mieee-fp", MASK_IEEE_FP
},
2602 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2603 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2604 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2605 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2606 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2607 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2608 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2609 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2610 { "-mrecip", MASK_RECIP
},
2611 { "-mrtd", MASK_RTD
},
2612 { "-msseregparm", MASK_SSEREGPARM
},
2613 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2614 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2615 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2616 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2617 { "-mvzeroupper", MASK_VZEROUPPER
},
2618 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2619 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2620 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2623 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2626 char target_other
[40];
2636 memset (opts
, '\0', sizeof (opts
));
2638 /* Add -march= option. */
2641 opts
[num
][0] = "-march=";
2642 opts
[num
++][1] = arch
;
2645 /* Add -mtune= option. */
2648 opts
[num
][0] = "-mtune=";
2649 opts
[num
++][1] = tune
;
2652 /* Add -m32/-m64/-mx32. */
2653 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2655 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2659 isa
&= ~ (OPTION_MASK_ISA_64BIT
2660 | OPTION_MASK_ABI_64
2661 | OPTION_MASK_ABI_X32
);
2665 opts
[num
++][0] = abi
;
2667 /* Pick out the options in isa options. */
2668 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2670 if ((isa
& isa_opts
[i
].mask
) != 0)
2672 opts
[num
++][0] = isa_opts
[i
].option
;
2673 isa
&= ~ isa_opts
[i
].mask
;
2677 if (isa
&& add_nl_p
)
2679 opts
[num
++][0] = isa_other
;
2680 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2684 /* Add flag options. */
2685 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2687 if ((flags
& flag_opts
[i
].mask
) != 0)
2689 opts
[num
++][0] = flag_opts
[i
].option
;
2690 flags
&= ~ flag_opts
[i
].mask
;
2694 if (flags
&& add_nl_p
)
2696 opts
[num
++][0] = target_other
;
2697 sprintf (target_other
, "(other flags: %#x)", flags
);
2700 /* Add -fpmath= option. */
2703 opts
[num
][0] = "-mfpmath=";
2704 switch ((int) fpmath
)
2707 opts
[num
++][1] = "387";
2711 opts
[num
++][1] = "sse";
2714 case FPMATH_387
| FPMATH_SSE
:
2715 opts
[num
++][1] = "sse+387";
2727 gcc_assert (num
< ARRAY_SIZE (opts
));
2729 /* Size the string. */
2731 sep_len
= (add_nl_p
) ? 3 : 1;
2732 for (i
= 0; i
< num
; i
++)
2735 for (j
= 0; j
< 2; j
++)
2737 len
+= strlen (opts
[i
][j
]);
2740 /* Build the string. */
2741 ret
= ptr
= (char *) xmalloc (len
);
2744 for (i
= 0; i
< num
; i
++)
2748 for (j
= 0; j
< 2; j
++)
2749 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2756 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2764 for (j
= 0; j
< 2; j
++)
2767 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2769 line_len
+= len2
[j
];
2774 gcc_assert (ret
+ len
>= ptr
);
2779 /* Return true, if profiling code should be emitted before
2780 prologue. Otherwise it returns false.
2781 Note: For x86 with "hotfix" it is sorried. */
2783 ix86_profile_before_prologue (void)
2785 return flag_fentry
!= 0;
2788 /* Function that is callable from the debugger to print the current
2791 ix86_debug_options (void)
2793 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2794 ix86_arch_string
, ix86_tune_string
,
2799 fprintf (stderr
, "%s\n\n", opts
);
2803 fputs ("<no options>\n\n", stderr
);
2808 /* Override various settings based on options. If MAIN_ARGS_P, the
2809 options are from the command line, otherwise they are from
2813 ix86_option_override_internal (bool main_args_p
)
2816 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2817 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2822 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2823 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2824 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2825 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2826 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2827 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2828 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2829 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2830 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2831 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2832 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2833 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2834 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2835 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2836 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2837 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2838 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2839 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2840 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2841 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2842 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2843 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2844 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2845 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2846 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2847 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2848 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2849 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2850 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2851 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2852 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2853 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2854 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2855 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2856 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2857 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2858 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2859 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2860 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2861 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2863 /* if this reaches 64, need to widen struct pta flags below */
2867 const char *const name
; /* processor name or nickname. */
2868 const enum processor_type processor
;
2869 const enum attr_cpu schedule
;
2870 const unsigned HOST_WIDE_INT flags
;
2872 const processor_alias_table
[] =
2874 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2875 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2876 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2877 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2878 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2879 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2880 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2881 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2882 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2883 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2884 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2885 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
2886 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2887 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2888 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2889 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
2890 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2891 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2892 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2893 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2894 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2895 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
2896 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2897 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
2898 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2899 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2900 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
2901 {"core2", PROCESSOR_CORE2
, CPU_CORE2
,
2902 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2903 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
2904 {"corei7", PROCESSOR_COREI7
, CPU_COREI7
,
2905 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2906 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
2907 {"corei7-avx", PROCESSOR_COREI7
, CPU_COREI7
,
2908 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2909 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2910 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
2911 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2912 {"core-avx-i", PROCESSOR_COREI7
, CPU_COREI7
,
2913 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2914 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2915 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2916 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
2917 {"core-avx2", PROCESSOR_HASWELL
, CPU_COREI7
,
2918 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2919 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2920 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2921 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2922 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
2924 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2925 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2926 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
2927 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2928 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2929 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2930 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2931 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2932 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
2933 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2934 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
2935 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
2936 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
2937 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2938 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2939 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2940 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
2941 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
2942 {"x86-64", PROCESSOR_K8
, CPU_K8
,
2943 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
2944 {"k8", PROCESSOR_K8
, CPU_K8
,
2945 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2946 | PTA_SSE2
| PTA_NO_SAHF
},
2947 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
2948 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2949 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2950 {"opteron", PROCESSOR_K8
, CPU_K8
,
2951 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2952 | PTA_SSE2
| PTA_NO_SAHF
},
2953 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
2954 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2955 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2956 {"athlon64", PROCESSOR_K8
, CPU_K8
,
2957 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2958 | PTA_SSE2
| PTA_NO_SAHF
},
2959 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
2960 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2961 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
2962 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
2963 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2964 | PTA_SSE2
| PTA_NO_SAHF
},
2965 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2966 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2967 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2968 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
2969 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
2970 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
2971 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2974 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2975 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2976 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
2977 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2978 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2979 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
2980 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2981 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
},
2982 {"bdver3", PROCESSOR_BDVER3
, CPU_BDVER3
,
2983 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2984 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
2985 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2986 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
2987 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
2989 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
2990 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2991 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
2992 | PTA_FXSR
| PTA_XSAVE
},
2993 {"btver2", PROCESSOR_BTVER2
, CPU_BTVER2
,
2994 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2995 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
2996 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
2997 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
2998 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3000 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3001 PTA_HLE
/* flags are only used for -march switch. */ },
3002 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3004 | PTA_HLE
/* flags are only used for -march switch. */ },
3007 /* -mrecip options. */
3010 const char *string
; /* option name */
3011 unsigned int mask
; /* mask bits to set */
3013 const recip_options
[] =
3015 { "all", RECIP_MASK_ALL
},
3016 { "none", RECIP_MASK_NONE
},
3017 { "div", RECIP_MASK_DIV
},
3018 { "sqrt", RECIP_MASK_SQRT
},
3019 { "vec-div", RECIP_MASK_VEC_DIV
},
3020 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3023 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3025 /* Set up prefix/suffix so the error messages refer to either the command
3026 line argument, or the attribute(target). */
3035 prefix
= "option(\"";
3040 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3041 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3042 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3043 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3044 #ifdef TARGET_BI_ARCH
3047 #if TARGET_BI_ARCH == 1
3048 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3049 is on and OPTION_MASK_ABI_X32 is off. We turn off
3050 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3053 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3055 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3056 on and OPTION_MASK_ABI_64 is off. We turn off
3057 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3060 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3067 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3068 OPTION_MASK_ABI_64 for TARGET_X32. */
3069 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3070 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3072 else if (TARGET_LP64
)
3074 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3075 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3076 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3077 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3080 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3081 SUBTARGET_OVERRIDE_OPTIONS
;
3084 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3085 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3088 /* -fPIC is the default for x86_64. */
3089 if (TARGET_MACHO
&& TARGET_64BIT
)
3092 /* Need to check -mtune=generic first. */
3093 if (ix86_tune_string
)
3095 if (!strcmp (ix86_tune_string
, "generic")
3096 || !strcmp (ix86_tune_string
, "i686")
3097 /* As special support for cross compilers we read -mtune=native
3098 as -mtune=generic. With native compilers we won't see the
3099 -mtune=native, as it was changed by the driver. */
3100 || !strcmp (ix86_tune_string
, "native"))
3103 ix86_tune_string
= "generic64";
3105 ix86_tune_string
= "generic32";
3107 /* If this call is for setting the option attribute, allow the
3108 generic32/generic64 that was previously set. */
3109 else if (!main_args_p
3110 && (!strcmp (ix86_tune_string
, "generic32")
3111 || !strcmp (ix86_tune_string
, "generic64")))
3113 else if (!strncmp (ix86_tune_string
, "generic", 7))
3114 error ("bad value (%s) for %stune=%s %s",
3115 ix86_tune_string
, prefix
, suffix
, sw
);
3116 else if (!strcmp (ix86_tune_string
, "x86-64"))
3117 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3118 "%stune=k8%s or %stune=generic%s instead as appropriate",
3119 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3123 if (ix86_arch_string
)
3124 ix86_tune_string
= ix86_arch_string
;
3125 if (!ix86_tune_string
)
3127 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3128 ix86_tune_defaulted
= 1;
3131 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3132 need to use a sensible tune option. */
3133 if (!strcmp (ix86_tune_string
, "generic")
3134 || !strcmp (ix86_tune_string
, "x86-64")
3135 || !strcmp (ix86_tune_string
, "i686"))
3138 ix86_tune_string
= "generic64";
3140 ix86_tune_string
= "generic32";
3144 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3146 /* rep; movq isn't available in 32-bit code. */
3147 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3148 ix86_stringop_alg
= no_stringop
;
3151 if (!ix86_arch_string
)
3152 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3154 ix86_arch_specified
= 1;
3156 if (global_options_set
.x_ix86_pmode
)
3158 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3159 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3160 error ("address mode %qs not supported in the %s bit mode",
3161 TARGET_64BIT
? "short" : "long",
3162 TARGET_64BIT
? "64" : "32");
3165 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3167 if (!global_options_set
.x_ix86_abi
)
3168 ix86_abi
= DEFAULT_ABI
;
3170 if (global_options_set
.x_ix86_cmodel
)
3172 switch (ix86_cmodel
)
3177 ix86_cmodel
= CM_SMALL_PIC
;
3179 error ("code model %qs not supported in the %s bit mode",
3186 ix86_cmodel
= CM_MEDIUM_PIC
;
3188 error ("code model %qs not supported in the %s bit mode",
3190 else if (TARGET_X32
)
3191 error ("code model %qs not supported in x32 mode",
3198 ix86_cmodel
= CM_LARGE_PIC
;
3200 error ("code model %qs not supported in the %s bit mode",
3202 else if (TARGET_X32
)
3203 error ("code model %qs not supported in x32 mode",
3209 error ("code model %s does not support PIC mode", "32");
3211 error ("code model %qs not supported in the %s bit mode",
3218 error ("code model %s does not support PIC mode", "kernel");
3219 ix86_cmodel
= CM_32
;
3222 error ("code model %qs not supported in the %s bit mode",
3232 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3233 use of rip-relative addressing. This eliminates fixups that
3234 would otherwise be needed if this object is to be placed in a
3235 DLL, and is essentially just as efficient as direct addressing. */
3236 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3237 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3238 else if (TARGET_64BIT
&& TARGET_RDOS
)
3239 ix86_cmodel
= CM_MEDIUM_PIC
, flag_pic
= 1;
3240 else if (TARGET_64BIT
)
3241 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3243 ix86_cmodel
= CM_32
;
3245 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3247 error ("-masm=intel not supported in this configuration");
3248 ix86_asm_dialect
= ASM_ATT
;
3250 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3251 sorry ("%i-bit mode not compiled in",
3252 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3254 for (i
= 0; i
< pta_size
; i
++)
3255 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3257 ix86_schedule
= processor_alias_table
[i
].schedule
;
3258 ix86_arch
= processor_alias_table
[i
].processor
;
3259 /* Default cpu tuning to the architecture. */
3260 ix86_tune
= ix86_arch
;
3262 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3263 error ("CPU you selected does not support x86-64 "
3266 if (processor_alias_table
[i
].flags
& PTA_MMX
3267 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3268 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3269 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3270 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3271 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3272 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3273 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3274 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3275 if (processor_alias_table
[i
].flags
& PTA_SSE
3276 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3277 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3278 if (processor_alias_table
[i
].flags
& PTA_SSE2
3279 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3280 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3281 if (processor_alias_table
[i
].flags
& PTA_SSE3
3282 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3283 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3284 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3285 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3286 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3287 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3288 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3289 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3290 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3291 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3292 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3293 if (processor_alias_table
[i
].flags
& PTA_AVX
3294 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3295 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3296 if (processor_alias_table
[i
].flags
& PTA_AVX2
3297 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3298 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3299 if (processor_alias_table
[i
].flags
& PTA_FMA
3300 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3301 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3302 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3303 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3304 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3305 if (processor_alias_table
[i
].flags
& PTA_FMA4
3306 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3307 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3308 if (processor_alias_table
[i
].flags
& PTA_XOP
3309 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3310 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3311 if (processor_alias_table
[i
].flags
& PTA_LWP
3312 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3313 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3314 if (processor_alias_table
[i
].flags
& PTA_ABM
3315 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3316 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3317 if (processor_alias_table
[i
].flags
& PTA_BMI
3318 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3319 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3320 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3321 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3322 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3323 if (processor_alias_table
[i
].flags
& PTA_TBM
3324 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3325 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3326 if (processor_alias_table
[i
].flags
& PTA_BMI2
3327 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3328 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3329 if (processor_alias_table
[i
].flags
& PTA_CX16
3330 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3331 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3332 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3333 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3334 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3335 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3336 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3337 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3338 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3339 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3340 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3341 if (processor_alias_table
[i
].flags
& PTA_AES
3342 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3343 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3344 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3345 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3346 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3347 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3348 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3349 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3350 if (processor_alias_table
[i
].flags
& PTA_RDRND
3351 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3352 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3353 if (processor_alias_table
[i
].flags
& PTA_F16C
3354 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3355 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3356 if (processor_alias_table
[i
].flags
& PTA_RTM
3357 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3358 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3359 if (processor_alias_table
[i
].flags
& PTA_HLE
3360 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3361 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3362 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3363 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3364 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3365 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3366 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3367 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3368 if (processor_alias_table
[i
].flags
& PTA_ADX
3369 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3370 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3371 if (processor_alias_table
[i
].flags
& PTA_FXSR
3372 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3373 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3374 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3375 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3376 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3377 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3378 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3379 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3380 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3381 x86_prefetch_sse
= true;
3386 if (!strcmp (ix86_arch_string
, "generic"))
3387 error ("generic CPU can be used only for %stune=%s %s",
3388 prefix
, suffix
, sw
);
3389 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3390 error ("bad value (%s) for %sarch=%s %s",
3391 ix86_arch_string
, prefix
, suffix
, sw
);
3393 ix86_arch_mask
= 1u << ix86_arch
;
3394 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3395 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3397 for (i
= 0; i
< pta_size
; i
++)
3398 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3400 ix86_schedule
= processor_alias_table
[i
].schedule
;
3401 ix86_tune
= processor_alias_table
[i
].processor
;
3404 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3406 if (ix86_tune_defaulted
)
3408 ix86_tune_string
= "x86-64";
3409 for (i
= 0; i
< pta_size
; i
++)
3410 if (! strcmp (ix86_tune_string
,
3411 processor_alias_table
[i
].name
))
3413 ix86_schedule
= processor_alias_table
[i
].schedule
;
3414 ix86_tune
= processor_alias_table
[i
].processor
;
3417 error ("CPU you selected does not support x86-64 "
3423 /* Adjust tuning when compiling for 32-bit ABI. */
3426 case PROCESSOR_GENERIC64
:
3427 ix86_tune
= PROCESSOR_GENERIC32
;
3428 ix86_schedule
= CPU_PENTIUMPRO
;
3435 /* Intel CPUs have always interpreted SSE prefetch instructions as
3436 NOPs; so, we can enable SSE prefetch instructions even when
3437 -mtune (rather than -march) points us to a processor that has them.
3438 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3439 higher processors. */
3441 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3442 x86_prefetch_sse
= true;
3446 if (ix86_tune_specified
&& i
== pta_size
)
3447 error ("bad value (%s) for %stune=%s %s",
3448 ix86_tune_string
, prefix
, suffix
, sw
);
3450 ix86_tune_mask
= 1u << ix86_tune
;
3451 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3452 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3454 #ifndef USE_IX86_FRAME_POINTER
3455 #define USE_IX86_FRAME_POINTER 0
3458 #ifndef USE_X86_64_FRAME_POINTER
3459 #define USE_X86_64_FRAME_POINTER 0
3462 /* Set the default values for switches whose default depends on TARGET_64BIT
3463 in case they weren't overwritten by command line options. */
3466 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3467 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3468 if (flag_asynchronous_unwind_tables
== 2)
3469 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3470 if (flag_pcc_struct_return
== 2)
3471 flag_pcc_struct_return
= 0;
3475 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3476 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3477 if (flag_asynchronous_unwind_tables
== 2)
3478 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3479 if (flag_pcc_struct_return
== 2)
3480 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3483 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3485 ix86_cost
= &ix86_size_cost
;
3487 ix86_cost
= ix86_tune_cost
;
3489 /* Arrange to set up i386_stack_locals for all functions. */
3490 init_machine_status
= ix86_init_machine_status
;
3492 /* Validate -mregparm= value. */
3493 if (global_options_set
.x_ix86_regparm
)
3496 warning (0, "-mregparm is ignored in 64-bit mode");
3497 if (ix86_regparm
> REGPARM_MAX
)
3499 error ("-mregparm=%d is not between 0 and %d",
3500 ix86_regparm
, REGPARM_MAX
);
3505 ix86_regparm
= REGPARM_MAX
;
3507 /* Default align_* from the processor table. */
3508 if (align_loops
== 0)
3510 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3511 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3513 if (align_jumps
== 0)
3515 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3516 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3518 if (align_functions
== 0)
3520 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3523 /* Provide default for -mbranch-cost= value. */
3524 if (!global_options_set
.x_ix86_branch_cost
)
3525 ix86_branch_cost
= ix86_cost
->branch_cost
;
3529 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3531 /* Enable by default the SSE and MMX builtins. Do allow the user to
3532 explicitly disable any of these. In particular, disabling SSE and
3533 MMX for kernel code is extremely useful. */
3534 if (!ix86_arch_specified
)
3536 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3537 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3540 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3544 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3546 if (!ix86_arch_specified
)
3548 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3550 /* i386 ABI does not specify red zone. It still makes sense to use it
3551 when programmer takes care to stack from being destroyed. */
3552 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3553 target_flags
|= MASK_NO_RED_ZONE
;
3556 /* Keep nonleaf frame pointers. */
3557 if (flag_omit_frame_pointer
)
3558 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3559 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3560 flag_omit_frame_pointer
= 1;
3562 /* If we're doing fast math, we don't care about comparison order
3563 wrt NaNs. This lets us use a shorter comparison sequence. */
3564 if (flag_finite_math_only
)
3565 target_flags
&= ~MASK_IEEE_FP
;
3567 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3568 since the insns won't need emulation. */
3569 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3570 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3572 /* Likewise, if the target doesn't have a 387, or we've specified
3573 software floating point, don't use 387 inline intrinsics. */
3575 target_flags
|= MASK_NO_FANCY_MATH_387
;
3577 /* Turn on MMX builtins for -msse. */
3579 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3581 /* Enable SSE prefetch. */
3582 if (TARGET_SSE
|| TARGET_PRFCHW
)
3583 x86_prefetch_sse
= true;
3585 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3586 if (TARGET_SSE4_2
|| TARGET_ABM
)
3587 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3589 /* Turn on lzcnt instruction for -mabm. */
3591 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3593 /* Validate -mpreferred-stack-boundary= value or default it to
3594 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3595 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3596 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3598 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3599 int max
= (TARGET_SEH
? 4 : 12);
3601 if (ix86_preferred_stack_boundary_arg
< min
3602 || ix86_preferred_stack_boundary_arg
> max
)
3605 error ("-mpreferred-stack-boundary is not supported "
3608 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3609 ix86_preferred_stack_boundary_arg
, min
, max
);
3612 ix86_preferred_stack_boundary
3613 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3616 /* Set the default value for -mstackrealign. */
3617 if (ix86_force_align_arg_pointer
== -1)
3618 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3620 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3622 /* Validate -mincoming-stack-boundary= value or default it to
3623 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3624 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3625 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3627 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3628 || ix86_incoming_stack_boundary_arg
> 12)
3629 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3630 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3633 ix86_user_incoming_stack_boundary
3634 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3635 ix86_incoming_stack_boundary
3636 = ix86_user_incoming_stack_boundary
;
3640 /* Accept -msseregparm only if at least SSE support is enabled. */
3641 if (TARGET_SSEREGPARM
3643 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3645 if (global_options_set
.x_ix86_fpmath
)
3647 if (ix86_fpmath
& FPMATH_SSE
)
3651 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3652 ix86_fpmath
= FPMATH_387
;
3654 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3656 warning (0, "387 instruction set disabled, using SSE arithmetics");
3657 ix86_fpmath
= FPMATH_SSE
;
3662 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3664 /* If the i387 is disabled, then do not return values in it. */
3666 target_flags
&= ~MASK_FLOAT_RETURNS
;
3668 /* Use external vectorized library in vectorizing intrinsics. */
3669 if (global_options_set
.x_ix86_veclibabi_type
)
3670 switch (ix86_veclibabi_type
)
3672 case ix86_veclibabi_type_svml
:
3673 ix86_veclib_handler
= ix86_veclibabi_svml
;
3676 case ix86_veclibabi_type_acml
:
3677 ix86_veclib_handler
= ix86_veclibabi_acml
;
3684 if ((!USE_IX86_FRAME_POINTER
3685 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3686 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3688 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3690 /* ??? Unwind info is not correct around the CFG unless either a frame
3691 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3692 unwind info generation to be aware of the CFG and propagating states
3694 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3695 || flag_exceptions
|| flag_non_call_exceptions
)
3696 && flag_omit_frame_pointer
3697 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3699 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3700 warning (0, "unwind tables currently require either a frame pointer "
3701 "or %saccumulate-outgoing-args%s for correctness",
3703 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3706 /* If stack probes are required, the space used for large function
3707 arguments on the stack must also be probed, so enable
3708 -maccumulate-outgoing-args so this happens in the prologue. */
3709 if (TARGET_STACK_PROBE
3710 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3712 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3713 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3714 "for correctness", prefix
, suffix
);
3715 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3718 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3721 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3722 p
= strchr (internal_label_prefix
, 'X');
3723 internal_label_prefix_len
= p
- internal_label_prefix
;
3727 /* When scheduling description is not available, disable scheduler pass
3728 so it won't slow down the compilation and make x87 code slower. */
3729 if (!TARGET_SCHEDULE
)
3730 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3732 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3733 ix86_tune_cost
->simultaneous_prefetches
,
3734 global_options
.x_param_values
,
3735 global_options_set
.x_param_values
);
3736 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3737 ix86_tune_cost
->prefetch_block
,
3738 global_options
.x_param_values
,
3739 global_options_set
.x_param_values
);
3740 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3741 ix86_tune_cost
->l1_cache_size
,
3742 global_options
.x_param_values
,
3743 global_options_set
.x_param_values
);
3744 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3745 ix86_tune_cost
->l2_cache_size
,
3746 global_options
.x_param_values
,
3747 global_options_set
.x_param_values
);
3749 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3750 if (flag_prefetch_loop_arrays
< 0
3752 && (optimize
>= 3 || flag_profile_use
)
3753 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3754 flag_prefetch_loop_arrays
= 1;
3756 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3757 can be optimized to ap = __builtin_next_arg (0). */
3758 if (!TARGET_64BIT
&& !flag_split_stack
)
3759 targetm
.expand_builtin_va_start
= NULL
;
3763 ix86_gen_leave
= gen_leave_rex64
;
3764 if (Pmode
== DImode
)
3766 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3767 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3768 ix86_gen_tls_local_dynamic_base_64
3769 = gen_tls_local_dynamic_base_64_di
;
3773 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3774 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3775 ix86_gen_tls_local_dynamic_base_64
3776 = gen_tls_local_dynamic_base_64_si
;
3781 ix86_gen_leave
= gen_leave
;
3782 ix86_gen_monitor
= gen_sse3_monitor
;
3785 if (Pmode
== DImode
)
3787 ix86_gen_add3
= gen_adddi3
;
3788 ix86_gen_sub3
= gen_subdi3
;
3789 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3790 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3791 ix86_gen_andsp
= gen_anddi3
;
3792 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3793 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3794 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3798 ix86_gen_add3
= gen_addsi3
;
3799 ix86_gen_sub3
= gen_subsi3
;
3800 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3801 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3802 ix86_gen_andsp
= gen_andsi3
;
3803 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3804 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3805 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3809 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3811 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3814 if (!TARGET_64BIT
&& flag_pic
)
3816 if (flag_fentry
> 0)
3817 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3821 else if (TARGET_SEH
)
3823 if (flag_fentry
== 0)
3824 sorry ("-mno-fentry isn%'t compatible with SEH");
3827 else if (flag_fentry
< 0)
3829 #if defined(PROFILE_BEFORE_PROLOGUE)
3838 /* When not optimize for size, enable vzeroupper optimization for
3839 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3840 AVX unaligned load/store. */
3843 if (flag_expensive_optimizations
3844 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3845 target_flags
|= MASK_VZEROUPPER
;
3846 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3847 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3848 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3849 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3850 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3851 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3852 /* Enable 128-bit AVX instruction generation
3853 for the auto-vectorizer. */
3854 if (TARGET_AVX128_OPTIMAL
3855 && !(target_flags_explicit
& MASK_PREFER_AVX128
))
3856 target_flags
|= MASK_PREFER_AVX128
;
3861 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3862 target_flags
&= ~MASK_VZEROUPPER
;
3865 if (ix86_recip_name
)
3867 char *p
= ASTRDUP (ix86_recip_name
);
3869 unsigned int mask
, i
;
3872 while ((q
= strtok (p
, ",")) != NULL
)
3883 if (!strcmp (q
, "default"))
3884 mask
= RECIP_MASK_ALL
;
3887 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3888 if (!strcmp (q
, recip_options
[i
].string
))
3890 mask
= recip_options
[i
].mask
;
3894 if (i
== ARRAY_SIZE (recip_options
))
3896 error ("unknown option for -mrecip=%s", q
);
3898 mask
= RECIP_MASK_NONE
;
3902 recip_mask_explicit
|= mask
;
3904 recip_mask
&= ~mask
;
3911 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3912 else if (target_flags_explicit
& MASK_RECIP
)
3913 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3915 /* Default long double to 64-bit for Bionic. */
3916 if (TARGET_HAS_BIONIC
3917 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
3918 target_flags
|= MASK_LONG_DOUBLE_64
;
3920 /* Save the initial options in case the user does function specific
3923 target_option_default_node
= target_option_current_node
3924 = build_target_option_node ();
3927 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3930 ix86_option_override (void)
3932 static struct register_pass_info insert_vzeroupper_info
3933 = { &pass_insert_vzeroupper
.pass
, "reload",
3934 1, PASS_POS_INSERT_AFTER
3937 ix86_option_override_internal (true);
3940 /* This needs to be done at start up. It's convenient to do it here. */
3941 register_pass (&insert_vzeroupper_info
);
3944 /* Update register usage after having seen the compiler flags. */
3947 ix86_conditional_register_usage (void)
3952 /* The PIC register, if it exists, is fixed. */
3953 j
= PIC_OFFSET_TABLE_REGNUM
;
3954 if (j
!= INVALID_REGNUM
)
3955 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3957 /* For 32-bit targets, squash the REX registers. */
3960 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3961 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3962 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3963 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3966 /* See the definition of CALL_USED_REGISTERS in i386.h. */
3967 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
3968 : TARGET_64BIT
? (1 << 2)
3971 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3973 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 /* Set/reset conditionally defined registers from
3976 CALL_USED_REGISTERS initializer. */
3977 if (call_used_regs
[i
] > 1)
3978 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
3980 /* Calculate registers of CLOBBERED_REGS register set
3981 as call used registers from GENERAL_REGS register set. */
3982 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3983 && call_used_regs
[i
])
3984 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3987 /* If MMX is disabled, squash the registers. */
3989 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3990 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3991 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3993 /* If SSE is disabled, squash the registers. */
3995 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3996 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3997 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3999 /* If the FPU is disabled, squash the registers. */
4000 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4001 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4002 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4003 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4007 /* Save the current options */
4010 ix86_function_specific_save (struct cl_target_option
*ptr
)
4012 ptr
->arch
= ix86_arch
;
4013 ptr
->schedule
= ix86_schedule
;
4014 ptr
->tune
= ix86_tune
;
4015 ptr
->branch_cost
= ix86_branch_cost
;
4016 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4017 ptr
->arch_specified
= ix86_arch_specified
;
4018 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4019 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4020 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4022 /* The fields are char but the variables are not; make sure the
4023 values fit in the fields. */
4024 gcc_assert (ptr
->arch
== ix86_arch
);
4025 gcc_assert (ptr
->schedule
== ix86_schedule
);
4026 gcc_assert (ptr
->tune
== ix86_tune
);
4027 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4030 /* Restore the current options */
4033 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4035 enum processor_type old_tune
= ix86_tune
;
4036 enum processor_type old_arch
= ix86_arch
;
4037 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4040 ix86_arch
= (enum processor_type
) ptr
->arch
;
4041 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4042 ix86_tune
= (enum processor_type
) ptr
->tune
;
4043 ix86_branch_cost
= ptr
->branch_cost
;
4044 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4045 ix86_arch_specified
= ptr
->arch_specified
;
4046 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4047 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4048 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4050 /* Recreate the arch feature tests if the arch changed */
4051 if (old_arch
!= ix86_arch
)
4053 ix86_arch_mask
= 1u << ix86_arch
;
4054 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4055 ix86_arch_features
[i
]
4056 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4059 /* Recreate the tune optimization tests */
4060 if (old_tune
!= ix86_tune
)
4062 ix86_tune_mask
= 1u << ix86_tune
;
4063 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4064 ix86_tune_features
[i
]
4065 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4069 /* Print the current options */
4072 ix86_function_specific_print (FILE *file
, int indent
,
4073 struct cl_target_option
*ptr
)
4076 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4077 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4079 fprintf (file
, "%*sarch = %d (%s)\n",
4082 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4083 ? cpu_names
[ptr
->arch
]
4086 fprintf (file
, "%*stune = %d (%s)\n",
4089 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4090 ? cpu_names
[ptr
->tune
]
4093 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4097 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4098 free (target_string
);
4103 /* Inner function to process the attribute((target(...))), take an argument and
4104 set the current options from the argument. If we have a list, recursively go
4108 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4109 struct gcc_options
*enum_opts_set
)
4114 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4115 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4116 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4117 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4118 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4134 enum ix86_opt_type type
;
4139 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4140 IX86_ATTR_ISA ("abm", OPT_mabm
),
4141 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4142 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4143 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4144 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4145 IX86_ATTR_ISA ("aes", OPT_maes
),
4146 IX86_ATTR_ISA ("avx", OPT_mavx
),
4147 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4148 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4149 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4150 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4151 IX86_ATTR_ISA ("sse", OPT_msse
),
4152 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4153 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4154 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4155 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4156 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4157 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4158 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4159 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4160 IX86_ATTR_ISA ("fma", OPT_mfma
),
4161 IX86_ATTR_ISA ("xop", OPT_mxop
),
4162 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4163 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4164 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4165 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4166 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4167 IX86_ATTR_ISA ("hle", OPT_mhle
),
4168 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4169 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4170 IX86_ATTR_ISA ("adx", OPT_madx
),
4171 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4172 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4173 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4176 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4178 /* string options */
4179 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4180 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4183 IX86_ATTR_YES ("cld",
4187 IX86_ATTR_NO ("fancy-math-387",
4188 OPT_mfancy_math_387
,
4189 MASK_NO_FANCY_MATH_387
),
4191 IX86_ATTR_YES ("ieee-fp",
4195 IX86_ATTR_YES ("inline-all-stringops",
4196 OPT_minline_all_stringops
,
4197 MASK_INLINE_ALL_STRINGOPS
),
4199 IX86_ATTR_YES ("inline-stringops-dynamically",
4200 OPT_minline_stringops_dynamically
,
4201 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4203 IX86_ATTR_NO ("align-stringops",
4204 OPT_mno_align_stringops
,
4205 MASK_NO_ALIGN_STRINGOPS
),
4207 IX86_ATTR_YES ("recip",
4213 /* If this is a list, recurse to get the options. */
4214 if (TREE_CODE (args
) == TREE_LIST
)
4218 for (; args
; args
= TREE_CHAIN (args
))
4219 if (TREE_VALUE (args
)
4220 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4221 p_strings
, enum_opts_set
))
4227 else if (TREE_CODE (args
) != STRING_CST
)
4229 error ("attribute %<target%> argument not a string");
4233 /* Handle multiple arguments separated by commas. */
4234 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4236 while (next_optstr
&& *next_optstr
!= '\0')
4238 char *p
= next_optstr
;
4240 char *comma
= strchr (next_optstr
, ',');
4241 const char *opt_string
;
4242 size_t len
, opt_len
;
4247 enum ix86_opt_type type
= ix86_opt_unknown
;
4253 len
= comma
- next_optstr
;
4254 next_optstr
= comma
+ 1;
4262 /* Recognize no-xxx. */
4263 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4272 /* Find the option. */
4275 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4277 type
= attrs
[i
].type
;
4278 opt_len
= attrs
[i
].len
;
4279 if (ch
== attrs
[i
].string
[0]
4280 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4283 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4286 mask
= attrs
[i
].mask
;
4287 opt_string
= attrs
[i
].string
;
4292 /* Process the option. */
4295 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4299 else if (type
== ix86_opt_isa
)
4301 struct cl_decoded_option decoded
;
4303 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4304 ix86_handle_option (&global_options
, &global_options_set
,
4305 &decoded
, input_location
);
4308 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4310 if (type
== ix86_opt_no
)
4311 opt_set_p
= !opt_set_p
;
4314 target_flags
|= mask
;
4316 target_flags
&= ~mask
;
4319 else if (type
== ix86_opt_str
)
4323 error ("option(\"%s\") was already specified", opt_string
);
4327 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4330 else if (type
== ix86_opt_enum
)
4335 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4337 set_option (&global_options
, enum_opts_set
, opt
, value
,
4338 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4342 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4354 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4357 ix86_valid_target_attribute_tree (tree args
)
4359 const char *orig_arch_string
= ix86_arch_string
;
4360 const char *orig_tune_string
= ix86_tune_string
;
4361 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4362 int orig_tune_defaulted
= ix86_tune_defaulted
;
4363 int orig_arch_specified
= ix86_arch_specified
;
4364 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4367 struct cl_target_option
*def
4368 = TREE_TARGET_OPTION (target_option_default_node
);
4369 struct gcc_options enum_opts_set
;
4371 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4373 /* Process each of the options on the chain. */
4374 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4376 return error_mark_node
;
4378 /* If the changed options are different from the default, rerun
4379 ix86_option_override_internal, and then save the options away.
4380 The string options are are attribute options, and will be undone
4381 when we copy the save structure. */
4382 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4383 || target_flags
!= def
->x_target_flags
4384 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4385 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4386 || enum_opts_set
.x_ix86_fpmath
)
4388 /* If we are using the default tune= or arch=, undo the string assigned,
4389 and use the default. */
4390 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4391 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4392 else if (!orig_arch_specified
)
4393 ix86_arch_string
= NULL
;
4395 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4396 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4397 else if (orig_tune_defaulted
)
4398 ix86_tune_string
= NULL
;
4400 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4401 if (enum_opts_set
.x_ix86_fpmath
)
4402 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4403 else if (!TARGET_64BIT
&& TARGET_SSE
)
4405 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4406 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4409 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4410 ix86_option_override_internal (false);
4412 /* Add any builtin functions with the new isa if any. */
4413 ix86_add_new_builtins (ix86_isa_flags
);
4415 /* Save the current options unless we are validating options for
4417 t
= build_target_option_node ();
4419 ix86_arch_string
= orig_arch_string
;
4420 ix86_tune_string
= orig_tune_string
;
4421 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4423 /* Free up memory allocated to hold the strings */
4424 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4425 free (option_strings
[i
]);
4431 /* Hook to validate attribute((target("string"))). */
4434 ix86_valid_target_attribute_p (tree fndecl
,
4435 tree
ARG_UNUSED (name
),
4437 int ARG_UNUSED (flags
))
4439 struct cl_target_option cur_target
;
4442 /* attribute((target("default"))) does nothing, beyond
4443 affecting multi-versioning. */
4444 if (TREE_VALUE (args
)
4445 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
4446 && TREE_CHAIN (args
) == NULL_TREE
4447 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
4450 tree old_optimize
= build_optimization_node ();
4451 tree new_target
, new_optimize
;
4452 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4454 /* If the function changed the optimization levels as well as setting target
4455 options, start with the optimizations specified. */
4456 if (func_optimize
&& func_optimize
!= old_optimize
)
4457 cl_optimization_restore (&global_options
,
4458 TREE_OPTIMIZATION (func_optimize
));
4460 /* The target attributes may also change some optimization flags, so update
4461 the optimization options if necessary. */
4462 cl_target_option_save (&cur_target
, &global_options
);
4463 new_target
= ix86_valid_target_attribute_tree (args
);
4464 new_optimize
= build_optimization_node ();
4466 if (new_target
== error_mark_node
)
4469 else if (fndecl
&& new_target
)
4471 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4473 if (old_optimize
!= new_optimize
)
4474 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4477 cl_target_option_restore (&global_options
, &cur_target
);
4479 if (old_optimize
!= new_optimize
)
4480 cl_optimization_restore (&global_options
,
4481 TREE_OPTIMIZATION (old_optimize
));
4487 /* Hook to determine if one function can safely inline another. */
4490 ix86_can_inline_p (tree caller
, tree callee
)
4493 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4494 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4496 /* If callee has no option attributes, then it is ok to inline. */
4500 /* If caller has no option attributes, but callee does then it is not ok to
4502 else if (!caller_tree
)
4507 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4508 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4510 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4511 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4513 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4514 != callee_opts
->x_ix86_isa_flags
)
4517 /* See if we have the same non-isa options. */
4518 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4521 /* See if arch, tune, etc. are the same. */
4522 else if (caller_opts
->arch
!= callee_opts
->arch
)
4525 else if (caller_opts
->tune
!= callee_opts
->tune
)
4528 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4531 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4542 /* Remember the last target of ix86_set_current_function. */
4543 static GTY(()) tree ix86_previous_fndecl
;
4545 /* Establish appropriate back-end context for processing the function
4546 FNDECL. The argument might be NULL to indicate processing at top
4547 level, outside of any function scope. */
4549 ix86_set_current_function (tree fndecl
)
4551 /* Only change the context if the function changes. This hook is called
4552 several times in the course of compiling a function, and we don't want to
4553 slow things down too much or call target_reinit when it isn't safe. */
4554 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4556 tree old_tree
= (ix86_previous_fndecl
4557 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4560 tree new_tree
= (fndecl
4561 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4564 ix86_previous_fndecl
= fndecl
;
4565 if (old_tree
== new_tree
)
4570 cl_target_option_restore (&global_options
,
4571 TREE_TARGET_OPTION (new_tree
));
4577 struct cl_target_option
*def
4578 = TREE_TARGET_OPTION (target_option_current_node
);
4580 cl_target_option_restore (&global_options
, def
);
4587 /* Return true if this goes in large data/bss. */
4590 ix86_in_large_data_p (tree exp
)
4592 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4595 /* Functions are never large data. */
4596 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4599 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4601 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4602 if (strcmp (section
, ".ldata") == 0
4603 || strcmp (section
, ".lbss") == 0)
4609 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4611 /* If this is an incomplete type with size 0, then we can't put it
4612 in data because it might be too big when completed. */
4613 if (!size
|| size
> ix86_section_threshold
)
4620 /* Switch to the appropriate section for output of DECL.
4621 DECL is either a `VAR_DECL' node or a constant of some sort.
4622 RELOC indicates whether forming the initial value of DECL requires
4623 link-time relocations. */
4625 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4629 x86_64_elf_select_section (tree decl
, int reloc
,
4630 unsigned HOST_WIDE_INT align
)
4632 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4633 && ix86_in_large_data_p (decl
))
4635 const char *sname
= NULL
;
4636 unsigned int flags
= SECTION_WRITE
;
4637 switch (categorize_decl_for_section (decl
, reloc
))
4642 case SECCAT_DATA_REL
:
4643 sname
= ".ldata.rel";
4645 case SECCAT_DATA_REL_LOCAL
:
4646 sname
= ".ldata.rel.local";
4648 case SECCAT_DATA_REL_RO
:
4649 sname
= ".ldata.rel.ro";
4651 case SECCAT_DATA_REL_RO_LOCAL
:
4652 sname
= ".ldata.rel.ro.local";
4656 flags
|= SECTION_BSS
;
4659 case SECCAT_RODATA_MERGE_STR
:
4660 case SECCAT_RODATA_MERGE_STR_INIT
:
4661 case SECCAT_RODATA_MERGE_CONST
:
4665 case SECCAT_SRODATA
:
4672 /* We don't split these for medium model. Place them into
4673 default sections and hope for best. */
4678 /* We might get called with string constants, but get_named_section
4679 doesn't like them as they are not DECLs. Also, we need to set
4680 flags in that case. */
4682 return get_section (sname
, flags
, NULL
);
4683 return get_named_section (decl
, sname
, reloc
);
4686 return default_elf_select_section (decl
, reloc
, align
);
4689 /* Build up a unique section name, expressed as a
4690 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4691 RELOC indicates whether the initial value of EXP requires
4692 link-time relocations. */
4694 static void ATTRIBUTE_UNUSED
4695 x86_64_elf_unique_section (tree decl
, int reloc
)
4697 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4698 && ix86_in_large_data_p (decl
))
4700 const char *prefix
= NULL
;
4701 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4702 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4704 switch (categorize_decl_for_section (decl
, reloc
))
4707 case SECCAT_DATA_REL
:
4708 case SECCAT_DATA_REL_LOCAL
:
4709 case SECCAT_DATA_REL_RO
:
4710 case SECCAT_DATA_REL_RO_LOCAL
:
4711 prefix
= one_only
? ".ld" : ".ldata";
4714 prefix
= one_only
? ".lb" : ".lbss";
4717 case SECCAT_RODATA_MERGE_STR
:
4718 case SECCAT_RODATA_MERGE_STR_INIT
:
4719 case SECCAT_RODATA_MERGE_CONST
:
4720 prefix
= one_only
? ".lr" : ".lrodata";
4722 case SECCAT_SRODATA
:
4729 /* We don't split these for medium model. Place them into
4730 default sections and hope for best. */
4735 const char *name
, *linkonce
;
4738 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4739 name
= targetm
.strip_name_encoding (name
);
4741 /* If we're using one_only, then there needs to be a .gnu.linkonce
4742 prefix to the section name. */
4743 linkonce
= one_only
? ".gnu.linkonce" : "";
4745 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4747 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4751 default_unique_section (decl
, reloc
);
4754 #ifdef COMMON_ASM_OP
4755 /* This says how to output assembler code to declare an
4756 uninitialized external linkage data object.
4758 For medium model x86-64 we need to use .largecomm opcode for
4761 x86_elf_aligned_common (FILE *file
,
4762 const char *name
, unsigned HOST_WIDE_INT size
,
4765 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4766 && size
> (unsigned int)ix86_section_threshold
)
4767 fputs (".largecomm\t", file
);
4769 fputs (COMMON_ASM_OP
, file
);
4770 assemble_name (file
, name
);
4771 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4772 size
, align
/ BITS_PER_UNIT
);
4776 /* Utility function for targets to use in implementing
4777 ASM_OUTPUT_ALIGNED_BSS. */
4780 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4781 const char *name
, unsigned HOST_WIDE_INT size
,
4784 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4785 && size
> (unsigned int)ix86_section_threshold
)
4786 switch_to_section (get_named_section (decl
, ".lbss", 0));
4788 switch_to_section (bss_section
);
4789 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4790 #ifdef ASM_DECLARE_OBJECT_NAME
4791 last_assemble_variable_decl
= decl
;
4792 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4794 /* Standard thing is just output label for the object. */
4795 ASM_OUTPUT_LABEL (file
, name
);
4796 #endif /* ASM_DECLARE_OBJECT_NAME */
4797 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4800 /* Decide whether we must probe the stack before any space allocation
4801 on this target. It's essentially TARGET_STACK_PROBE except when
4802 -fstack-check causes the stack to be already probed differently. */
4805 ix86_target_stack_probe (void)
4807 /* Do not probe the stack twice if static stack checking is enabled. */
4808 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4811 return TARGET_STACK_PROBE
;
4814 /* Decide whether we can make a sibling call to a function. DECL is the
4815 declaration of the function being targeted by the call and EXP is the
4816 CALL_EXPR representing the call. */
4819 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4821 tree type
, decl_or_type
;
4824 /* If we are generating position-independent code, we cannot sibcall
4825 optimize any indirect call, or a direct call to a global function,
4826 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4830 && (!decl
|| !targetm
.binds_local_p (decl
)))
4833 /* If we need to align the outgoing stack, then sibcalling would
4834 unalign the stack, which may break the called function. */
4835 if (ix86_minimum_incoming_stack_boundary (true)
4836 < PREFERRED_STACK_BOUNDARY
)
4841 decl_or_type
= decl
;
4842 type
= TREE_TYPE (decl
);
4846 /* We're looking at the CALL_EXPR, we need the type of the function. */
4847 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4848 type
= TREE_TYPE (type
); /* pointer type */
4849 type
= TREE_TYPE (type
); /* function type */
4850 decl_or_type
= type
;
4853 /* Check that the return value locations are the same. Like
4854 if we are returning floats on the 80387 register stack, we cannot
4855 make a sibcall from a function that doesn't return a float to a
4856 function that does or, conversely, from a function that does return
4857 a float to a function that doesn't; the necessary stack adjustment
4858 would not be executed. This is also the place we notice
4859 differences in the return value ABI. Note that it is ok for one
4860 of the functions to have void return type as long as the return
4861 value of the other is passed in a register. */
4862 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4863 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4865 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4867 if (!rtx_equal_p (a
, b
))
4870 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4872 else if (!rtx_equal_p (a
, b
))
4877 /* The SYSV ABI has more call-clobbered registers;
4878 disallow sibcalls from MS to SYSV. */
4879 if (cfun
->machine
->call_abi
== MS_ABI
4880 && ix86_function_type_abi (type
) == SYSV_ABI
)
4885 /* If this call is indirect, we'll need to be able to use a
4886 call-clobbered register for the address of the target function.
4887 Make sure that all such registers are not used for passing
4888 parameters. Note that DLLIMPORT functions are indirect. */
4890 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4892 if (ix86_function_regparm (type
, NULL
) >= 3)
4894 /* ??? Need to count the actual number of registers to be used,
4895 not the possible number of registers. Fix later. */
4901 /* Otherwise okay. That also includes certain types of indirect calls. */
4905 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4906 and "sseregparm" calling convention attributes;
4907 arguments as in struct attribute_spec.handler. */
4910 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4912 int flags ATTRIBUTE_UNUSED
,
4915 if (TREE_CODE (*node
) != FUNCTION_TYPE
4916 && TREE_CODE (*node
) != METHOD_TYPE
4917 && TREE_CODE (*node
) != FIELD_DECL
4918 && TREE_CODE (*node
) != TYPE_DECL
)
4920 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4922 *no_add_attrs
= true;
4926 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4927 if (is_attribute_p ("regparm", name
))
4931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4933 error ("fastcall and regparm attributes are not compatible");
4936 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4938 error ("regparam and thiscall attributes are not compatible");
4941 cst
= TREE_VALUE (args
);
4942 if (TREE_CODE (cst
) != INTEGER_CST
)
4944 warning (OPT_Wattributes
,
4945 "%qE attribute requires an integer constant argument",
4947 *no_add_attrs
= true;
4949 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4951 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4953 *no_add_attrs
= true;
4961 /* Do not warn when emulating the MS ABI. */
4962 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4963 && TREE_CODE (*node
) != METHOD_TYPE
)
4964 || ix86_function_type_abi (*node
) != MS_ABI
)
4965 warning (OPT_Wattributes
, "%qE attribute ignored",
4967 *no_add_attrs
= true;
4971 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4972 if (is_attribute_p ("fastcall", name
))
4974 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4976 error ("fastcall and cdecl attributes are not compatible");
4978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4980 error ("fastcall and stdcall attributes are not compatible");
4982 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4984 error ("fastcall and regparm attributes are not compatible");
4986 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4988 error ("fastcall and thiscall attributes are not compatible");
4992 /* Can combine stdcall with fastcall (redundant), regparm and
4994 else if (is_attribute_p ("stdcall", name
))
4996 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4998 error ("stdcall and cdecl attributes are not compatible");
5000 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5002 error ("stdcall and fastcall attributes are not compatible");
5004 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5006 error ("stdcall and thiscall attributes are not compatible");
5010 /* Can combine cdecl with regparm and sseregparm. */
5011 else if (is_attribute_p ("cdecl", name
))
5013 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5015 error ("stdcall and cdecl attributes are not compatible");
5017 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5019 error ("fastcall and cdecl attributes are not compatible");
5021 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5023 error ("cdecl and thiscall attributes are not compatible");
5026 else if (is_attribute_p ("thiscall", name
))
5028 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5029 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5031 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5033 error ("stdcall and thiscall attributes are not compatible");
5035 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5037 error ("fastcall and thiscall attributes are not compatible");
5039 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5041 error ("cdecl and thiscall attributes are not compatible");
5045 /* Can combine sseregparm with all attributes. */
5050 /* The transactional memory builtins are implicitly regparm or fastcall
5051 depending on the ABI. Override the generic do-nothing attribute that
5052 these builtins were declared with, and replace it with one of the two
5053 attributes that we expect elsewhere. */
5056 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5057 tree args ATTRIBUTE_UNUSED
,
5058 int flags ATTRIBUTE_UNUSED
,
5063 /* In no case do we want to add the placeholder attribute. */
5064 *no_add_attrs
= true;
5066 /* The 64-bit ABI is unchanged for transactional memory. */
5070 /* ??? Is there a better way to validate 32-bit windows? We have
5071 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5072 if (CHECK_STACK_LIMIT
> 0)
5073 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5076 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5077 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5079 decl_attributes (node
, alt
, flags
);
5084 /* This function determines from TYPE the calling-convention. */
5087 ix86_get_callcvt (const_tree type
)
5089 unsigned int ret
= 0;
5094 return IX86_CALLCVT_CDECL
;
5096 attrs
= TYPE_ATTRIBUTES (type
);
5097 if (attrs
!= NULL_TREE
)
5099 if (lookup_attribute ("cdecl", attrs
))
5100 ret
|= IX86_CALLCVT_CDECL
;
5101 else if (lookup_attribute ("stdcall", attrs
))
5102 ret
|= IX86_CALLCVT_STDCALL
;
5103 else if (lookup_attribute ("fastcall", attrs
))
5104 ret
|= IX86_CALLCVT_FASTCALL
;
5105 else if (lookup_attribute ("thiscall", attrs
))
5106 ret
|= IX86_CALLCVT_THISCALL
;
5108 /* Regparam isn't allowed for thiscall and fastcall. */
5109 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5111 if (lookup_attribute ("regparm", attrs
))
5112 ret
|= IX86_CALLCVT_REGPARM
;
5113 if (lookup_attribute ("sseregparm", attrs
))
5114 ret
|= IX86_CALLCVT_SSEREGPARM
;
5117 if (IX86_BASE_CALLCVT(ret
) != 0)
5121 is_stdarg
= stdarg_p (type
);
5122 if (TARGET_RTD
&& !is_stdarg
)
5123 return IX86_CALLCVT_STDCALL
| ret
;
5127 || TREE_CODE (type
) != METHOD_TYPE
5128 || ix86_function_type_abi (type
) != MS_ABI
)
5129 return IX86_CALLCVT_CDECL
| ret
;
5131 return IX86_CALLCVT_THISCALL
;
5134 /* Return 0 if the attributes for two types are incompatible, 1 if they
5135 are compatible, and 2 if they are nearly compatible (which causes a
5136 warning to be generated). */
5139 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5141 unsigned int ccvt1
, ccvt2
;
5143 if (TREE_CODE (type1
) != FUNCTION_TYPE
5144 && TREE_CODE (type1
) != METHOD_TYPE
)
5147 ccvt1
= ix86_get_callcvt (type1
);
5148 ccvt2
= ix86_get_callcvt (type2
);
5151 if (ix86_function_regparm (type1
, NULL
)
5152 != ix86_function_regparm (type2
, NULL
))
5158 /* Return the regparm value for a function with the indicated TYPE and DECL.
5159 DECL may be NULL when calling function indirectly
5160 or considering a libcall. */
5163 ix86_function_regparm (const_tree type
, const_tree decl
)
5170 return (ix86_function_type_abi (type
) == SYSV_ABI
5171 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5172 ccvt
= ix86_get_callcvt (type
);
5173 regparm
= ix86_regparm
;
5175 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5177 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5180 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5184 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5186 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5189 /* Use register calling convention for local functions when possible. */
5191 && TREE_CODE (decl
) == FUNCTION_DECL
5193 && !(profile_flag
&& !flag_fentry
))
5195 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5196 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5197 if (i
&& i
->local
&& i
->can_change_signature
)
5199 int local_regparm
, globals
= 0, regno
;
5201 /* Make sure no regparm register is taken by a
5202 fixed register variable. */
5203 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5204 if (fixed_regs
[local_regparm
])
5207 /* We don't want to use regparm(3) for nested functions as
5208 these use a static chain pointer in the third argument. */
5209 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5212 /* In 32-bit mode save a register for the split stack. */
5213 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5216 /* Each fixed register usage increases register pressure,
5217 so less registers should be used for argument passing.
5218 This functionality can be overriden by an explicit
5220 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5221 if (fixed_regs
[regno
])
5225 = globals
< local_regparm
? local_regparm
- globals
: 0;
5227 if (local_regparm
> regparm
)
5228 regparm
= local_regparm
;
5235 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5236 DFmode (2) arguments in SSE registers for a function with the
5237 indicated TYPE and DECL. DECL may be NULL when calling function
5238 indirectly or considering a libcall. Otherwise return 0. */
5241 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5243 gcc_assert (!TARGET_64BIT
);
5245 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5246 by the sseregparm attribute. */
5247 if (TARGET_SSEREGPARM
5248 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5255 error ("calling %qD with attribute sseregparm without "
5256 "SSE/SSE2 enabled", decl
);
5258 error ("calling %qT with attribute sseregparm without "
5259 "SSE/SSE2 enabled", type
);
5267 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5268 (and DFmode for SSE2) arguments in SSE registers. */
5269 if (decl
&& TARGET_SSE_MATH
&& optimize
5270 && !(profile_flag
&& !flag_fentry
))
5272 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5273 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5274 if (i
&& i
->local
&& i
->can_change_signature
)
5275 return TARGET_SSE2
? 2 : 1;
5281 /* Return true if EAX is live at the start of the function. Used by
5282 ix86_expand_prologue to determine if we need special help before
5283 calling allocate_stack_worker. */
5286 ix86_eax_live_at_start_p (void)
5288 /* Cheat. Don't bother working forward from ix86_function_regparm
5289 to the function type to whether an actual argument is located in
5290 eax. Instead just look at cfg info, which is still close enough
5291 to correct at this point. This gives false positives for broken
5292 functions that might use uninitialized data that happens to be
5293 allocated in eax, but who cares? */
5294 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5298 ix86_keep_aggregate_return_pointer (tree fntype
)
5304 attr
= lookup_attribute ("callee_pop_aggregate_return",
5305 TYPE_ATTRIBUTES (fntype
));
5307 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5309 /* For 32-bit MS-ABI the default is to keep aggregate
5311 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5314 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5317 /* Value is the number of bytes of arguments automatically
5318 popped when returning from a subroutine call.
5319 FUNDECL is the declaration node of the function (as a tree),
5320 FUNTYPE is the data type of the function (as a tree),
5321 or for a library call it is an identifier node for the subroutine name.
5322 SIZE is the number of bytes of arguments passed on the stack.
5324 On the 80386, the RTD insn may be used to pop them if the number
5325 of args is fixed, but if the number is variable then the caller
5326 must pop them all. RTD can't be used for library calls now
5327 because the library is compiled with the Unix compiler.
5328 Use of RTD is a selectable option, since it is incompatible with
5329 standard Unix calling sequences. If the option is not selected,
5330 the caller must always pop the args.
5332 The attribute stdcall is equivalent to RTD on a per module basis. */
5335 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5339 /* None of the 64-bit ABIs pop arguments. */
5343 ccvt
= ix86_get_callcvt (funtype
);
5345 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5346 | IX86_CALLCVT_THISCALL
)) != 0
5347 && ! stdarg_p (funtype
))
5350 /* Lose any fake structure return argument if it is passed on the stack. */
5351 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5352 && !ix86_keep_aggregate_return_pointer (funtype
))
5354 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5356 return GET_MODE_SIZE (Pmode
);
5362 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5365 ix86_legitimate_combined_insn (rtx insn
)
5367 /* Check operand constraints in case hard registers were propagated
5368 into insn pattern. This check prevents combine pass from
5369 generating insn patterns with invalid hard register operands.
5370 These invalid insns can eventually confuse reload to error out
5371 with a spill failure. See also PRs 46829 and 46843. */
5372 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5376 extract_insn (insn
);
5377 preprocess_constraints ();
5379 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5381 rtx op
= recog_data
.operand
[i
];
5382 enum machine_mode mode
= GET_MODE (op
);
5383 struct operand_alternative
*op_alt
;
5388 /* A unary operator may be accepted by the predicate, but it
5389 is irrelevant for matching constraints. */
5393 if (GET_CODE (op
) == SUBREG
)
5395 if (REG_P (SUBREG_REG (op
))
5396 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5397 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5398 GET_MODE (SUBREG_REG (op
)),
5401 op
= SUBREG_REG (op
);
5404 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5407 op_alt
= recog_op_alt
[i
];
5409 /* Operand has no constraints, anything is OK. */
5410 win
= !recog_data
.n_alternatives
;
5412 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5414 if (op_alt
[j
].anything_ok
5415 || (op_alt
[j
].matches
!= -1
5417 (recog_data
.operand
[i
],
5418 recog_data
.operand
[op_alt
[j
].matches
]))
5419 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5434 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5436 static unsigned HOST_WIDE_INT
5437 ix86_asan_shadow_offset (void)
5439 return (unsigned HOST_WIDE_INT
) 1 << (TARGET_LP64
? 44 : 29);
5442 /* Argument support functions. */
5444 /* Return true when register may be used to pass function parameters. */
5446 ix86_function_arg_regno_p (int regno
)
5449 const int *parm_regs
;
5454 return (regno
< REGPARM_MAX
5455 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5457 return (regno
< REGPARM_MAX
5458 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5459 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5460 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5461 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5466 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5471 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5472 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5476 /* TODO: The function should depend on current function ABI but
5477 builtins.c would need updating then. Therefore we use the
5480 /* RAX is used as hidden argument to va_arg functions. */
5481 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5484 if (ix86_abi
== MS_ABI
)
5485 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5487 parm_regs
= x86_64_int_parameter_registers
;
5488 for (i
= 0; i
< (ix86_abi
== MS_ABI
5489 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5490 if (regno
== parm_regs
[i
])
5495 /* Return if we do not know how to pass TYPE solely in registers. */
5498 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5500 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5503 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5504 The layout_type routine is crafty and tries to trick us into passing
5505 currently unsupported vector types on the stack by using TImode. */
5506 return (!TARGET_64BIT
&& mode
== TImode
5507 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5510 /* It returns the size, in bytes, of the area reserved for arguments passed
5511 in registers for the function represented by fndecl dependent to the used
5514 ix86_reg_parm_stack_space (const_tree fndecl
)
5516 enum calling_abi call_abi
= SYSV_ABI
;
5517 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5518 call_abi
= ix86_function_abi (fndecl
);
5520 call_abi
= ix86_function_type_abi (fndecl
);
5521 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5526 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5529 ix86_function_type_abi (const_tree fntype
)
5531 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5533 enum calling_abi abi
= ix86_abi
;
5534 if (abi
== SYSV_ABI
)
5536 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5539 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5547 ix86_function_ms_hook_prologue (const_tree fn
)
5549 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5551 if (decl_function_context (fn
) != NULL_TREE
)
5552 error_at (DECL_SOURCE_LOCATION (fn
),
5553 "ms_hook_prologue is not compatible with nested function");
5560 static enum calling_abi
5561 ix86_function_abi (const_tree fndecl
)
5565 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5568 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5571 ix86_cfun_abi (void)
5575 return cfun
->machine
->call_abi
;
5578 /* Write the extra assembler code needed to declare a function properly. */
5581 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5584 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5588 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5589 unsigned int filler_cc
= 0xcccccccc;
5591 for (i
= 0; i
< filler_count
; i
+= 4)
5592 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5595 #ifdef SUBTARGET_ASM_UNWIND_INIT
5596 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5599 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5601 /* Output magic byte marker, if hot-patch attribute is set. */
5606 /* leaq [%rsp + 0], %rsp */
5607 asm_fprintf (asm_out_file
, ASM_BYTE
5608 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5612 /* movl.s %edi, %edi
5614 movl.s %esp, %ebp */
5615 asm_fprintf (asm_out_file
, ASM_BYTE
5616 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5622 extern void init_regs (void);
5624 /* Implementation of call abi switching target hook. Specific to FNDECL
5625 the specific call register sets are set. See also
5626 ix86_conditional_register_usage for more details. */
5628 ix86_call_abi_override (const_tree fndecl
)
5630 if (fndecl
== NULL_TREE
)
5631 cfun
->machine
->call_abi
= ix86_abi
;
5633 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5636 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5637 expensive re-initialization of init_regs each time we switch function context
5638 since this is needed only during RTL expansion. */
5640 ix86_maybe_switch_abi (void)
5643 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5647 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5648 for a call to a function whose data type is FNTYPE.
5649 For a library call, FNTYPE is 0. */
5652 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5653 tree fntype
, /* tree ptr for function decl */
5654 rtx libname
, /* SYMBOL_REF of library name or 0 */
5658 struct cgraph_local_info
*i
;
5660 memset (cum
, 0, sizeof (*cum
));
5664 i
= cgraph_local_info (fndecl
);
5665 cum
->call_abi
= ix86_function_abi (fndecl
);
5670 cum
->call_abi
= ix86_function_type_abi (fntype
);
5673 cum
->caller
= caller
;
5675 /* Set up the number of registers to use for passing arguments. */
5677 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5678 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5679 "or subtarget optimization implying it");
5680 cum
->nregs
= ix86_regparm
;
5683 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5684 ? X86_64_REGPARM_MAX
5685 : X86_64_MS_REGPARM_MAX
);
5689 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5692 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5693 ? X86_64_SSE_REGPARM_MAX
5694 : X86_64_MS_SSE_REGPARM_MAX
);
5698 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5699 cum
->warn_avx
= true;
5700 cum
->warn_sse
= true;
5701 cum
->warn_mmx
= true;
5703 /* Because type might mismatch in between caller and callee, we need to
5704 use actual type of function for local calls.
5705 FIXME: cgraph_analyze can be told to actually record if function uses
5706 va_start so for local functions maybe_vaarg can be made aggressive
5708 FIXME: once typesytem is fixed, we won't need this code anymore. */
5709 if (i
&& i
->local
&& i
->can_change_signature
)
5710 fntype
= TREE_TYPE (fndecl
);
5711 cum
->maybe_vaarg
= (fntype
5712 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5717 /* If there are variable arguments, then we won't pass anything
5718 in registers in 32-bit mode. */
5719 if (stdarg_p (fntype
))
5730 /* Use ecx and edx registers if function has fastcall attribute,
5731 else look for regparm information. */
5734 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5735 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5738 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5740 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5746 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5749 /* Set up the number of SSE registers used for passing SFmode
5750 and DFmode arguments. Warn for mismatching ABI. */
5751 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5755 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5756 But in the case of vector types, it is some vector mode.
5758 When we have only some of our vector isa extensions enabled, then there
5759 are some modes for which vector_mode_supported_p is false. For these
5760 modes, the generic vector support in gcc will choose some non-vector mode
5761 in order to implement the type. By computing the natural mode, we'll
5762 select the proper ABI location for the operand and not depend on whatever
5763 the middle-end decides to do with these vector types.
5765 The midde-end can't deal with the vector types > 16 bytes. In this
5766 case, we return the original mode and warn ABI change if CUM isn't
5769 static enum machine_mode
5770 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5772 enum machine_mode mode
= TYPE_MODE (type
);
5774 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5776 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5777 if ((size
== 8 || size
== 16 || size
== 32)
5778 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5779 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5781 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5783 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5784 mode
= MIN_MODE_VECTOR_FLOAT
;
5786 mode
= MIN_MODE_VECTOR_INT
;
5788 /* Get the mode which has this inner mode and number of units. */
5789 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5790 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5791 && GET_MODE_INNER (mode
) == innermode
)
5793 if (size
== 32 && !TARGET_AVX
)
5795 static bool warnedavx
;
5802 warning (0, "AVX vector argument without AVX "
5803 "enabled changes the ABI");
5805 return TYPE_MODE (type
);
5807 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5809 static bool warnedsse
;
5816 warning (0, "SSE vector argument without SSE "
5817 "enabled changes the ABI");
5832 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5833 this may not agree with the mode that the type system has chosen for the
5834 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5835 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5838 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5843 if (orig_mode
!= BLKmode
)
5844 tmp
= gen_rtx_REG (orig_mode
, regno
);
5847 tmp
= gen_rtx_REG (mode
, regno
);
5848 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5849 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5855 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5856 of this code is to classify each 8bytes of incoming argument by the register
5857 class and assign registers accordingly. */
5859 /* Return the union class of CLASS1 and CLASS2.
5860 See the x86-64 PS ABI for details. */
5862 static enum x86_64_reg_class
5863 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5865 /* Rule #1: If both classes are equal, this is the resulting class. */
5866 if (class1
== class2
)
5869 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5871 if (class1
== X86_64_NO_CLASS
)
5873 if (class2
== X86_64_NO_CLASS
)
5876 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5877 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5878 return X86_64_MEMORY_CLASS
;
5880 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5881 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5882 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5883 return X86_64_INTEGERSI_CLASS
;
5884 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5885 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5886 return X86_64_INTEGER_CLASS
;
5888 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5890 if (class1
== X86_64_X87_CLASS
5891 || class1
== X86_64_X87UP_CLASS
5892 || class1
== X86_64_COMPLEX_X87_CLASS
5893 || class2
== X86_64_X87_CLASS
5894 || class2
== X86_64_X87UP_CLASS
5895 || class2
== X86_64_COMPLEX_X87_CLASS
)
5896 return X86_64_MEMORY_CLASS
;
5898 /* Rule #6: Otherwise class SSE is used. */
5899 return X86_64_SSE_CLASS
;
5902 /* Classify the argument of type TYPE and mode MODE.
5903 CLASSES will be filled by the register class used to pass each word
5904 of the operand. The number of words is returned. In case the parameter
5905 should be passed in memory, 0 is returned. As a special case for zero
5906 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5908 BIT_OFFSET is used internally for handling records and specifies offset
5909 of the offset in bits modulo 256 to avoid overflow cases.
5911 See the x86-64 PS ABI for details.
5915 classify_argument (enum machine_mode mode
, const_tree type
,
5916 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5918 HOST_WIDE_INT bytes
=
5919 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5921 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5923 /* Variable sized entities are always passed/returned in memory. */
5927 if (mode
!= VOIDmode
5928 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5931 /* Special case check for pointer to shared, on 64-bit target. */
5932 if (TARGET_64BIT
&& mode
== TImode
5933 && type
&& TREE_CODE (type
) == POINTER_TYPE
5934 && upc_shared_type_p (TREE_TYPE (type
)))
5936 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
5940 if (type
&& AGGREGATE_TYPE_P (type
))
5944 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5946 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5950 for (i
= 0; i
< words
; i
++)
5951 classes
[i
] = X86_64_NO_CLASS
;
5953 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5954 signalize memory class, so handle it as special case. */
5957 classes
[0] = X86_64_NO_CLASS
;
5961 /* Classify each field of record and merge classes. */
5962 switch (TREE_CODE (type
))
5965 /* And now merge the fields of structure. */
5966 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5968 if (TREE_CODE (field
) == FIELD_DECL
)
5972 if (TREE_TYPE (field
) == error_mark_node
)
5975 /* Bitfields are always classified as integer. Handle them
5976 early, since later code would consider them to be
5977 misaligned integers. */
5978 if (DECL_BIT_FIELD (field
))
5980 for (i
= (int_bit_position (field
)
5981 + (bit_offset
% 64)) / 8 / 8;
5982 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5983 + tree_low_cst (DECL_SIZE (field
), 0)
5986 merge_classes (X86_64_INTEGER_CLASS
,
5993 type
= TREE_TYPE (field
);
5995 /* Flexible array member is ignored. */
5996 if (TYPE_MODE (type
) == BLKmode
5997 && TREE_CODE (type
) == ARRAY_TYPE
5998 && TYPE_SIZE (type
) == NULL_TREE
5999 && TYPE_DOMAIN (type
) != NULL_TREE
6000 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6005 if (!warned
&& warn_psabi
)
6008 inform (input_location
,
6009 "the ABI of passing struct with"
6010 " a flexible array member has"
6011 " changed in GCC 4.4");
6015 num
= classify_argument (TYPE_MODE (type
), type
,
6017 (int_bit_position (field
)
6018 + bit_offset
) % 256);
6021 pos
= (int_bit_position (field
)
6022 + (bit_offset
% 64)) / 8 / 8;
6023 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6025 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6032 /* Arrays are handled as small records. */
6035 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6036 TREE_TYPE (type
), subclasses
, bit_offset
);
6040 /* The partial classes are now full classes. */
6041 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6042 subclasses
[0] = X86_64_SSE_CLASS
;
6043 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6044 && !((bit_offset
% 64) == 0 && bytes
== 4))
6045 subclasses
[0] = X86_64_INTEGER_CLASS
;
6047 for (i
= 0; i
< words
; i
++)
6048 classes
[i
] = subclasses
[i
% num
];
6053 case QUAL_UNION_TYPE
:
6054 /* Unions are similar to RECORD_TYPE but offset is always 0.
6056 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6058 if (TREE_CODE (field
) == FIELD_DECL
)
6062 if (TREE_TYPE (field
) == error_mark_node
)
6065 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6066 TREE_TYPE (field
), subclasses
,
6070 for (i
= 0; i
< num
; i
++)
6071 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6082 /* When size > 16 bytes, if the first one isn't
6083 X86_64_SSE_CLASS or any other ones aren't
6084 X86_64_SSEUP_CLASS, everything should be passed in
6086 if (classes
[0] != X86_64_SSE_CLASS
)
6089 for (i
= 1; i
< words
; i
++)
6090 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6094 /* Final merger cleanup. */
6095 for (i
= 0; i
< words
; i
++)
6097 /* If one class is MEMORY, everything should be passed in
6099 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6102 /* The X86_64_SSEUP_CLASS should be always preceded by
6103 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6104 if (classes
[i
] == X86_64_SSEUP_CLASS
6105 && classes
[i
- 1] != X86_64_SSE_CLASS
6106 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6108 /* The first one should never be X86_64_SSEUP_CLASS. */
6109 gcc_assert (i
!= 0);
6110 classes
[i
] = X86_64_SSE_CLASS
;
6113 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6114 everything should be passed in memory. */
6115 if (classes
[i
] == X86_64_X87UP_CLASS
6116 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6120 /* The first one should never be X86_64_X87UP_CLASS. */
6121 gcc_assert (i
!= 0);
6122 if (!warned
&& warn_psabi
)
6125 inform (input_location
,
6126 "the ABI of passing union with long double"
6127 " has changed in GCC 4.4");
6135 /* Compute alignment needed. We align all types to natural boundaries with
6136 exception of XFmode that is aligned to 64bits. */
6137 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6139 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6142 mode_alignment
= 128;
6143 else if (mode
== XCmode
)
6144 mode_alignment
= 256;
6145 if (COMPLEX_MODE_P (mode
))
6146 mode_alignment
/= 2;
6147 /* Misaligned fields are always returned in memory. */
6148 if (bit_offset
% mode_alignment
)
6152 /* for V1xx modes, just use the base mode */
6153 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6154 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6155 mode
= GET_MODE_INNER (mode
);
6157 /* Classification of atomic types. */
6162 classes
[0] = X86_64_SSE_CLASS
;
6165 classes
[0] = X86_64_SSE_CLASS
;
6166 classes
[1] = X86_64_SSEUP_CLASS
;
6176 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6180 classes
[0] = X86_64_INTEGERSI_CLASS
;
6183 else if (size
<= 64)
6185 classes
[0] = X86_64_INTEGER_CLASS
;
6188 else if (size
<= 64+32)
6190 classes
[0] = X86_64_INTEGER_CLASS
;
6191 classes
[1] = X86_64_INTEGERSI_CLASS
;
6194 else if (size
<= 64+64)
6196 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6204 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6208 /* OImode shouldn't be used directly. */
6213 if (!(bit_offset
% 64))
6214 classes
[0] = X86_64_SSESF_CLASS
;
6216 classes
[0] = X86_64_SSE_CLASS
;
6219 classes
[0] = X86_64_SSEDF_CLASS
;
6222 classes
[0] = X86_64_X87_CLASS
;
6223 classes
[1] = X86_64_X87UP_CLASS
;
6226 classes
[0] = X86_64_SSE_CLASS
;
6227 classes
[1] = X86_64_SSEUP_CLASS
;
6230 classes
[0] = X86_64_SSE_CLASS
;
6231 if (!(bit_offset
% 64))
6237 if (!warned
&& warn_psabi
)
6240 inform (input_location
,
6241 "the ABI of passing structure with complex float"
6242 " member has changed in GCC 4.4");
6244 classes
[1] = X86_64_SSESF_CLASS
;
6248 classes
[0] = X86_64_SSEDF_CLASS
;
6249 classes
[1] = X86_64_SSEDF_CLASS
;
6252 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6255 /* This modes is larger than 16 bytes. */
6263 classes
[0] = X86_64_SSE_CLASS
;
6264 classes
[1] = X86_64_SSEUP_CLASS
;
6265 classes
[2] = X86_64_SSEUP_CLASS
;
6266 classes
[3] = X86_64_SSEUP_CLASS
;
6274 classes
[0] = X86_64_SSE_CLASS
;
6275 classes
[1] = X86_64_SSEUP_CLASS
;
6283 classes
[0] = X86_64_SSE_CLASS
;
6289 gcc_assert (VECTOR_MODE_P (mode
));
6294 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6296 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6297 classes
[0] = X86_64_INTEGERSI_CLASS
;
6299 classes
[0] = X86_64_INTEGER_CLASS
;
6300 classes
[1] = X86_64_INTEGER_CLASS
;
6301 return 1 + (bytes
> 8);
6305 /* Examine the argument and return set number of register required in each
6306 class. Return 0 iff parameter should be passed in memory. */
6308 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6309 int *int_nregs
, int *sse_nregs
)
6311 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6312 int n
= classify_argument (mode
, type
, regclass
, 0);
6318 for (n
--; n
>= 0; n
--)
6319 switch (regclass
[n
])
6321 case X86_64_INTEGER_CLASS
:
6322 case X86_64_INTEGERSI_CLASS
:
6325 case X86_64_SSE_CLASS
:
6326 case X86_64_SSESF_CLASS
:
6327 case X86_64_SSEDF_CLASS
:
6330 case X86_64_NO_CLASS
:
6331 case X86_64_SSEUP_CLASS
:
6333 case X86_64_X87_CLASS
:
6334 case X86_64_X87UP_CLASS
:
6338 case X86_64_COMPLEX_X87_CLASS
:
6339 return in_return
? 2 : 0;
6340 case X86_64_MEMORY_CLASS
:
6346 /* Construct container for the argument used by GCC interface. See
6347 FUNCTION_ARG for the detailed description. */
6350 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6351 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6352 const int *intreg
, int sse_regno
)
6354 /* The following variables hold the static issued_error state. */
6355 static bool issued_sse_arg_error
;
6356 static bool issued_sse_ret_error
;
6357 static bool issued_x87_ret_error
;
6359 enum machine_mode tmpmode
;
6361 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6362 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6366 int needed_sseregs
, needed_intregs
;
6367 rtx exp
[MAX_CLASSES
];
6370 n
= classify_argument (mode
, type
, regclass
, 0);
6373 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6376 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6379 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6380 some less clueful developer tries to use floating-point anyway. */
6381 if (needed_sseregs
&& !TARGET_SSE
)
6385 if (!issued_sse_ret_error
)
6387 error ("SSE register return with SSE disabled");
6388 issued_sse_ret_error
= true;
6391 else if (!issued_sse_arg_error
)
6393 error ("SSE register argument with SSE disabled");
6394 issued_sse_arg_error
= true;
6399 /* Likewise, error if the ABI requires us to return values in the
6400 x87 registers and the user specified -mno-80387. */
6401 if (!TARGET_80387
&& in_return
)
6402 for (i
= 0; i
< n
; i
++)
6403 if (regclass
[i
] == X86_64_X87_CLASS
6404 || regclass
[i
] == X86_64_X87UP_CLASS
6405 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6407 if (!issued_x87_ret_error
)
6409 error ("x87 register return with x87 disabled");
6410 issued_x87_ret_error
= true;
6415 /* First construct simple cases. Avoid SCmode, since we want to use
6416 single register to pass this type. */
6417 if (n
== 1 && mode
!= SCmode
)
6418 switch (regclass
[0])
6420 case X86_64_INTEGER_CLASS
:
6421 case X86_64_INTEGERSI_CLASS
:
6422 return gen_rtx_REG (mode
, intreg
[0]);
6423 case X86_64_SSE_CLASS
:
6424 case X86_64_SSESF_CLASS
:
6425 case X86_64_SSEDF_CLASS
:
6426 if (mode
!= BLKmode
)
6427 return gen_reg_or_parallel (mode
, orig_mode
,
6428 SSE_REGNO (sse_regno
));
6430 case X86_64_X87_CLASS
:
6431 case X86_64_COMPLEX_X87_CLASS
:
6432 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6433 case X86_64_NO_CLASS
:
6434 /* Zero sized array, struct or class. */
6440 && regclass
[0] == X86_64_SSE_CLASS
6441 && regclass
[1] == X86_64_SSEUP_CLASS
6443 return gen_reg_or_parallel (mode
, orig_mode
,
6444 SSE_REGNO (sse_regno
));
6446 && regclass
[0] == X86_64_SSE_CLASS
6447 && regclass
[1] == X86_64_SSEUP_CLASS
6448 && regclass
[2] == X86_64_SSEUP_CLASS
6449 && regclass
[3] == X86_64_SSEUP_CLASS
6451 return gen_reg_or_parallel (mode
, orig_mode
,
6452 SSE_REGNO (sse_regno
));
6454 && regclass
[0] == X86_64_X87_CLASS
6455 && regclass
[1] == X86_64_X87UP_CLASS
)
6456 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6459 && regclass
[0] == X86_64_INTEGER_CLASS
6460 && regclass
[1] == X86_64_INTEGER_CLASS
6461 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6462 && intreg
[0] + 1 == intreg
[1])
6463 return gen_rtx_REG (mode
, intreg
[0]);
6465 /* Otherwise figure out the entries of the PARALLEL. */
6466 for (i
= 0; i
< n
; i
++)
6470 switch (regclass
[i
])
6472 case X86_64_NO_CLASS
:
6474 case X86_64_INTEGER_CLASS
:
6475 case X86_64_INTEGERSI_CLASS
:
6476 /* Merge TImodes on aligned occasions here too. */
6477 if (i
* 8 + 8 > bytes
)
6479 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6480 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6484 /* We've requested 24 bytes we
6485 don't have mode for. Use DImode. */
6486 if (tmpmode
== BLKmode
)
6489 = gen_rtx_EXPR_LIST (VOIDmode
,
6490 gen_rtx_REG (tmpmode
, *intreg
),
6494 case X86_64_SSESF_CLASS
:
6496 = gen_rtx_EXPR_LIST (VOIDmode
,
6497 gen_rtx_REG (SFmode
,
6498 SSE_REGNO (sse_regno
)),
6502 case X86_64_SSEDF_CLASS
:
6504 = gen_rtx_EXPR_LIST (VOIDmode
,
6505 gen_rtx_REG (DFmode
,
6506 SSE_REGNO (sse_regno
)),
6510 case X86_64_SSE_CLASS
:
6518 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6528 && regclass
[1] == X86_64_SSEUP_CLASS
6529 && regclass
[2] == X86_64_SSEUP_CLASS
6530 && regclass
[3] == X86_64_SSEUP_CLASS
);
6538 = gen_rtx_EXPR_LIST (VOIDmode
,
6539 gen_rtx_REG (tmpmode
,
6540 SSE_REGNO (sse_regno
)),
6549 /* Empty aligned struct, union or class. */
6553 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6554 for (i
= 0; i
< nexps
; i
++)
6555 XVECEXP (ret
, 0, i
) = exp
[i
];
6559 /* Update the data in CUM to advance over an argument of mode MODE
6560 and data type TYPE. (TYPE is null for libcalls where that information
6561 may not be available.) */
6564 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6565 const_tree type
, HOST_WIDE_INT bytes
,
6566 HOST_WIDE_INT words
)
6582 cum
->words
+= words
;
6583 cum
->nregs
-= words
;
6584 cum
->regno
+= words
;
6586 if (cum
->nregs
<= 0)
6594 /* OImode shouldn't be used directly. */
6598 if (cum
->float_in_sse
< 2)
6601 if (cum
->float_in_sse
< 1)
6618 if (!type
|| !AGGREGATE_TYPE_P (type
))
6620 cum
->sse_words
+= words
;
6621 cum
->sse_nregs
-= 1;
6622 cum
->sse_regno
+= 1;
6623 if (cum
->sse_nregs
<= 0)
6637 if (!type
|| !AGGREGATE_TYPE_P (type
))
6639 cum
->mmx_words
+= words
;
6640 cum
->mmx_nregs
-= 1;
6641 cum
->mmx_regno
+= 1;
6642 if (cum
->mmx_nregs
<= 0)
6653 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6654 const_tree type
, HOST_WIDE_INT words
, bool named
)
6656 int int_nregs
, sse_nregs
;
6658 /* Unnamed 256bit vector mode parameters are passed on stack. */
6659 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6662 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6663 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6665 cum
->nregs
-= int_nregs
;
6666 cum
->sse_nregs
-= sse_nregs
;
6667 cum
->regno
+= int_nregs
;
6668 cum
->sse_regno
+= sse_nregs
;
6672 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6673 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6674 cum
->words
+= words
;
6679 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6680 HOST_WIDE_INT words
)
6682 /* Otherwise, this should be passed indirect. */
6683 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6685 cum
->words
+= words
;
6693 /* Update the data in CUM to advance over an argument of mode MODE and
6694 data type TYPE. (TYPE is null for libcalls where that information
6695 may not be available.) */
6698 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6699 const_tree type
, bool named
)
6701 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6702 HOST_WIDE_INT bytes
, words
;
6704 if (mode
== BLKmode
)
6705 bytes
= int_size_in_bytes (type
);
6707 bytes
= GET_MODE_SIZE (mode
);
6708 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6711 mode
= type_natural_mode (type
, NULL
);
6713 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6714 function_arg_advance_ms_64 (cum
, bytes
, words
);
6715 else if (TARGET_64BIT
)
6716 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6718 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6721 /* Define where to put the arguments to a function.
6722 Value is zero to push the argument on the stack,
6723 or a hard register in which to store the argument.
6725 MODE is the argument's machine mode.
6726 TYPE is the data type of the argument (as a tree).
6727 This is null for libcalls where that information may
6729 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6730 the preceding args and about the function being called.
6731 NAMED is nonzero if this argument is a named parameter
6732 (otherwise it is an extra parameter matching an ellipsis). */
6735 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6736 enum machine_mode orig_mode
, const_tree type
,
6737 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6739 static bool warnedsse
, warnedmmx
;
6741 /* Avoid the AL settings for the Unix64 ABI. */
6742 if (mode
== VOIDmode
)
6758 if (words
<= cum
->nregs
)
6760 int regno
= cum
->regno
;
6762 /* Fastcall allocates the first two DWORD (SImode) or
6763 smaller arguments to ECX and EDX if it isn't an
6769 || (type
&& AGGREGATE_TYPE_P (type
)))
6772 /* ECX not EAX is the first allocated register. */
6773 if (regno
== AX_REG
)
6776 return gen_rtx_REG (mode
, regno
);
6781 if (cum
->float_in_sse
< 2)
6784 if (cum
->float_in_sse
< 1)
6788 /* In 32bit, we pass TImode in xmm registers. */
6795 if (!type
|| !AGGREGATE_TYPE_P (type
))
6797 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6800 warning (0, "SSE vector argument without SSE enabled "
6804 return gen_reg_or_parallel (mode
, orig_mode
,
6805 cum
->sse_regno
+ FIRST_SSE_REG
);
6810 /* OImode shouldn't be used directly. */
6819 if (!type
|| !AGGREGATE_TYPE_P (type
))
6822 return gen_reg_or_parallel (mode
, orig_mode
,
6823 cum
->sse_regno
+ FIRST_SSE_REG
);
6833 if (!type
|| !AGGREGATE_TYPE_P (type
))
6835 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6838 warning (0, "MMX vector argument without MMX enabled "
6842 return gen_reg_or_parallel (mode
, orig_mode
,
6843 cum
->mmx_regno
+ FIRST_MMX_REG
);
6852 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6853 enum machine_mode orig_mode
, const_tree type
, bool named
)
6855 /* Handle a hidden AL argument containing number of registers
6856 for varargs x86-64 functions. */
6857 if (mode
== VOIDmode
)
6858 return GEN_INT (cum
->maybe_vaarg
6859 ? (cum
->sse_nregs
< 0
6860 ? X86_64_SSE_REGPARM_MAX
6875 /* Unnamed 256bit vector mode parameters are passed on stack. */
6881 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6883 &x86_64_int_parameter_registers
[cum
->regno
],
6888 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6889 enum machine_mode orig_mode
, bool named
,
6890 HOST_WIDE_INT bytes
)
6894 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6895 We use value of -2 to specify that current function call is MSABI. */
6896 if (mode
== VOIDmode
)
6897 return GEN_INT (-2);
6899 /* If we've run out of registers, it goes on the stack. */
6900 if (cum
->nregs
== 0)
6903 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6905 /* Only floating point modes are passed in anything but integer regs. */
6906 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6909 regno
= cum
->regno
+ FIRST_SSE_REG
;
6914 /* Unnamed floating parameters are passed in both the
6915 SSE and integer registers. */
6916 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6917 t2
= gen_rtx_REG (mode
, regno
);
6918 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6919 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6920 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6923 /* Handle aggregated types passed in register. */
6924 if (orig_mode
== BLKmode
)
6926 if (bytes
> 0 && bytes
<= 8)
6927 mode
= (bytes
> 4 ? DImode
: SImode
);
6928 if (mode
== BLKmode
)
6932 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6935 /* Return where to put the arguments to a function.
6936 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6938 MODE is the argument's machine mode. TYPE is the data type of the
6939 argument. It is null for libcalls where that information may not be
6940 available. CUM gives information about the preceding args and about
6941 the function being called. NAMED is nonzero if this argument is a
6942 named parameter (otherwise it is an extra parameter matching an
6946 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6947 const_tree type
, bool named
)
6949 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6950 enum machine_mode mode
= omode
;
6951 HOST_WIDE_INT bytes
, words
;
6954 if (mode
== BLKmode
)
6955 bytes
= int_size_in_bytes (type
);
6957 bytes
= GET_MODE_SIZE (mode
);
6958 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6960 /* To simplify the code below, represent vector types with a vector mode
6961 even if MMX/SSE are not active. */
6962 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6963 mode
= type_natural_mode (type
, cum
);
6965 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6966 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6967 else if (TARGET_64BIT
)
6968 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6970 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6975 /* A C expression that indicates when an argument must be passed by
6976 reference. If nonzero for an argument, a copy of that argument is
6977 made in memory and a pointer to the argument is passed instead of
6978 the argument itself. The pointer is passed in whatever way is
6979 appropriate for passing a pointer to that type. */
6982 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6983 enum machine_mode mode ATTRIBUTE_UNUSED
,
6984 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6986 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6988 /* See Windows x64 Software Convention. */
6989 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6991 int msize
= (int) GET_MODE_SIZE (mode
);
6994 /* Arrays are passed by reference. */
6995 if (TREE_CODE (type
) == ARRAY_TYPE
)
6998 if (AGGREGATE_TYPE_P (type
))
7000 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7001 are passed by reference. */
7002 msize
= int_size_in_bytes (type
);
7006 /* __m128 is passed by reference. */
7008 case 1: case 2: case 4: case 8:
7014 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7020 /* Return true when TYPE should be 128bit aligned for 32bit argument
7021 passing ABI. XXX: This function is obsolete and is only used for
7022 checking psABI compatibility with previous versions of GCC. */
7025 ix86_compat_aligned_value_p (const_tree type
)
7027 enum machine_mode mode
= TYPE_MODE (type
);
7028 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7032 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7034 if (TYPE_ALIGN (type
) < 128)
7037 if (AGGREGATE_TYPE_P (type
))
7039 /* Walk the aggregates recursively. */
7040 switch (TREE_CODE (type
))
7044 case QUAL_UNION_TYPE
:
7048 /* Walk all the structure fields. */
7049 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7051 if (TREE_CODE (field
) == FIELD_DECL
7052 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7059 /* Just for use if some languages passes arrays by value. */
7060 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7071 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7072 XXX: This function is obsolete and is only used for checking psABI
7073 compatibility with previous versions of GCC. */
7076 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7077 const_tree type
, unsigned int align
)
7079 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7080 natural boundaries. */
7081 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7083 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7084 make an exception for SSE modes since these require 128bit
7087 The handling here differs from field_alignment. ICC aligns MMX
7088 arguments to 4 byte boundaries, while structure fields are aligned
7089 to 8 byte boundaries. */
7092 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7093 align
= PARM_BOUNDARY
;
7097 if (!ix86_compat_aligned_value_p (type
))
7098 align
= PARM_BOUNDARY
;
7101 if (align
> BIGGEST_ALIGNMENT
)
7102 align
= BIGGEST_ALIGNMENT
;
7106 /* Return true when TYPE should be 128bit aligned for 32bit argument
7110 ix86_contains_aligned_value_p (const_tree type
)
7112 enum machine_mode mode
= TYPE_MODE (type
);
7114 if (mode
== XFmode
|| mode
== XCmode
)
7117 if (TYPE_ALIGN (type
) < 128)
7120 if (AGGREGATE_TYPE_P (type
))
7122 /* Walk the aggregates recursively. */
7123 switch (TREE_CODE (type
))
7127 case QUAL_UNION_TYPE
:
7131 /* Walk all the structure fields. */
7132 for (field
= TYPE_FIELDS (type
);
7134 field
= DECL_CHAIN (field
))
7136 if (TREE_CODE (field
) == FIELD_DECL
7137 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7144 /* Just for use if some languages passes arrays by value. */
7145 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7154 return TYPE_ALIGN (type
) >= 128;
7159 /* Gives the alignment boundary, in bits, of an argument with the
7160 specified mode and type. */
7163 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7168 /* Since the main variant type is used for call, we convert it to
7169 the main variant type. */
7170 type
= TYPE_MAIN_VARIANT (type
);
7171 align
= TYPE_ALIGN (type
);
7174 align
= GET_MODE_ALIGNMENT (mode
);
7175 if (align
< PARM_BOUNDARY
)
7176 align
= PARM_BOUNDARY
;
7180 unsigned int saved_align
= align
;
7184 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7187 if (mode
== XFmode
|| mode
== XCmode
)
7188 align
= PARM_BOUNDARY
;
7190 else if (!ix86_contains_aligned_value_p (type
))
7191 align
= PARM_BOUNDARY
;
7194 align
= PARM_BOUNDARY
;
7199 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7203 inform (input_location
,
7204 "The ABI for passing parameters with %d-byte"
7205 " alignment has changed in GCC 4.6",
7206 align
/ BITS_PER_UNIT
);
7213 /* Return true if N is a possible register number of function value. */
7216 ix86_function_value_regno_p (const unsigned int regno
)
7223 case FIRST_FLOAT_REG
:
7224 /* TODO: The function should depend on current function ABI but
7225 builtins.c would need updating then. Therefore we use the
7227 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7229 return TARGET_FLOAT_RETURNS_IN_80387
;
7235 if (TARGET_MACHO
|| TARGET_64BIT
)
7243 /* Define how to find the value returned by a function.
7244 VALTYPE is the data type of the value (as a tree).
7245 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7246 otherwise, FUNC is 0. */
7249 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7250 const_tree fntype
, const_tree fn
)
7254 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7255 we normally prevent this case when mmx is not available. However
7256 some ABIs may require the result to be returned like DImode. */
7257 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7258 regno
= FIRST_MMX_REG
;
7260 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7261 we prevent this case when sse is not available. However some ABIs
7262 may require the result to be returned like integer TImode. */
7263 else if (mode
== TImode
7264 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7265 regno
= FIRST_SSE_REG
;
7267 /* 32-byte vector modes in %ymm0. */
7268 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7269 regno
= FIRST_SSE_REG
;
7271 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7272 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7273 regno
= FIRST_FLOAT_REG
;
7275 /* Most things go in %eax. */
7278 /* Override FP return register with %xmm0 for local functions when
7279 SSE math is enabled or for functions with sseregparm attribute. */
7280 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7282 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7283 if ((sse_level
>= 1 && mode
== SFmode
)
7284 || (sse_level
== 2 && mode
== DFmode
))
7285 regno
= FIRST_SSE_REG
;
7288 /* OImode shouldn't be used directly. */
7289 gcc_assert (mode
!= OImode
);
7291 return gen_rtx_REG (orig_mode
, regno
);
7295 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7300 /* Handle libcalls, which don't provide a type node. */
7301 if (valtype
== NULL
)
7315 regno
= FIRST_SSE_REG
;
7319 regno
= FIRST_FLOAT_REG
;
7327 return gen_rtx_REG (mode
, regno
);
7329 else if (POINTER_TYPE_P (valtype
)
7330 && !upc_shared_type_p (TREE_TYPE (valtype
)))
7332 /* Pointers are always returned in word_mode. */
7336 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7337 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7338 x86_64_int_return_registers
, 0);
7340 /* For zero sized structures, construct_container returns NULL, but we
7341 need to keep rest of compiler happy by returning meaningful value. */
7343 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7349 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7352 unsigned int regno
= AX_REG
;
7356 switch (GET_MODE_SIZE (mode
))
7359 if (valtype
!= NULL_TREE
7360 && !VECTOR_INTEGER_TYPE_P (valtype
)
7361 && !VECTOR_INTEGER_TYPE_P (valtype
)
7362 && !INTEGRAL_TYPE_P (valtype
)
7363 && !VECTOR_FLOAT_TYPE_P (valtype
))
7365 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7366 && !COMPLEX_MODE_P (mode
))
7367 regno
= FIRST_SSE_REG
;
7371 if (mode
== SFmode
|| mode
== DFmode
)
7372 regno
= FIRST_SSE_REG
;
7378 return gen_rtx_REG (orig_mode
, regno
);
7382 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7383 enum machine_mode orig_mode
, enum machine_mode mode
)
7385 const_tree fn
, fntype
;
7388 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7389 fn
= fntype_or_decl
;
7390 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7392 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7393 return function_value_ms_64 (orig_mode
, mode
, valtype
);
7394 else if (TARGET_64BIT
)
7395 return function_value_64 (orig_mode
, mode
, valtype
);
7397 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7401 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7402 bool outgoing ATTRIBUTE_UNUSED
)
7404 enum machine_mode mode
, orig_mode
;
7406 orig_mode
= TYPE_MODE (valtype
);
7407 mode
= type_natural_mode (valtype
, NULL
);
7408 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7411 /* Pointer function arguments and return values are promoted to
7414 static enum machine_mode
7415 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7416 int *punsignedp
, const_tree fntype
,
7419 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7421 if (upc_shared_type_p (TREE_TYPE (type
)))
7424 return TYPE_MODE (upc_pts_rep_type_node
);
7426 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7429 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7433 /* Return true if a structure, union or array with MODE containing FIELD
7434 should be accessed using BLKmode. */
7437 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7439 /* Union with XFmode must be in BLKmode. */
7440 return (mode
== XFmode
7441 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7442 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7446 ix86_libcall_value (enum machine_mode mode
)
7448 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7451 /* Return true iff type is returned in memory. */
7453 static bool ATTRIBUTE_UNUSED
7454 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7458 if (mode
== BLKmode
)
7461 size
= int_size_in_bytes (type
);
7463 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7466 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7468 /* User-created vectors small enough to fit in EAX. */
7472 /* MMX/3dNow values are returned in MM0,
7473 except when it doesn't exits or the ABI prescribes otherwise. */
7475 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7477 /* SSE values are returned in XMM0, except when it doesn't exist. */
7481 /* AVX values are returned in YMM0, except when it doesn't exist. */
7492 /* OImode shouldn't be used directly. */
7493 gcc_assert (mode
!= OImode
);
7498 static bool ATTRIBUTE_UNUSED
7499 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7501 int needed_intregs
, needed_sseregs
;
7502 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7505 static bool ATTRIBUTE_UNUSED
7506 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7508 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7510 /* __m128 is returned in xmm0. */
7511 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
) || INTEGRAL_TYPE_P (type
)
7512 || VECTOR_FLOAT_TYPE_P (type
))
7513 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7514 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7517 /* Otherwise, the size must be exactly in [1248]. */
7518 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7522 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7524 #ifdef SUBTARGET_RETURN_IN_MEMORY
7525 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7527 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7531 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7532 return return_in_memory_ms_64 (type
, mode
);
7534 return return_in_memory_64 (type
, mode
);
7537 return return_in_memory_32 (type
, mode
);
7541 /* When returning SSE vector types, we have a choice of either
7542 (1) being abi incompatible with a -march switch, or
7543 (2) generating an error.
7544 Given no good solution, I think the safest thing is one warning.
7545 The user won't be able to use -Werror, but....
7547 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7548 called in response to actually generating a caller or callee that
7549 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7550 via aggregate_value_p for general type probing from tree-ssa. */
7553 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7555 static bool warnedsse
, warnedmmx
;
7557 if (!TARGET_64BIT
&& type
)
7559 /* Look at the return type of the function, not the function type. */
7560 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7562 if (!TARGET_SSE
&& !warnedsse
)
7565 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7568 warning (0, "SSE vector return without SSE enabled "
7573 if (!TARGET_MMX
&& !warnedmmx
)
7575 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7578 warning (0, "MMX vector return without MMX enabled "
7588 /* Create the va_list data type. */
7590 /* Returns the calling convention specific va_list date type.
7591 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7594 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7596 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7598 /* For i386 we use plain pointer to argument area. */
7599 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7600 return build_pointer_type (char_type_node
);
7602 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7603 type_decl
= build_decl (BUILTINS_LOCATION
,
7604 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7606 f_gpr
= build_decl (BUILTINS_LOCATION
,
7607 FIELD_DECL
, get_identifier ("gp_offset"),
7608 unsigned_type_node
);
7609 f_fpr
= build_decl (BUILTINS_LOCATION
,
7610 FIELD_DECL
, get_identifier ("fp_offset"),
7611 unsigned_type_node
);
7612 f_ovf
= build_decl (BUILTINS_LOCATION
,
7613 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7615 f_sav
= build_decl (BUILTINS_LOCATION
,
7616 FIELD_DECL
, get_identifier ("reg_save_area"),
7619 va_list_gpr_counter_field
= f_gpr
;
7620 va_list_fpr_counter_field
= f_fpr
;
7622 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7623 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7624 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7625 DECL_FIELD_CONTEXT (f_sav
) = record
;
7627 TYPE_STUB_DECL (record
) = type_decl
;
7628 TYPE_NAME (record
) = type_decl
;
7629 TYPE_FIELDS (record
) = f_gpr
;
7630 DECL_CHAIN (f_gpr
) = f_fpr
;
7631 DECL_CHAIN (f_fpr
) = f_ovf
;
7632 DECL_CHAIN (f_ovf
) = f_sav
;
7634 layout_type (record
);
7636 /* The correct type is an array type of one element. */
7637 return build_array_type (record
, build_index_type (size_zero_node
));
7640 /* Setup the builtin va_list data type and for 64-bit the additional
7641 calling convention specific va_list data types. */
7644 ix86_build_builtin_va_list (void)
7646 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7648 /* Initialize abi specific va_list builtin types. */
7652 if (ix86_abi
== MS_ABI
)
7654 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7655 if (TREE_CODE (t
) != RECORD_TYPE
)
7656 t
= build_variant_type_copy (t
);
7657 sysv_va_list_type_node
= t
;
7662 if (TREE_CODE (t
) != RECORD_TYPE
)
7663 t
= build_variant_type_copy (t
);
7664 sysv_va_list_type_node
= t
;
7666 if (ix86_abi
!= MS_ABI
)
7668 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7669 if (TREE_CODE (t
) != RECORD_TYPE
)
7670 t
= build_variant_type_copy (t
);
7671 ms_va_list_type_node
= t
;
7676 if (TREE_CODE (t
) != RECORD_TYPE
)
7677 t
= build_variant_type_copy (t
);
7678 ms_va_list_type_node
= t
;
7685 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7688 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7694 /* GPR size of varargs save area. */
7695 if (cfun
->va_list_gpr_size
)
7696 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7698 ix86_varargs_gpr_size
= 0;
7700 /* FPR size of varargs save area. We don't need it if we don't pass
7701 anything in SSE registers. */
7702 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7703 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7705 ix86_varargs_fpr_size
= 0;
7707 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7710 save_area
= frame_pointer_rtx
;
7711 set
= get_varargs_alias_set ();
7713 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7714 if (max
> X86_64_REGPARM_MAX
)
7715 max
= X86_64_REGPARM_MAX
;
7717 for (i
= cum
->regno
; i
< max
; i
++)
7719 mem
= gen_rtx_MEM (word_mode
,
7720 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7721 MEM_NOTRAP_P (mem
) = 1;
7722 set_mem_alias_set (mem
, set
);
7723 emit_move_insn (mem
,
7724 gen_rtx_REG (word_mode
,
7725 x86_64_int_parameter_registers
[i
]));
7728 if (ix86_varargs_fpr_size
)
7730 enum machine_mode smode
;
7733 /* Now emit code to save SSE registers. The AX parameter contains number
7734 of SSE parameter registers used to call this function, though all we
7735 actually check here is the zero/non-zero status. */
7737 label
= gen_label_rtx ();
7738 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7739 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7742 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7743 we used movdqa (i.e. TImode) instead? Perhaps even better would
7744 be if we could determine the real mode of the data, via a hook
7745 into pass_stdarg. Ignore all that for now. */
7747 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7748 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7750 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7751 if (max
> X86_64_SSE_REGPARM_MAX
)
7752 max
= X86_64_SSE_REGPARM_MAX
;
7754 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7756 mem
= plus_constant (Pmode
, save_area
,
7757 i
* 16 + ix86_varargs_gpr_size
);
7758 mem
= gen_rtx_MEM (smode
, mem
);
7759 MEM_NOTRAP_P (mem
) = 1;
7760 set_mem_alias_set (mem
, set
);
7761 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7763 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7771 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7773 alias_set_type set
= get_varargs_alias_set ();
7776 /* Reset to zero, as there might be a sysv vaarg used
7778 ix86_varargs_gpr_size
= 0;
7779 ix86_varargs_fpr_size
= 0;
7781 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7785 mem
= gen_rtx_MEM (Pmode
,
7786 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7787 i
* UNITS_PER_WORD
));
7788 MEM_NOTRAP_P (mem
) = 1;
7789 set_mem_alias_set (mem
, set
);
7791 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7792 emit_move_insn (mem
, reg
);
7797 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7798 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7801 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7802 CUMULATIVE_ARGS next_cum
;
7805 /* This argument doesn't appear to be used anymore. Which is good,
7806 because the old code here didn't suppress rtl generation. */
7807 gcc_assert (!no_rtl
);
7812 fntype
= TREE_TYPE (current_function_decl
);
7814 /* For varargs, we do not want to skip the dummy va_dcl argument.
7815 For stdargs, we do want to skip the last named argument. */
7817 if (stdarg_p (fntype
))
7818 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7821 if (cum
->call_abi
== MS_ABI
)
7822 setup_incoming_varargs_ms_64 (&next_cum
);
7824 setup_incoming_varargs_64 (&next_cum
);
7827 /* Checks if TYPE is of kind va_list char *. */
7830 is_va_list_char_pointer (tree type
)
7834 /* For 32-bit it is always true. */
7837 canonic
= ix86_canonical_va_list_type (type
);
7838 return (canonic
== ms_va_list_type_node
7839 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7842 /* Implement va_start. */
7845 ix86_va_start (tree valist
, rtx nextarg
)
7847 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7848 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7849 tree gpr
, fpr
, ovf
, sav
, t
;
7853 if (flag_split_stack
7854 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7856 unsigned int scratch_regno
;
7858 /* When we are splitting the stack, we can't refer to the stack
7859 arguments using internal_arg_pointer, because they may be on
7860 the old stack. The split stack prologue will arrange to
7861 leave a pointer to the old stack arguments in a scratch
7862 register, which we here copy to a pseudo-register. The split
7863 stack prologue can't set the pseudo-register directly because
7864 it (the prologue) runs before any registers have been saved. */
7866 scratch_regno
= split_stack_prologue_scratch_regno ();
7867 if (scratch_regno
!= INVALID_REGNUM
)
7871 reg
= gen_reg_rtx (Pmode
);
7872 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7875 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7879 push_topmost_sequence ();
7880 emit_insn_after (seq
, entry_of_function ());
7881 pop_topmost_sequence ();
7885 /* Only 64bit target needs something special. */
7886 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7888 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7889 std_expand_builtin_va_start (valist
, nextarg
);
7894 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7895 next
= expand_binop (ptr_mode
, add_optab
,
7896 cfun
->machine
->split_stack_varargs_pointer
,
7897 crtl
->args
.arg_offset_rtx
,
7898 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7899 convert_move (va_r
, next
, 0);
7904 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7905 f_fpr
= DECL_CHAIN (f_gpr
);
7906 f_ovf
= DECL_CHAIN (f_fpr
);
7907 f_sav
= DECL_CHAIN (f_ovf
);
7909 valist
= build_simple_mem_ref (valist
);
7910 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7911 /* The following should be folded into the MEM_REF offset. */
7912 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7914 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7916 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7918 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7921 /* Count number of gp and fp argument registers used. */
7922 words
= crtl
->args
.info
.words
;
7923 n_gpr
= crtl
->args
.info
.regno
;
7924 n_fpr
= crtl
->args
.info
.sse_regno
;
7926 if (cfun
->va_list_gpr_size
)
7928 type
= TREE_TYPE (gpr
);
7929 t
= build2 (MODIFY_EXPR
, type
,
7930 gpr
, build_int_cst (type
, n_gpr
* 8));
7931 TREE_SIDE_EFFECTS (t
) = 1;
7932 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7935 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7937 type
= TREE_TYPE (fpr
);
7938 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7939 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7940 TREE_SIDE_EFFECTS (t
) = 1;
7941 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7944 /* Find the overflow area. */
7945 type
= TREE_TYPE (ovf
);
7946 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7947 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7949 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7950 t
= make_tree (type
, ovf_rtx
);
7952 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7953 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7954 TREE_SIDE_EFFECTS (t
) = 1;
7955 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7957 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7959 /* Find the register save area.
7960 Prologue of the function save it right above stack frame. */
7961 type
= TREE_TYPE (sav
);
7962 t
= make_tree (type
, frame_pointer_rtx
);
7963 if (!ix86_varargs_gpr_size
)
7964 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7965 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7966 TREE_SIDE_EFFECTS (t
) = 1;
7967 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7971 /* Implement va_arg. */
7974 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7977 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7978 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7979 tree gpr
, fpr
, ovf
, sav
, t
;
7981 tree lab_false
, lab_over
= NULL_TREE
;
7986 enum machine_mode nat_mode
;
7987 unsigned int arg_boundary
;
7989 /* Only 64bit target needs something special. */
7990 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7991 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7993 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7994 f_fpr
= DECL_CHAIN (f_gpr
);
7995 f_ovf
= DECL_CHAIN (f_fpr
);
7996 f_sav
= DECL_CHAIN (f_ovf
);
7998 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7999 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8000 valist
= build_va_arg_indirect_ref (valist
);
8001 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8002 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8003 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8005 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8007 type
= build_pointer_type (type
);
8008 size
= int_size_in_bytes (type
);
8009 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8011 nat_mode
= type_natural_mode (type
, NULL
);
8020 /* Unnamed 256bit vector mode parameters are passed on stack. */
8021 if (!TARGET_64BIT_MS_ABI
)
8028 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8029 type
, 0, X86_64_REGPARM_MAX
,
8030 X86_64_SSE_REGPARM_MAX
, intreg
,
8035 /* Pull the value out of the saved registers. */
8037 addr
= create_tmp_var (ptr_type_node
, "addr");
8041 int needed_intregs
, needed_sseregs
;
8043 tree int_addr
, sse_addr
;
8045 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8046 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8048 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8050 need_temp
= (!REG_P (container
)
8051 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8052 || TYPE_ALIGN (type
) > 128));
8054 /* In case we are passing structure, verify that it is consecutive block
8055 on the register save area. If not we need to do moves. */
8056 if (!need_temp
&& !REG_P (container
))
8058 /* Verify that all registers are strictly consecutive */
8059 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8063 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8065 rtx slot
= XVECEXP (container
, 0, i
);
8066 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8067 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8075 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8077 rtx slot
= XVECEXP (container
, 0, i
);
8078 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8079 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8091 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8092 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8095 /* First ensure that we fit completely in registers. */
8098 t
= build_int_cst (TREE_TYPE (gpr
),
8099 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8100 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8101 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8102 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8103 gimplify_and_add (t
, pre_p
);
8107 t
= build_int_cst (TREE_TYPE (fpr
),
8108 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8109 + X86_64_REGPARM_MAX
* 8);
8110 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8111 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8112 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8113 gimplify_and_add (t
, pre_p
);
8116 /* Compute index to start of area used for integer regs. */
8119 /* int_addr = gpr + sav; */
8120 t
= fold_build_pointer_plus (sav
, gpr
);
8121 gimplify_assign (int_addr
, t
, pre_p
);
8125 /* sse_addr = fpr + sav; */
8126 t
= fold_build_pointer_plus (sav
, fpr
);
8127 gimplify_assign (sse_addr
, t
, pre_p
);
8131 int i
, prev_size
= 0;
8132 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8135 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8136 gimplify_assign (addr
, t
, pre_p
);
8138 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8140 rtx slot
= XVECEXP (container
, 0, i
);
8141 rtx reg
= XEXP (slot
, 0);
8142 enum machine_mode mode
= GET_MODE (reg
);
8148 tree dest_addr
, dest
;
8149 int cur_size
= GET_MODE_SIZE (mode
);
8151 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8152 prev_size
= INTVAL (XEXP (slot
, 1));
8153 if (prev_size
+ cur_size
> size
)
8155 cur_size
= size
- prev_size
;
8156 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8157 if (mode
== BLKmode
)
8160 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8161 if (mode
== GET_MODE (reg
))
8162 addr_type
= build_pointer_type (piece_type
);
8164 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8166 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8169 if (SSE_REGNO_P (REGNO (reg
)))
8171 src_addr
= sse_addr
;
8172 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8176 src_addr
= int_addr
;
8177 src_offset
= REGNO (reg
) * 8;
8179 src_addr
= fold_convert (addr_type
, src_addr
);
8180 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8182 dest_addr
= fold_convert (daddr_type
, addr
);
8183 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8184 if (cur_size
== GET_MODE_SIZE (mode
))
8186 src
= build_va_arg_indirect_ref (src_addr
);
8187 dest
= build_va_arg_indirect_ref (dest_addr
);
8189 gimplify_assign (dest
, src
, pre_p
);
8194 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8195 3, dest_addr
, src_addr
,
8196 size_int (cur_size
));
8197 gimplify_and_add (copy
, pre_p
);
8199 prev_size
+= cur_size
;
8205 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8206 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8207 gimplify_assign (gpr
, t
, pre_p
);
8212 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8213 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8214 gimplify_assign (fpr
, t
, pre_p
);
8217 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8219 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8222 /* ... otherwise out of the overflow area. */
8224 /* When we align parameter on stack for caller, if the parameter
8225 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8226 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8227 here with caller. */
8228 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8229 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8230 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8232 /* Care for on-stack alignment if needed. */
8233 if (arg_boundary
<= 64 || size
== 0)
8237 HOST_WIDE_INT align
= arg_boundary
/ 8;
8238 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8239 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8240 build_int_cst (TREE_TYPE (t
), -align
));
8243 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8244 gimplify_assign (addr
, t
, pre_p
);
8246 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8247 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8250 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8252 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8253 addr
= fold_convert (ptrtype
, addr
);
8256 addr
= build_va_arg_indirect_ref (addr
);
8257 return build_va_arg_indirect_ref (addr
);
8260 /* Return true if OPNUM's MEM should be matched
8261 in movabs* patterns. */
8264 ix86_check_movabs (rtx insn
, int opnum
)
8268 set
= PATTERN (insn
);
8269 if (GET_CODE (set
) == PARALLEL
)
8270 set
= XVECEXP (set
, 0, 0);
8271 gcc_assert (GET_CODE (set
) == SET
);
8272 mem
= XEXP (set
, opnum
);
8273 while (GET_CODE (mem
) == SUBREG
)
8274 mem
= SUBREG_REG (mem
);
8275 gcc_assert (MEM_P (mem
));
8276 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8279 /* Initialize the table of extra 80387 mathematical constants. */
8282 init_ext_80387_constants (void)
8284 static const char * cst
[5] =
8286 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8287 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8288 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8289 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8290 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8294 for (i
= 0; i
< 5; i
++)
8296 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8297 /* Ensure each constant is rounded to XFmode precision. */
8298 real_convert (&ext_80387_constants_table
[i
],
8299 XFmode
, &ext_80387_constants_table
[i
]);
8302 ext_80387_constants_init
= 1;
8305 /* Return non-zero if the constant is something that
8306 can be loaded with a special instruction. */
8309 standard_80387_constant_p (rtx x
)
8311 enum machine_mode mode
= GET_MODE (x
);
8315 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8318 if (x
== CONST0_RTX (mode
))
8320 if (x
== CONST1_RTX (mode
))
8323 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8325 /* For XFmode constants, try to find a special 80387 instruction when
8326 optimizing for size or on those CPUs that benefit from them. */
8328 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8332 if (! ext_80387_constants_init
)
8333 init_ext_80387_constants ();
8335 for (i
= 0; i
< 5; i
++)
8336 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8340 /* Load of the constant -0.0 or -1.0 will be split as
8341 fldz;fchs or fld1;fchs sequence. */
8342 if (real_isnegzero (&r
))
8344 if (real_identical (&r
, &dconstm1
))
8350 /* Return the opcode of the special instruction to be used to load
8354 standard_80387_constant_opcode (rtx x
)
8356 switch (standard_80387_constant_p (x
))
8380 /* Return the CONST_DOUBLE representing the 80387 constant that is
8381 loaded by the specified special instruction. The argument IDX
8382 matches the return value from standard_80387_constant_p. */
8385 standard_80387_constant_rtx (int idx
)
8389 if (! ext_80387_constants_init
)
8390 init_ext_80387_constants ();
8406 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8410 /* Return 1 if X is all 0s and 2 if x is all 1s
8411 in supported SSE/AVX vector mode. */
8414 standard_sse_constant_p (rtx x
)
8416 enum machine_mode mode
= GET_MODE (x
);
8418 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8420 if (vector_all_ones_operand (x
, mode
))
8442 /* Return the opcode of the special instruction to be used to load
8446 standard_sse_constant_opcode (rtx insn
, rtx x
)
8448 switch (standard_sse_constant_p (x
))
8451 switch (get_attr_mode (insn
))
8454 return "%vpxor\t%0, %d0";
8456 return "%vxorpd\t%0, %d0";
8458 return "%vxorps\t%0, %d0";
8461 return "vpxor\t%x0, %x0, %x0";
8463 return "vxorpd\t%x0, %x0, %x0";
8465 return "vxorps\t%x0, %x0, %x0";
8473 return "vpcmpeqd\t%0, %0, %0";
8475 return "pcmpeqd\t%0, %0";
8483 /* Returns true if OP contains a symbol reference */
8486 symbolic_reference_mentioned_p (rtx op
)
8491 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8494 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8495 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8501 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8502 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8506 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8513 /* Return true if it is appropriate to emit `ret' instructions in the
8514 body of a function. Do this only if the epilogue is simple, needing a
8515 couple of insns. Prior to reloading, we can't tell how many registers
8516 must be saved, so return false then. Return false if there is no frame
8517 marker to de-allocate. */
8520 ix86_can_use_return_insn_p (void)
8522 struct ix86_frame frame
;
8524 if (! reload_completed
|| frame_pointer_needed
)
8527 /* Don't allow more than 32k pop, since that's all we can do
8528 with one instruction. */
8529 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8532 ix86_compute_frame_layout (&frame
);
8533 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8534 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8537 /* Value should be nonzero if functions must have frame pointers.
8538 Zero means the frame pointer need not be set up (and parms may
8539 be accessed via the stack pointer) in functions that seem suitable. */
8542 ix86_frame_pointer_required (void)
8544 /* If we accessed previous frames, then the generated code expects
8545 to be able to access the saved ebp value in our frame. */
8546 if (cfun
->machine
->accesses_prev_frame
)
8549 /* Several x86 os'es need a frame pointer for other reasons,
8550 usually pertaining to setjmp. */
8551 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8554 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8555 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8558 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8559 allocation is 4GB. */
8560 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8563 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8564 turns off the frame pointer by default. Turn it back on now if
8565 we've not got a leaf function. */
8566 if (TARGET_OMIT_LEAF_FRAME_POINTER
8568 || ix86_current_function_calls_tls_descriptor
))
8571 if (crtl
->profile
&& !flag_fentry
)
8577 /* Record that the current function accesses previous call frames. */
8580 ix86_setup_frame_addresses (void)
8582 cfun
->machine
->accesses_prev_frame
= 1;
8585 #ifndef USE_HIDDEN_LINKONCE
8586 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8587 # define USE_HIDDEN_LINKONCE 1
8589 # define USE_HIDDEN_LINKONCE 0
8593 static int pic_labels_used
;
8595 /* Fills in the label name that should be used for a pc thunk for
8596 the given register. */
8599 get_pc_thunk_name (char name
[32], unsigned int regno
)
8601 gcc_assert (!TARGET_64BIT
);
8603 if (USE_HIDDEN_LINKONCE
)
8604 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8606 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8610 /* This function generates code for -fpic that loads %ebx with
8611 the return address of the caller and then returns. */
8614 ix86_code_end (void)
8619 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8624 if (!(pic_labels_used
& (1 << regno
)))
8627 get_pc_thunk_name (name
, regno
);
8629 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8630 get_identifier (name
),
8631 build_function_type_list (void_type_node
, NULL_TREE
));
8632 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8633 NULL_TREE
, void_type_node
);
8634 TREE_PUBLIC (decl
) = 1;
8635 TREE_STATIC (decl
) = 1;
8636 DECL_IGNORED_P (decl
) = 1;
8641 switch_to_section (darwin_sections
[text_coal_section
]);
8642 fputs ("\t.weak_definition\t", asm_out_file
);
8643 assemble_name (asm_out_file
, name
);
8644 fputs ("\n\t.private_extern\t", asm_out_file
);
8645 assemble_name (asm_out_file
, name
);
8646 putc ('\n', asm_out_file
);
8647 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8648 DECL_WEAK (decl
) = 1;
8652 if (USE_HIDDEN_LINKONCE
)
8654 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8656 targetm
.asm_out
.unique_section (decl
, 0);
8657 switch_to_section (get_named_section (decl
, NULL
, 0));
8659 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8660 fputs ("\t.hidden\t", asm_out_file
);
8661 assemble_name (asm_out_file
, name
);
8662 putc ('\n', asm_out_file
);
8663 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8667 switch_to_section (text_section
);
8668 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8671 DECL_INITIAL (decl
) = make_node (BLOCK
);
8672 current_function_decl
= decl
;
8673 init_function_start (decl
);
8674 first_function_block_is_cold
= false;
8675 /* Make sure unwind info is emitted for the thunk if needed. */
8676 final_start_function (emit_barrier (), asm_out_file
, 1);
8678 /* Pad stack IP move with 4 instructions (two NOPs count
8679 as one instruction). */
8680 if (TARGET_PAD_SHORT_FUNCTION
)
8685 fputs ("\tnop\n", asm_out_file
);
8688 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8689 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8690 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8691 fputs ("\tret\n", asm_out_file
);
8692 final_end_function ();
8693 init_insn_lengths ();
8694 free_after_compilation (cfun
);
8696 current_function_decl
= NULL
;
8699 if (flag_split_stack
)
8700 file_end_indicate_split_stack ();
8703 /* Emit code for the SET_GOT patterns. */
8706 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8712 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8714 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8715 xops
[2] = gen_rtx_MEM (Pmode
,
8716 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8717 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8719 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8720 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8721 an unadorned address. */
8722 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8723 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8724 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8728 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8732 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8734 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8737 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8738 is what will be referenced by the Mach-O PIC subsystem. */
8740 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8743 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8744 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8749 get_pc_thunk_name (name
, REGNO (dest
));
8750 pic_labels_used
|= 1 << REGNO (dest
);
8752 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8753 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8754 output_asm_insn ("call\t%X2", xops
);
8755 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8756 is what will be referenced by the Mach-O PIC subsystem. */
8759 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8761 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8762 CODE_LABEL_NUMBER (label
));
8767 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8772 /* Generate an "push" pattern for input ARG. */
8777 struct machine_function
*m
= cfun
->machine
;
8779 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8780 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8781 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8783 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8784 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8786 return gen_rtx_SET (VOIDmode
,
8787 gen_rtx_MEM (word_mode
,
8788 gen_rtx_PRE_DEC (Pmode
,
8789 stack_pointer_rtx
)),
8793 /* Generate an "pop" pattern for input ARG. */
8798 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8799 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8801 return gen_rtx_SET (VOIDmode
,
8803 gen_rtx_MEM (word_mode
,
8804 gen_rtx_POST_INC (Pmode
,
8805 stack_pointer_rtx
)));
8808 /* Return >= 0 if there is an unused call-clobbered register available
8809 for the entire function. */
8812 ix86_select_alt_pic_regnum (void)
8816 && !ix86_current_function_calls_tls_descriptor
)
8819 /* Can't use the same register for both PIC and DRAP. */
8821 drap
= REGNO (crtl
->drap_reg
);
8824 for (i
= 2; i
>= 0; --i
)
8825 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8829 return INVALID_REGNUM
;
8832 /* Return TRUE if we need to save REGNO. */
8835 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8837 if (pic_offset_table_rtx
8838 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8839 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8841 || crtl
->calls_eh_return
8842 || crtl
->uses_const_pool
))
8843 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8845 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8850 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8851 if (test
== INVALID_REGNUM
)
8858 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8861 return (df_regs_ever_live_p (regno
)
8862 && !call_used_regs
[regno
]
8863 && !fixed_regs
[regno
]
8864 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8867 /* Return number of saved general prupose registers. */
8870 ix86_nsaved_regs (void)
8875 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8876 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8881 /* Return number of saved SSE registrers. */
8884 ix86_nsaved_sseregs (void)
8889 if (!TARGET_64BIT_MS_ABI
)
8891 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8892 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8897 /* Given FROM and TO register numbers, say whether this elimination is
8898 allowed. If stack alignment is needed, we can only replace argument
8899 pointer with hard frame pointer, or replace frame pointer with stack
8900 pointer. Otherwise, frame pointer elimination is automatically
8901 handled and all other eliminations are valid. */
8904 ix86_can_eliminate (const int from
, const int to
)
8906 if (stack_realign_fp
)
8907 return ((from
== ARG_POINTER_REGNUM
8908 && to
== HARD_FRAME_POINTER_REGNUM
)
8909 || (from
== FRAME_POINTER_REGNUM
8910 && to
== STACK_POINTER_REGNUM
));
8912 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8915 /* Return the offset between two registers, one to be eliminated, and the other
8916 its replacement, at the start of a routine. */
8919 ix86_initial_elimination_offset (int from
, int to
)
8921 struct ix86_frame frame
;
8922 ix86_compute_frame_layout (&frame
);
8924 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8925 return frame
.hard_frame_pointer_offset
;
8926 else if (from
== FRAME_POINTER_REGNUM
8927 && to
== HARD_FRAME_POINTER_REGNUM
)
8928 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8931 gcc_assert (to
== STACK_POINTER_REGNUM
);
8933 if (from
== ARG_POINTER_REGNUM
)
8934 return frame
.stack_pointer_offset
;
8936 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8937 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8941 /* In a dynamically-aligned function, we can't know the offset from
8942 stack pointer to frame pointer, so we must ensure that setjmp
8943 eliminates fp against the hard fp (%ebp) rather than trying to
8944 index from %esp up to the top of the frame across a gap that is
8945 of unknown (at compile-time) size. */
8947 ix86_builtin_setjmp_frame_value (void)
8949 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8952 /* When using -fsplit-stack, the allocation routines set a field in
8953 the TCB to the bottom of the stack plus this much space, measured
8956 #define SPLIT_STACK_AVAILABLE 256
8958 /* Fill structure ix86_frame about frame of currently computed function. */
8961 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8963 unsigned HOST_WIDE_INT stack_alignment_needed
;
8964 HOST_WIDE_INT offset
;
8965 unsigned HOST_WIDE_INT preferred_alignment
;
8966 HOST_WIDE_INT size
= get_frame_size ();
8967 HOST_WIDE_INT to_allocate
;
8969 frame
->nregs
= ix86_nsaved_regs ();
8970 frame
->nsseregs
= ix86_nsaved_sseregs ();
8972 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8973 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8975 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8976 function prologues and leaf. */
8977 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8978 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
8979 || ix86_current_function_calls_tls_descriptor
))
8981 preferred_alignment
= 16;
8982 stack_alignment_needed
= 16;
8983 crtl
->preferred_stack_boundary
= 128;
8984 crtl
->stack_alignment_needed
= 128;
8987 gcc_assert (!size
|| stack_alignment_needed
);
8988 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8989 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8991 /* For SEH we have to limit the amount of code movement into the prologue.
8992 At present we do this via a BLOCKAGE, at which point there's very little
8993 scheduling that can be done, which means that there's very little point
8994 in doing anything except PUSHs. */
8996 cfun
->machine
->use_fast_prologue_epilogue
= false;
8998 /* During reload iteration the amount of registers saved can change.
8999 Recompute the value as needed. Do not recompute when amount of registers
9000 didn't change as reload does multiple calls to the function and does not
9001 expect the decision to change within single iteration. */
9002 else if (!optimize_function_for_size_p (cfun
)
9003 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9005 int count
= frame
->nregs
;
9006 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9008 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9010 /* The fast prologue uses move instead of push to save registers. This
9011 is significantly longer, but also executes faster as modern hardware
9012 can execute the moves in parallel, but can't do that for push/pop.
9014 Be careful about choosing what prologue to emit: When function takes
9015 many instructions to execute we may use slow version as well as in
9016 case function is known to be outside hot spot (this is known with
9017 feedback only). Weight the size of function by number of registers
9018 to save as it is cheap to use one or two push instructions but very
9019 slow to use many of them. */
9021 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9022 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9023 || (flag_branch_probabilities
9024 && node
->frequency
< NODE_FREQUENCY_HOT
))
9025 cfun
->machine
->use_fast_prologue_epilogue
= false;
9027 cfun
->machine
->use_fast_prologue_epilogue
9028 = !expensive_function_p (count
);
9031 frame
->save_regs_using_mov
9032 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9033 /* If static stack checking is enabled and done with probes,
9034 the registers need to be saved before allocating the frame. */
9035 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9037 /* Skip return address. */
9038 offset
= UNITS_PER_WORD
;
9040 /* Skip pushed static chain. */
9041 if (ix86_static_chain_on_stack
)
9042 offset
+= UNITS_PER_WORD
;
9044 /* Skip saved base pointer. */
9045 if (frame_pointer_needed
)
9046 offset
+= UNITS_PER_WORD
;
9047 frame
->hfp_save_offset
= offset
;
9049 /* The traditional frame pointer location is at the top of the frame. */
9050 frame
->hard_frame_pointer_offset
= offset
;
9052 /* Register save area */
9053 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9054 frame
->reg_save_offset
= offset
;
9056 /* On SEH target, registers are pushed just before the frame pointer
9059 frame
->hard_frame_pointer_offset
= offset
;
9061 /* Align and set SSE register save area. */
9062 if (frame
->nsseregs
)
9064 /* The only ABI that has saved SSE registers (Win64) also has a
9065 16-byte aligned default stack, and thus we don't need to be
9066 within the re-aligned local stack frame to save them. */
9067 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9068 offset
= (offset
+ 16 - 1) & -16;
9069 offset
+= frame
->nsseregs
* 16;
9071 frame
->sse_reg_save_offset
= offset
;
9073 /* The re-aligned stack starts here. Values before this point are not
9074 directly comparable with values below this point. In order to make
9075 sure that no value happens to be the same before and after, force
9076 the alignment computation below to add a non-zero value. */
9077 if (stack_realign_fp
)
9078 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9081 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9082 offset
+= frame
->va_arg_size
;
9084 /* Align start of frame for local function. */
9085 if (stack_realign_fp
9086 || offset
!= frame
->sse_reg_save_offset
9089 || cfun
->calls_alloca
9090 || ix86_current_function_calls_tls_descriptor
)
9091 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9093 /* Frame pointer points here. */
9094 frame
->frame_pointer_offset
= offset
;
9098 /* Add outgoing arguments area. Can be skipped if we eliminated
9099 all the function calls as dead code.
9100 Skipping is however impossible when function calls alloca. Alloca
9101 expander assumes that last crtl->outgoing_args_size
9102 of stack frame are unused. */
9103 if (ACCUMULATE_OUTGOING_ARGS
9104 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9105 || ix86_current_function_calls_tls_descriptor
))
9107 offset
+= crtl
->outgoing_args_size
;
9108 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9111 frame
->outgoing_arguments_size
= 0;
9113 /* Align stack boundary. Only needed if we're calling another function
9115 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9116 || ix86_current_function_calls_tls_descriptor
)
9117 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9119 /* We've reached end of stack frame. */
9120 frame
->stack_pointer_offset
= offset
;
9122 /* Size prologue needs to allocate. */
9123 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9125 if ((!to_allocate
&& frame
->nregs
<= 1)
9126 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9127 frame
->save_regs_using_mov
= false;
9129 if (ix86_using_red_zone ()
9130 && crtl
->sp_is_unchanging
9132 && !ix86_current_function_calls_tls_descriptor
)
9134 frame
->red_zone_size
= to_allocate
;
9135 if (frame
->save_regs_using_mov
)
9136 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9137 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9138 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9141 frame
->red_zone_size
= 0;
9142 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9144 /* The SEH frame pointer location is near the bottom of the frame.
9145 This is enforced by the fact that the difference between the
9146 stack pointer and the frame pointer is limited to 240 bytes in
9147 the unwind data structure. */
9152 /* If we can leave the frame pointer where it is, do so. Also, returns
9153 the establisher frame for __builtin_frame_address (0). */
9154 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9155 if (diff
<= SEH_MAX_FRAME_SIZE
9156 && (diff
> 240 || (diff
& 15) != 0)
9157 && !crtl
->accesses_prior_frames
)
9159 /* Ideally we'd determine what portion of the local stack frame
9160 (within the constraint of the lowest 240) is most heavily used.
9161 But without that complication, simply bias the frame pointer
9162 by 128 bytes so as to maximize the amount of the local stack
9163 frame that is addressable with 8-bit offsets. */
9164 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9169 /* This is semi-inlined memory_address_length, but simplified
9170 since we know that we're always dealing with reg+offset, and
9171 to avoid having to create and discard all that rtl. */
9174 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9180 /* EBP and R13 cannot be encoded without an offset. */
9181 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9183 else if (IN_RANGE (offset
, -128, 127))
9186 /* ESP and R12 must be encoded with a SIB byte. */
9187 if (regno
== SP_REG
|| regno
== R12_REG
)
9193 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9194 The valid base registers are taken from CFUN->MACHINE->FS. */
9197 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9199 const struct machine_function
*m
= cfun
->machine
;
9200 rtx base_reg
= NULL
;
9201 HOST_WIDE_INT base_offset
= 0;
9203 if (m
->use_fast_prologue_epilogue
)
9205 /* Choose the base register most likely to allow the most scheduling
9206 opportunities. Generally FP is valid throughout the function,
9207 while DRAP must be reloaded within the epilogue. But choose either
9208 over the SP due to increased encoding size. */
9212 base_reg
= hard_frame_pointer_rtx
;
9213 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9215 else if (m
->fs
.drap_valid
)
9217 base_reg
= crtl
->drap_reg
;
9218 base_offset
= 0 - cfa_offset
;
9220 else if (m
->fs
.sp_valid
)
9222 base_reg
= stack_pointer_rtx
;
9223 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9228 HOST_WIDE_INT toffset
;
9231 /* Choose the base register with the smallest address encoding.
9232 With a tie, choose FP > DRAP > SP. */
9235 base_reg
= stack_pointer_rtx
;
9236 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9237 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9239 if (m
->fs
.drap_valid
)
9241 toffset
= 0 - cfa_offset
;
9242 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9245 base_reg
= crtl
->drap_reg
;
9246 base_offset
= toffset
;
9252 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9253 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9256 base_reg
= hard_frame_pointer_rtx
;
9257 base_offset
= toffset
;
9262 gcc_assert (base_reg
!= NULL
);
9264 return plus_constant (Pmode
, base_reg
, base_offset
);
9267 /* Emit code to save registers in the prologue. */
9270 ix86_emit_save_regs (void)
9275 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9276 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9278 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9279 RTX_FRAME_RELATED_P (insn
) = 1;
9283 /* Emit a single register save at CFA - CFA_OFFSET. */
9286 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9287 HOST_WIDE_INT cfa_offset
)
9289 struct machine_function
*m
= cfun
->machine
;
9290 rtx reg
= gen_rtx_REG (mode
, regno
);
9291 rtx mem
, addr
, base
, insn
;
9293 addr
= choose_baseaddr (cfa_offset
);
9294 mem
= gen_frame_mem (mode
, addr
);
9296 /* For SSE saves, we need to indicate the 128-bit alignment. */
9297 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9299 insn
= emit_move_insn (mem
, reg
);
9300 RTX_FRAME_RELATED_P (insn
) = 1;
9303 if (GET_CODE (base
) == PLUS
)
9304 base
= XEXP (base
, 0);
9305 gcc_checking_assert (REG_P (base
));
9307 /* When saving registers into a re-aligned local stack frame, avoid
9308 any tricky guessing by dwarf2out. */
9309 if (m
->fs
.realigned
)
9311 gcc_checking_assert (stack_realign_drap
);
9313 if (regno
== REGNO (crtl
->drap_reg
))
9315 /* A bit of a hack. We force the DRAP register to be saved in
9316 the re-aligned stack frame, which provides us with a copy
9317 of the CFA that will last past the prologue. Install it. */
9318 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9319 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9320 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9321 mem
= gen_rtx_MEM (mode
, addr
);
9322 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9326 /* The frame pointer is a stable reference within the
9327 aligned frame. Use it. */
9328 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9329 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9330 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9331 mem
= gen_rtx_MEM (mode
, addr
);
9332 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9333 gen_rtx_SET (VOIDmode
, mem
, reg
));
9337 /* The memory may not be relative to the current CFA register,
9338 which means that we may need to generate a new pattern for
9339 use by the unwind info. */
9340 else if (base
!= m
->fs
.cfa_reg
)
9342 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9343 m
->fs
.cfa_offset
- cfa_offset
);
9344 mem
= gen_rtx_MEM (mode
, addr
);
9345 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9349 /* Emit code to save registers using MOV insns.
9350 First register is stored at CFA - CFA_OFFSET. */
9352 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9356 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9357 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9359 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9360 cfa_offset
-= UNITS_PER_WORD
;
9364 /* Emit code to save SSE registers using MOV insns.
9365 First register is stored at CFA - CFA_OFFSET. */
9367 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9371 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9372 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9374 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9379 static GTY(()) rtx queued_cfa_restores
;
9381 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9382 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9383 Don't add the note if the previously saved value will be left untouched
9384 within stack red-zone till return, as unwinders can find the same value
9385 in the register and on the stack. */
9388 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9390 if (!crtl
->shrink_wrapped
9391 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9396 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9397 RTX_FRAME_RELATED_P (insn
) = 1;
9401 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9404 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9407 ix86_add_queued_cfa_restore_notes (rtx insn
)
9410 if (!queued_cfa_restores
)
9412 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9414 XEXP (last
, 1) = REG_NOTES (insn
);
9415 REG_NOTES (insn
) = queued_cfa_restores
;
9416 queued_cfa_restores
= NULL_RTX
;
9417 RTX_FRAME_RELATED_P (insn
) = 1;
9420 /* Expand prologue or epilogue stack adjustment.
9421 The pattern exist to put a dependency on all ebp-based memory accesses.
9422 STYLE should be negative if instructions should be marked as frame related,
9423 zero if %r11 register is live and cannot be freely used and positive
9427 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9428 int style
, bool set_cfa
)
9430 struct machine_function
*m
= cfun
->machine
;
9432 bool add_frame_related_expr
= false;
9434 if (Pmode
== SImode
)
9435 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9436 else if (x86_64_immediate_operand (offset
, DImode
))
9437 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9441 /* r11 is used by indirect sibcall return as well, set before the
9442 epilogue and used after the epilogue. */
9444 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9447 gcc_assert (src
!= hard_frame_pointer_rtx
9448 && dest
!= hard_frame_pointer_rtx
);
9449 tmp
= hard_frame_pointer_rtx
;
9451 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9453 add_frame_related_expr
= true;
9455 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9458 insn
= emit_insn (insn
);
9460 ix86_add_queued_cfa_restore_notes (insn
);
9466 gcc_assert (m
->fs
.cfa_reg
== src
);
9467 m
->fs
.cfa_offset
+= INTVAL (offset
);
9468 m
->fs
.cfa_reg
= dest
;
9470 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9471 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9472 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9473 RTX_FRAME_RELATED_P (insn
) = 1;
9477 RTX_FRAME_RELATED_P (insn
) = 1;
9478 if (add_frame_related_expr
)
9480 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9481 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9482 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9486 if (dest
== stack_pointer_rtx
)
9488 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9489 bool valid
= m
->fs
.sp_valid
;
9491 if (src
== hard_frame_pointer_rtx
)
9493 valid
= m
->fs
.fp_valid
;
9494 ooffset
= m
->fs
.fp_offset
;
9496 else if (src
== crtl
->drap_reg
)
9498 valid
= m
->fs
.drap_valid
;
9503 /* Else there are two possibilities: SP itself, which we set
9504 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9505 taken care of this by hand along the eh_return path. */
9506 gcc_checking_assert (src
== stack_pointer_rtx
9507 || offset
== const0_rtx
);
9510 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9511 m
->fs
.sp_valid
= valid
;
9515 /* Find an available register to be used as dynamic realign argument
9516 pointer regsiter. Such a register will be written in prologue and
9517 used in begin of body, so it must not be
9518 1. parameter passing register.
9520 We reuse static-chain register if it is available. Otherwise, we
9521 use DI for i386 and R13 for x86-64. We chose R13 since it has
9524 Return: the regno of chosen register. */
9527 find_drap_reg (void)
9529 tree decl
= cfun
->decl
;
9533 /* Use R13 for nested function or function need static chain.
9534 Since function with tail call may use any caller-saved
9535 registers in epilogue, DRAP must not use caller-saved
9536 register in such case. */
9537 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9544 /* Use DI for nested function or function need static chain.
9545 Since function with tail call may use any caller-saved
9546 registers in epilogue, DRAP must not use caller-saved
9547 register in such case. */
9548 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9551 /* Reuse static chain register if it isn't used for parameter
9553 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9555 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9556 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9563 /* Return minimum incoming stack alignment. */
9566 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9568 unsigned int incoming_stack_boundary
;
9570 /* Prefer the one specified at command line. */
9571 if (ix86_user_incoming_stack_boundary
)
9572 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9573 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9574 if -mstackrealign is used, it isn't used for sibcall check and
9575 estimated stack alignment is 128bit. */
9578 && ix86_force_align_arg_pointer
9579 && crtl
->stack_alignment_estimated
== 128)
9580 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9582 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9584 /* Incoming stack alignment can be changed on individual functions
9585 via force_align_arg_pointer attribute. We use the smallest
9586 incoming stack boundary. */
9587 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9588 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9589 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9590 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9592 /* The incoming stack frame has to be aligned at least at
9593 parm_stack_boundary. */
9594 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9595 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9597 /* Stack at entrance of main is aligned by runtime. We use the
9598 smallest incoming stack boundary. */
9599 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9600 && DECL_NAME (current_function_decl
)
9601 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9602 && DECL_FILE_SCOPE_P (current_function_decl
))
9603 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9605 return incoming_stack_boundary
;
9608 /* Update incoming stack boundary and estimated stack alignment. */
9611 ix86_update_stack_boundary (void)
9613 ix86_incoming_stack_boundary
9614 = ix86_minimum_incoming_stack_boundary (false);
9616 /* x86_64 vararg needs 16byte stack alignment for register save
9620 && crtl
->stack_alignment_estimated
< 128)
9621 crtl
->stack_alignment_estimated
= 128;
9624 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9625 needed or an rtx for DRAP otherwise. */
9628 ix86_get_drap_rtx (void)
9630 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9631 crtl
->need_drap
= true;
9633 if (stack_realign_drap
)
9635 /* Assign DRAP to vDRAP and returns vDRAP */
9636 unsigned int regno
= find_drap_reg ();
9641 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9642 crtl
->drap_reg
= arg_ptr
;
9645 drap_vreg
= copy_to_reg (arg_ptr
);
9649 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9652 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9653 RTX_FRAME_RELATED_P (insn
) = 1;
9661 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9664 ix86_internal_arg_pointer (void)
9666 return virtual_incoming_args_rtx
;
9669 struct scratch_reg
{
9674 /* Return a short-lived scratch register for use on function entry.
9675 In 32-bit mode, it is valid only after the registers are saved
9676 in the prologue. This register must be released by means of
9677 release_scratch_register_on_entry once it is dead. */
9680 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9688 /* We always use R11 in 64-bit mode. */
9693 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9695 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9697 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9698 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9699 int regparm
= ix86_function_regparm (fntype
, decl
);
9701 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9703 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9704 for the static chain register. */
9705 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9706 && drap_regno
!= AX_REG
)
9708 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9709 for the static chain register. */
9710 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
9712 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
9714 /* ecx is the static chain register. */
9715 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
9717 && drap_regno
!= CX_REG
)
9719 else if (ix86_save_reg (BX_REG
, true))
9721 /* esi is the static chain register. */
9722 else if (!(regparm
== 3 && static_chain_p
)
9723 && ix86_save_reg (SI_REG
, true))
9725 else if (ix86_save_reg (DI_REG
, true))
9729 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9734 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9737 rtx insn
= emit_insn (gen_push (sr
->reg
));
9738 RTX_FRAME_RELATED_P (insn
) = 1;
9742 /* Release a scratch register obtained from the preceding function. */
9745 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9749 struct machine_function
*m
= cfun
->machine
;
9750 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9752 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9753 RTX_FRAME_RELATED_P (insn
) = 1;
9754 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9755 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9756 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9757 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9761 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9763 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9766 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9768 /* We skip the probe for the first interval + a small dope of 4 words and
9769 probe that many bytes past the specified size to maintain a protection
9770 area at the botton of the stack. */
9771 const int dope
= 4 * UNITS_PER_WORD
;
9772 rtx size_rtx
= GEN_INT (size
), last
;
9774 /* See if we have a constant small number of probes to generate. If so,
9775 that's the easy case. The run-time loop is made up of 11 insns in the
9776 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9777 for n # of intervals. */
9778 if (size
<= 5 * PROBE_INTERVAL
)
9780 HOST_WIDE_INT i
, adjust
;
9781 bool first_probe
= true;
9783 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9784 values of N from 1 until it exceeds SIZE. If only one probe is
9785 needed, this will not generate any code. Then adjust and probe
9786 to PROBE_INTERVAL + SIZE. */
9787 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9791 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9792 first_probe
= false;
9795 adjust
= PROBE_INTERVAL
;
9797 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9798 plus_constant (Pmode
, stack_pointer_rtx
,
9800 emit_stack_probe (stack_pointer_rtx
);
9804 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9806 adjust
= size
+ PROBE_INTERVAL
- i
;
9808 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9809 plus_constant (Pmode
, stack_pointer_rtx
,
9811 emit_stack_probe (stack_pointer_rtx
);
9813 /* Adjust back to account for the additional first interval. */
9814 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9815 plus_constant (Pmode
, stack_pointer_rtx
,
9816 PROBE_INTERVAL
+ dope
)));
9819 /* Otherwise, do the same as above, but in a loop. Note that we must be
9820 extra careful with variables wrapping around because we might be at
9821 the very top (or the very bottom) of the address space and we have
9822 to be able to handle this case properly; in particular, we use an
9823 equality test for the loop condition. */
9826 HOST_WIDE_INT rounded_size
;
9827 struct scratch_reg sr
;
9829 get_scratch_register_on_entry (&sr
);
9832 /* Step 1: round SIZE to the previous multiple of the interval. */
9834 rounded_size
= size
& -PROBE_INTERVAL
;
9837 /* Step 2: compute initial and final value of the loop counter. */
9839 /* SP = SP_0 + PROBE_INTERVAL. */
9840 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9841 plus_constant (Pmode
, stack_pointer_rtx
,
9842 - (PROBE_INTERVAL
+ dope
))));
9844 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9845 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9846 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9847 gen_rtx_PLUS (Pmode
, sr
.reg
,
9848 stack_pointer_rtx
)));
9853 while (SP != LAST_ADDR)
9855 SP = SP + PROBE_INTERVAL
9859 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9860 values of N from 1 until it is equal to ROUNDED_SIZE. */
9862 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9865 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9866 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9868 if (size
!= rounded_size
)
9870 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9871 plus_constant (Pmode
, stack_pointer_rtx
,
9872 rounded_size
- size
)));
9873 emit_stack_probe (stack_pointer_rtx
);
9876 /* Adjust back to account for the additional first interval. */
9877 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9878 plus_constant (Pmode
, stack_pointer_rtx
,
9879 PROBE_INTERVAL
+ dope
)));
9881 release_scratch_register_on_entry (&sr
);
9884 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9886 /* Even if the stack pointer isn't the CFA register, we need to correctly
9887 describe the adjustments made to it, in particular differentiate the
9888 frame-related ones from the frame-unrelated ones. */
9891 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9892 XVECEXP (expr
, 0, 0)
9893 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9894 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9895 XVECEXP (expr
, 0, 1)
9896 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9897 plus_constant (Pmode
, stack_pointer_rtx
,
9898 PROBE_INTERVAL
+ dope
+ size
));
9899 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9900 RTX_FRAME_RELATED_P (last
) = 1;
9902 cfun
->machine
->fs
.sp_offset
+= size
;
9905 /* Make sure nothing is scheduled before we are done. */
9906 emit_insn (gen_blockage ());
9909 /* Adjust the stack pointer up to REG while probing it. */
9912 output_adjust_stack_and_probe (rtx reg
)
9914 static int labelno
= 0;
9915 char loop_lab
[32], end_lab
[32];
9918 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9919 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9921 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9923 /* Jump to END_LAB if SP == LAST_ADDR. */
9924 xops
[0] = stack_pointer_rtx
;
9926 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9927 fputs ("\tje\t", asm_out_file
);
9928 assemble_name_raw (asm_out_file
, end_lab
);
9929 fputc ('\n', asm_out_file
);
9931 /* SP = SP + PROBE_INTERVAL. */
9932 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9933 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9936 xops
[1] = const0_rtx
;
9937 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9939 fprintf (asm_out_file
, "\tjmp\t");
9940 assemble_name_raw (asm_out_file
, loop_lab
);
9941 fputc ('\n', asm_out_file
);
9943 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9948 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9949 inclusive. These are offsets from the current stack pointer. */
9952 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9954 /* See if we have a constant small number of probes to generate. If so,
9955 that's the easy case. The run-time loop is made up of 7 insns in the
9956 generic case while the compile-time loop is made up of n insns for n #
9958 if (size
<= 7 * PROBE_INTERVAL
)
9962 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9963 it exceeds SIZE. If only one probe is needed, this will not
9964 generate any code. Then probe at FIRST + SIZE. */
9965 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9966 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9969 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
9973 /* Otherwise, do the same as above, but in a loop. Note that we must be
9974 extra careful with variables wrapping around because we might be at
9975 the very top (or the very bottom) of the address space and we have
9976 to be able to handle this case properly; in particular, we use an
9977 equality test for the loop condition. */
9980 HOST_WIDE_INT rounded_size
, last
;
9981 struct scratch_reg sr
;
9983 get_scratch_register_on_entry (&sr
);
9986 /* Step 1: round SIZE to the previous multiple of the interval. */
9988 rounded_size
= size
& -PROBE_INTERVAL
;
9991 /* Step 2: compute initial and final value of the loop counter. */
9993 /* TEST_OFFSET = FIRST. */
9994 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9996 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9997 last
= first
+ rounded_size
;
10000 /* Step 3: the loop
10002 while (TEST_ADDR != LAST_ADDR)
10004 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10008 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10009 until it is equal to ROUNDED_SIZE. */
10011 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10014 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10015 that SIZE is equal to ROUNDED_SIZE. */
10017 if (size
!= rounded_size
)
10018 emit_stack_probe (plus_constant (Pmode
,
10019 gen_rtx_PLUS (Pmode
,
10022 rounded_size
- size
));
10024 release_scratch_register_on_entry (&sr
);
10027 /* Make sure nothing is scheduled before we are done. */
10028 emit_insn (gen_blockage ());
10031 /* Probe a range of stack addresses from REG to END, inclusive. These are
10032 offsets from the current stack pointer. */
10035 output_probe_stack_range (rtx reg
, rtx end
)
10037 static int labelno
= 0;
10038 char loop_lab
[32], end_lab
[32];
10041 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10042 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10044 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10046 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10049 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10050 fputs ("\tje\t", asm_out_file
);
10051 assemble_name_raw (asm_out_file
, end_lab
);
10052 fputc ('\n', asm_out_file
);
10054 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10055 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10056 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10058 /* Probe at TEST_ADDR. */
10059 xops
[0] = stack_pointer_rtx
;
10061 xops
[2] = const0_rtx
;
10062 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10064 fprintf (asm_out_file
, "\tjmp\t");
10065 assemble_name_raw (asm_out_file
, loop_lab
);
10066 fputc ('\n', asm_out_file
);
10068 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10073 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10074 to be generated in correct form. */
10076 ix86_finalize_stack_realign_flags (void)
10078 /* Check if stack realign is really needed after reload, and
10079 stores result in cfun */
10080 unsigned int incoming_stack_boundary
10081 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10082 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10083 unsigned int stack_realign
= (incoming_stack_boundary
10085 ? crtl
->max_used_stack_slot_alignment
10086 : crtl
->stack_alignment_needed
));
10088 if (crtl
->stack_realign_finalized
)
10090 /* After stack_realign_needed is finalized, we can't no longer
10092 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10096 /* If the only reason for frame_pointer_needed is that we conservatively
10097 assumed stack realignment might be needed, but in the end nothing that
10098 needed the stack alignment had been spilled, clear frame_pointer_needed
10099 and say we don't need stack realignment. */
10101 && !crtl
->need_drap
10102 && frame_pointer_needed
10104 && flag_omit_frame_pointer
10105 && crtl
->sp_is_unchanging
10106 && !ix86_current_function_calls_tls_descriptor
10107 && !crtl
->accesses_prior_frames
10108 && !cfun
->calls_alloca
10109 && !crtl
->calls_eh_return
10110 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10111 && !ix86_frame_pointer_required ()
10112 && get_frame_size () == 0
10113 && ix86_nsaved_sseregs () == 0
10114 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10116 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10119 CLEAR_HARD_REG_SET (prologue_used
);
10120 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10121 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10122 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10123 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10124 HARD_FRAME_POINTER_REGNUM
);
10128 FOR_BB_INSNS (bb
, insn
)
10129 if (NONDEBUG_INSN_P (insn
)
10130 && requires_stack_frame_p (insn
, prologue_used
,
10131 set_up_by_prologue
))
10133 crtl
->stack_realign_needed
= stack_realign
;
10134 crtl
->stack_realign_finalized
= true;
10139 frame_pointer_needed
= false;
10140 stack_realign
= false;
10141 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10142 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10143 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10144 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10145 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10146 df_finish_pass (true);
10147 df_scan_alloc (NULL
);
10149 df_compute_regs_ever_live (true);
10153 crtl
->stack_realign_needed
= stack_realign
;
10154 crtl
->stack_realign_finalized
= true;
10157 /* Expand the prologue into a bunch of separate insns. */
10160 ix86_expand_prologue (void)
10162 struct machine_function
*m
= cfun
->machine
;
10165 struct ix86_frame frame
;
10166 HOST_WIDE_INT allocate
;
10167 bool int_registers_saved
;
10168 bool sse_registers_saved
;
10170 ix86_finalize_stack_realign_flags ();
10172 /* DRAP should not coexist with stack_realign_fp */
10173 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10175 memset (&m
->fs
, 0, sizeof (m
->fs
));
10177 /* Initialize CFA state for before the prologue. */
10178 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10179 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10181 /* Track SP offset to the CFA. We continue tracking this after we've
10182 swapped the CFA register away from SP. In the case of re-alignment
10183 this is fudged; we're interested to offsets within the local frame. */
10184 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10185 m
->fs
.sp_valid
= true;
10187 ix86_compute_frame_layout (&frame
);
10189 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10191 /* We should have already generated an error for any use of
10192 ms_hook on a nested function. */
10193 gcc_checking_assert (!ix86_static_chain_on_stack
);
10195 /* Check if profiling is active and we shall use profiling before
10196 prologue variant. If so sorry. */
10197 if (crtl
->profile
&& flag_fentry
!= 0)
10198 sorry ("ms_hook_prologue attribute isn%'t compatible "
10199 "with -mfentry for 32-bit");
10201 /* In ix86_asm_output_function_label we emitted:
10202 8b ff movl.s %edi,%edi
10204 8b ec movl.s %esp,%ebp
10206 This matches the hookable function prologue in Win32 API
10207 functions in Microsoft Windows XP Service Pack 2 and newer.
10208 Wine uses this to enable Windows apps to hook the Win32 API
10209 functions provided by Wine.
10211 What that means is that we've already set up the frame pointer. */
10213 if (frame_pointer_needed
10214 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10218 /* We've decided to use the frame pointer already set up.
10219 Describe this to the unwinder by pretending that both
10220 push and mov insns happen right here.
10222 Putting the unwind info here at the end of the ms_hook
10223 is done so that we can make absolutely certain we get
10224 the required byte sequence at the start of the function,
10225 rather than relying on an assembler that can produce
10226 the exact encoding required.
10228 However it does mean (in the unpatched case) that we have
10229 a 1 insn window where the asynchronous unwind info is
10230 incorrect. However, if we placed the unwind info at
10231 its correct location we would have incorrect unwind info
10232 in the patched case. Which is probably all moot since
10233 I don't expect Wine generates dwarf2 unwind info for the
10234 system libraries that use this feature. */
10236 insn
= emit_insn (gen_blockage ());
10238 push
= gen_push (hard_frame_pointer_rtx
);
10239 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10240 stack_pointer_rtx
);
10241 RTX_FRAME_RELATED_P (push
) = 1;
10242 RTX_FRAME_RELATED_P (mov
) = 1;
10244 RTX_FRAME_RELATED_P (insn
) = 1;
10245 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10246 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10248 /* Note that gen_push incremented m->fs.cfa_offset, even
10249 though we didn't emit the push insn here. */
10250 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10251 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10252 m
->fs
.fp_valid
= true;
10256 /* The frame pointer is not needed so pop %ebp again.
10257 This leaves us with a pristine state. */
10258 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10262 /* The first insn of a function that accepts its static chain on the
10263 stack is to push the register that would be filled in by a direct
10264 call. This insn will be skipped by the trampoline. */
10265 else if (ix86_static_chain_on_stack
)
10267 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10268 emit_insn (gen_blockage ());
10270 /* We don't want to interpret this push insn as a register save,
10271 only as a stack adjustment. The real copy of the register as
10272 a save will be done later, if needed. */
10273 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10274 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10275 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10276 RTX_FRAME_RELATED_P (insn
) = 1;
10279 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10280 of DRAP is needed and stack realignment is really needed after reload */
10281 if (stack_realign_drap
)
10283 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10285 /* Only need to push parameter pointer reg if it is caller saved. */
10286 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10288 /* Push arg pointer reg */
10289 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10290 RTX_FRAME_RELATED_P (insn
) = 1;
10293 /* Grab the argument pointer. */
10294 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10295 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10296 RTX_FRAME_RELATED_P (insn
) = 1;
10297 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10298 m
->fs
.cfa_offset
= 0;
10300 /* Align the stack. */
10301 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10303 GEN_INT (-align_bytes
)));
10304 RTX_FRAME_RELATED_P (insn
) = 1;
10306 /* Replicate the return address on the stack so that return
10307 address can be reached via (argp - 1) slot. This is needed
10308 to implement macro RETURN_ADDR_RTX and intrinsic function
10309 expand_builtin_return_addr etc. */
10310 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10311 t
= gen_frame_mem (word_mode
, t
);
10312 insn
= emit_insn (gen_push (t
));
10313 RTX_FRAME_RELATED_P (insn
) = 1;
10315 /* For the purposes of frame and register save area addressing,
10316 we've started over with a new frame. */
10317 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10318 m
->fs
.realigned
= true;
10321 int_registers_saved
= (frame
.nregs
== 0);
10322 sse_registers_saved
= (frame
.nsseregs
== 0);
10324 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10326 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10327 slower on all targets. Also sdb doesn't like it. */
10328 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10329 RTX_FRAME_RELATED_P (insn
) = 1;
10331 /* Push registers now, before setting the frame pointer
10333 if (!int_registers_saved
10335 && !frame
.save_regs_using_mov
)
10337 ix86_emit_save_regs ();
10338 int_registers_saved
= true;
10339 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10342 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10344 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10345 RTX_FRAME_RELATED_P (insn
) = 1;
10347 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10348 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10349 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10350 m
->fs
.fp_valid
= true;
10354 if (!int_registers_saved
)
10356 /* If saving registers via PUSH, do so now. */
10357 if (!frame
.save_regs_using_mov
)
10359 ix86_emit_save_regs ();
10360 int_registers_saved
= true;
10361 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10364 /* When using red zone we may start register saving before allocating
10365 the stack frame saving one cycle of the prologue. However, avoid
10366 doing this if we have to probe the stack; at least on x86_64 the
10367 stack probe can turn into a call that clobbers a red zone location. */
10368 else if (ix86_using_red_zone ()
10369 && (! TARGET_STACK_PROBE
10370 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10372 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10373 int_registers_saved
= true;
10377 if (stack_realign_fp
)
10379 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10380 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10382 /* The computation of the size of the re-aligned stack frame means
10383 that we must allocate the size of the register save area before
10384 performing the actual alignment. Otherwise we cannot guarantee
10385 that there's enough storage above the realignment point. */
10386 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10387 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10388 GEN_INT (m
->fs
.sp_offset
10389 - frame
.sse_reg_save_offset
),
10392 /* Align the stack. */
10393 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10395 GEN_INT (-align_bytes
)));
10397 /* For the purposes of register save area addressing, the stack
10398 pointer is no longer valid. As for the value of sp_offset,
10399 see ix86_compute_frame_layout, which we need to match in order
10400 to pass verification of stack_pointer_offset at the end. */
10401 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10402 m
->fs
.sp_valid
= false;
10405 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10407 if (flag_stack_usage_info
)
10409 /* We start to count from ARG_POINTER. */
10410 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10412 /* If it was realigned, take into account the fake frame. */
10413 if (stack_realign_drap
)
10415 if (ix86_static_chain_on_stack
)
10416 stack_size
+= UNITS_PER_WORD
;
10418 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10419 stack_size
+= UNITS_PER_WORD
;
10421 /* This over-estimates by 1 minimal-stack-alignment-unit but
10422 mitigates that by counting in the new return address slot. */
10423 current_function_dynamic_stack_size
10424 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10427 current_function_static_stack_size
= stack_size
;
10430 /* On SEH target with very large frame size, allocate an area to save
10431 SSE registers (as the very large allocation won't be described). */
10433 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10434 && !sse_registers_saved
)
10436 HOST_WIDE_INT sse_size
=
10437 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10439 gcc_assert (int_registers_saved
);
10441 /* No need to do stack checking as the area will be immediately
10443 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10444 GEN_INT (-sse_size
), -1,
10445 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10446 allocate
-= sse_size
;
10447 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10448 sse_registers_saved
= true;
10451 /* The stack has already been decremented by the instruction calling us
10452 so probe if the size is non-negative to preserve the protection area. */
10453 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10455 /* We expect the registers to be saved when probes are used. */
10456 gcc_assert (int_registers_saved
);
10458 if (STACK_CHECK_MOVING_SP
)
10460 ix86_adjust_stack_and_probe (allocate
);
10465 HOST_WIDE_INT size
= allocate
;
10467 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10468 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10470 if (TARGET_STACK_PROBE
)
10471 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10473 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10479 else if (!ix86_target_stack_probe ()
10480 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10482 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10483 GEN_INT (-allocate
), -1,
10484 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10488 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10490 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10491 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10492 bool eax_live
= false;
10493 bool r10_live
= false;
10496 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10497 if (!TARGET_64BIT_MS_ABI
)
10498 eax_live
= ix86_eax_live_at_start_p ();
10500 /* Note that SEH directives need to continue tracking the stack
10501 pointer even after the frame pointer has been set up. */
10504 insn
= emit_insn (gen_push (eax
));
10505 allocate
-= UNITS_PER_WORD
;
10506 if (sp_is_cfa_reg
|| TARGET_SEH
)
10509 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10510 RTX_FRAME_RELATED_P (insn
) = 1;
10516 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10517 insn
= emit_insn (gen_push (r10
));
10518 allocate
-= UNITS_PER_WORD
;
10519 if (sp_is_cfa_reg
|| TARGET_SEH
)
10522 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
10523 RTX_FRAME_RELATED_P (insn
) = 1;
10527 emit_move_insn (eax
, GEN_INT (allocate
));
10528 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10530 /* Use the fact that AX still contains ALLOCATE. */
10531 adjust_stack_insn
= (Pmode
== DImode
10532 ? gen_pro_epilogue_adjust_stack_di_sub
10533 : gen_pro_epilogue_adjust_stack_si_sub
);
10535 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10536 stack_pointer_rtx
, eax
));
10538 if (sp_is_cfa_reg
|| TARGET_SEH
)
10541 m
->fs
.cfa_offset
+= allocate
;
10542 RTX_FRAME_RELATED_P (insn
) = 1;
10543 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10544 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10545 plus_constant (Pmode
, stack_pointer_rtx
,
10548 m
->fs
.sp_offset
+= allocate
;
10550 if (r10_live
&& eax_live
)
10552 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10553 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10554 gen_frame_mem (word_mode
, t
));
10555 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10556 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10557 gen_frame_mem (word_mode
, t
));
10559 else if (eax_live
|| r10_live
)
10561 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10562 emit_move_insn (gen_rtx_REG (word_mode
,
10563 (eax_live
? AX_REG
: R10_REG
)),
10564 gen_frame_mem (word_mode
, t
));
10567 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10569 /* If we havn't already set up the frame pointer, do so now. */
10570 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10572 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10573 GEN_INT (frame
.stack_pointer_offset
10574 - frame
.hard_frame_pointer_offset
));
10575 insn
= emit_insn (insn
);
10576 RTX_FRAME_RELATED_P (insn
) = 1;
10577 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10579 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10580 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10581 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10582 m
->fs
.fp_valid
= true;
10585 if (!int_registers_saved
)
10586 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10587 if (!sse_registers_saved
)
10588 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10590 pic_reg_used
= false;
10591 if (pic_offset_table_rtx
10592 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10595 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10597 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10598 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10600 pic_reg_used
= true;
10607 if (ix86_cmodel
== CM_LARGE_PIC
)
10609 rtx label
, tmp_reg
;
10611 gcc_assert (Pmode
== DImode
);
10612 label
= gen_label_rtx ();
10613 emit_label (label
);
10614 LABEL_PRESERVE_P (label
) = 1;
10615 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10616 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10617 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10619 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10620 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10621 pic_offset_table_rtx
, tmp_reg
));
10624 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10628 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10629 RTX_FRAME_RELATED_P (insn
) = 1;
10630 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10634 /* In the pic_reg_used case, make sure that the got load isn't deleted
10635 when mcount needs it. Blockage to avoid call movement across mcount
10636 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10638 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10639 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10641 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10643 /* vDRAP is setup but after reload it turns out stack realign
10644 isn't necessary, here we will emit prologue to setup DRAP
10645 without stack realign adjustment */
10646 t
= choose_baseaddr (0);
10647 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10650 /* Prevent instructions from being scheduled into register save push
10651 sequence when access to the redzone area is done through frame pointer.
10652 The offset between the frame pointer and the stack pointer is calculated
10653 relative to the value of the stack pointer at the end of the function
10654 prologue, and moving instructions that access redzone area via frame
10655 pointer inside push sequence violates this assumption. */
10656 if (frame_pointer_needed
&& frame
.red_zone_size
)
10657 emit_insn (gen_memory_blockage ());
10659 /* Emit cld instruction if stringops are used in the function. */
10660 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10661 emit_insn (gen_cld ());
10663 /* SEH requires that the prologue end within 256 bytes of the start of
10664 the function. Prevent instruction schedules that would extend that.
10665 Further, prevent alloca modifications to the stack pointer from being
10666 combined with prologue modifications. */
10668 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10671 /* Emit code to restore REG using a POP insn. */
10674 ix86_emit_restore_reg_using_pop (rtx reg
)
10676 struct machine_function
*m
= cfun
->machine
;
10677 rtx insn
= emit_insn (gen_pop (reg
));
10679 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10680 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10682 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10683 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10685 /* Previously we'd represented the CFA as an expression
10686 like *(%ebp - 8). We've just popped that value from
10687 the stack, which means we need to reset the CFA to
10688 the drap register. This will remain until we restore
10689 the stack pointer. */
10690 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10691 RTX_FRAME_RELATED_P (insn
) = 1;
10693 /* This means that the DRAP register is valid for addressing too. */
10694 m
->fs
.drap_valid
= true;
10698 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10700 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10701 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10702 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10703 RTX_FRAME_RELATED_P (insn
) = 1;
10705 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10708 /* When the frame pointer is the CFA, and we pop it, we are
10709 swapping back to the stack pointer as the CFA. This happens
10710 for stack frames that don't allocate other data, so we assume
10711 the stack pointer is now pointing at the return address, i.e.
10712 the function entry state, which makes the offset be 1 word. */
10713 if (reg
== hard_frame_pointer_rtx
)
10715 m
->fs
.fp_valid
= false;
10716 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10718 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10719 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10721 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10722 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10723 GEN_INT (m
->fs
.cfa_offset
)));
10724 RTX_FRAME_RELATED_P (insn
) = 1;
10729 /* Emit code to restore saved registers using POP insns. */
10732 ix86_emit_restore_regs_using_pop (void)
10734 unsigned int regno
;
10736 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10737 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10738 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10741 /* Emit code and notes for the LEAVE instruction. */
10744 ix86_emit_leave (void)
10746 struct machine_function
*m
= cfun
->machine
;
10747 rtx insn
= emit_insn (ix86_gen_leave ());
10749 ix86_add_queued_cfa_restore_notes (insn
);
10751 gcc_assert (m
->fs
.fp_valid
);
10752 m
->fs
.sp_valid
= true;
10753 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10754 m
->fs
.fp_valid
= false;
10756 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10758 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10759 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10761 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10762 plus_constant (Pmode
, stack_pointer_rtx
,
10764 RTX_FRAME_RELATED_P (insn
) = 1;
10766 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10770 /* Emit code to restore saved registers using MOV insns.
10771 First register is restored from CFA - CFA_OFFSET. */
10773 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10774 bool maybe_eh_return
)
10776 struct machine_function
*m
= cfun
->machine
;
10777 unsigned int regno
;
10779 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10780 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10782 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10785 mem
= choose_baseaddr (cfa_offset
);
10786 mem
= gen_frame_mem (word_mode
, mem
);
10787 insn
= emit_move_insn (reg
, mem
);
10789 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10791 /* Previously we'd represented the CFA as an expression
10792 like *(%ebp - 8). We've just popped that value from
10793 the stack, which means we need to reset the CFA to
10794 the drap register. This will remain until we restore
10795 the stack pointer. */
10796 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10797 RTX_FRAME_RELATED_P (insn
) = 1;
10799 /* This means that the DRAP register is valid for addressing. */
10800 m
->fs
.drap_valid
= true;
10803 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10805 cfa_offset
-= UNITS_PER_WORD
;
10809 /* Emit code to restore saved registers using MOV insns.
10810 First register is restored from CFA - CFA_OFFSET. */
10812 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10813 bool maybe_eh_return
)
10815 unsigned int regno
;
10817 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10818 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10820 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10823 mem
= choose_baseaddr (cfa_offset
);
10824 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10825 set_mem_align (mem
, 128);
10826 emit_move_insn (reg
, mem
);
10828 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10834 /* Restore function stack, frame, and registers. */
10837 ix86_expand_epilogue (int style
)
10839 struct machine_function
*m
= cfun
->machine
;
10840 struct machine_frame_state frame_state_save
= m
->fs
;
10841 struct ix86_frame frame
;
10842 bool restore_regs_via_mov
;
10845 ix86_finalize_stack_realign_flags ();
10846 ix86_compute_frame_layout (&frame
);
10848 m
->fs
.sp_valid
= (!frame_pointer_needed
10849 || (crtl
->sp_is_unchanging
10850 && !stack_realign_fp
));
10851 gcc_assert (!m
->fs
.sp_valid
10852 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10854 /* The FP must be valid if the frame pointer is present. */
10855 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10856 gcc_assert (!m
->fs
.fp_valid
10857 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10859 /* We must have *some* valid pointer to the stack frame. */
10860 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10862 /* The DRAP is never valid at this point. */
10863 gcc_assert (!m
->fs
.drap_valid
);
10865 /* See the comment about red zone and frame
10866 pointer usage in ix86_expand_prologue. */
10867 if (frame_pointer_needed
&& frame
.red_zone_size
)
10868 emit_insn (gen_memory_blockage ());
10870 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10871 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10873 /* Determine the CFA offset of the end of the red-zone. */
10874 m
->fs
.red_zone_offset
= 0;
10875 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10877 /* The red-zone begins below the return address. */
10878 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10880 /* When the register save area is in the aligned portion of
10881 the stack, determine the maximum runtime displacement that
10882 matches up with the aligned frame. */
10883 if (stack_realign_drap
)
10884 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10888 /* Special care must be taken for the normal return case of a function
10889 using eh_return: the eax and edx registers are marked as saved, but
10890 not restored along this path. Adjust the save location to match. */
10891 if (crtl
->calls_eh_return
&& style
!= 2)
10892 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10894 /* EH_RETURN requires the use of moves to function properly. */
10895 if (crtl
->calls_eh_return
)
10896 restore_regs_via_mov
= true;
10897 /* SEH requires the use of pops to identify the epilogue. */
10898 else if (TARGET_SEH
)
10899 restore_regs_via_mov
= false;
10900 /* If we're only restoring one register and sp is not valid then
10901 using a move instruction to restore the register since it's
10902 less work than reloading sp and popping the register. */
10903 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10904 restore_regs_via_mov
= true;
10905 else if (TARGET_EPILOGUE_USING_MOVE
10906 && cfun
->machine
->use_fast_prologue_epilogue
10907 && (frame
.nregs
> 1
10908 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10909 restore_regs_via_mov
= true;
10910 else if (frame_pointer_needed
10912 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10913 restore_regs_via_mov
= true;
10914 else if (frame_pointer_needed
10915 && TARGET_USE_LEAVE
10916 && cfun
->machine
->use_fast_prologue_epilogue
10917 && frame
.nregs
== 1)
10918 restore_regs_via_mov
= true;
10920 restore_regs_via_mov
= false;
10922 if (restore_regs_via_mov
|| frame
.nsseregs
)
10924 /* Ensure that the entire register save area is addressable via
10925 the stack pointer, if we will restore via sp. */
10927 && m
->fs
.sp_offset
> 0x7fffffff
10928 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10929 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10931 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10932 GEN_INT (m
->fs
.sp_offset
10933 - frame
.sse_reg_save_offset
),
10935 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10939 /* If there are any SSE registers to restore, then we have to do it
10940 via moves, since there's obviously no pop for SSE regs. */
10941 if (frame
.nsseregs
)
10942 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10945 if (restore_regs_via_mov
)
10950 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10952 /* eh_return epilogues need %ecx added to the stack pointer. */
10955 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10957 /* Stack align doesn't work with eh_return. */
10958 gcc_assert (!stack_realign_drap
);
10959 /* Neither does regparm nested functions. */
10960 gcc_assert (!ix86_static_chain_on_stack
);
10962 if (frame_pointer_needed
)
10964 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10965 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10966 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10968 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10969 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10971 /* Note that we use SA as a temporary CFA, as the return
10972 address is at the proper place relative to it. We
10973 pretend this happens at the FP restore insn because
10974 prior to this insn the FP would be stored at the wrong
10975 offset relative to SA, and after this insn we have no
10976 other reasonable register to use for the CFA. We don't
10977 bother resetting the CFA to the SP for the duration of
10978 the return insn. */
10979 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10980 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
10981 ix86_add_queued_cfa_restore_notes (insn
);
10982 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10983 RTX_FRAME_RELATED_P (insn
) = 1;
10985 m
->fs
.cfa_reg
= sa
;
10986 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10987 m
->fs
.fp_valid
= false;
10989 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10990 const0_rtx
, style
, false);
10994 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10995 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10996 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10997 ix86_add_queued_cfa_restore_notes (insn
);
10999 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11000 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11002 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11003 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11004 plus_constant (Pmode
, stack_pointer_rtx
,
11006 RTX_FRAME_RELATED_P (insn
) = 1;
11009 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11010 m
->fs
.sp_valid
= true;
11015 /* SEH requires that the function end with (1) a stack adjustment
11016 if necessary, (2) a sequence of pops, and (3) a return or
11017 jump instruction. Prevent insns from the function body from
11018 being scheduled into this sequence. */
11021 /* Prevent a catch region from being adjacent to the standard
11022 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11023 several other flags that would be interesting to test are
11025 if (flag_non_call_exceptions
)
11026 emit_insn (gen_nops (const1_rtx
));
11028 emit_insn (gen_blockage ());
11031 /* First step is to deallocate the stack frame so that we can
11032 pop the registers. Also do it on SEH target for very large
11033 frame as the emitted instructions aren't allowed by the ABI in
11035 if (!m
->fs
.sp_valid
11037 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11038 >= SEH_MAX_FRAME_SIZE
)))
11040 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11041 GEN_INT (m
->fs
.fp_offset
11042 - frame
.reg_save_offset
),
11045 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11047 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11048 GEN_INT (m
->fs
.sp_offset
11049 - frame
.reg_save_offset
),
11051 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11054 ix86_emit_restore_regs_using_pop ();
11057 /* If we used a stack pointer and haven't already got rid of it,
11059 if (m
->fs
.fp_valid
)
11061 /* If the stack pointer is valid and pointing at the frame
11062 pointer store address, then we only need a pop. */
11063 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11064 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11065 /* Leave results in shorter dependency chains on CPUs that are
11066 able to grok it fast. */
11067 else if (TARGET_USE_LEAVE
11068 || optimize_function_for_size_p (cfun
)
11069 || !cfun
->machine
->use_fast_prologue_epilogue
)
11070 ix86_emit_leave ();
11073 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11074 hard_frame_pointer_rtx
,
11075 const0_rtx
, style
, !using_drap
);
11076 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11082 int param_ptr_offset
= UNITS_PER_WORD
;
11085 gcc_assert (stack_realign_drap
);
11087 if (ix86_static_chain_on_stack
)
11088 param_ptr_offset
+= UNITS_PER_WORD
;
11089 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11090 param_ptr_offset
+= UNITS_PER_WORD
;
11092 insn
= emit_insn (gen_rtx_SET
11093 (VOIDmode
, stack_pointer_rtx
,
11094 gen_rtx_PLUS (Pmode
,
11096 GEN_INT (-param_ptr_offset
))));
11097 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11098 m
->fs
.cfa_offset
= param_ptr_offset
;
11099 m
->fs
.sp_offset
= param_ptr_offset
;
11100 m
->fs
.realigned
= false;
11102 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11103 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11104 GEN_INT (param_ptr_offset
)));
11105 RTX_FRAME_RELATED_P (insn
) = 1;
11107 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11108 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11111 /* At this point the stack pointer must be valid, and we must have
11112 restored all of the registers. We may not have deallocated the
11113 entire stack frame. We've delayed this until now because it may
11114 be possible to merge the local stack deallocation with the
11115 deallocation forced by ix86_static_chain_on_stack. */
11116 gcc_assert (m
->fs
.sp_valid
);
11117 gcc_assert (!m
->fs
.fp_valid
);
11118 gcc_assert (!m
->fs
.realigned
);
11119 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11121 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11122 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11126 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11128 /* Sibcall epilogues don't want a return instruction. */
11131 m
->fs
= frame_state_save
;
11135 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11137 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11139 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11140 address, do explicit add, and jump indirectly to the caller. */
11142 if (crtl
->args
.pops_args
>= 65536)
11144 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11147 /* There is no "pascal" calling convention in any 64bit ABI. */
11148 gcc_assert (!TARGET_64BIT
);
11150 insn
= emit_insn (gen_pop (ecx
));
11151 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11152 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11154 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11155 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11156 add_reg_note (insn
, REG_CFA_REGISTER
,
11157 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11158 RTX_FRAME_RELATED_P (insn
) = 1;
11160 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11162 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11165 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11168 emit_jump_insn (gen_simple_return_internal ());
11170 /* Restore the state back to the state from the prologue,
11171 so that it's correct for the next epilogue. */
11172 m
->fs
= frame_state_save
;
11175 /* Reset from the function's potential modifications. */
11178 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11179 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11181 if (pic_offset_table_rtx
)
11182 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11184 /* Mach-O doesn't support labels at the end of objects, so if
11185 it looks like we might want one, insert a NOP. */
11187 rtx insn
= get_last_insn ();
11188 rtx deleted_debug_label
= NULL_RTX
;
11191 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11193 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11194 notes only, instead set their CODE_LABEL_NUMBER to -1,
11195 otherwise there would be code generation differences
11196 in between -g and -g0. */
11197 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11198 deleted_debug_label
= insn
;
11199 insn
= PREV_INSN (insn
);
11204 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11205 fputs ("\tnop\n", file
);
11206 else if (deleted_debug_label
)
11207 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11208 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11209 CODE_LABEL_NUMBER (insn
) = -1;
11215 /* Return a scratch register to use in the split stack prologue. The
11216 split stack prologue is used for -fsplit-stack. It is the first
11217 instructions in the function, even before the regular prologue.
11218 The scratch register can be any caller-saved register which is not
11219 used for parameters or for the static chain. */
11221 static unsigned int
11222 split_stack_prologue_scratch_regno (void)
11228 bool is_fastcall
, is_thiscall
;
11231 is_fastcall
= (lookup_attribute ("fastcall",
11232 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11234 is_thiscall
= (lookup_attribute ("thiscall",
11235 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11237 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11241 if (DECL_STATIC_CHAIN (cfun
->decl
))
11243 sorry ("-fsplit-stack does not support fastcall with "
11244 "nested function");
11245 return INVALID_REGNUM
;
11249 else if (is_thiscall
)
11251 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11255 else if (regparm
< 3)
11257 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11263 sorry ("-fsplit-stack does not support 2 register "
11264 " parameters for a nested function");
11265 return INVALID_REGNUM
;
11272 /* FIXME: We could make this work by pushing a register
11273 around the addition and comparison. */
11274 sorry ("-fsplit-stack does not support 3 register parameters");
11275 return INVALID_REGNUM
;
11280 /* A SYMBOL_REF for the function which allocates new stackspace for
11283 static GTY(()) rtx split_stack_fn
;
11285 /* A SYMBOL_REF for the more stack function when using the large
11288 static GTY(()) rtx split_stack_fn_large
;
11290 /* Handle -fsplit-stack. These are the first instructions in the
11291 function, even before the regular prologue. */
11294 ix86_expand_split_stack_prologue (void)
11296 struct ix86_frame frame
;
11297 HOST_WIDE_INT allocate
;
11298 unsigned HOST_WIDE_INT args_size
;
11299 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11300 rtx scratch_reg
= NULL_RTX
;
11301 rtx varargs_label
= NULL_RTX
;
11304 gcc_assert (flag_split_stack
&& reload_completed
);
11306 ix86_finalize_stack_realign_flags ();
11307 ix86_compute_frame_layout (&frame
);
11308 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11310 /* This is the label we will branch to if we have enough stack
11311 space. We expect the basic block reordering pass to reverse this
11312 branch if optimizing, so that we branch in the unlikely case. */
11313 label
= gen_label_rtx ();
11315 /* We need to compare the stack pointer minus the frame size with
11316 the stack boundary in the TCB. The stack boundary always gives
11317 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11318 can compare directly. Otherwise we need to do an addition. */
11320 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11321 UNSPEC_STACK_CHECK
);
11322 limit
= gen_rtx_CONST (Pmode
, limit
);
11323 limit
= gen_rtx_MEM (Pmode
, limit
);
11324 if (allocate
< SPLIT_STACK_AVAILABLE
)
11325 current
= stack_pointer_rtx
;
11328 unsigned int scratch_regno
;
11331 /* We need a scratch register to hold the stack pointer minus
11332 the required frame size. Since this is the very start of the
11333 function, the scratch register can be any caller-saved
11334 register which is not used for parameters. */
11335 offset
= GEN_INT (- allocate
);
11336 scratch_regno
= split_stack_prologue_scratch_regno ();
11337 if (scratch_regno
== INVALID_REGNUM
)
11339 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11340 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11342 /* We don't use ix86_gen_add3 in this case because it will
11343 want to split to lea, but when not optimizing the insn
11344 will not be split after this point. */
11345 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11346 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11351 emit_move_insn (scratch_reg
, offset
);
11352 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11353 stack_pointer_rtx
));
11355 current
= scratch_reg
;
11358 ix86_expand_branch (GEU
, current
, limit
, label
);
11359 jump_insn
= get_last_insn ();
11360 JUMP_LABEL (jump_insn
) = label
;
11362 /* Mark the jump as very likely to be taken. */
11363 add_reg_note (jump_insn
, REG_BR_PROB
,
11364 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11366 if (split_stack_fn
== NULL_RTX
)
11367 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11368 fn
= split_stack_fn
;
11370 /* Get more stack space. We pass in the desired stack space and the
11371 size of the arguments to copy to the new stack. In 32-bit mode
11372 we push the parameters; __morestack will return on a new stack
11373 anyhow. In 64-bit mode we pass the parameters in r10 and
11375 allocate_rtx
= GEN_INT (allocate
);
11376 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11377 call_fusage
= NULL_RTX
;
11382 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11383 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11385 /* If this function uses a static chain, it will be in %r10.
11386 Preserve it across the call to __morestack. */
11387 if (DECL_STATIC_CHAIN (cfun
->decl
))
11391 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11392 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11393 use_reg (&call_fusage
, rax
);
11396 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11398 HOST_WIDE_INT argval
;
11400 gcc_assert (Pmode
== DImode
);
11401 /* When using the large model we need to load the address
11402 into a register, and we've run out of registers. So we
11403 switch to a different calling convention, and we call a
11404 different function: __morestack_large. We pass the
11405 argument size in the upper 32 bits of r10 and pass the
11406 frame size in the lower 32 bits. */
11407 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11408 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11410 if (split_stack_fn_large
== NULL_RTX
)
11411 split_stack_fn_large
=
11412 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11414 if (ix86_cmodel
== CM_LARGE_PIC
)
11418 label
= gen_label_rtx ();
11419 emit_label (label
);
11420 LABEL_PRESERVE_P (label
) = 1;
11421 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11422 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11423 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11424 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11426 x
= gen_rtx_CONST (Pmode
, x
);
11427 emit_move_insn (reg11
, x
);
11428 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11429 x
= gen_const_mem (Pmode
, x
);
11430 emit_move_insn (reg11
, x
);
11433 emit_move_insn (reg11
, split_stack_fn_large
);
11437 argval
= ((args_size
<< 16) << 16) + allocate
;
11438 emit_move_insn (reg10
, GEN_INT (argval
));
11442 emit_move_insn (reg10
, allocate_rtx
);
11443 emit_move_insn (reg11
, GEN_INT (args_size
));
11444 use_reg (&call_fusage
, reg11
);
11447 use_reg (&call_fusage
, reg10
);
11451 emit_insn (gen_push (GEN_INT (args_size
)));
11452 emit_insn (gen_push (allocate_rtx
));
11454 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11455 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11457 add_function_usage_to (call_insn
, call_fusage
);
11459 /* In order to make call/return prediction work right, we now need
11460 to execute a return instruction. See
11461 libgcc/config/i386/morestack.S for the details on how this works.
11463 For flow purposes gcc must not see this as a return
11464 instruction--we need control flow to continue at the subsequent
11465 label. Therefore, we use an unspec. */
11466 gcc_assert (crtl
->args
.pops_args
< 65536);
11467 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11469 /* If we are in 64-bit mode and this function uses a static chain,
11470 we saved %r10 in %rax before calling _morestack. */
11471 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11472 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11473 gen_rtx_REG (word_mode
, AX_REG
));
11475 /* If this function calls va_start, we need to store a pointer to
11476 the arguments on the old stack, because they may not have been
11477 all copied to the new stack. At this point the old stack can be
11478 found at the frame pointer value used by __morestack, because
11479 __morestack has set that up before calling back to us. Here we
11480 store that pointer in a scratch register, and in
11481 ix86_expand_prologue we store the scratch register in a stack
11483 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11485 unsigned int scratch_regno
;
11489 scratch_regno
= split_stack_prologue_scratch_regno ();
11490 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11491 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11495 return address within this function
11496 return address of caller of this function
11498 So we add three words to get to the stack arguments.
11502 return address within this function
11503 first argument to __morestack
11504 second argument to __morestack
11505 return address of caller of this function
11507 So we add five words to get to the stack arguments.
11509 words
= TARGET_64BIT
? 3 : 5;
11510 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11511 gen_rtx_PLUS (Pmode
, frame_reg
,
11512 GEN_INT (words
* UNITS_PER_WORD
))));
11514 varargs_label
= gen_label_rtx ();
11515 emit_jump_insn (gen_jump (varargs_label
));
11516 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11521 emit_label (label
);
11522 LABEL_NUSES (label
) = 1;
11524 /* If this function calls va_start, we now have to set the scratch
11525 register for the case where we do not call __morestack. In this
11526 case we need to set it based on the stack pointer. */
11527 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11529 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11530 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11531 GEN_INT (UNITS_PER_WORD
))));
11533 emit_label (varargs_label
);
11534 LABEL_NUSES (varargs_label
) = 1;
11538 /* We may have to tell the dataflow pass that the split stack prologue
11539 is initializing a scratch register. */
11542 ix86_live_on_entry (bitmap regs
)
11544 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11546 gcc_assert (flag_split_stack
);
11547 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11551 /* Determine if op is suitable SUBREG RTX for address. */
11554 ix86_address_subreg_operand (rtx op
)
11556 enum machine_mode mode
;
11561 mode
= GET_MODE (op
);
11563 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11566 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11567 failures when the register is one word out of a two word structure. */
11568 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11571 /* Allow only SUBREGs of non-eliminable hard registers. */
11572 return register_no_elim_operand (op
, mode
);
11575 /* Extract the parts of an RTL expression that is a valid memory address
11576 for an instruction. Return 0 if the structure of the address is
11577 grossly off. Return -1 if the address contains ASHIFT, so it is not
11578 strictly valid, but still used for computing length of lea instruction. */
11581 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11583 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11584 rtx base_reg
, index_reg
;
11585 HOST_WIDE_INT scale
= 1;
11586 rtx scale_rtx
= NULL_RTX
;
11589 enum ix86_address_seg seg
= SEG_DEFAULT
;
11591 /* Allow zero-extended SImode addresses,
11592 they will be emitted with addr32 prefix. */
11593 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11595 if (GET_CODE (addr
) == ZERO_EXTEND
11596 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11598 addr
= XEXP (addr
, 0);
11599 if (CONST_INT_P (addr
))
11602 else if (GET_CODE (addr
) == AND
11603 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11605 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11606 if (addr
== NULL_RTX
)
11609 if (CONST_INT_P (addr
))
11614 /* Allow SImode subregs of DImode addresses,
11615 they will be emitted with addr32 prefix. */
11616 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11618 if (GET_CODE (addr
) == SUBREG
11619 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11621 addr
= SUBREG_REG (addr
);
11622 if (CONST_INT_P (addr
))
11629 else if (GET_CODE (addr
) == SUBREG
)
11631 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11636 else if (GET_CODE (addr
) == PLUS
)
11638 rtx addends
[4], op
;
11646 addends
[n
++] = XEXP (op
, 1);
11649 while (GET_CODE (op
) == PLUS
);
11654 for (i
= n
; i
>= 0; --i
)
11657 switch (GET_CODE (op
))
11662 index
= XEXP (op
, 0);
11663 scale_rtx
= XEXP (op
, 1);
11669 index
= XEXP (op
, 0);
11670 tmp
= XEXP (op
, 1);
11671 if (!CONST_INT_P (tmp
))
11673 scale
= INTVAL (tmp
);
11674 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11676 scale
= 1 << scale
;
11681 if (GET_CODE (op
) != UNSPEC
)
11686 if (XINT (op
, 1) == UNSPEC_TP
11687 && TARGET_TLS_DIRECT_SEG_REFS
11688 && seg
== SEG_DEFAULT
)
11689 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11695 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11722 else if (GET_CODE (addr
) == MULT
)
11724 index
= XEXP (addr
, 0); /* index*scale */
11725 scale_rtx
= XEXP (addr
, 1);
11727 else if (GET_CODE (addr
) == ASHIFT
)
11729 /* We're called for lea too, which implements ashift on occasion. */
11730 index
= XEXP (addr
, 0);
11731 tmp
= XEXP (addr
, 1);
11732 if (!CONST_INT_P (tmp
))
11734 scale
= INTVAL (tmp
);
11735 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11737 scale
= 1 << scale
;
11740 else if (CONST_INT_P (addr
))
11742 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11745 /* Constant addresses are sign extended to 64bit, we have to
11746 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11748 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11754 disp
= addr
; /* displacement */
11760 else if (GET_CODE (index
) == SUBREG
11761 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11767 /* Address override works only on the (%reg) part of %fs:(%reg). */
11768 if (seg
!= SEG_DEFAULT
11769 && ((base
&& GET_MODE (base
) != word_mode
)
11770 || (index
&& GET_MODE (index
) != word_mode
)))
11773 /* Extract the integral value of scale. */
11776 if (!CONST_INT_P (scale_rtx
))
11778 scale
= INTVAL (scale_rtx
);
11781 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11782 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11784 /* Avoid useless 0 displacement. */
11785 if (disp
== const0_rtx
&& (base
|| index
))
11788 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11789 if (base_reg
&& index_reg
&& scale
== 1
11790 && (index_reg
== arg_pointer_rtx
11791 || index_reg
== frame_pointer_rtx
11792 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11795 tmp
= base
, base
= index
, index
= tmp
;
11796 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11799 /* Special case: %ebp cannot be encoded as a base without a displacement.
11803 && (base_reg
== hard_frame_pointer_rtx
11804 || base_reg
== frame_pointer_rtx
11805 || base_reg
== arg_pointer_rtx
11806 || (REG_P (base_reg
)
11807 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11808 || REGNO (base_reg
) == R13_REG
))))
11811 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11812 Avoid this by transforming to [%esi+0].
11813 Reload calls address legitimization without cfun defined, so we need
11814 to test cfun for being non-NULL. */
11815 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11816 && base_reg
&& !index_reg
&& !disp
11817 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11820 /* Special case: encode reg+reg instead of reg*2. */
11821 if (!base
&& index
&& scale
== 2)
11822 base
= index
, base_reg
= index_reg
, scale
= 1;
11824 /* Special case: scaling cannot be encoded without base or displacement. */
11825 if (!base
&& !disp
&& index
&& scale
!= 1)
11829 out
->index
= index
;
11831 out
->scale
= scale
;
11837 /* Return cost of the memory address x.
11838 For i386, it is better to use a complex address than let gcc copy
11839 the address into a reg and make a new pseudo. But not if the address
11840 requires to two regs - that would mean more pseudos with longer
11843 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
11844 addr_space_t as ATTRIBUTE_UNUSED
,
11845 bool speed ATTRIBUTE_UNUSED
)
11847 struct ix86_address parts
;
11849 int ok
= ix86_decompose_address (x
, &parts
);
11853 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11854 parts
.base
= SUBREG_REG (parts
.base
);
11855 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11856 parts
.index
= SUBREG_REG (parts
.index
);
11858 /* Attempt to minimize number of registers in the address. */
11860 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11862 && (!REG_P (parts
.index
)
11863 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11867 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11869 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11870 && parts
.base
!= parts
.index
)
11873 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11874 since it's predecode logic can't detect the length of instructions
11875 and it degenerates to vector decoded. Increase cost of such
11876 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11877 to split such addresses or even refuse such addresses at all.
11879 Following addressing modes are affected:
11884 The first and last case may be avoidable by explicitly coding the zero in
11885 memory address, but I don't have AMD-K6 machine handy to check this
11889 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11890 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11891 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11897 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11898 this is used for to form addresses to local data when -fPIC is in
11902 darwin_local_data_pic (rtx disp
)
11904 return (GET_CODE (disp
) == UNSPEC
11905 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11908 /* Determine if a given RTX is a valid constant. We already know this
11909 satisfies CONSTANT_P. */
11912 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11914 switch (GET_CODE (x
))
11919 if (GET_CODE (x
) == PLUS
)
11921 if (!CONST_INT_P (XEXP (x
, 1)))
11926 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11929 /* Only some unspecs are valid as "constants". */
11930 if (GET_CODE (x
) == UNSPEC
)
11931 switch (XINT (x
, 1))
11934 case UNSPEC_GOTOFF
:
11935 case UNSPEC_PLTOFF
:
11936 return TARGET_64BIT
;
11938 case UNSPEC_NTPOFF
:
11939 x
= XVECEXP (x
, 0, 0);
11940 return (GET_CODE (x
) == SYMBOL_REF
11941 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11942 case UNSPEC_DTPOFF
:
11943 x
= XVECEXP (x
, 0, 0);
11944 return (GET_CODE (x
) == SYMBOL_REF
11945 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11950 /* We must have drilled down to a symbol. */
11951 if (GET_CODE (x
) == LABEL_REF
)
11953 if (GET_CODE (x
) != SYMBOL_REF
)
11958 /* TLS symbols are never valid. */
11959 if (SYMBOL_REF_TLS_MODEL (x
))
11962 /* DLLIMPORT symbols are never valid. */
11963 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11964 && SYMBOL_REF_DLLIMPORT_P (x
))
11968 /* mdynamic-no-pic */
11969 if (MACHO_DYNAMIC_NO_PIC_P
)
11970 return machopic_symbol_defined_p (x
);
11975 if (GET_MODE (x
) == TImode
11976 && x
!= CONST0_RTX (TImode
)
11982 if (!standard_sse_constant_p (x
))
11989 /* Otherwise we handle everything else in the move patterns. */
11993 /* Determine if it's legal to put X into the constant pool. This
11994 is not possible for the address of thread-local symbols, which
11995 is checked above. */
11998 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12000 /* We can always put integral constants and vectors in memory. */
12001 switch (GET_CODE (x
))
12011 return !ix86_legitimate_constant_p (mode
, x
);
12015 /* Nonzero if the constant value X is a legitimate general operand
12016 when generating PIC code. It is given that flag_pic is on and
12017 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12020 legitimate_pic_operand_p (rtx x
)
12024 switch (GET_CODE (x
))
12027 inner
= XEXP (x
, 0);
12028 if (GET_CODE (inner
) == PLUS
12029 && CONST_INT_P (XEXP (inner
, 1)))
12030 inner
= XEXP (inner
, 0);
12032 /* Only some unspecs are valid as "constants". */
12033 if (GET_CODE (inner
) == UNSPEC
)
12034 switch (XINT (inner
, 1))
12037 case UNSPEC_GOTOFF
:
12038 case UNSPEC_PLTOFF
:
12039 return TARGET_64BIT
;
12041 x
= XVECEXP (inner
, 0, 0);
12042 return (GET_CODE (x
) == SYMBOL_REF
12043 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12044 case UNSPEC_MACHOPIC_OFFSET
:
12045 return legitimate_pic_address_disp_p (x
);
12053 return legitimate_pic_address_disp_p (x
);
12060 /* Determine if a given CONST RTX is a valid memory displacement
12064 legitimate_pic_address_disp_p (rtx disp
)
12068 /* In 64bit mode we can allow direct addresses of symbols and labels
12069 when they are not dynamic symbols. */
12072 rtx op0
= disp
, op1
;
12074 switch (GET_CODE (disp
))
12080 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12082 op0
= XEXP (XEXP (disp
, 0), 0);
12083 op1
= XEXP (XEXP (disp
, 0), 1);
12084 if (!CONST_INT_P (op1
)
12085 || INTVAL (op1
) >= 16*1024*1024
12086 || INTVAL (op1
) < -16*1024*1024)
12088 if (GET_CODE (op0
) == LABEL_REF
)
12090 if (GET_CODE (op0
) == CONST
12091 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12092 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12094 if (GET_CODE (op0
) == UNSPEC
12095 && XINT (op0
, 1) == UNSPEC_PCREL
)
12097 if (GET_CODE (op0
) != SYMBOL_REF
)
12102 /* TLS references should always be enclosed in UNSPEC. */
12103 if (SYMBOL_REF_TLS_MODEL (op0
))
12105 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12106 && ix86_cmodel
!= CM_LARGE_PIC
)
12114 if (GET_CODE (disp
) != CONST
)
12116 disp
= XEXP (disp
, 0);
12120 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12121 of GOT tables. We should not need these anyway. */
12122 if (GET_CODE (disp
) != UNSPEC
12123 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12124 && XINT (disp
, 1) != UNSPEC_GOTOFF
12125 && XINT (disp
, 1) != UNSPEC_PCREL
12126 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12129 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12130 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12136 if (GET_CODE (disp
) == PLUS
)
12138 if (!CONST_INT_P (XEXP (disp
, 1)))
12140 disp
= XEXP (disp
, 0);
12144 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12147 if (GET_CODE (disp
) != UNSPEC
)
12150 switch (XINT (disp
, 1))
12155 /* We need to check for both symbols and labels because VxWorks loads
12156 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12158 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12159 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12160 case UNSPEC_GOTOFF
:
12161 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12162 While ABI specify also 32bit relocation but we don't produce it in
12163 small PIC model at all. */
12164 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12165 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12167 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12169 case UNSPEC_GOTTPOFF
:
12170 case UNSPEC_GOTNTPOFF
:
12171 case UNSPEC_INDNTPOFF
:
12174 disp
= XVECEXP (disp
, 0, 0);
12175 return (GET_CODE (disp
) == SYMBOL_REF
12176 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12177 case UNSPEC_NTPOFF
:
12178 disp
= XVECEXP (disp
, 0, 0);
12179 return (GET_CODE (disp
) == SYMBOL_REF
12180 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12181 case UNSPEC_DTPOFF
:
12182 disp
= XVECEXP (disp
, 0, 0);
12183 return (GET_CODE (disp
) == SYMBOL_REF
12184 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12190 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12191 replace the input X, or the original X if no replacement is called for.
12192 The output parameter *WIN is 1 if the calling macro should goto WIN,
12193 0 if it should not. */
12196 ix86_legitimize_reload_address (rtx x
,
12197 enum machine_mode mode ATTRIBUTE_UNUSED
,
12198 int opnum
, int type
,
12199 int ind_levels ATTRIBUTE_UNUSED
)
12201 /* Reload can generate:
12203 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12207 This RTX is rejected from ix86_legitimate_address_p due to
12208 non-strictness of base register 97. Following this rejection,
12209 reload pushes all three components into separate registers,
12210 creating invalid memory address RTX.
12212 Following code reloads only the invalid part of the
12213 memory address RTX. */
12215 if (GET_CODE (x
) == PLUS
12216 && REG_P (XEXP (x
, 1))
12217 && GET_CODE (XEXP (x
, 0)) == PLUS
12218 && REG_P (XEXP (XEXP (x
, 0), 1)))
12221 bool something_reloaded
= false;
12223 base
= XEXP (XEXP (x
, 0), 1);
12224 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12226 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12227 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12228 opnum
, (enum reload_type
) type
);
12229 something_reloaded
= true;
12232 index
= XEXP (x
, 1);
12233 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12235 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12236 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12237 opnum
, (enum reload_type
) type
);
12238 something_reloaded
= true;
12241 gcc_assert (something_reloaded
);
12248 /* Recognizes RTL expressions that are valid memory addresses for an
12249 instruction. The MODE argument is the machine mode for the MEM
12250 expression that wants to use this address.
12252 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12253 convert common non-canonical forms to canonical form so that they will
12257 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12258 rtx addr
, bool strict
)
12260 struct ix86_address parts
;
12261 rtx base
, index
, disp
;
12262 HOST_WIDE_INT scale
;
12264 if (ix86_decompose_address (addr
, &parts
) <= 0)
12265 /* Decomposition failed. */
12269 index
= parts
.index
;
12271 scale
= parts
.scale
;
12273 /* Validate base register. */
12280 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12281 reg
= SUBREG_REG (base
);
12283 /* Base is not a register. */
12286 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12289 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12290 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12291 /* Base is not valid. */
12295 /* Validate index register. */
12302 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12303 reg
= SUBREG_REG (index
);
12305 /* Index is not a register. */
12308 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12311 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12312 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12313 /* Index is not valid. */
12317 /* Index and base should have the same mode. */
12319 && GET_MODE (base
) != GET_MODE (index
))
12322 /* Validate scale factor. */
12326 /* Scale without index. */
12329 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12330 /* Scale is not a valid multiplier. */
12334 /* Validate displacement. */
12337 if (GET_CODE (disp
) == CONST
12338 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12339 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12340 switch (XINT (XEXP (disp
, 0), 1))
12342 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12343 used. While ABI specify also 32bit relocations, we don't produce
12344 them at all and use IP relative instead. */
12346 case UNSPEC_GOTOFF
:
12347 gcc_assert (flag_pic
);
12349 goto is_legitimate_pic
;
12351 /* 64bit address unspec. */
12354 case UNSPEC_GOTPCREL
:
12356 gcc_assert (flag_pic
);
12357 goto is_legitimate_pic
;
12359 case UNSPEC_GOTTPOFF
:
12360 case UNSPEC_GOTNTPOFF
:
12361 case UNSPEC_INDNTPOFF
:
12362 case UNSPEC_NTPOFF
:
12363 case UNSPEC_DTPOFF
:
12366 case UNSPEC_STACK_CHECK
:
12367 gcc_assert (flag_split_stack
);
12371 /* Invalid address unspec. */
12375 else if (SYMBOLIC_CONST (disp
)
12379 && MACHOPIC_INDIRECT
12380 && !machopic_operand_p (disp
)
12386 if (TARGET_64BIT
&& (index
|| base
))
12388 /* foo@dtpoff(%rX) is ok. */
12389 if (GET_CODE (disp
) != CONST
12390 || GET_CODE (XEXP (disp
, 0)) != PLUS
12391 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12392 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12393 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12394 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12395 /* Non-constant pic memory reference. */
12398 else if ((!TARGET_MACHO
|| flag_pic
)
12399 && ! legitimate_pic_address_disp_p (disp
))
12400 /* Displacement is an invalid pic construct. */
12403 else if (MACHO_DYNAMIC_NO_PIC_P
12404 && !ix86_legitimate_constant_p (Pmode
, disp
))
12405 /* displacment must be referenced via non_lazy_pointer */
12409 /* This code used to verify that a symbolic pic displacement
12410 includes the pic_offset_table_rtx register.
12412 While this is good idea, unfortunately these constructs may
12413 be created by "adds using lea" optimization for incorrect
12422 This code is nonsensical, but results in addressing
12423 GOT table with pic_offset_table_rtx base. We can't
12424 just refuse it easily, since it gets matched by
12425 "addsi3" pattern, that later gets split to lea in the
12426 case output register differs from input. While this
12427 can be handled by separate addsi pattern for this case
12428 that never results in lea, this seems to be easier and
12429 correct fix for crash to disable this test. */
12431 else if (GET_CODE (disp
) != LABEL_REF
12432 && !CONST_INT_P (disp
)
12433 && (GET_CODE (disp
) != CONST
12434 || !ix86_legitimate_constant_p (Pmode
, disp
))
12435 && (GET_CODE (disp
) != SYMBOL_REF
12436 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12437 /* Displacement is not constant. */
12439 else if (TARGET_64BIT
12440 && !x86_64_immediate_operand (disp
, VOIDmode
))
12441 /* Displacement is out of range. */
12445 /* Everything looks valid. */
12449 /* Determine if a given RTX is a valid constant address. */
12452 constant_address_p (rtx x
)
12454 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12457 /* Return a unique alias set for the GOT. */
12459 static alias_set_type
12460 ix86_GOT_alias_set (void)
12462 static alias_set_type set
= -1;
12464 set
= new_alias_set ();
12468 /* Return a legitimate reference for ORIG (an address) using the
12469 register REG. If REG is 0, a new pseudo is generated.
12471 There are two types of references that must be handled:
12473 1. Global data references must load the address from the GOT, via
12474 the PIC reg. An insn is emitted to do this load, and the reg is
12477 2. Static data references, constant pool addresses, and code labels
12478 compute the address as an offset from the GOT, whose base is in
12479 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12480 differentiate them from global data objects. The returned
12481 address is the PIC reg + an unspec constant.
12483 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12484 reg also appears in the address. */
12487 legitimize_pic_address (rtx orig
, rtx reg
)
12490 rtx new_rtx
= orig
;
12493 if (TARGET_MACHO
&& !TARGET_64BIT
)
12496 reg
= gen_reg_rtx (Pmode
);
12497 /* Use the generic Mach-O PIC machinery. */
12498 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12502 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12504 else if (TARGET_64BIT
12505 && ix86_cmodel
!= CM_SMALL_PIC
12506 && gotoff_operand (addr
, Pmode
))
12509 /* This symbol may be referenced via a displacement from the PIC
12510 base address (@GOTOFF). */
12512 if (reload_in_progress
)
12513 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12514 if (GET_CODE (addr
) == CONST
)
12515 addr
= XEXP (addr
, 0);
12516 if (GET_CODE (addr
) == PLUS
)
12518 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12520 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12523 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12524 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12526 tmpreg
= gen_reg_rtx (Pmode
);
12529 emit_move_insn (tmpreg
, new_rtx
);
12533 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12534 tmpreg
, 1, OPTAB_DIRECT
);
12537 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12539 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12541 /* This symbol may be referenced via a displacement from the PIC
12542 base address (@GOTOFF). */
12544 if (reload_in_progress
)
12545 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12546 if (GET_CODE (addr
) == CONST
)
12547 addr
= XEXP (addr
, 0);
12548 if (GET_CODE (addr
) == PLUS
)
12550 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12552 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12555 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12556 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12557 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12561 emit_move_insn (reg
, new_rtx
);
12565 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12566 /* We can't use @GOTOFF for text labels on VxWorks;
12567 see gotoff_operand. */
12568 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12570 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12572 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12573 return legitimize_dllimport_symbol (addr
, true);
12574 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12575 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12576 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12578 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12579 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12583 /* For x64 PE-COFF there is no GOT table. So we use address
12585 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12587 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12588 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12591 reg
= gen_reg_rtx (Pmode
);
12592 emit_move_insn (reg
, new_rtx
);
12595 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12597 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12598 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12599 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12600 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12603 reg
= gen_reg_rtx (Pmode
);
12604 /* Use directly gen_movsi, otherwise the address is loaded
12605 into register for CSE. We don't want to CSE this addresses,
12606 instead we CSE addresses from the GOT table, so skip this. */
12607 emit_insn (gen_movsi (reg
, new_rtx
));
12612 /* This symbol must be referenced via a load from the
12613 Global Offset Table (@GOT). */
12615 if (reload_in_progress
)
12616 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12617 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12618 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12620 new_rtx
= force_reg (Pmode
, new_rtx
);
12621 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12622 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12623 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12626 reg
= gen_reg_rtx (Pmode
);
12627 emit_move_insn (reg
, new_rtx
);
12633 if (CONST_INT_P (addr
)
12634 && !x86_64_immediate_operand (addr
, VOIDmode
))
12638 emit_move_insn (reg
, addr
);
12642 new_rtx
= force_reg (Pmode
, addr
);
12644 else if (GET_CODE (addr
) == CONST
)
12646 addr
= XEXP (addr
, 0);
12648 /* We must match stuff we generate before. Assume the only
12649 unspecs that can get here are ours. Not that we could do
12650 anything with them anyway.... */
12651 if (GET_CODE (addr
) == UNSPEC
12652 || (GET_CODE (addr
) == PLUS
12653 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12655 gcc_assert (GET_CODE (addr
) == PLUS
);
12657 if (GET_CODE (addr
) == PLUS
)
12659 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12661 /* Check first to see if this is a constant offset from a @GOTOFF
12662 symbol reference. */
12663 if (gotoff_operand (op0
, Pmode
)
12664 && CONST_INT_P (op1
))
12668 if (reload_in_progress
)
12669 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12670 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12672 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12673 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12674 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12678 emit_move_insn (reg
, new_rtx
);
12684 if (INTVAL (op1
) < -16*1024*1024
12685 || INTVAL (op1
) >= 16*1024*1024)
12687 if (!x86_64_immediate_operand (op1
, Pmode
))
12688 op1
= force_reg (Pmode
, op1
);
12689 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12695 rtx base
= legitimize_pic_address (op0
, reg
);
12696 enum machine_mode mode
= GET_MODE (base
);
12698 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
12700 if (CONST_INT_P (new_rtx
))
12702 if (INTVAL (new_rtx
) < -16*1024*1024
12703 || INTVAL (new_rtx
) >= 16*1024*1024)
12705 if (!x86_64_immediate_operand (new_rtx
, mode
))
12706 new_rtx
= force_reg (mode
, new_rtx
);
12708 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
12711 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
12715 if (GET_CODE (new_rtx
) == PLUS
12716 && CONSTANT_P (XEXP (new_rtx
, 1)))
12718 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
12719 new_rtx
= XEXP (new_rtx
, 1);
12721 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
12729 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12732 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12734 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12736 if (GET_MODE (tp
) != tp_mode
)
12738 gcc_assert (GET_MODE (tp
) == SImode
);
12739 gcc_assert (tp_mode
== DImode
);
12741 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12745 tp
= copy_to_mode_reg (tp_mode
, tp
);
12750 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12752 static GTY(()) rtx ix86_tls_symbol
;
12755 ix86_tls_get_addr (void)
12757 if (!ix86_tls_symbol
)
12760 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12761 ? "___tls_get_addr" : "__tls_get_addr");
12763 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12766 return ix86_tls_symbol
;
12769 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12771 static GTY(()) rtx ix86_tls_module_base_symbol
;
12774 ix86_tls_module_base (void)
12776 if (!ix86_tls_module_base_symbol
)
12778 ix86_tls_module_base_symbol
12779 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12781 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12782 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12785 return ix86_tls_module_base_symbol
;
12788 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12789 false if we expect this to be used for a memory address and true if
12790 we expect to load the address into a register. */
12793 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12795 rtx dest
, base
, off
;
12796 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12797 enum machine_mode tp_mode
= Pmode
;
12802 case TLS_MODEL_GLOBAL_DYNAMIC
:
12803 dest
= gen_reg_rtx (Pmode
);
12808 pic
= pic_offset_table_rtx
;
12811 pic
= gen_reg_rtx (Pmode
);
12812 emit_insn (gen_set_got (pic
));
12816 if (TARGET_GNU2_TLS
)
12819 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12821 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12823 tp
= get_thread_pointer (Pmode
, true);
12824 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12826 if (GET_MODE (x
) != Pmode
)
12827 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12829 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12833 rtx caddr
= ix86_tls_get_addr ();
12837 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12842 (ix86_gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12843 insns
= get_insns ();
12846 if (GET_MODE (x
) != Pmode
)
12847 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12849 RTL_CONST_CALL_P (insns
) = 1;
12850 emit_libcall_block (insns
, dest
, rax
, x
);
12853 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12857 case TLS_MODEL_LOCAL_DYNAMIC
:
12858 base
= gen_reg_rtx (Pmode
);
12863 pic
= pic_offset_table_rtx
;
12866 pic
= gen_reg_rtx (Pmode
);
12867 emit_insn (gen_set_got (pic
));
12871 if (TARGET_GNU2_TLS
)
12873 rtx tmp
= ix86_tls_module_base ();
12876 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12878 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12880 tp
= get_thread_pointer (Pmode
, true);
12881 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12882 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12886 rtx caddr
= ix86_tls_get_addr ();
12890 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
12895 (ix86_gen_tls_local_dynamic_base_64 (rax
, caddr
));
12896 insns
= get_insns ();
12899 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12900 share the LD_BASE result with other LD model accesses. */
12901 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12902 UNSPEC_TLS_LD_BASE
);
12904 RTL_CONST_CALL_P (insns
) = 1;
12905 emit_libcall_block (insns
, base
, rax
, eqv
);
12908 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12911 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12912 off
= gen_rtx_CONST (Pmode
, off
);
12914 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12916 if (TARGET_GNU2_TLS
)
12918 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12920 if (GET_MODE (x
) != Pmode
)
12921 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
12923 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12927 case TLS_MODEL_INITIAL_EXEC
:
12930 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12932 /* The Sun linker took the AMD64 TLS spec literally
12933 and can only handle %rax as destination of the
12934 initial executable code sequence. */
12936 dest
= gen_reg_rtx (DImode
);
12937 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12941 /* Generate DImode references to avoid %fs:(%reg32)
12942 problems and linker IE->LE relaxation bug. */
12945 type
= UNSPEC_GOTNTPOFF
;
12949 if (reload_in_progress
)
12950 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12951 pic
= pic_offset_table_rtx
;
12952 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12954 else if (!TARGET_ANY_GNU_TLS
)
12956 pic
= gen_reg_rtx (Pmode
);
12957 emit_insn (gen_set_got (pic
));
12958 type
= UNSPEC_GOTTPOFF
;
12963 type
= UNSPEC_INDNTPOFF
;
12966 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
12967 off
= gen_rtx_CONST (tp_mode
, off
);
12969 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
12970 off
= gen_const_mem (tp_mode
, off
);
12971 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12973 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12975 base
= get_thread_pointer (tp_mode
,
12976 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12977 off
= force_reg (tp_mode
, off
);
12978 return gen_rtx_PLUS (tp_mode
, base
, off
);
12982 base
= get_thread_pointer (Pmode
, true);
12983 dest
= gen_reg_rtx (Pmode
);
12984 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
12988 case TLS_MODEL_LOCAL_EXEC
:
12989 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12990 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12991 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12992 off
= gen_rtx_CONST (Pmode
, off
);
12994 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12996 base
= get_thread_pointer (Pmode
,
12997 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12998 return gen_rtx_PLUS (Pmode
, base
, off
);
13002 base
= get_thread_pointer (Pmode
, true);
13003 dest
= gen_reg_rtx (Pmode
);
13004 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13009 gcc_unreachable ();
13015 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13018 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13019 htab_t dllimport_map
;
13022 get_dllimport_decl (tree decl
)
13024 struct tree_map
*h
, in
;
13027 const char *prefix
;
13028 size_t namelen
, prefixlen
;
13033 if (!dllimport_map
)
13034 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13036 in
.hash
= htab_hash_pointer (decl
);
13037 in
.base
.from
= decl
;
13038 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13039 h
= (struct tree_map
*) *loc
;
13043 *loc
= h
= ggc_alloc_tree_map ();
13045 h
->base
.from
= decl
;
13046 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13047 VAR_DECL
, NULL
, ptr_type_node
);
13048 DECL_ARTIFICIAL (to
) = 1;
13049 DECL_IGNORED_P (to
) = 1;
13050 DECL_EXTERNAL (to
) = 1;
13051 TREE_READONLY (to
) = 1;
13053 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13054 name
= targetm
.strip_name_encoding (name
);
13055 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13056 ? "*__imp_" : "*__imp__";
13057 namelen
= strlen (name
);
13058 prefixlen
= strlen (prefix
);
13059 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13060 memcpy (imp_name
, prefix
, prefixlen
);
13061 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13063 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13064 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13065 SET_SYMBOL_REF_DECL (rtl
, to
);
13066 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13068 rtl
= gen_const_mem (Pmode
, rtl
);
13069 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13071 SET_DECL_RTL (to
, rtl
);
13072 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13077 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13078 true if we require the result be a register. */
13081 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13086 gcc_assert (SYMBOL_REF_DECL (symbol
));
13087 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13089 x
= DECL_RTL (imp_decl
);
13091 x
= force_reg (Pmode
, x
);
13095 /* Try machine-dependent ways of modifying an illegitimate address
13096 to be legitimate. If we find one, return the new, valid address.
13097 This macro is used in only one place: `memory_address' in explow.c.
13099 OLDX is the address as it was before break_out_memory_refs was called.
13100 In some cases it is useful to look at this to decide what needs to be done.
13102 It is always safe for this macro to do nothing. It exists to recognize
13103 opportunities to optimize the output.
13105 For the 80386, we handle X+REG by loading X into a register R and
13106 using R+REG. R will go in a general reg and indexing will be used.
13107 However, if REG is a broken-out memory address or multiplication,
13108 nothing needs to be done because REG can certainly go in a general reg.
13110 When -fpic is used, special handling is needed for symbolic references.
13111 See comments by legitimize_pic_address in i386.c for details. */
13114 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13115 enum machine_mode mode
)
13120 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13122 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13123 if (GET_CODE (x
) == CONST
13124 && GET_CODE (XEXP (x
, 0)) == PLUS
13125 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13126 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13128 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13129 (enum tls_model
) log
, false);
13130 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13133 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13135 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13136 return legitimize_dllimport_symbol (x
, true);
13137 if (GET_CODE (x
) == CONST
13138 && GET_CODE (XEXP (x
, 0)) == PLUS
13139 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13140 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13142 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13143 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13147 if (flag_pic
&& SYMBOLIC_CONST (x
))
13148 return legitimize_pic_address (x
, 0);
13151 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13152 return machopic_indirect_data_reference (x
, 0);
13155 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13156 if (GET_CODE (x
) == ASHIFT
13157 && CONST_INT_P (XEXP (x
, 1))
13158 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13161 log
= INTVAL (XEXP (x
, 1));
13162 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13163 GEN_INT (1 << log
));
13166 if (GET_CODE (x
) == PLUS
)
13168 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13170 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13171 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13172 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13175 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13176 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13177 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13178 GEN_INT (1 << log
));
13181 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13182 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13183 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13186 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13187 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13188 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13189 GEN_INT (1 << log
));
13192 /* Put multiply first if it isn't already. */
13193 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13195 rtx tmp
= XEXP (x
, 0);
13196 XEXP (x
, 0) = XEXP (x
, 1);
13201 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13202 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13203 created by virtual register instantiation, register elimination, and
13204 similar optimizations. */
13205 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13208 x
= gen_rtx_PLUS (Pmode
,
13209 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13210 XEXP (XEXP (x
, 1), 0)),
13211 XEXP (XEXP (x
, 1), 1));
13215 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13216 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13217 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13218 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13219 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13220 && CONSTANT_P (XEXP (x
, 1)))
13223 rtx other
= NULL_RTX
;
13225 if (CONST_INT_P (XEXP (x
, 1)))
13227 constant
= XEXP (x
, 1);
13228 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13230 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13232 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13233 other
= XEXP (x
, 1);
13241 x
= gen_rtx_PLUS (Pmode
,
13242 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13243 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13244 plus_constant (Pmode
, other
,
13245 INTVAL (constant
)));
13249 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13252 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13255 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13258 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13261 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13265 && REG_P (XEXP (x
, 1))
13266 && REG_P (XEXP (x
, 0)))
13269 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13272 x
= legitimize_pic_address (x
, 0);
13275 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13278 if (REG_P (XEXP (x
, 0)))
13280 rtx temp
= gen_reg_rtx (Pmode
);
13281 rtx val
= force_operand (XEXP (x
, 1), temp
);
13284 val
= convert_to_mode (Pmode
, val
, 1);
13285 emit_move_insn (temp
, val
);
13288 XEXP (x
, 1) = temp
;
13292 else if (REG_P (XEXP (x
, 1)))
13294 rtx temp
= gen_reg_rtx (Pmode
);
13295 rtx val
= force_operand (XEXP (x
, 0), temp
);
13298 val
= convert_to_mode (Pmode
, val
, 1);
13299 emit_move_insn (temp
, val
);
13302 XEXP (x
, 0) = temp
;
13310 /* Print an integer constant expression in assembler syntax. Addition
13311 and subtraction are the only arithmetic that may appear in these
13312 expressions. FILE is the stdio stream to write to, X is the rtx, and
13313 CODE is the operand print code from the output string. */
13316 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13320 switch (GET_CODE (x
))
13323 gcc_assert (flag_pic
);
13328 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13329 output_addr_const (file
, x
);
13332 const char *name
= XSTR (x
, 0);
13334 /* Mark the decl as referenced so that cgraph will
13335 output the function. */
13336 if (SYMBOL_REF_DECL (x
))
13337 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13340 if (MACHOPIC_INDIRECT
13341 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13342 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13344 assemble_name (file
, name
);
13346 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13347 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13348 fputs ("@PLT", file
);
13355 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13356 assemble_name (asm_out_file
, buf
);
13360 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13364 /* This used to output parentheses around the expression,
13365 but that does not work on the 386 (either ATT or BSD assembler). */
13366 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13370 if (GET_MODE (x
) == VOIDmode
)
13372 /* We can use %d if the number is <32 bits and positive. */
13373 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13374 fprintf (file
, "0x%lx%08lx",
13375 (unsigned long) CONST_DOUBLE_HIGH (x
),
13376 (unsigned long) CONST_DOUBLE_LOW (x
));
13378 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13381 /* We can't handle floating point constants;
13382 TARGET_PRINT_OPERAND must handle them. */
13383 output_operand_lossage ("floating constant misused");
13387 /* Some assemblers need integer constants to appear first. */
13388 if (CONST_INT_P (XEXP (x
, 0)))
13390 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13392 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13396 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13397 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13399 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13405 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13406 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13408 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13410 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13414 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13416 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13421 gcc_assert (XVECLEN (x
, 0) == 1);
13422 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13423 switch (XINT (x
, 1))
13426 fputs ("@GOT", file
);
13428 case UNSPEC_GOTOFF
:
13429 fputs ("@GOTOFF", file
);
13431 case UNSPEC_PLTOFF
:
13432 fputs ("@PLTOFF", file
);
13435 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13436 "(%rip)" : "[rip]", file
);
13438 case UNSPEC_GOTPCREL
:
13439 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13440 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13442 case UNSPEC_GOTTPOFF
:
13443 /* FIXME: This might be @TPOFF in Sun ld too. */
13444 fputs ("@gottpoff", file
);
13447 fputs ("@tpoff", file
);
13449 case UNSPEC_NTPOFF
:
13451 fputs ("@tpoff", file
);
13453 fputs ("@ntpoff", file
);
13455 case UNSPEC_DTPOFF
:
13456 fputs ("@dtpoff", file
);
13458 case UNSPEC_GOTNTPOFF
:
13460 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13461 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13463 fputs ("@gotntpoff", file
);
13465 case UNSPEC_INDNTPOFF
:
13466 fputs ("@indntpoff", file
);
13469 case UNSPEC_MACHOPIC_OFFSET
:
13471 machopic_output_function_base_name (file
);
13475 output_operand_lossage ("invalid UNSPEC as operand");
13481 output_operand_lossage ("invalid expression as operand");
13485 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13486 We need to emit DTP-relative relocations. */
13488 static void ATTRIBUTE_UNUSED
13489 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13491 fputs (ASM_LONG
, file
);
13492 output_addr_const (file
, x
);
13493 fputs ("@dtpoff", file
);
13499 fputs (", 0", file
);
13502 gcc_unreachable ();
13506 /* Return true if X is a representation of the PIC register. This copes
13507 with calls from ix86_find_base_term, where the register might have
13508 been replaced by a cselib value. */
13511 ix86_pic_register_p (rtx x
)
13513 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13514 return (pic_offset_table_rtx
13515 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13517 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13520 /* Helper function for ix86_delegitimize_address.
13521 Attempt to delegitimize TLS local-exec accesses. */
13524 ix86_delegitimize_tls_address (rtx orig_x
)
13526 rtx x
= orig_x
, unspec
;
13527 struct ix86_address addr
;
13529 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13533 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13535 if (ix86_decompose_address (x
, &addr
) == 0
13536 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13537 || addr
.disp
== NULL_RTX
13538 || GET_CODE (addr
.disp
) != CONST
)
13540 unspec
= XEXP (addr
.disp
, 0);
13541 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13542 unspec
= XEXP (unspec
, 0);
13543 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13545 x
= XVECEXP (unspec
, 0, 0);
13546 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13547 if (unspec
!= XEXP (addr
.disp
, 0))
13548 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13551 rtx idx
= addr
.index
;
13552 if (addr
.scale
!= 1)
13553 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13554 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13557 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13558 if (MEM_P (orig_x
))
13559 x
= replace_equiv_address_nv (orig_x
, x
);
13563 /* In the name of slightly smaller debug output, and to cater to
13564 general assembler lossage, recognize PIC+GOTOFF and turn it back
13565 into a direct symbol reference.
13567 On Darwin, this is necessary to avoid a crash, because Darwin
13568 has a different PIC label for each routine but the DWARF debugging
13569 information is not associated with any particular routine, so it's
13570 necessary to remove references to the PIC label from RTL stored by
13571 the DWARF output code. */
13574 ix86_delegitimize_address (rtx x
)
13576 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13577 /* addend is NULL or some rtx if x is something+GOTOFF where
13578 something doesn't include the PIC register. */
13579 rtx addend
= NULL_RTX
;
13580 /* reg_addend is NULL or a multiple of some register. */
13581 rtx reg_addend
= NULL_RTX
;
13582 /* const_addend is NULL or a const_int. */
13583 rtx const_addend
= NULL_RTX
;
13584 /* This is the result, or NULL. */
13585 rtx result
= NULL_RTX
;
13594 if (GET_CODE (x
) == CONST
13595 && GET_CODE (XEXP (x
, 0)) == PLUS
13596 && GET_MODE (XEXP (x
, 0)) == Pmode
13597 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13598 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13599 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13601 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13602 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13603 if (MEM_P (orig_x
))
13604 x
= replace_equiv_address_nv (orig_x
, x
);
13607 if (GET_CODE (x
) != CONST
13608 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13609 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13610 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13611 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13612 return ix86_delegitimize_tls_address (orig_x
);
13613 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13614 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13616 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13624 if (GET_CODE (x
) != PLUS
13625 || GET_CODE (XEXP (x
, 1)) != CONST
)
13626 return ix86_delegitimize_tls_address (orig_x
);
13628 if (ix86_pic_register_p (XEXP (x
, 0)))
13629 /* %ebx + GOT/GOTOFF */
13631 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13633 /* %ebx + %reg * scale + GOT/GOTOFF */
13634 reg_addend
= XEXP (x
, 0);
13635 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13636 reg_addend
= XEXP (reg_addend
, 1);
13637 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13638 reg_addend
= XEXP (reg_addend
, 0);
13641 reg_addend
= NULL_RTX
;
13642 addend
= XEXP (x
, 0);
13646 addend
= XEXP (x
, 0);
13648 x
= XEXP (XEXP (x
, 1), 0);
13649 if (GET_CODE (x
) == PLUS
13650 && CONST_INT_P (XEXP (x
, 1)))
13652 const_addend
= XEXP (x
, 1);
13656 if (GET_CODE (x
) == UNSPEC
13657 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13658 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13659 result
= XVECEXP (x
, 0, 0);
13661 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13662 && !MEM_P (orig_x
))
13663 result
= XVECEXP (x
, 0, 0);
13666 return ix86_delegitimize_tls_address (orig_x
);
13669 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13671 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13674 /* If the rest of original X doesn't involve the PIC register, add
13675 addend and subtract pic_offset_table_rtx. This can happen e.g.
13677 leal (%ebx, %ecx, 4), %ecx
13679 movl foo@GOTOFF(%ecx), %edx
13680 in which case we return (%ecx - %ebx) + foo. */
13681 if (pic_offset_table_rtx
)
13682 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13683 pic_offset_table_rtx
),
13688 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13690 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13691 if (result
== NULL_RTX
)
13697 /* If X is a machine specific address (i.e. a symbol or label being
13698 referenced as a displacement from the GOT implemented using an
13699 UNSPEC), then return the base term. Otherwise return X. */
13702 ix86_find_base_term (rtx x
)
13708 if (GET_CODE (x
) != CONST
)
13710 term
= XEXP (x
, 0);
13711 if (GET_CODE (term
) == PLUS
13712 && (CONST_INT_P (XEXP (term
, 1))
13713 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13714 term
= XEXP (term
, 0);
13715 if (GET_CODE (term
) != UNSPEC
13716 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13717 && XINT (term
, 1) != UNSPEC_PCREL
))
13720 return XVECEXP (term
, 0, 0);
13723 return ix86_delegitimize_address (x
);
13727 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13728 bool fp
, FILE *file
)
13730 const char *suffix
;
13732 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13734 code
= ix86_fp_compare_code_to_integer (code
);
13738 code
= reverse_condition (code
);
13789 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13793 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13794 Those same assemblers have the same but opposite lossage on cmov. */
13795 if (mode
== CCmode
)
13796 suffix
= fp
? "nbe" : "a";
13797 else if (mode
== CCCmode
)
13800 gcc_unreachable ();
13816 gcc_unreachable ();
13820 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13837 gcc_unreachable ();
13841 /* ??? As above. */
13842 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13843 suffix
= fp
? "nb" : "ae";
13846 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13850 /* ??? As above. */
13851 if (mode
== CCmode
)
13853 else if (mode
== CCCmode
)
13854 suffix
= fp
? "nb" : "ae";
13856 gcc_unreachable ();
13859 suffix
= fp
? "u" : "p";
13862 suffix
= fp
? "nu" : "np";
13865 gcc_unreachable ();
13867 fputs (suffix
, file
);
13870 /* Print the name of register X to FILE based on its machine mode and number.
13871 If CODE is 'w', pretend the mode is HImode.
13872 If CODE is 'b', pretend the mode is QImode.
13873 If CODE is 'k', pretend the mode is SImode.
13874 If CODE is 'q', pretend the mode is DImode.
13875 If CODE is 'x', pretend the mode is V4SFmode.
13876 If CODE is 't', pretend the mode is V8SFmode.
13877 If CODE is 'h', pretend the reg is the 'high' byte register.
13878 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13879 If CODE is 'd', duplicate the operand for AVX instruction.
13883 print_reg (rtx x
, int code
, FILE *file
)
13886 unsigned int regno
;
13887 bool duplicated
= code
== 'd' && TARGET_AVX
;
13889 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13894 gcc_assert (TARGET_64BIT
);
13895 fputs ("rip", file
);
13899 regno
= true_regnum (x
);
13900 gcc_assert (regno
!= ARG_POINTER_REGNUM
13901 && regno
!= FRAME_POINTER_REGNUM
13902 && regno
!= FLAGS_REG
13903 && regno
!= FPSR_REG
13904 && regno
!= FPCR_REG
);
13906 if (code
== 'w' || MMX_REG_P (x
))
13908 else if (code
== 'b')
13910 else if (code
== 'k')
13912 else if (code
== 'q')
13914 else if (code
== 'y')
13916 else if (code
== 'h')
13918 else if (code
== 'x')
13920 else if (code
== 't')
13923 code
= GET_MODE_SIZE (GET_MODE (x
));
13925 /* Irritatingly, AMD extended registers use different naming convention
13926 from the normal registers: "r%d[bwd]" */
13927 if (REX_INT_REGNO_P (regno
))
13929 gcc_assert (TARGET_64BIT
);
13931 fprint_ul (file
, regno
- FIRST_REX_INT_REG
+ 8);
13935 error ("extended registers have no high halves");
13950 error ("unsupported operand size for extended register");
13960 if (STACK_TOP_P (x
))
13969 if (! ANY_FP_REG_P (x
))
13970 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13975 reg
= hi_reg_name
[regno
];
13978 if (regno
>= ARRAY_SIZE (qi_reg_name
))
13980 reg
= qi_reg_name
[regno
];
13983 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
13985 reg
= qi_high_reg_name
[regno
];
13990 gcc_assert (!duplicated
);
13992 fputs (hi_reg_name
[regno
] + 1, file
);
13997 gcc_unreachable ();
14003 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14004 fprintf (file
, ", %%%s", reg
);
14006 fprintf (file
, ", %s", reg
);
14010 /* Locate some local-dynamic symbol still in use by this function
14011 so that we can print its name in some tls_local_dynamic_base
14015 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14019 if (GET_CODE (x
) == SYMBOL_REF
14020 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14022 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14029 static const char *
14030 get_some_local_dynamic_name (void)
14034 if (cfun
->machine
->some_ld_name
)
14035 return cfun
->machine
->some_ld_name
;
14037 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14038 if (NONDEBUG_INSN_P (insn
)
14039 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14040 return cfun
->machine
->some_ld_name
;
14045 /* Meaning of CODE:
14046 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14047 C -- print opcode suffix for set/cmov insn.
14048 c -- like C, but print reversed condition
14049 F,f -- likewise, but for floating-point.
14050 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14052 R -- print the prefix for register names.
14053 z -- print the opcode suffix for the size of the current operand.
14054 Z -- likewise, with special suffixes for x87 instructions.
14055 * -- print a star (in certain assembler syntax)
14056 A -- print an absolute memory reference.
14057 E -- print address with DImode register names if TARGET_64BIT.
14058 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14059 s -- print a shift double count, followed by the assemblers argument
14061 b -- print the QImode name of the register for the indicated operand.
14062 %b0 would print %al if operands[0] is reg 0.
14063 w -- likewise, print the HImode name of the register.
14064 k -- likewise, print the SImode name of the register.
14065 q -- likewise, print the DImode name of the register.
14066 x -- likewise, print the V4SFmode name of the register.
14067 t -- likewise, print the V8SFmode name of the register.
14068 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14069 y -- print "st(0)" instead of "st" as a register.
14070 d -- print duplicated register operand for AVX instruction.
14071 D -- print condition for SSE cmp instruction.
14072 P -- if PIC, print an @PLT suffix.
14073 p -- print raw symbol name.
14074 X -- don't print any sort of PIC '@' suffix for a symbol.
14075 & -- print some in-use local-dynamic symbol name.
14076 H -- print a memory address offset by 8; used for sse high-parts
14077 Y -- print condition for XOP pcom* instruction.
14078 + -- print a branch hint as 'cs' or 'ds' prefix
14079 ; -- print a semicolon (after prefixes due to bug in older gas).
14080 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14081 @ -- print a segment register of thread base pointer load
14082 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14086 ix86_print_operand (FILE *file
, rtx x
, int code
)
14093 switch (ASSEMBLER_DIALECT
)
14100 /* Intel syntax. For absolute addresses, registers should not
14101 be surrounded by braces. */
14105 ix86_print_operand (file
, x
, 0);
14112 gcc_unreachable ();
14115 ix86_print_operand (file
, x
, 0);
14119 /* Wrap address in an UNSPEC to declare special handling. */
14121 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14123 output_address (x
);
14127 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14132 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14137 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14142 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14147 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14152 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14157 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14158 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14161 switch (GET_MODE_SIZE (GET_MODE (x
)))
14176 output_operand_lossage
14177 ("invalid operand size for operand code 'O'");
14186 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14188 /* Opcodes don't get size suffixes if using Intel opcodes. */
14189 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14192 switch (GET_MODE_SIZE (GET_MODE (x
)))
14211 output_operand_lossage
14212 ("invalid operand size for operand code 'z'");
14217 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14219 (0, "non-integer operand used with operand code 'z'");
14223 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14224 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14227 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14229 switch (GET_MODE_SIZE (GET_MODE (x
)))
14232 #ifdef HAVE_AS_IX86_FILDS
14242 #ifdef HAVE_AS_IX86_FILDQ
14245 fputs ("ll", file
);
14253 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14255 /* 387 opcodes don't get size suffixes
14256 if the operands are registers. */
14257 if (STACK_REG_P (x
))
14260 switch (GET_MODE_SIZE (GET_MODE (x
)))
14281 output_operand_lossage
14282 ("invalid operand type used with operand code 'Z'");
14286 output_operand_lossage
14287 ("invalid operand size for operand code 'Z'");
14305 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14307 ix86_print_operand (file
, x
, 0);
14308 fputs (", ", file
);
14313 switch (GET_CODE (x
))
14316 fputs ("neq", file
);
14319 fputs ("eq", file
);
14323 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14327 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14331 fputs ("le", file
);
14335 fputs ("lt", file
);
14338 fputs ("unord", file
);
14341 fputs ("ord", file
);
14344 fputs ("ueq", file
);
14347 fputs ("nlt", file
);
14350 fputs ("nle", file
);
14353 fputs ("ule", file
);
14356 fputs ("ult", file
);
14359 fputs ("une", file
);
14362 output_operand_lossage ("operand is not a condition code, "
14363 "invalid operand code 'Y'");
14369 /* Little bit of braindamage here. The SSE compare instructions
14370 does use completely different names for the comparisons that the
14371 fp conditional moves. */
14372 switch (GET_CODE (x
))
14377 fputs ("eq_us", file
);
14381 fputs ("eq", file
);
14386 fputs ("nge", file
);
14390 fputs ("lt", file
);
14395 fputs ("ngt", file
);
14399 fputs ("le", file
);
14402 fputs ("unord", file
);
14407 fputs ("neq_oq", file
);
14411 fputs ("neq", file
);
14416 fputs ("ge", file
);
14420 fputs ("nlt", file
);
14425 fputs ("gt", file
);
14429 fputs ("nle", file
);
14432 fputs ("ord", file
);
14435 output_operand_lossage ("operand is not a condition code, "
14436 "invalid operand code 'D'");
14443 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14444 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14450 if (!COMPARISON_P (x
))
14452 output_operand_lossage ("operand is not a condition code, "
14453 "invalid operand code '%c'", code
);
14456 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14457 code
== 'c' || code
== 'f',
14458 code
== 'F' || code
== 'f',
14463 if (!offsettable_memref_p (x
))
14465 output_operand_lossage ("operand is not an offsettable memory "
14466 "reference, invalid operand code 'H'");
14469 /* It doesn't actually matter what mode we use here, as we're
14470 only going to use this for printing. */
14471 x
= adjust_address_nv (x
, DImode
, 8);
14475 gcc_assert (CONST_INT_P (x
));
14477 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14478 #ifdef HAVE_AS_IX86_HLE
14479 fputs ("xacquire ", file
);
14481 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14483 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14484 #ifdef HAVE_AS_IX86_HLE
14485 fputs ("xrelease ", file
);
14487 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14489 /* We do not want to print value of the operand. */
14493 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14499 const char *name
= get_some_local_dynamic_name ();
14501 output_operand_lossage ("'%%&' used without any "
14502 "local dynamic TLS references");
14504 assemble_name (file
, name
);
14513 || optimize_function_for_size_p (cfun
)
14514 || !TARGET_BRANCH_PREDICTION_HINTS
)
14517 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14520 int pred_val
= INTVAL (XEXP (x
, 0));
14522 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14523 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14525 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14527 = final_forward_branch_p (current_output_insn
) == 0;
14529 /* Emit hints only in the case default branch prediction
14530 heuristics would fail. */
14531 if (taken
!= cputaken
)
14533 /* We use 3e (DS) prefix for taken branches and
14534 2e (CS) prefix for not taken branches. */
14536 fputs ("ds ; ", file
);
14538 fputs ("cs ; ", file
);
14546 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14552 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14555 /* The kernel uses a different segment register for performance
14556 reasons; a system call would not have to trash the userspace
14557 segment register, which would be expensive. */
14558 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14559 fputs ("fs", file
);
14561 fputs ("gs", file
);
14565 putc (TARGET_AVX2
? 'i' : 'f', file
);
14569 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14570 fputs ("addr32 ", file
);
14574 output_operand_lossage ("invalid operand code '%c'", code
);
14579 print_reg (x
, code
, file
);
14581 else if (MEM_P (x
))
14583 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14584 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14585 && GET_MODE (x
) != BLKmode
)
14588 switch (GET_MODE_SIZE (GET_MODE (x
)))
14590 case 1: size
= "BYTE"; break;
14591 case 2: size
= "WORD"; break;
14592 case 4: size
= "DWORD"; break;
14593 case 8: size
= "QWORD"; break;
14594 case 12: size
= "TBYTE"; break;
14596 if (GET_MODE (x
) == XFmode
)
14601 case 32: size
= "YMMWORD"; break;
14603 gcc_unreachable ();
14606 /* Check for explicit size override (codes 'b', 'w', 'k',
14610 else if (code
== 'w')
14612 else if (code
== 'k')
14614 else if (code
== 'q')
14616 else if (code
== 'x')
14619 fputs (size
, file
);
14620 fputs (" PTR ", file
);
14624 /* Avoid (%rip) for call operands. */
14625 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14626 && !CONST_INT_P (x
))
14627 output_addr_const (file
, x
);
14628 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14629 output_operand_lossage ("invalid constraints for operand");
14631 output_address (x
);
14634 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14639 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14640 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14642 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14644 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14646 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
14647 (unsigned long long) (int) l
);
14649 fprintf (file
, "0x%08x", (unsigned int) l
);
14652 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14657 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14658 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14660 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14662 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14665 /* These float cases don't actually occur as immediate operands. */
14666 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14670 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14671 fputs (dstr
, file
);
14676 /* We have patterns that allow zero sets of memory, for instance.
14677 In 64-bit mode, we should probably support all 8-byte vectors,
14678 since we can in fact encode that into an immediate. */
14679 if (GET_CODE (x
) == CONST_VECTOR
)
14681 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14685 if (code
!= 'P' && code
!= 'p')
14687 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14689 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14692 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14693 || GET_CODE (x
) == LABEL_REF
)
14695 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14698 fputs ("OFFSET FLAT:", file
);
14701 if (CONST_INT_P (x
))
14702 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14703 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14704 output_pic_addr_const (file
, x
, code
);
14706 output_addr_const (file
, x
);
14711 ix86_print_operand_punct_valid_p (unsigned char code
)
14713 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14714 || code
== ';' || code
== '~' || code
== '^');
14717 /* Print a memory operand whose address is ADDR. */
14720 ix86_print_operand_address (FILE *file
, rtx addr
)
14722 struct ix86_address parts
;
14723 rtx base
, index
, disp
;
14729 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14731 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14732 gcc_assert (parts
.index
== NULL_RTX
);
14733 parts
.index
= XVECEXP (addr
, 0, 1);
14734 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14735 addr
= XVECEXP (addr
, 0, 0);
14738 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14740 gcc_assert (TARGET_64BIT
);
14741 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14745 ok
= ix86_decompose_address (addr
, &parts
);
14750 index
= parts
.index
;
14752 scale
= parts
.scale
;
14760 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14762 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14765 gcc_unreachable ();
14768 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14769 if (TARGET_64BIT
&& !base
&& !index
)
14773 if (GET_CODE (disp
) == CONST
14774 && GET_CODE (XEXP (disp
, 0)) == PLUS
14775 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14776 symbol
= XEXP (XEXP (disp
, 0), 0);
14778 if (GET_CODE (symbol
) == LABEL_REF
14779 || (GET_CODE (symbol
) == SYMBOL_REF
14780 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14783 if (!base
&& !index
)
14785 /* Displacement only requires special attention. */
14787 if (CONST_INT_P (disp
))
14789 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14790 fputs ("ds:", file
);
14791 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14794 output_pic_addr_const (file
, disp
, 0);
14796 output_addr_const (file
, disp
);
14800 /* Print SImode register names to force addr32 prefix. */
14801 if (SImode_address_operand (addr
, VOIDmode
))
14803 #ifdef ENABLE_CHECKING
14804 gcc_assert (TARGET_64BIT
);
14805 switch (GET_CODE (addr
))
14808 gcc_assert (GET_MODE (addr
) == SImode
);
14809 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14813 gcc_assert (GET_MODE (addr
) == DImode
);
14816 gcc_unreachable ();
14819 gcc_assert (!code
);
14825 && CONST_INT_P (disp
)
14826 && INTVAL (disp
) < -16*1024*1024)
14828 /* X32 runs in 64-bit mode, where displacement, DISP, in
14829 address DISP(%r64), is encoded as 32-bit immediate sign-
14830 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14831 address is %r64 + 0xffffffffbffffd00. When %r64 <
14832 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14833 which is invalid for x32. The correct address is %r64
14834 - 0x40000300 == 0xf7ffdd64. To properly encode
14835 -0x40000300(%r64) for x32, we zero-extend negative
14836 displacement by forcing addr32 prefix which truncates
14837 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14838 zero-extend all negative displacements, including -1(%rsp).
14839 However, for small negative displacements, sign-extension
14840 won't cause overflow. We only zero-extend negative
14841 displacements if they < -16*1024*1024, which is also used
14842 to check legitimate address displacements for PIC. */
14846 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14851 output_pic_addr_const (file
, disp
, 0);
14852 else if (GET_CODE (disp
) == LABEL_REF
)
14853 output_asm_label (disp
);
14855 output_addr_const (file
, disp
);
14860 print_reg (base
, code
, file
);
14864 print_reg (index
, vsib
? 0 : code
, file
);
14865 if (scale
!= 1 || vsib
)
14866 fprintf (file
, ",%d", scale
);
14872 rtx offset
= NULL_RTX
;
14876 /* Pull out the offset of a symbol; print any symbol itself. */
14877 if (GET_CODE (disp
) == CONST
14878 && GET_CODE (XEXP (disp
, 0)) == PLUS
14879 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14881 offset
= XEXP (XEXP (disp
, 0), 1);
14882 disp
= gen_rtx_CONST (VOIDmode
,
14883 XEXP (XEXP (disp
, 0), 0));
14887 output_pic_addr_const (file
, disp
, 0);
14888 else if (GET_CODE (disp
) == LABEL_REF
)
14889 output_asm_label (disp
);
14890 else if (CONST_INT_P (disp
))
14893 output_addr_const (file
, disp
);
14899 print_reg (base
, code
, file
);
14902 if (INTVAL (offset
) >= 0)
14904 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14908 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14915 print_reg (index
, vsib
? 0 : code
, file
);
14916 if (scale
!= 1 || vsib
)
14917 fprintf (file
, "*%d", scale
);
14924 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14927 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14931 if (GET_CODE (x
) != UNSPEC
)
14934 op
= XVECEXP (x
, 0, 0);
14935 switch (XINT (x
, 1))
14937 case UNSPEC_GOTTPOFF
:
14938 output_addr_const (file
, op
);
14939 /* FIXME: This might be @TPOFF in Sun ld. */
14940 fputs ("@gottpoff", file
);
14943 output_addr_const (file
, op
);
14944 fputs ("@tpoff", file
);
14946 case UNSPEC_NTPOFF
:
14947 output_addr_const (file
, op
);
14949 fputs ("@tpoff", file
);
14951 fputs ("@ntpoff", file
);
14953 case UNSPEC_DTPOFF
:
14954 output_addr_const (file
, op
);
14955 fputs ("@dtpoff", file
);
14957 case UNSPEC_GOTNTPOFF
:
14958 output_addr_const (file
, op
);
14960 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14961 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14963 fputs ("@gotntpoff", file
);
14965 case UNSPEC_INDNTPOFF
:
14966 output_addr_const (file
, op
);
14967 fputs ("@indntpoff", file
);
14970 case UNSPEC_MACHOPIC_OFFSET
:
14971 output_addr_const (file
, op
);
14973 machopic_output_function_base_name (file
);
14977 case UNSPEC_STACK_CHECK
:
14981 gcc_assert (flag_split_stack
);
14983 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14984 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14986 gcc_unreachable ();
14989 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15000 /* Split one or more double-mode RTL references into pairs of half-mode
15001 references. The RTL can be REG, offsettable MEM, integer constant, or
15002 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15003 split and "num" is its length. lo_half and hi_half are output arrays
15004 that parallel "operands". */
15007 split_double_mode (enum machine_mode mode
, rtx operands
[],
15008 int num
, rtx lo_half
[], rtx hi_half
[])
15010 enum machine_mode half_mode
;
15016 half_mode
= DImode
;
15019 half_mode
= SImode
;
15022 gcc_unreachable ();
15025 byte
= GET_MODE_SIZE (half_mode
);
15029 rtx op
= operands
[num
];
15031 /* simplify_subreg refuse to split volatile memory addresses,
15032 but we still have to handle it. */
15035 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15036 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15040 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15041 GET_MODE (op
) == VOIDmode
15042 ? mode
: GET_MODE (op
), 0);
15043 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15044 GET_MODE (op
) == VOIDmode
15045 ? mode
: GET_MODE (op
), byte
);
15050 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15051 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15052 is the expression of the binary operation. The output may either be
15053 emitted here, or returned to the caller, like all output_* functions.
15055 There is no guarantee that the operands are the same mode, as they
15056 might be within FLOAT or FLOAT_EXTEND expressions. */
15058 #ifndef SYSV386_COMPAT
15059 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15060 wants to fix the assemblers because that causes incompatibility
15061 with gcc. No-one wants to fix gcc because that causes
15062 incompatibility with assemblers... You can use the option of
15063 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15064 #define SYSV386_COMPAT 1
15068 output_387_binary_op (rtx insn
, rtx
*operands
)
15070 static char buf
[40];
15073 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15075 #ifdef ENABLE_CHECKING
15076 /* Even if we do not want to check the inputs, this documents input
15077 constraints. Which helps in understanding the following code. */
15078 if (STACK_REG_P (operands
[0])
15079 && ((REG_P (operands
[1])
15080 && REGNO (operands
[0]) == REGNO (operands
[1])
15081 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15082 || (REG_P (operands
[2])
15083 && REGNO (operands
[0]) == REGNO (operands
[2])
15084 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15085 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15088 gcc_assert (is_sse
);
15091 switch (GET_CODE (operands
[3]))
15094 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15095 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15103 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15104 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15112 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15113 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15121 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15122 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15130 gcc_unreachable ();
15137 strcpy (buf
, ssep
);
15138 if (GET_MODE (operands
[0]) == SFmode
)
15139 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15141 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15145 strcpy (buf
, ssep
+ 1);
15146 if (GET_MODE (operands
[0]) == SFmode
)
15147 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15149 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15155 switch (GET_CODE (operands
[3]))
15159 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15161 rtx temp
= operands
[2];
15162 operands
[2] = operands
[1];
15163 operands
[1] = temp
;
15166 /* know operands[0] == operands[1]. */
15168 if (MEM_P (operands
[2]))
15174 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15176 if (STACK_TOP_P (operands
[0]))
15177 /* How is it that we are storing to a dead operand[2]?
15178 Well, presumably operands[1] is dead too. We can't
15179 store the result to st(0) as st(0) gets popped on this
15180 instruction. Instead store to operands[2] (which I
15181 think has to be st(1)). st(1) will be popped later.
15182 gcc <= 2.8.1 didn't have this check and generated
15183 assembly code that the Unixware assembler rejected. */
15184 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15186 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15190 if (STACK_TOP_P (operands
[0]))
15191 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15193 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15198 if (MEM_P (operands
[1]))
15204 if (MEM_P (operands
[2]))
15210 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15213 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15214 derived assemblers, confusingly reverse the direction of
15215 the operation for fsub{r} and fdiv{r} when the
15216 destination register is not st(0). The Intel assembler
15217 doesn't have this brain damage. Read !SYSV386_COMPAT to
15218 figure out what the hardware really does. */
15219 if (STACK_TOP_P (operands
[0]))
15220 p
= "{p\t%0, %2|rp\t%2, %0}";
15222 p
= "{rp\t%2, %0|p\t%0, %2}";
15224 if (STACK_TOP_P (operands
[0]))
15225 /* As above for fmul/fadd, we can't store to st(0). */
15226 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15228 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15233 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15236 if (STACK_TOP_P (operands
[0]))
15237 p
= "{rp\t%0, %1|p\t%1, %0}";
15239 p
= "{p\t%1, %0|rp\t%0, %1}";
15241 if (STACK_TOP_P (operands
[0]))
15242 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15244 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15249 if (STACK_TOP_P (operands
[0]))
15251 if (STACK_TOP_P (operands
[1]))
15252 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15254 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15257 else if (STACK_TOP_P (operands
[1]))
15260 p
= "{\t%1, %0|r\t%0, %1}";
15262 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15268 p
= "{r\t%2, %0|\t%0, %2}";
15270 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15276 gcc_unreachable ();
15283 /* Check if a 256bit AVX register is referenced inside of EXP. */
15286 ix86_check_avx256_register (rtx
*pexp
, void *data ATTRIBUTE_UNUSED
)
15290 if (GET_CODE (exp
) == SUBREG
)
15291 exp
= SUBREG_REG (exp
);
15294 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp
)))
15300 /* Return needed mode for entity in optimize_mode_switching pass. */
15303 ix86_avx_u128_mode_needed (rtx insn
)
15309 /* Needed mode is set to AVX_U128_CLEAN if there are
15310 no 256bit modes used in function arguments. */
15311 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
15313 link
= XEXP (link
, 1))
15315 if (GET_CODE (XEXP (link
, 0)) == USE
)
15317 rtx arg
= XEXP (XEXP (link
, 0), 0);
15319 if (ix86_check_avx256_register (&arg
, NULL
))
15320 return AVX_U128_ANY
;
15324 return AVX_U128_CLEAN
;
15327 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15328 changes state only when a 256bit register is written to, but we need
15329 to prevent the compiler from moving optimal insertion point above
15330 eventual read from 256bit register. */
15331 if (for_each_rtx (&PATTERN (insn
), ix86_check_avx256_register
, NULL
))
15332 return AVX_U128_DIRTY
;
15334 return AVX_U128_ANY
;
15337 /* Return mode that i387 must be switched into
15338 prior to the execution of insn. */
15341 ix86_i387_mode_needed (int entity
, rtx insn
)
15343 enum attr_i387_cw mode
;
15345 /* The mode UNINITIALIZED is used to store control word after a
15346 function call or ASM pattern. The mode ANY specify that function
15347 has no requirements on the control word and make no changes in the
15348 bits we are interested in. */
15351 || (NONJUMP_INSN_P (insn
)
15352 && (asm_noperands (PATTERN (insn
)) >= 0
15353 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15354 return I387_CW_UNINITIALIZED
;
15356 if (recog_memoized (insn
) < 0)
15357 return I387_CW_ANY
;
15359 mode
= get_attr_i387_cw (insn
);
15364 if (mode
== I387_CW_TRUNC
)
15369 if (mode
== I387_CW_FLOOR
)
15374 if (mode
== I387_CW_CEIL
)
15379 if (mode
== I387_CW_MASK_PM
)
15384 gcc_unreachable ();
15387 return I387_CW_ANY
;
15390 /* Return mode that entity must be switched into
15391 prior to the execution of insn. */
15394 ix86_mode_needed (int entity
, rtx insn
)
15399 return ix86_avx_u128_mode_needed (insn
);
15404 return ix86_i387_mode_needed (entity
, insn
);
15406 gcc_unreachable ();
15411 /* Check if a 256bit AVX register is referenced in stores. */
15414 ix86_check_avx256_stores (rtx dest
, const_rtx set ATTRIBUTE_UNUSED
, void *data
)
15416 if (ix86_check_avx256_register (&dest
, NULL
))
15418 bool *used
= (bool *) data
;
15423 /* Calculate mode of upper 128bit AVX registers after the insn. */
15426 ix86_avx_u128_mode_after (int mode
, rtx insn
)
15428 rtx pat
= PATTERN (insn
);
15430 if (vzeroupper_operation (pat
, VOIDmode
)
15431 || vzeroall_operation (pat
, VOIDmode
))
15432 return AVX_U128_CLEAN
;
15434 /* We know that state is clean after CALL insn if there are no
15435 256bit registers used in the function return register. */
15438 bool avx_reg256_found
= false;
15439 note_stores (pat
, ix86_check_avx256_stores
, &avx_reg256_found
);
15440 if (!avx_reg256_found
)
15441 return AVX_U128_CLEAN
;
15444 /* Otherwise, return current mode. Remember that if insn
15445 references AVX 256bit registers, the mode was already changed
15446 to DIRTY from MODE_NEEDED. */
15450 /* Return the mode that an insn results in. */
15453 ix86_mode_after (int entity
, int mode
, rtx insn
)
15458 return ix86_avx_u128_mode_after (mode
, insn
);
15465 gcc_unreachable ();
15470 ix86_avx_u128_mode_entry (void)
15474 /* Entry mode is set to AVX_U128_DIRTY if there are
15475 256bit modes used in function arguments. */
15476 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
15477 arg
= TREE_CHAIN (arg
))
15479 rtx incoming
= DECL_INCOMING_RTL (arg
);
15481 if (incoming
&& ix86_check_avx256_register (&incoming
, NULL
))
15482 return AVX_U128_DIRTY
;
15485 return AVX_U128_CLEAN
;
15488 /* Return a mode that ENTITY is assumed to be
15489 switched to at function entry. */
15492 ix86_mode_entry (int entity
)
15497 return ix86_avx_u128_mode_entry ();
15502 return I387_CW_ANY
;
15504 gcc_unreachable ();
15509 ix86_avx_u128_mode_exit (void)
15511 rtx reg
= crtl
->return_rtx
;
15513 /* Exit mode is set to AVX_U128_DIRTY if there are
15514 256bit modes used in the function return register. */
15515 if (reg
&& ix86_check_avx256_register (®
, NULL
))
15516 return AVX_U128_DIRTY
;
15518 return AVX_U128_CLEAN
;
15521 /* Return a mode that ENTITY is assumed to be
15522 switched to at function exit. */
15525 ix86_mode_exit (int entity
)
15530 return ix86_avx_u128_mode_exit ();
15535 return I387_CW_ANY
;
15537 gcc_unreachable ();
15541 /* Output code to initialize control word copies used by trunc?f?i and
15542 rounding patterns. CURRENT_MODE is set to current control word,
15543 while NEW_MODE is set to new control word. */
15546 emit_i387_cw_initialization (int mode
)
15548 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15551 enum ix86_stack_slot slot
;
15553 rtx reg
= gen_reg_rtx (HImode
);
15555 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15556 emit_move_insn (reg
, copy_rtx (stored_mode
));
15558 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15559 || optimize_function_for_size_p (cfun
))
15563 case I387_CW_TRUNC
:
15564 /* round toward zero (truncate) */
15565 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15566 slot
= SLOT_CW_TRUNC
;
15569 case I387_CW_FLOOR
:
15570 /* round down toward -oo */
15571 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15572 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15573 slot
= SLOT_CW_FLOOR
;
15577 /* round up toward +oo */
15578 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15579 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15580 slot
= SLOT_CW_CEIL
;
15583 case I387_CW_MASK_PM
:
15584 /* mask precision exception for nearbyint() */
15585 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15586 slot
= SLOT_CW_MASK_PM
;
15590 gcc_unreachable ();
15597 case I387_CW_TRUNC
:
15598 /* round toward zero (truncate) */
15599 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15600 slot
= SLOT_CW_TRUNC
;
15603 case I387_CW_FLOOR
:
15604 /* round down toward -oo */
15605 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15606 slot
= SLOT_CW_FLOOR
;
15610 /* round up toward +oo */
15611 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15612 slot
= SLOT_CW_CEIL
;
15615 case I387_CW_MASK_PM
:
15616 /* mask precision exception for nearbyint() */
15617 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15618 slot
= SLOT_CW_MASK_PM
;
15622 gcc_unreachable ();
15626 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15628 new_mode
= assign_386_stack_local (HImode
, slot
);
15629 emit_move_insn (new_mode
, reg
);
15632 /* Emit vzeroupper. */
15635 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live
)
15639 /* Cancel automatic vzeroupper insertion if there are
15640 live call-saved SSE registers at the insertion point. */
15642 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
15643 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15647 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
15648 if (TEST_HARD_REG_BIT (regs_live
, i
) && !call_used_regs
[i
])
15651 emit_insn (gen_avx_vzeroupper ());
15654 /* Generate one or more insns to set ENTITY to MODE. */
15657 ix86_emit_mode_set (int entity
, int mode
, HARD_REG_SET regs_live
)
15662 if (mode
== AVX_U128_CLEAN
)
15663 ix86_avx_emit_vzeroupper (regs_live
);
15669 if (mode
!= I387_CW_ANY
15670 && mode
!= I387_CW_UNINITIALIZED
)
15671 emit_i387_cw_initialization (mode
);
15674 gcc_unreachable ();
15678 /* Output code for INSN to convert a float to a signed int. OPERANDS
15679 are the insn operands. The output may be [HSD]Imode and the input
15680 operand may be [SDX]Fmode. */
15683 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15685 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15686 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15687 int round_mode
= get_attr_i387_cw (insn
);
15689 /* Jump through a hoop or two for DImode, since the hardware has no
15690 non-popping instruction. We used to do this a different way, but
15691 that was somewhat fragile and broke with post-reload splitters. */
15692 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15693 output_asm_insn ("fld\t%y1", operands
);
15695 gcc_assert (STACK_TOP_P (operands
[1]));
15696 gcc_assert (MEM_P (operands
[0]));
15697 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15700 output_asm_insn ("fisttp%Z0\t%0", operands
);
15703 if (round_mode
!= I387_CW_ANY
)
15704 output_asm_insn ("fldcw\t%3", operands
);
15705 if (stack_top_dies
|| dimode_p
)
15706 output_asm_insn ("fistp%Z0\t%0", operands
);
15708 output_asm_insn ("fist%Z0\t%0", operands
);
15709 if (round_mode
!= I387_CW_ANY
)
15710 output_asm_insn ("fldcw\t%2", operands
);
15716 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15717 have the values zero or one, indicates the ffreep insn's operand
15718 from the OPERANDS array. */
15720 static const char *
15721 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15723 if (TARGET_USE_FFREEP
)
15724 #ifdef HAVE_AS_IX86_FFREEP
15725 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15728 static char retval
[32];
15729 int regno
= REGNO (operands
[opno
]);
15731 gcc_assert (STACK_REGNO_P (regno
));
15733 regno
-= FIRST_STACK_REG
;
15735 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15740 return opno
? "fstp\t%y1" : "fstp\t%y0";
15744 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15745 should be used. UNORDERED_P is true when fucom should be used. */
15748 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15750 int stack_top_dies
;
15751 rtx cmp_op0
, cmp_op1
;
15752 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15756 cmp_op0
= operands
[0];
15757 cmp_op1
= operands
[1];
15761 cmp_op0
= operands
[1];
15762 cmp_op1
= operands
[2];
15767 if (GET_MODE (operands
[0]) == SFmode
)
15769 return "%vucomiss\t{%1, %0|%0, %1}";
15771 return "%vcomiss\t{%1, %0|%0, %1}";
15774 return "%vucomisd\t{%1, %0|%0, %1}";
15776 return "%vcomisd\t{%1, %0|%0, %1}";
15779 gcc_assert (STACK_TOP_P (cmp_op0
));
15781 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15783 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15785 if (stack_top_dies
)
15787 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15788 return output_387_ffreep (operands
, 1);
15791 return "ftst\n\tfnstsw\t%0";
15794 if (STACK_REG_P (cmp_op1
)
15796 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15797 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15799 /* If both the top of the 387 stack dies, and the other operand
15800 is also a stack register that dies, then this must be a
15801 `fcompp' float compare */
15805 /* There is no double popping fcomi variant. Fortunately,
15806 eflags is immune from the fstp's cc clobbering. */
15808 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15810 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15811 return output_387_ffreep (operands
, 0);
15816 return "fucompp\n\tfnstsw\t%0";
15818 return "fcompp\n\tfnstsw\t%0";
15823 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15825 static const char * const alt
[16] =
15827 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15828 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15829 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15830 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15832 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15833 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15837 "fcomi\t{%y1, %0|%0, %y1}",
15838 "fcomip\t{%y1, %0|%0, %y1}",
15839 "fucomi\t{%y1, %0|%0, %y1}",
15840 "fucomip\t{%y1, %0|%0, %y1}",
15851 mask
= eflags_p
<< 3;
15852 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15853 mask
|= unordered_p
<< 1;
15854 mask
|= stack_top_dies
;
15856 gcc_assert (mask
< 16);
15865 ix86_output_addr_vec_elt (FILE *file
, int value
)
15867 const char *directive
= ASM_LONG
;
15871 directive
= ASM_QUAD
;
15873 gcc_assert (!TARGET_64BIT
);
15876 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15880 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15882 const char *directive
= ASM_LONG
;
15885 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15886 directive
= ASM_QUAD
;
15888 gcc_assert (!TARGET_64BIT
);
15890 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15891 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15892 fprintf (file
, "%s%s%d-%s%d\n",
15893 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15894 else if (HAVE_AS_GOTOFF_IN_DATA
)
15895 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15897 else if (TARGET_MACHO
)
15899 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15900 machopic_output_function_base_name (file
);
15905 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15906 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15909 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15913 ix86_expand_clear (rtx dest
)
15917 /* We play register width games, which are only valid after reload. */
15918 gcc_assert (reload_completed
);
15920 /* Avoid HImode and its attendant prefix byte. */
15921 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15922 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15923 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15925 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15926 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15928 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15929 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15935 /* X is an unchanging MEM. If it is a constant pool reference, return
15936 the constant pool rtx, else NULL. */
15939 maybe_get_pool_constant (rtx x
)
15941 x
= ix86_delegitimize_address (XEXP (x
, 0));
15943 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15944 return get_pool_constant (x
);
15950 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15953 enum tls_model model
;
15958 if (GET_CODE (op1
) == SYMBOL_REF
)
15960 model
= SYMBOL_REF_TLS_MODEL (op1
);
15963 op1
= legitimize_tls_address (op1
, model
, true);
15964 op1
= force_operand (op1
, op0
);
15967 op1
= convert_to_mode (mode
, op1
, 1);
15969 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15970 && SYMBOL_REF_DLLIMPORT_P (op1
))
15971 op1
= legitimize_dllimport_symbol (op1
, false);
15973 else if (GET_CODE (op1
) == CONST
15974 && GET_CODE (XEXP (op1
, 0)) == PLUS
15975 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15977 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15978 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15981 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15983 tmp
= legitimize_tls_address (symbol
, model
, true);
15984 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15985 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15986 tmp
= legitimize_dllimport_symbol (symbol
, true);
15990 tmp
= force_operand (tmp
, NULL
);
15991 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15992 op0
, 1, OPTAB_DIRECT
);
15995 op1
= convert_to_mode (mode
, tmp
, 1);
15999 if ((flag_pic
|| MACHOPIC_INDIRECT
)
16000 && symbolic_operand (op1
, mode
))
16002 if (TARGET_MACHO
&& !TARGET_64BIT
)
16005 /* dynamic-no-pic */
16006 if (MACHOPIC_INDIRECT
)
16008 rtx temp
= ((reload_in_progress
16009 || ((op0
&& REG_P (op0
))
16011 ? op0
: gen_reg_rtx (Pmode
));
16012 op1
= machopic_indirect_data_reference (op1
, temp
);
16014 op1
= machopic_legitimize_pic_address (op1
, mode
,
16015 temp
== op1
? 0 : temp
);
16017 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
16019 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
16023 if (GET_CODE (op0
) == MEM
)
16024 op1
= force_reg (Pmode
, op1
);
16028 if (GET_CODE (temp
) != REG
)
16029 temp
= gen_reg_rtx (Pmode
);
16030 temp
= legitimize_pic_address (op1
, temp
);
16035 /* dynamic-no-pic */
16041 op1
= force_reg (mode
, op1
);
16042 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
16044 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
16045 op1
= legitimize_pic_address (op1
, reg
);
16048 op1
= convert_to_mode (mode
, op1
, 1);
16055 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
16056 || !push_operand (op0
, mode
))
16058 op1
= force_reg (mode
, op1
);
16060 if (push_operand (op0
, mode
)
16061 && ! general_no_elim_operand (op1
, mode
))
16062 op1
= copy_to_mode_reg (mode
, op1
);
16064 /* Force large constants in 64bit compilation into register
16065 to get them CSEed. */
16066 if (can_create_pseudo_p ()
16067 && (mode
== DImode
) && TARGET_64BIT
16068 && immediate_operand (op1
, mode
)
16069 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
16070 && !register_operand (op0
, mode
)
16072 op1
= copy_to_mode_reg (mode
, op1
);
16074 if (can_create_pseudo_p ()
16075 && FLOAT_MODE_P (mode
)
16076 && GET_CODE (op1
) == CONST_DOUBLE
)
16078 /* If we are loading a floating point constant to a register,
16079 force the value to memory now, since we'll get better code
16080 out the back end. */
16082 op1
= validize_mem (force_const_mem (mode
, op1
));
16083 if (!register_operand (op0
, mode
))
16085 rtx temp
= gen_reg_rtx (mode
);
16086 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16087 emit_move_insn (op0
, temp
);
16093 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16097 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16099 rtx op0
= operands
[0], op1
= operands
[1];
16100 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16102 /* Force constants other than zero into memory. We do not know how
16103 the instructions used to build constants modify the upper 64 bits
16104 of the register, once we have that information we may be able
16105 to handle some of them more efficiently. */
16106 if (can_create_pseudo_p ()
16107 && register_operand (op0
, mode
)
16108 && (CONSTANT_P (op1
)
16109 || (GET_CODE (op1
) == SUBREG
16110 && CONSTANT_P (SUBREG_REG (op1
))))
16111 && !standard_sse_constant_p (op1
))
16112 op1
= validize_mem (force_const_mem (mode
, op1
));
16114 /* We need to check memory alignment for SSE mode since attribute
16115 can make operands unaligned. */
16116 if (can_create_pseudo_p ()
16117 && SSE_REG_MODE_P (mode
)
16118 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16119 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16123 /* ix86_expand_vector_move_misalign() does not like constants ... */
16124 if (CONSTANT_P (op1
)
16125 || (GET_CODE (op1
) == SUBREG
16126 && CONSTANT_P (SUBREG_REG (op1
))))
16127 op1
= validize_mem (force_const_mem (mode
, op1
));
16129 /* ... nor both arguments in memory. */
16130 if (!register_operand (op0
, mode
)
16131 && !register_operand (op1
, mode
))
16132 op1
= force_reg (mode
, op1
);
16134 tmp
[0] = op0
; tmp
[1] = op1
;
16135 ix86_expand_vector_move_misalign (mode
, tmp
);
16139 /* Make operand1 a register if it isn't already. */
16140 if (can_create_pseudo_p ()
16141 && !register_operand (op0
, mode
)
16142 && !register_operand (op1
, mode
))
16144 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16148 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16151 /* Split 32-byte AVX unaligned load and store if needed. */
16154 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16157 rtx (*extract
) (rtx
, rtx
, rtx
);
16158 rtx (*load_unaligned
) (rtx
, rtx
);
16159 rtx (*store_unaligned
) (rtx
, rtx
);
16160 enum machine_mode mode
;
16162 switch (GET_MODE (op0
))
16165 gcc_unreachable ();
16167 extract
= gen_avx_vextractf128v32qi
;
16168 load_unaligned
= gen_avx_loaddqu256
;
16169 store_unaligned
= gen_avx_storedqu256
;
16173 extract
= gen_avx_vextractf128v8sf
;
16174 load_unaligned
= gen_avx_loadups256
;
16175 store_unaligned
= gen_avx_storeups256
;
16179 extract
= gen_avx_vextractf128v4df
;
16180 load_unaligned
= gen_avx_loadupd256
;
16181 store_unaligned
= gen_avx_storeupd256
;
16188 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16190 rtx r
= gen_reg_rtx (mode
);
16191 m
= adjust_address (op1
, mode
, 0);
16192 emit_move_insn (r
, m
);
16193 m
= adjust_address (op1
, mode
, 16);
16194 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16195 emit_move_insn (op0
, r
);
16198 emit_insn (load_unaligned (op0
, op1
));
16200 else if (MEM_P (op0
))
16202 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16204 m
= adjust_address (op0
, mode
, 0);
16205 emit_insn (extract (m
, op1
, const0_rtx
));
16206 m
= adjust_address (op0
, mode
, 16);
16207 emit_insn (extract (m
, op1
, const1_rtx
));
16210 emit_insn (store_unaligned (op0
, op1
));
16213 gcc_unreachable ();
16216 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16217 straight to ix86_expand_vector_move. */
16218 /* Code generation for scalar reg-reg moves of single and double precision data:
16219 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16223 if (x86_sse_partial_reg_dependency == true)
16228 Code generation for scalar loads of double precision data:
16229 if (x86_sse_split_regs == true)
16230 movlpd mem, reg (gas syntax)
16234 Code generation for unaligned packed loads of single precision data
16235 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16236 if (x86_sse_unaligned_move_optimal)
16239 if (x86_sse_partial_reg_dependency == true)
16251 Code generation for unaligned packed loads of double precision data
16252 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16253 if (x86_sse_unaligned_move_optimal)
16256 if (x86_sse_split_regs == true)
16269 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16277 && GET_MODE_SIZE (mode
) == 32)
16279 switch (GET_MODE_CLASS (mode
))
16281 case MODE_VECTOR_INT
:
16283 op0
= gen_lowpart (V32QImode
, op0
);
16284 op1
= gen_lowpart (V32QImode
, op1
);
16287 case MODE_VECTOR_FLOAT
:
16288 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16292 gcc_unreachable ();
16300 /* ??? If we have typed data, then it would appear that using
16301 movdqu is the only way to get unaligned data loaded with
16303 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16305 op0
= gen_lowpart (V16QImode
, op0
);
16306 op1
= gen_lowpart (V16QImode
, op1
);
16307 /* We will eventually emit movups based on insn attributes. */
16308 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16310 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16315 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16316 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16317 || optimize_function_for_size_p (cfun
))
16319 /* We will eventually emit movups based on insn attributes. */
16320 emit_insn (gen_sse2_loadupd (op0
, op1
));
16324 /* When SSE registers are split into halves, we can avoid
16325 writing to the top half twice. */
16326 if (TARGET_SSE_SPLIT_REGS
)
16328 emit_clobber (op0
);
16333 /* ??? Not sure about the best option for the Intel chips.
16334 The following would seem to satisfy; the register is
16335 entirely cleared, breaking the dependency chain. We
16336 then store to the upper half, with a dependency depth
16337 of one. A rumor has it that Intel recommends two movsd
16338 followed by an unpacklpd, but this is unconfirmed. And
16339 given that the dependency depth of the unpacklpd would
16340 still be one, I'm not sure why this would be better. */
16341 zero
= CONST0_RTX (V2DFmode
);
16344 m
= adjust_address (op1
, DFmode
, 0);
16345 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16346 m
= adjust_address (op1
, DFmode
, 8);
16347 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16352 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16353 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16354 || optimize_function_for_size_p (cfun
))
16356 op0
= gen_lowpart (V4SFmode
, op0
);
16357 op1
= gen_lowpart (V4SFmode
, op1
);
16358 emit_insn (gen_sse_loadups (op0
, op1
));
16362 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16363 emit_move_insn (op0
, CONST0_RTX (mode
));
16365 emit_clobber (op0
);
16367 if (mode
!= V4SFmode
)
16368 op0
= gen_lowpart (V4SFmode
, op0
);
16370 m
= adjust_address (op1
, V2SFmode
, 0);
16371 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16372 m
= adjust_address (op1
, V2SFmode
, 8);
16373 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16376 else if (MEM_P (op0
))
16378 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16380 op0
= gen_lowpart (V16QImode
, op0
);
16381 op1
= gen_lowpart (V16QImode
, op1
);
16382 /* We will eventually emit movups based on insn attributes. */
16383 emit_insn (gen_sse2_storedqu (op0
, op1
));
16385 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16388 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16389 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16390 || optimize_function_for_size_p (cfun
))
16391 /* We will eventually emit movups based on insn attributes. */
16392 emit_insn (gen_sse2_storeupd (op0
, op1
));
16395 m
= adjust_address (op0
, DFmode
, 0);
16396 emit_insn (gen_sse2_storelpd (m
, op1
));
16397 m
= adjust_address (op0
, DFmode
, 8);
16398 emit_insn (gen_sse2_storehpd (m
, op1
));
16403 if (mode
!= V4SFmode
)
16404 op1
= gen_lowpart (V4SFmode
, op1
);
16407 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16408 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16409 || optimize_function_for_size_p (cfun
))
16411 op0
= gen_lowpart (V4SFmode
, op0
);
16412 emit_insn (gen_sse_storeups (op0
, op1
));
16416 m
= adjust_address (op0
, V2SFmode
, 0);
16417 emit_insn (gen_sse_storelps (m
, op1
));
16418 m
= adjust_address (op0
, V2SFmode
, 8);
16419 emit_insn (gen_sse_storehps (m
, op1
));
16424 gcc_unreachable ();
16427 /* Expand a push in MODE. This is some mode for which we do not support
16428 proper push instructions, at least from the registers that we expect
16429 the value to live in. */
16432 ix86_expand_push (enum machine_mode mode
, rtx x
)
16436 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16437 GEN_INT (-GET_MODE_SIZE (mode
)),
16438 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16439 if (tmp
!= stack_pointer_rtx
)
16440 emit_move_insn (stack_pointer_rtx
, tmp
);
16442 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16444 /* When we push an operand onto stack, it has to be aligned at least
16445 at the function argument boundary. However since we don't have
16446 the argument type, we can't determine the actual argument
16448 emit_move_insn (tmp
, x
);
16451 /* Helper function of ix86_fixup_binary_operands to canonicalize
16452 operand order. Returns true if the operands should be swapped. */
16455 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16458 rtx dst
= operands
[0];
16459 rtx src1
= operands
[1];
16460 rtx src2
= operands
[2];
16462 /* If the operation is not commutative, we can't do anything. */
16463 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16466 /* Highest priority is that src1 should match dst. */
16467 if (rtx_equal_p (dst
, src1
))
16469 if (rtx_equal_p (dst
, src2
))
16472 /* Next highest priority is that immediate constants come second. */
16473 if (immediate_operand (src2
, mode
))
16475 if (immediate_operand (src1
, mode
))
16478 /* Lowest priority is that memory references should come second. */
16488 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16489 destination to use for the operation. If different from the true
16490 destination in operands[0], a copy operation will be required. */
16493 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16496 rtx dst
= operands
[0];
16497 rtx src1
= operands
[1];
16498 rtx src2
= operands
[2];
16500 /* Canonicalize operand order. */
16501 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16505 /* It is invalid to swap operands of different modes. */
16506 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16513 /* Both source operands cannot be in memory. */
16514 if (MEM_P (src1
) && MEM_P (src2
))
16516 /* Optimization: Only read from memory once. */
16517 if (rtx_equal_p (src1
, src2
))
16519 src2
= force_reg (mode
, src2
);
16523 src2
= force_reg (mode
, src2
);
16526 /* If the destination is memory, and we do not have matching source
16527 operands, do things in registers. */
16528 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16529 dst
= gen_reg_rtx (mode
);
16531 /* Source 1 cannot be a constant. */
16532 if (CONSTANT_P (src1
))
16533 src1
= force_reg (mode
, src1
);
16535 /* Source 1 cannot be a non-matching memory. */
16536 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16537 src1
= force_reg (mode
, src1
);
16539 /* Improve address combine. */
16541 && GET_MODE_CLASS (mode
) == MODE_INT
16543 src2
= force_reg (mode
, src2
);
16545 operands
[1] = src1
;
16546 operands
[2] = src2
;
16550 /* Similarly, but assume that the destination has already been
16551 set up properly. */
16554 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16555 enum machine_mode mode
, rtx operands
[])
16557 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16558 gcc_assert (dst
== operands
[0]);
16561 /* Attempt to expand a binary operator. Make the expansion closer to the
16562 actual machine, then just general_operand, which will allow 3 separate
16563 memory references (one output, two input) in a single insn. */
16566 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16569 rtx src1
, src2
, dst
, op
, clob
;
16571 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16572 src1
= operands
[1];
16573 src2
= operands
[2];
16575 /* Emit the instruction. */
16577 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16578 if (reload_in_progress
)
16580 /* Reload doesn't know about the flags register, and doesn't know that
16581 it doesn't want to clobber it. We can only do this with PLUS. */
16582 gcc_assert (code
== PLUS
);
16585 else if (reload_completed
16587 && !rtx_equal_p (dst
, src1
))
16589 /* This is going to be an LEA; avoid splitting it later. */
16594 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16595 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16598 /* Fix up the destination if needed. */
16599 if (dst
!= operands
[0])
16600 emit_move_insn (operands
[0], dst
);
16603 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16604 the given OPERANDS. */
16607 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16610 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16611 if (GET_CODE (operands
[1]) == SUBREG
)
16616 else if (GET_CODE (operands
[2]) == SUBREG
)
16621 /* Optimize (__m128i) d | (__m128i) e and similar code
16622 when d and e are float vectors into float vector logical
16623 insn. In C/C++ without using intrinsics there is no other way
16624 to express vector logical operation on float vectors than
16625 to cast them temporarily to integer vectors. */
16627 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16628 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16629 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16630 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16631 && SUBREG_BYTE (op1
) == 0
16632 && (GET_CODE (op2
) == CONST_VECTOR
16633 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16634 && SUBREG_BYTE (op2
) == 0))
16635 && can_create_pseudo_p ())
16638 switch (GET_MODE (SUBREG_REG (op1
)))
16644 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16645 if (GET_CODE (op2
) == CONST_VECTOR
)
16647 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16648 op2
= force_reg (GET_MODE (dst
), op2
);
16653 op2
= SUBREG_REG (operands
[2]);
16654 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16655 op2
= force_reg (GET_MODE (dst
), op2
);
16657 op1
= SUBREG_REG (op1
);
16658 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16659 op1
= force_reg (GET_MODE (dst
), op1
);
16660 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16661 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16663 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16669 if (!nonimmediate_operand (operands
[1], mode
))
16670 operands
[1] = force_reg (mode
, operands
[1]);
16671 if (!nonimmediate_operand (operands
[2], mode
))
16672 operands
[2] = force_reg (mode
, operands
[2]);
16673 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16674 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16675 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16679 /* Return TRUE or FALSE depending on whether the binary operator meets the
16680 appropriate constraints. */
16683 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16686 rtx dst
= operands
[0];
16687 rtx src1
= operands
[1];
16688 rtx src2
= operands
[2];
16690 /* Both source operands cannot be in memory. */
16691 if (MEM_P (src1
) && MEM_P (src2
))
16694 /* Canonicalize operand order for commutative operators. */
16695 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16702 /* If the destination is memory, we must have a matching source operand. */
16703 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16706 /* Source 1 cannot be a constant. */
16707 if (CONSTANT_P (src1
))
16710 /* Source 1 cannot be a non-matching memory. */
16711 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16712 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16713 return (code
== AND
16716 || (TARGET_64BIT
&& mode
== DImode
))
16717 && satisfies_constraint_L (src2
));
16722 /* Attempt to expand a unary operator. Make the expansion closer to the
16723 actual machine, then just general_operand, which will allow 2 separate
16724 memory references (one output, one input) in a single insn. */
16727 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16730 int matching_memory
;
16731 rtx src
, dst
, op
, clob
;
16736 /* If the destination is memory, and we do not have matching source
16737 operands, do things in registers. */
16738 matching_memory
= 0;
16741 if (rtx_equal_p (dst
, src
))
16742 matching_memory
= 1;
16744 dst
= gen_reg_rtx (mode
);
16747 /* When source operand is memory, destination must match. */
16748 if (MEM_P (src
) && !matching_memory
)
16749 src
= force_reg (mode
, src
);
16751 /* Emit the instruction. */
16753 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16754 if (reload_in_progress
|| code
== NOT
)
16756 /* Reload doesn't know about the flags register, and doesn't know that
16757 it doesn't want to clobber it. */
16758 gcc_assert (code
== NOT
);
16763 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16764 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16767 /* Fix up the destination if needed. */
16768 if (dst
!= operands
[0])
16769 emit_move_insn (operands
[0], dst
);
16772 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16773 divisor are within the range [0-255]. */
16776 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16779 rtx end_label
, qimode_label
;
16780 rtx insn
, div
, mod
;
16781 rtx scratch
, tmp0
, tmp1
, tmp2
;
16782 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16783 rtx (*gen_zero_extend
) (rtx
, rtx
);
16784 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16789 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16790 gen_test_ccno_1
= gen_testsi_ccno_1
;
16791 gen_zero_extend
= gen_zero_extendqisi2
;
16794 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16795 gen_test_ccno_1
= gen_testdi_ccno_1
;
16796 gen_zero_extend
= gen_zero_extendqidi2
;
16799 gcc_unreachable ();
16802 end_label
= gen_label_rtx ();
16803 qimode_label
= gen_label_rtx ();
16805 scratch
= gen_reg_rtx (mode
);
16807 /* Use 8bit unsigned divimod if dividend and divisor are within
16808 the range [0-255]. */
16809 emit_move_insn (scratch
, operands
[2]);
16810 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16811 scratch
, 1, OPTAB_DIRECT
);
16812 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16813 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16814 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16815 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16816 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16818 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16819 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16820 JUMP_LABEL (insn
) = qimode_label
;
16822 /* Generate original signed/unsigned divimod. */
16823 div
= gen_divmod4_1 (operands
[0], operands
[1],
16824 operands
[2], operands
[3]);
16827 /* Branch to the end. */
16828 emit_jump_insn (gen_jump (end_label
));
16831 /* Generate 8bit unsigned divide. */
16832 emit_label (qimode_label
);
16833 /* Don't use operands[0] for result of 8bit divide since not all
16834 registers support QImode ZERO_EXTRACT. */
16835 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16836 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16837 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16838 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16842 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16843 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16847 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16848 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16851 /* Extract remainder from AH. */
16852 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16853 if (REG_P (operands
[1]))
16854 insn
= emit_move_insn (operands
[1], tmp1
);
16857 /* Need a new scratch register since the old one has result
16859 scratch
= gen_reg_rtx (mode
);
16860 emit_move_insn (scratch
, tmp1
);
16861 insn
= emit_move_insn (operands
[1], scratch
);
16863 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16865 /* Zero extend quotient from AL. */
16866 tmp1
= gen_lowpart (QImode
, tmp0
);
16867 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16868 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16870 emit_label (end_label
);
16873 #define LEA_MAX_STALL (3)
16874 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16876 /* Increase given DISTANCE in half-cycles according to
16877 dependencies between PREV and NEXT instructions.
16878 Add 1 half-cycle if there is no dependency and
16879 go to next cycle if there is some dependecy. */
16881 static unsigned int
16882 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16887 if (!prev
|| !next
)
16888 return distance
+ (distance
& 1) + 2;
16890 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16891 return distance
+ 1;
16893 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16894 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16895 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16896 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16897 return distance
+ (distance
& 1) + 2;
16899 return distance
+ 1;
16902 /* Function checks if instruction INSN defines register number
16903 REGNO1 or REGNO2. */
16906 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16911 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16912 if (DF_REF_REG_DEF_P (*def_rec
)
16913 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16914 && (regno1
== DF_REF_REGNO (*def_rec
)
16915 || regno2
== DF_REF_REGNO (*def_rec
)))
16923 /* Function checks if instruction INSN uses register number
16924 REGNO as a part of address expression. */
16927 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16931 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16932 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16938 /* Search backward for non-agu definition of register number REGNO1
16939 or register number REGNO2 in basic block starting from instruction
16940 START up to head of basic block or instruction INSN.
16942 Function puts true value into *FOUND var if definition was found
16943 and false otherwise.
16945 Distance in half-cycles between START and found instruction or head
16946 of BB is added to DISTANCE and returned. */
16949 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16950 rtx insn
, int distance
,
16951 rtx start
, bool *found
)
16953 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16961 && distance
< LEA_SEARCH_THRESHOLD
)
16963 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16965 distance
= increase_distance (prev
, next
, distance
);
16966 if (insn_defines_reg (regno1
, regno2
, prev
))
16968 if (recog_memoized (prev
) < 0
16969 || get_attr_type (prev
) != TYPE_LEA
)
16978 if (prev
== BB_HEAD (bb
))
16981 prev
= PREV_INSN (prev
);
16987 /* Search backward for non-agu definition of register number REGNO1
16988 or register number REGNO2 in INSN's basic block until
16989 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16990 2. Reach neighbour BBs boundary, or
16991 3. Reach agu definition.
16992 Returns the distance between the non-agu definition point and INSN.
16993 If no definition point, returns -1. */
16996 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16999 basic_block bb
= BLOCK_FOR_INSN (insn
);
17001 bool found
= false;
17003 if (insn
!= BB_HEAD (bb
))
17004 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
17005 distance
, PREV_INSN (insn
),
17008 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
17012 bool simple_loop
= false;
17014 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17017 simple_loop
= true;
17022 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
17024 BB_END (bb
), &found
);
17027 int shortest_dist
= -1;
17028 bool found_in_bb
= false;
17030 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
17033 = distance_non_agu_define_in_bb (regno1
, regno2
,
17039 if (shortest_dist
< 0)
17040 shortest_dist
= bb_dist
;
17041 else if (bb_dist
> 0)
17042 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17048 distance
= shortest_dist
;
17052 /* get_attr_type may modify recog data. We want to make sure
17053 that recog data is valid for instruction INSN, on which
17054 distance_non_agu_define is called. INSN is unchanged here. */
17055 extract_insn_cached (insn
);
17060 return distance
>> 1;
17063 /* Return the distance in half-cycles between INSN and the next
17064 insn that uses register number REGNO in memory address added
17065 to DISTANCE. Return -1 if REGNO0 is set.
17067 Put true value into *FOUND if register usage was found and
17069 Put true value into *REDEFINED if register redefinition was
17070 found and false otherwise. */
17073 distance_agu_use_in_bb (unsigned int regno
,
17074 rtx insn
, int distance
, rtx start
,
17075 bool *found
, bool *redefined
)
17077 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17082 *redefined
= false;
17086 && distance
< LEA_SEARCH_THRESHOLD
)
17088 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17090 distance
= increase_distance(prev
, next
, distance
);
17091 if (insn_uses_reg_mem (regno
, next
))
17093 /* Return DISTANCE if OP0 is used in memory
17094 address in NEXT. */
17099 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17101 /* Return -1 if OP0 is set in NEXT. */
17109 if (next
== BB_END (bb
))
17112 next
= NEXT_INSN (next
);
17118 /* Return the distance between INSN and the next insn that uses
17119 register number REGNO0 in memory address. Return -1 if no such
17120 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17123 distance_agu_use (unsigned int regno0
, rtx insn
)
17125 basic_block bb
= BLOCK_FOR_INSN (insn
);
17127 bool found
= false;
17128 bool redefined
= false;
17130 if (insn
!= BB_END (bb
))
17131 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17133 &found
, &redefined
);
17135 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17139 bool simple_loop
= false;
17141 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17144 simple_loop
= true;
17149 distance
= distance_agu_use_in_bb (regno0
, insn
,
17150 distance
, BB_HEAD (bb
),
17151 &found
, &redefined
);
17154 int shortest_dist
= -1;
17155 bool found_in_bb
= false;
17156 bool redefined_in_bb
= false;
17158 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17161 = distance_agu_use_in_bb (regno0
, insn
,
17162 distance
, BB_HEAD (e
->dest
),
17163 &found_in_bb
, &redefined_in_bb
);
17166 if (shortest_dist
< 0)
17167 shortest_dist
= bb_dist
;
17168 else if (bb_dist
> 0)
17169 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17175 distance
= shortest_dist
;
17179 if (!found
|| redefined
)
17182 return distance
>> 1;
17185 /* Define this macro to tune LEA priority vs ADD, it take effect when
17186 there is a dilemma of choicing LEA or ADD
17187 Negative value: ADD is more preferred than LEA
17189 Positive value: LEA is more preferred than ADD*/
17190 #define IX86_LEA_PRIORITY 0
17192 /* Return true if usage of lea INSN has performance advantage
17193 over a sequence of instructions. Instructions sequence has
17194 SPLIT_COST cycles higher latency than lea latency. */
17197 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17198 unsigned int regno2
, int split_cost
)
17200 int dist_define
, dist_use
;
17202 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17203 dist_use
= distance_agu_use (regno0
, insn
);
17205 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17207 /* If there is no non AGU operand definition, no AGU
17208 operand usage and split cost is 0 then both lea
17209 and non lea variants have same priority. Currently
17210 we prefer lea for 64 bit code and non lea on 32 bit
17212 if (dist_use
< 0 && split_cost
== 0)
17213 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17218 /* With longer definitions distance lea is more preferable.
17219 Here we change it to take into account splitting cost and
17221 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17223 /* If there is no use in memory addess then we just check
17224 that split cost exceeds AGU stall. */
17226 return dist_define
> LEA_MAX_STALL
;
17228 /* If this insn has both backward non-agu dependence and forward
17229 agu dependence, the one with short distance takes effect. */
17230 return dist_define
>= dist_use
;
17233 /* Return true if it is legal to clobber flags by INSN and
17234 false otherwise. */
17237 ix86_ok_to_clobber_flags (rtx insn
)
17239 basic_block bb
= BLOCK_FOR_INSN (insn
);
17245 if (NONDEBUG_INSN_P (insn
))
17247 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17248 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17251 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17255 if (insn
== BB_END (bb
))
17258 insn
= NEXT_INSN (insn
);
17261 live
= df_get_live_out(bb
);
17262 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17265 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17266 move and add to avoid AGU stalls. */
17269 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17271 unsigned int regno0
, regno1
, regno2
;
17273 /* Check if we need to optimize. */
17274 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17277 /* Check it is correct to split here. */
17278 if (!ix86_ok_to_clobber_flags(insn
))
17281 regno0
= true_regnum (operands
[0]);
17282 regno1
= true_regnum (operands
[1]);
17283 regno2
= true_regnum (operands
[2]);
17285 /* We need to split only adds with non destructive
17286 destination operand. */
17287 if (regno0
== regno1
|| regno0
== regno2
)
17290 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17293 /* Return true if we should emit lea instruction instead of mov
17297 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17299 unsigned int regno0
, regno1
;
17301 /* Check if we need to optimize. */
17302 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17305 /* Use lea for reg to reg moves only. */
17306 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17309 regno0
= true_regnum (operands
[0]);
17310 regno1
= true_regnum (operands
[1]);
17312 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17315 /* Return true if we need to split lea into a sequence of
17316 instructions to avoid AGU stalls. */
17319 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17321 unsigned int regno0
, regno1
, regno2
;
17323 struct ix86_address parts
;
17326 /* Check we need to optimize. */
17327 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17330 /* Check it is correct to split here. */
17331 if (!ix86_ok_to_clobber_flags(insn
))
17334 ok
= ix86_decompose_address (operands
[1], &parts
);
17337 /* There should be at least two components in the address. */
17338 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17339 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17342 /* We should not split into add if non legitimate pic
17343 operand is used as displacement. */
17344 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17347 regno0
= true_regnum (operands
[0]) ;
17348 regno1
= INVALID_REGNUM
;
17349 regno2
= INVALID_REGNUM
;
17352 regno1
= true_regnum (parts
.base
);
17354 regno2
= true_regnum (parts
.index
);
17358 /* Compute how many cycles we will add to execution time
17359 if split lea into a sequence of instructions. */
17360 if (parts
.base
|| parts
.index
)
17362 /* Have to use mov instruction if non desctructive
17363 destination form is used. */
17364 if (regno1
!= regno0
&& regno2
!= regno0
)
17367 /* Have to add index to base if both exist. */
17368 if (parts
.base
&& parts
.index
)
17371 /* Have to use shift and adds if scale is 2 or greater. */
17372 if (parts
.scale
> 1)
17374 if (regno0
!= regno1
)
17376 else if (regno2
== regno0
)
17379 split_cost
+= parts
.scale
;
17382 /* Have to use add instruction with immediate if
17383 disp is non zero. */
17384 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17387 /* Subtract the price of lea. */
17391 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17394 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17395 matches destination. RTX includes clobber of FLAGS_REG. */
17398 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17403 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17404 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17406 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17409 /* Return true if regno1 def is nearest to the insn. */
17412 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17415 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17419 while (prev
&& prev
!= start
)
17421 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17423 prev
= PREV_INSN (prev
);
17426 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17428 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17430 prev
= PREV_INSN (prev
);
17433 /* None of the regs is defined in the bb. */
17437 /* Split lea instructions into a sequence of instructions
17438 which are executed on ALU to avoid AGU stalls.
17439 It is assumed that it is allowed to clobber flags register
17440 at lea position. */
17443 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17445 unsigned int regno0
, regno1
, regno2
;
17446 struct ix86_address parts
;
17450 ok
= ix86_decompose_address (operands
[1], &parts
);
17453 target
= gen_lowpart (mode
, operands
[0]);
17455 regno0
= true_regnum (target
);
17456 regno1
= INVALID_REGNUM
;
17457 regno2
= INVALID_REGNUM
;
17461 parts
.base
= gen_lowpart (mode
, parts
.base
);
17462 regno1
= true_regnum (parts
.base
);
17467 parts
.index
= gen_lowpart (mode
, parts
.index
);
17468 regno2
= true_regnum (parts
.index
);
17472 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17474 if (parts
.scale
> 1)
17476 /* Case r1 = r1 + ... */
17477 if (regno1
== regno0
)
17479 /* If we have a case r1 = r1 + C * r1 then we
17480 should use multiplication which is very
17481 expensive. Assume cost model is wrong if we
17482 have such case here. */
17483 gcc_assert (regno2
!= regno0
);
17485 for (adds
= parts
.scale
; adds
> 0; adds
--)
17486 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17490 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17491 if (regno0
!= regno2
)
17492 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17494 /* Use shift for scaling. */
17495 ix86_emit_binop (ASHIFT
, mode
, target
,
17496 GEN_INT (exact_log2 (parts
.scale
)));
17499 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17501 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17502 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17505 else if (!parts
.base
&& !parts
.index
)
17507 gcc_assert(parts
.disp
);
17508 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17514 if (regno0
!= regno2
)
17515 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17517 else if (!parts
.index
)
17519 if (regno0
!= regno1
)
17520 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17524 if (regno0
== regno1
)
17526 else if (regno0
== regno2
)
17532 /* Find better operand for SET instruction, depending
17533 on which definition is farther from the insn. */
17534 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17535 tmp
= parts
.index
, tmp1
= parts
.base
;
17537 tmp
= parts
.base
, tmp1
= parts
.index
;
17539 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17541 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17542 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17544 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17548 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17551 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17552 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17556 /* Return true if it is ok to optimize an ADD operation to LEA
17557 operation to avoid flag register consumation. For most processors,
17558 ADD is faster than LEA. For the processors like ATOM, if the
17559 destination register of LEA holds an actual address which will be
17560 used soon, LEA is better and otherwise ADD is better. */
17563 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17565 unsigned int regno0
= true_regnum (operands
[0]);
17566 unsigned int regno1
= true_regnum (operands
[1]);
17567 unsigned int regno2
= true_regnum (operands
[2]);
17569 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17570 if (regno0
!= regno1
&& regno0
!= regno2
)
17573 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17576 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17579 /* Return true if destination reg of SET_BODY is shift count of
17583 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17589 /* Retrieve destination of SET_BODY. */
17590 switch (GET_CODE (set_body
))
17593 set_dest
= SET_DEST (set_body
);
17594 if (!set_dest
|| !REG_P (set_dest
))
17598 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17599 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17607 /* Retrieve shift count of USE_BODY. */
17608 switch (GET_CODE (use_body
))
17611 shift_rtx
= XEXP (use_body
, 1);
17614 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17615 if (ix86_dep_by_shift_count_body (set_body
,
17616 XVECEXP (use_body
, 0, i
)))
17624 && (GET_CODE (shift_rtx
) == ASHIFT
17625 || GET_CODE (shift_rtx
) == LSHIFTRT
17626 || GET_CODE (shift_rtx
) == ASHIFTRT
17627 || GET_CODE (shift_rtx
) == ROTATE
17628 || GET_CODE (shift_rtx
) == ROTATERT
))
17630 rtx shift_count
= XEXP (shift_rtx
, 1);
17632 /* Return true if shift count is dest of SET_BODY. */
17633 if (REG_P (shift_count
))
17635 /* Add check since it can be invoked before register
17636 allocation in pre-reload schedule. */
17637 if (reload_completed
17638 && true_regnum (set_dest
) == true_regnum (shift_count
))
17640 else if (REGNO(set_dest
) == REGNO(shift_count
))
17648 /* Return true if destination reg of SET_INSN is shift count of
17652 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17654 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17655 PATTERN (use_insn
));
17658 /* Return TRUE or FALSE depending on whether the unary operator meets the
17659 appropriate constraints. */
17662 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17663 enum machine_mode mode ATTRIBUTE_UNUSED
,
17664 rtx operands
[2] ATTRIBUTE_UNUSED
)
17666 /* If one of operands is memory, source and destination must match. */
17667 if ((MEM_P (operands
[0])
17668 || MEM_P (operands
[1]))
17669 && ! rtx_equal_p (operands
[0], operands
[1]))
17674 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17675 are ok, keeping in mind the possible movddup alternative. */
17678 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17680 if (MEM_P (operands
[0]))
17681 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17682 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17683 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17687 /* Post-reload splitter for converting an SF or DFmode value in an
17688 SSE register into an unsigned SImode. */
17691 ix86_split_convert_uns_si_sse (rtx operands
[])
17693 enum machine_mode vecmode
;
17694 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17696 large
= operands
[1];
17697 zero_or_two31
= operands
[2];
17698 input
= operands
[3];
17699 two31
= operands
[4];
17700 vecmode
= GET_MODE (large
);
17701 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17703 /* Load up the value into the low element. We must ensure that the other
17704 elements are valid floats -- zero is the easiest such value. */
17707 if (vecmode
== V4SFmode
)
17708 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17710 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17714 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17715 emit_move_insn (value
, CONST0_RTX (vecmode
));
17716 if (vecmode
== V4SFmode
)
17717 emit_insn (gen_sse_movss (value
, value
, input
));
17719 emit_insn (gen_sse2_movsd (value
, value
, input
));
17722 emit_move_insn (large
, two31
);
17723 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17725 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17726 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17728 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17729 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17731 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17732 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17734 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17735 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17737 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17738 if (vecmode
== V4SFmode
)
17739 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17741 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17744 emit_insn (gen_xorv4si3 (value
, value
, large
));
17747 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17748 Expects the 64-bit DImode to be supplied in a pair of integral
17749 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17750 -mfpmath=sse, !optimize_size only. */
17753 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17755 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17756 rtx int_xmm
, fp_xmm
;
17757 rtx biases
, exponents
;
17760 int_xmm
= gen_reg_rtx (V4SImode
);
17761 if (TARGET_INTER_UNIT_MOVES
)
17762 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17763 else if (TARGET_SSE_SPLIT_REGS
)
17765 emit_clobber (int_xmm
);
17766 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17770 x
= gen_reg_rtx (V2DImode
);
17771 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17772 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17775 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17776 gen_rtvec (4, GEN_INT (0x43300000UL
),
17777 GEN_INT (0x45300000UL
),
17778 const0_rtx
, const0_rtx
));
17779 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17781 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17782 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17784 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17785 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17786 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17787 (0x1.0p84 + double(fp_value_hi_xmm)).
17788 Note these exponents differ by 32. */
17790 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17792 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17793 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17794 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17795 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17796 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17797 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17798 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17799 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17800 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17802 /* Add the upper and lower DFmode values together. */
17804 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17807 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17808 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17809 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17812 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17815 /* Not used, but eases macroization of patterns. */
17817 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17818 rtx input ATTRIBUTE_UNUSED
)
17820 gcc_unreachable ();
17823 /* Convert an unsigned SImode value into a DFmode. Only currently used
17824 for SSE, but applicable anywhere. */
17827 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17829 REAL_VALUE_TYPE TWO31r
;
17832 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17833 NULL
, 1, OPTAB_DIRECT
);
17835 fp
= gen_reg_rtx (DFmode
);
17836 emit_insn (gen_floatsidf2 (fp
, x
));
17838 real_ldexp (&TWO31r
, &dconst1
, 31);
17839 x
= const_double_from_real_value (TWO31r
, DFmode
);
17841 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17843 emit_move_insn (target
, x
);
17846 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17847 32-bit mode; otherwise we have a direct convert instruction. */
17850 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17852 REAL_VALUE_TYPE TWO32r
;
17853 rtx fp_lo
, fp_hi
, x
;
17855 fp_lo
= gen_reg_rtx (DFmode
);
17856 fp_hi
= gen_reg_rtx (DFmode
);
17858 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17860 real_ldexp (&TWO32r
, &dconst1
, 32);
17861 x
= const_double_from_real_value (TWO32r
, DFmode
);
17862 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17864 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17866 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17869 emit_move_insn (target
, x
);
17872 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17873 For x86_32, -mfpmath=sse, !optimize_size only. */
17875 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17877 REAL_VALUE_TYPE ONE16r
;
17878 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17880 real_ldexp (&ONE16r
, &dconst1
, 16);
17881 x
= const_double_from_real_value (ONE16r
, SFmode
);
17882 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17883 NULL
, 0, OPTAB_DIRECT
);
17884 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17885 NULL
, 0, OPTAB_DIRECT
);
17886 fp_hi
= gen_reg_rtx (SFmode
);
17887 fp_lo
= gen_reg_rtx (SFmode
);
17888 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17889 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17890 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17892 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17894 if (!rtx_equal_p (target
, fp_hi
))
17895 emit_move_insn (target
, fp_hi
);
17898 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17899 a vector of unsigned ints VAL to vector of floats TARGET. */
17902 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17905 REAL_VALUE_TYPE TWO16r
;
17906 enum machine_mode intmode
= GET_MODE (val
);
17907 enum machine_mode fltmode
= GET_MODE (target
);
17908 rtx (*cvt
) (rtx
, rtx
);
17910 if (intmode
== V4SImode
)
17911 cvt
= gen_floatv4siv4sf2
;
17913 cvt
= gen_floatv8siv8sf2
;
17914 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17915 tmp
[0] = force_reg (intmode
, tmp
[0]);
17916 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17918 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17919 NULL_RTX
, 1, OPTAB_DIRECT
);
17920 tmp
[3] = gen_reg_rtx (fltmode
);
17921 emit_insn (cvt (tmp
[3], tmp
[1]));
17922 tmp
[4] = gen_reg_rtx (fltmode
);
17923 emit_insn (cvt (tmp
[4], tmp
[2]));
17924 real_ldexp (&TWO16r
, &dconst1
, 16);
17925 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17926 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17927 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17929 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17931 if (tmp
[7] != target
)
17932 emit_move_insn (target
, tmp
[7]);
17935 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17936 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17937 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17938 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17941 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17943 REAL_VALUE_TYPE TWO31r
;
17944 rtx two31r
, tmp
[4];
17945 enum machine_mode mode
= GET_MODE (val
);
17946 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17947 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17948 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17951 for (i
= 0; i
< 3; i
++)
17952 tmp
[i
] = gen_reg_rtx (mode
);
17953 real_ldexp (&TWO31r
, &dconst1
, 31);
17954 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17955 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17956 two31r
= force_reg (mode
, two31r
);
17959 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17960 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17961 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17962 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17963 default: gcc_unreachable ();
17965 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17966 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17967 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17969 if (intmode
== V4SImode
|| TARGET_AVX2
)
17970 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17971 gen_lowpart (intmode
, tmp
[0]),
17972 GEN_INT (31), NULL_RTX
, 0,
17976 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17977 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17978 *xorp
= expand_simple_binop (intmode
, AND
,
17979 gen_lowpart (intmode
, tmp
[0]),
17980 two31
, NULL_RTX
, 0,
17983 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17987 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17988 then replicate the value for all elements of the vector
17992 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17996 enum machine_mode scalar_mode
;
18013 n_elt
= GET_MODE_NUNITS (mode
);
18014 v
= rtvec_alloc (n_elt
);
18015 scalar_mode
= GET_MODE_INNER (mode
);
18017 RTVEC_ELT (v
, 0) = value
;
18019 for (i
= 1; i
< n_elt
; ++i
)
18020 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
18022 return gen_rtx_CONST_VECTOR (mode
, v
);
18025 gcc_unreachable ();
18029 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18030 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18031 for an SSE register. If VECT is true, then replicate the mask for
18032 all elements of the vector register. If INVERT is true, then create
18033 a mask excluding the sign bit. */
18036 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
18038 enum machine_mode vec_mode
, imode
;
18039 HOST_WIDE_INT hi
, lo
;
18044 /* Find the sign bit, sign extended to 2*HWI. */
18052 mode
= GET_MODE_INNER (mode
);
18054 lo
= 0x80000000, hi
= lo
< 0;
18062 mode
= GET_MODE_INNER (mode
);
18064 if (HOST_BITS_PER_WIDE_INT
>= 64)
18065 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
18067 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18072 vec_mode
= VOIDmode
;
18073 if (HOST_BITS_PER_WIDE_INT
>= 64)
18076 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18083 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18087 lo
= ~lo
, hi
= ~hi
;
18093 mask
= immed_double_const (lo
, hi
, imode
);
18095 vec
= gen_rtvec (2, v
, mask
);
18096 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18097 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18104 gcc_unreachable ();
18108 lo
= ~lo
, hi
= ~hi
;
18110 /* Force this value into the low part of a fp vector constant. */
18111 mask
= immed_double_const (lo
, hi
, imode
);
18112 mask
= gen_lowpart (mode
, mask
);
18114 if (vec_mode
== VOIDmode
)
18115 return force_reg (mode
, mask
);
18117 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18118 return force_reg (vec_mode
, v
);
18121 /* Generate code for floating point ABS or NEG. */
18124 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18127 rtx mask
, set
, dst
, src
;
18128 bool use_sse
= false;
18129 bool vector_mode
= VECTOR_MODE_P (mode
);
18130 enum machine_mode vmode
= mode
;
18134 else if (mode
== TFmode
)
18136 else if (TARGET_SSE_MATH
)
18138 use_sse
= SSE_FLOAT_MODE_P (mode
);
18139 if (mode
== SFmode
)
18141 else if (mode
== DFmode
)
18145 /* NEG and ABS performed with SSE use bitwise mask operations.
18146 Create the appropriate mask now. */
18148 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18155 set
= gen_rtx_fmt_e (code
, mode
, src
);
18156 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18163 use
= gen_rtx_USE (VOIDmode
, mask
);
18165 par
= gen_rtvec (2, set
, use
);
18168 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18169 par
= gen_rtvec (3, set
, use
, clob
);
18171 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18177 /* Expand a copysign operation. Special case operand 0 being a constant. */
18180 ix86_expand_copysign (rtx operands
[])
18182 enum machine_mode mode
, vmode
;
18183 rtx dest
, op0
, op1
, mask
, nmask
;
18185 dest
= operands
[0];
18189 mode
= GET_MODE (dest
);
18191 if (mode
== SFmode
)
18193 else if (mode
== DFmode
)
18198 if (GET_CODE (op0
) == CONST_DOUBLE
)
18200 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18202 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18203 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18205 if (mode
== SFmode
|| mode
== DFmode
)
18207 if (op0
== CONST0_RTX (mode
))
18208 op0
= CONST0_RTX (vmode
);
18211 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18213 op0
= force_reg (vmode
, v
);
18216 else if (op0
!= CONST0_RTX (mode
))
18217 op0
= force_reg (mode
, op0
);
18219 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18221 if (mode
== SFmode
)
18222 copysign_insn
= gen_copysignsf3_const
;
18223 else if (mode
== DFmode
)
18224 copysign_insn
= gen_copysigndf3_const
;
18226 copysign_insn
= gen_copysigntf3_const
;
18228 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18232 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18234 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18235 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18237 if (mode
== SFmode
)
18238 copysign_insn
= gen_copysignsf3_var
;
18239 else if (mode
== DFmode
)
18240 copysign_insn
= gen_copysigndf3_var
;
18242 copysign_insn
= gen_copysigntf3_var
;
18244 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18248 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18249 be a constant, and so has already been expanded into a vector constant. */
18252 ix86_split_copysign_const (rtx operands
[])
18254 enum machine_mode mode
, vmode
;
18255 rtx dest
, op0
, mask
, x
;
18257 dest
= operands
[0];
18259 mask
= operands
[3];
18261 mode
= GET_MODE (dest
);
18262 vmode
= GET_MODE (mask
);
18264 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18265 x
= gen_rtx_AND (vmode
, dest
, mask
);
18266 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18268 if (op0
!= CONST0_RTX (vmode
))
18270 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18271 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18275 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18276 so we have to do two masks. */
18279 ix86_split_copysign_var (rtx operands
[])
18281 enum machine_mode mode
, vmode
;
18282 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18284 dest
= operands
[0];
18285 scratch
= operands
[1];
18288 nmask
= operands
[4];
18289 mask
= operands
[5];
18291 mode
= GET_MODE (dest
);
18292 vmode
= GET_MODE (mask
);
18294 if (rtx_equal_p (op0
, op1
))
18296 /* Shouldn't happen often (it's useless, obviously), but when it does
18297 we'd generate incorrect code if we continue below. */
18298 emit_move_insn (dest
, op0
);
18302 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18304 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18306 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18307 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18310 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18311 x
= gen_rtx_NOT (vmode
, dest
);
18312 x
= gen_rtx_AND (vmode
, x
, op0
);
18313 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18317 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18319 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18321 else /* alternative 2,4 */
18323 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18324 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18325 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18327 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18329 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18331 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18332 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18334 else /* alternative 3,4 */
18336 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18338 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18339 x
= gen_rtx_AND (vmode
, dest
, op0
);
18341 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18344 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18345 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18348 /* Return TRUE or FALSE depending on whether the first SET in INSN
18349 has source and destination with matching CC modes, and that the
18350 CC mode is at least as constrained as REQ_MODE. */
18353 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18356 enum machine_mode set_mode
;
18358 set
= PATTERN (insn
);
18359 if (GET_CODE (set
) == PARALLEL
)
18360 set
= XVECEXP (set
, 0, 0);
18361 gcc_assert (GET_CODE (set
) == SET
);
18362 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18364 set_mode
= GET_MODE (SET_DEST (set
));
18368 if (req_mode
!= CCNOmode
18369 && (req_mode
!= CCmode
18370 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18374 if (req_mode
== CCGCmode
)
18378 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18382 if (req_mode
== CCZmode
)
18392 if (set_mode
!= req_mode
)
18397 gcc_unreachable ();
18400 return GET_MODE (SET_SRC (set
)) == set_mode
;
18403 /* Generate insn patterns to do an integer compare of OPERANDS. */
18406 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18408 enum machine_mode cmpmode
;
18411 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18412 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18414 /* This is very simple, but making the interface the same as in the
18415 FP case makes the rest of the code easier. */
18416 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18417 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18419 /* Return the test that should be put into the flags user, i.e.
18420 the bcc, scc, or cmov instruction. */
18421 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18424 /* Figure out whether to use ordered or unordered fp comparisons.
18425 Return the appropriate mode to use. */
18428 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18430 /* ??? In order to make all comparisons reversible, we do all comparisons
18431 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18432 all forms trapping and nontrapping comparisons, we can make inequality
18433 comparisons trapping again, since it results in better code when using
18434 FCOM based compares. */
18435 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18439 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18441 enum machine_mode mode
= GET_MODE (op0
);
18443 if (SCALAR_FLOAT_MODE_P (mode
))
18445 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18446 return ix86_fp_compare_mode (code
);
18451 /* Only zero flag is needed. */
18452 case EQ
: /* ZF=0 */
18453 case NE
: /* ZF!=0 */
18455 /* Codes needing carry flag. */
18456 case GEU
: /* CF=0 */
18457 case LTU
: /* CF=1 */
18458 /* Detect overflow checks. They need just the carry flag. */
18459 if (GET_CODE (op0
) == PLUS
18460 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18464 case GTU
: /* CF=0 & ZF=0 */
18465 case LEU
: /* CF=1 | ZF=1 */
18466 /* Detect overflow checks. They need just the carry flag. */
18467 if (GET_CODE (op0
) == MINUS
18468 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18472 /* Codes possibly doable only with sign flag when
18473 comparing against zero. */
18474 case GE
: /* SF=OF or SF=0 */
18475 case LT
: /* SF<>OF or SF=1 */
18476 if (op1
== const0_rtx
)
18479 /* For other cases Carry flag is not required. */
18481 /* Codes doable only with sign flag when comparing
18482 against zero, but we miss jump instruction for it
18483 so we need to use relational tests against overflow
18484 that thus needs to be zero. */
18485 case GT
: /* ZF=0 & SF=OF */
18486 case LE
: /* ZF=1 | SF<>OF */
18487 if (op1
== const0_rtx
)
18491 /* strcmp pattern do (use flags) and combine may ask us for proper
18496 gcc_unreachable ();
18500 /* Return the fixed registers used for condition codes. */
18503 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18510 /* If two condition code modes are compatible, return a condition code
18511 mode which is compatible with both. Otherwise, return
18514 static enum machine_mode
18515 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18520 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18523 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18524 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18527 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18529 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18535 gcc_unreachable ();
18565 /* These are only compatible with themselves, which we already
18572 /* Return a comparison we can do and that it is equivalent to
18573 swap_condition (code) apart possibly from orderedness.
18574 But, never change orderedness if TARGET_IEEE_FP, returning
18575 UNKNOWN in that case if necessary. */
18577 static enum rtx_code
18578 ix86_fp_swap_condition (enum rtx_code code
)
18582 case GT
: /* GTU - CF=0 & ZF=0 */
18583 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18584 case GE
: /* GEU - CF=0 */
18585 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18586 case UNLT
: /* LTU - CF=1 */
18587 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18588 case UNLE
: /* LEU - CF=1 | ZF=1 */
18589 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18591 return swap_condition (code
);
18595 /* Return cost of comparison CODE using the best strategy for performance.
18596 All following functions do use number of instructions as a cost metrics.
18597 In future this should be tweaked to compute bytes for optimize_size and
18598 take into account performance of various instructions on various CPUs. */
18601 ix86_fp_comparison_cost (enum rtx_code code
)
18605 /* The cost of code using bit-twiddling on %ah. */
18622 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18626 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18629 gcc_unreachable ();
18632 switch (ix86_fp_comparison_strategy (code
))
18634 case IX86_FPCMP_COMI
:
18635 return arith_cost
> 4 ? 3 : 2;
18636 case IX86_FPCMP_SAHF
:
18637 return arith_cost
> 4 ? 4 : 3;
18643 /* Return strategy to use for floating-point. We assume that fcomi is always
18644 preferrable where available, since that is also true when looking at size
18645 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18647 enum ix86_fpcmp_strategy
18648 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18650 /* Do fcomi/sahf based test when profitable. */
18653 return IX86_FPCMP_COMI
;
18655 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18656 return IX86_FPCMP_SAHF
;
18658 return IX86_FPCMP_ARITH
;
18661 /* Swap, force into registers, or otherwise massage the two operands
18662 to a fp comparison. The operands are updated in place; the new
18663 comparison code is returned. */
18665 static enum rtx_code
18666 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18668 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18669 rtx op0
= *pop0
, op1
= *pop1
;
18670 enum machine_mode op_mode
= GET_MODE (op0
);
18671 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18673 /* All of the unordered compare instructions only work on registers.
18674 The same is true of the fcomi compare instructions. The XFmode
18675 compare instructions require registers except when comparing
18676 against zero or when converting operand 1 from fixed point to
18680 && (fpcmp_mode
== CCFPUmode
18681 || (op_mode
== XFmode
18682 && ! (standard_80387_constant_p (op0
) == 1
18683 || standard_80387_constant_p (op1
) == 1)
18684 && GET_CODE (op1
) != FLOAT
)
18685 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18687 op0
= force_reg (op_mode
, op0
);
18688 op1
= force_reg (op_mode
, op1
);
18692 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18693 things around if they appear profitable, otherwise force op0
18694 into a register. */
18696 if (standard_80387_constant_p (op0
) == 0
18698 && ! (standard_80387_constant_p (op1
) == 0
18701 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18702 if (new_code
!= UNKNOWN
)
18705 tmp
= op0
, op0
= op1
, op1
= tmp
;
18711 op0
= force_reg (op_mode
, op0
);
18713 if (CONSTANT_P (op1
))
18715 int tmp
= standard_80387_constant_p (op1
);
18717 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18721 op1
= force_reg (op_mode
, op1
);
18724 op1
= force_reg (op_mode
, op1
);
18728 /* Try to rearrange the comparison to make it cheaper. */
18729 if (ix86_fp_comparison_cost (code
)
18730 > ix86_fp_comparison_cost (swap_condition (code
))
18731 && (REG_P (op1
) || can_create_pseudo_p ()))
18734 tmp
= op0
, op0
= op1
, op1
= tmp
;
18735 code
= swap_condition (code
);
18737 op0
= force_reg (op_mode
, op0
);
18745 /* Convert comparison codes we use to represent FP comparison to integer
18746 code that will result in proper branch. Return UNKNOWN if no such code
18750 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18779 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18782 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18784 enum machine_mode fpcmp_mode
, intcmp_mode
;
18787 fpcmp_mode
= ix86_fp_compare_mode (code
);
18788 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18790 /* Do fcomi/sahf based test when profitable. */
18791 switch (ix86_fp_comparison_strategy (code
))
18793 case IX86_FPCMP_COMI
:
18794 intcmp_mode
= fpcmp_mode
;
18795 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18796 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18801 case IX86_FPCMP_SAHF
:
18802 intcmp_mode
= fpcmp_mode
;
18803 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18804 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18808 scratch
= gen_reg_rtx (HImode
);
18809 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18810 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18813 case IX86_FPCMP_ARITH
:
18814 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18815 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18816 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18818 scratch
= gen_reg_rtx (HImode
);
18819 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18821 /* In the unordered case, we have to check C2 for NaN's, which
18822 doesn't happen to work out to anything nice combination-wise.
18823 So do some bit twiddling on the value we've got in AH to come
18824 up with an appropriate set of condition codes. */
18826 intcmp_mode
= CCNOmode
;
18831 if (code
== GT
|| !TARGET_IEEE_FP
)
18833 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18838 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18839 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18840 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18841 intcmp_mode
= CCmode
;
18847 if (code
== LT
&& TARGET_IEEE_FP
)
18849 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18850 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18851 intcmp_mode
= CCmode
;
18856 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18862 if (code
== GE
|| !TARGET_IEEE_FP
)
18864 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18869 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18870 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18876 if (code
== LE
&& TARGET_IEEE_FP
)
18878 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18879 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18880 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18881 intcmp_mode
= CCmode
;
18886 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18892 if (code
== EQ
&& TARGET_IEEE_FP
)
18894 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18895 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18896 intcmp_mode
= CCmode
;
18901 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18907 if (code
== NE
&& TARGET_IEEE_FP
)
18909 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18910 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18916 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18922 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18926 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18931 gcc_unreachable ();
18939 /* Return the test that should be put into the flags user, i.e.
18940 the bcc, scc, or cmov instruction. */
18941 return gen_rtx_fmt_ee (code
, VOIDmode
,
18942 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18947 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18951 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18952 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18954 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18956 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18957 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18960 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18966 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18968 enum machine_mode mode
= GET_MODE (op0
);
18980 tmp
= ix86_expand_compare (code
, op0
, op1
);
18981 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18982 gen_rtx_LABEL_REF (VOIDmode
, label
),
18984 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18991 /* Expand DImode branch into multiple compare+branch. */
18993 rtx lo
[2], hi
[2], label2
;
18994 enum rtx_code code1
, code2
, code3
;
18995 enum machine_mode submode
;
18997 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18999 tmp
= op0
, op0
= op1
, op1
= tmp
;
19000 code
= swap_condition (code
);
19003 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
19004 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
19006 submode
= mode
== DImode
? SImode
: DImode
;
19008 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19009 avoid two branches. This costs one extra insn, so disable when
19010 optimizing for size. */
19012 if ((code
== EQ
|| code
== NE
)
19013 && (!optimize_insn_for_size_p ()
19014 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
19019 if (hi
[1] != const0_rtx
)
19020 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
19021 NULL_RTX
, 0, OPTAB_WIDEN
);
19024 if (lo
[1] != const0_rtx
)
19025 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
19026 NULL_RTX
, 0, OPTAB_WIDEN
);
19028 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
19029 NULL_RTX
, 0, OPTAB_WIDEN
);
19031 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
19035 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19036 op1 is a constant and the low word is zero, then we can just
19037 examine the high word. Similarly for low word -1 and
19038 less-or-equal-than or greater-than. */
19040 if (CONST_INT_P (hi
[1]))
19043 case LT
: case LTU
: case GE
: case GEU
:
19044 if (lo
[1] == const0_rtx
)
19046 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19050 case LE
: case LEU
: case GT
: case GTU
:
19051 if (lo
[1] == constm1_rtx
)
19053 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
19061 /* Otherwise, we need two or three jumps. */
19063 label2
= gen_label_rtx ();
19066 code2
= swap_condition (code
);
19067 code3
= unsigned_condition (code
);
19071 case LT
: case GT
: case LTU
: case GTU
:
19074 case LE
: code1
= LT
; code2
= GT
; break;
19075 case GE
: code1
= GT
; code2
= LT
; break;
19076 case LEU
: code1
= LTU
; code2
= GTU
; break;
19077 case GEU
: code1
= GTU
; code2
= LTU
; break;
19079 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19080 case NE
: code2
= UNKNOWN
; break;
19083 gcc_unreachable ();
19088 * if (hi(a) < hi(b)) goto true;
19089 * if (hi(a) > hi(b)) goto false;
19090 * if (lo(a) < lo(b)) goto true;
19094 if (code1
!= UNKNOWN
)
19095 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19096 if (code2
!= UNKNOWN
)
19097 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19099 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19101 if (code2
!= UNKNOWN
)
19102 emit_label (label2
);
19107 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19112 /* Split branch based on floating point condition. */
19114 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19115 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19120 if (target2
!= pc_rtx
)
19123 code
= reverse_condition_maybe_unordered (code
);
19128 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19131 /* Remove pushed operand from stack. */
19133 ix86_free_from_memory (GET_MODE (pushed
));
19135 i
= emit_jump_insn (gen_rtx_SET
19137 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19138 condition
, target1
, target2
)));
19139 if (split_branch_probability
>= 0)
19140 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19144 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19148 gcc_assert (GET_MODE (dest
) == QImode
);
19150 ret
= ix86_expand_compare (code
, op0
, op1
);
19151 PUT_MODE (ret
, QImode
);
19152 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19155 /* Expand comparison setting or clearing carry flag. Return true when
19156 successful and set pop for the operation. */
19158 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19160 enum machine_mode mode
=
19161 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19163 /* Do not handle double-mode compares that go through special path. */
19164 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19167 if (SCALAR_FLOAT_MODE_P (mode
))
19169 rtx compare_op
, compare_seq
;
19171 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19173 /* Shortcut: following common codes never translate
19174 into carry flag compares. */
19175 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19176 || code
== ORDERED
|| code
== UNORDERED
)
19179 /* These comparisons require zero flag; swap operands so they won't. */
19180 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19181 && !TARGET_IEEE_FP
)
19186 code
= swap_condition (code
);
19189 /* Try to expand the comparison and verify that we end up with
19190 carry flag based comparison. This fails to be true only when
19191 we decide to expand comparison using arithmetic that is not
19192 too common scenario. */
19194 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19195 compare_seq
= get_insns ();
19198 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19199 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19200 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19202 code
= GET_CODE (compare_op
);
19204 if (code
!= LTU
&& code
!= GEU
)
19207 emit_insn (compare_seq
);
19212 if (!INTEGRAL_MODE_P (mode
))
19221 /* Convert a==0 into (unsigned)a<1. */
19224 if (op1
!= const0_rtx
)
19227 code
= (code
== EQ
? LTU
: GEU
);
19230 /* Convert a>b into b<a or a>=b-1. */
19233 if (CONST_INT_P (op1
))
19235 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19236 /* Bail out on overflow. We still can swap operands but that
19237 would force loading of the constant into register. */
19238 if (op1
== const0_rtx
19239 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19241 code
= (code
== GTU
? GEU
: LTU
);
19248 code
= (code
== GTU
? LTU
: GEU
);
19252 /* Convert a>=0 into (unsigned)a<0x80000000. */
19255 if (mode
== DImode
|| op1
!= const0_rtx
)
19257 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19258 code
= (code
== LT
? GEU
: LTU
);
19262 if (mode
== DImode
|| op1
!= constm1_rtx
)
19264 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19265 code
= (code
== LE
? GEU
: LTU
);
19271 /* Swapping operands may cause constant to appear as first operand. */
19272 if (!nonimmediate_operand (op0
, VOIDmode
))
19274 if (!can_create_pseudo_p ())
19276 op0
= force_reg (mode
, op0
);
19278 *pop
= ix86_expand_compare (code
, op0
, op1
);
19279 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19284 ix86_expand_int_movcc (rtx operands
[])
19286 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19287 rtx compare_seq
, compare_op
;
19288 enum machine_mode mode
= GET_MODE (operands
[0]);
19289 bool sign_bit_compare_p
= false;
19290 rtx op0
= XEXP (operands
[1], 0);
19291 rtx op1
= XEXP (operands
[1], 1);
19293 if (GET_MODE (op0
) == TImode
19294 || (GET_MODE (op0
) == DImode
19299 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19300 compare_seq
= get_insns ();
19303 compare_code
= GET_CODE (compare_op
);
19305 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19306 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19307 sign_bit_compare_p
= true;
19309 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19310 HImode insns, we'd be swallowed in word prefix ops. */
19312 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19313 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19314 && CONST_INT_P (operands
[2])
19315 && CONST_INT_P (operands
[3]))
19317 rtx out
= operands
[0];
19318 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19319 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19320 HOST_WIDE_INT diff
;
19323 /* Sign bit compares are better done using shifts than we do by using
19325 if (sign_bit_compare_p
19326 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19328 /* Detect overlap between destination and compare sources. */
19331 if (!sign_bit_compare_p
)
19334 bool fpcmp
= false;
19336 compare_code
= GET_CODE (compare_op
);
19338 flags
= XEXP (compare_op
, 0);
19340 if (GET_MODE (flags
) == CCFPmode
19341 || GET_MODE (flags
) == CCFPUmode
)
19345 = ix86_fp_compare_code_to_integer (compare_code
);
19348 /* To simplify rest of code, restrict to the GEU case. */
19349 if (compare_code
== LTU
)
19351 HOST_WIDE_INT tmp
= ct
;
19354 compare_code
= reverse_condition (compare_code
);
19355 code
= reverse_condition (code
);
19360 PUT_CODE (compare_op
,
19361 reverse_condition_maybe_unordered
19362 (GET_CODE (compare_op
)));
19364 PUT_CODE (compare_op
,
19365 reverse_condition (GET_CODE (compare_op
)));
19369 if (reg_overlap_mentioned_p (out
, op0
)
19370 || reg_overlap_mentioned_p (out
, op1
))
19371 tmp
= gen_reg_rtx (mode
);
19373 if (mode
== DImode
)
19374 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19376 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19377 flags
, compare_op
));
19381 if (code
== GT
|| code
== GE
)
19382 code
= reverse_condition (code
);
19385 HOST_WIDE_INT tmp
= ct
;
19390 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19403 tmp
= expand_simple_binop (mode
, PLUS
,
19405 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19416 tmp
= expand_simple_binop (mode
, IOR
,
19418 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19420 else if (diff
== -1 && ct
)
19430 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19432 tmp
= expand_simple_binop (mode
, PLUS
,
19433 copy_rtx (tmp
), GEN_INT (cf
),
19434 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19442 * andl cf - ct, dest
19452 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19455 tmp
= expand_simple_binop (mode
, AND
,
19457 gen_int_mode (cf
- ct
, mode
),
19458 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19460 tmp
= expand_simple_binop (mode
, PLUS
,
19461 copy_rtx (tmp
), GEN_INT (ct
),
19462 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19465 if (!rtx_equal_p (tmp
, out
))
19466 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19473 enum machine_mode cmp_mode
= GET_MODE (op0
);
19476 tmp
= ct
, ct
= cf
, cf
= tmp
;
19479 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19481 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19483 /* We may be reversing unordered compare to normal compare, that
19484 is not valid in general (we may convert non-trapping condition
19485 to trapping one), however on i386 we currently emit all
19486 comparisons unordered. */
19487 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19488 code
= reverse_condition_maybe_unordered (code
);
19492 compare_code
= reverse_condition (compare_code
);
19493 code
= reverse_condition (code
);
19497 compare_code
= UNKNOWN
;
19498 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19499 && CONST_INT_P (op1
))
19501 if (op1
== const0_rtx
19502 && (code
== LT
|| code
== GE
))
19503 compare_code
= code
;
19504 else if (op1
== constm1_rtx
)
19508 else if (code
== GT
)
19513 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19514 if (compare_code
!= UNKNOWN
19515 && GET_MODE (op0
) == GET_MODE (out
)
19516 && (cf
== -1 || ct
== -1))
19518 /* If lea code below could be used, only optimize
19519 if it results in a 2 insn sequence. */
19521 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19522 || diff
== 3 || diff
== 5 || diff
== 9)
19523 || (compare_code
== LT
&& ct
== -1)
19524 || (compare_code
== GE
&& cf
== -1))
19527 * notl op1 (if necessary)
19535 code
= reverse_condition (code
);
19538 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19540 out
= expand_simple_binop (mode
, IOR
,
19542 out
, 1, OPTAB_DIRECT
);
19543 if (out
!= operands
[0])
19544 emit_move_insn (operands
[0], out
);
19551 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19552 || diff
== 3 || diff
== 5 || diff
== 9)
19553 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19555 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19561 * lea cf(dest*(ct-cf)),dest
19565 * This also catches the degenerate setcc-only case.
19571 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19574 /* On x86_64 the lea instruction operates on Pmode, so we need
19575 to get arithmetics done in proper mode to match. */
19577 tmp
= copy_rtx (out
);
19581 out1
= copy_rtx (out
);
19582 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19586 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19592 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19595 if (!rtx_equal_p (tmp
, out
))
19598 out
= force_operand (tmp
, copy_rtx (out
));
19600 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19602 if (!rtx_equal_p (out
, operands
[0]))
19603 emit_move_insn (operands
[0], copy_rtx (out
));
19609 * General case: Jumpful:
19610 * xorl dest,dest cmpl op1, op2
19611 * cmpl op1, op2 movl ct, dest
19612 * setcc dest jcc 1f
19613 * decl dest movl cf, dest
19614 * andl (cf-ct),dest 1:
19617 * Size 20. Size 14.
19619 * This is reasonably steep, but branch mispredict costs are
19620 * high on modern cpus, so consider failing only if optimizing
19624 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19625 && BRANCH_COST (optimize_insn_for_speed_p (),
19630 enum machine_mode cmp_mode
= GET_MODE (op0
);
19635 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19637 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19639 /* We may be reversing unordered compare to normal compare,
19640 that is not valid in general (we may convert non-trapping
19641 condition to trapping one), however on i386 we currently
19642 emit all comparisons unordered. */
19643 code
= reverse_condition_maybe_unordered (code
);
19647 code
= reverse_condition (code
);
19648 if (compare_code
!= UNKNOWN
)
19649 compare_code
= reverse_condition (compare_code
);
19653 if (compare_code
!= UNKNOWN
)
19655 /* notl op1 (if needed)
19660 For x < 0 (resp. x <= -1) there will be no notl,
19661 so if possible swap the constants to get rid of the
19663 True/false will be -1/0 while code below (store flag
19664 followed by decrement) is 0/-1, so the constants need
19665 to be exchanged once more. */
19667 if (compare_code
== GE
|| !cf
)
19669 code
= reverse_condition (code
);
19674 HOST_WIDE_INT tmp
= cf
;
19679 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19683 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19685 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19687 copy_rtx (out
), 1, OPTAB_DIRECT
);
19690 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19691 gen_int_mode (cf
- ct
, mode
),
19692 copy_rtx (out
), 1, OPTAB_DIRECT
);
19694 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19695 copy_rtx (out
), 1, OPTAB_DIRECT
);
19696 if (!rtx_equal_p (out
, operands
[0]))
19697 emit_move_insn (operands
[0], copy_rtx (out
));
19703 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19705 /* Try a few things more with specific constants and a variable. */
19708 rtx var
, orig_out
, out
, tmp
;
19710 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19713 /* If one of the two operands is an interesting constant, load a
19714 constant with the above and mask it in with a logical operation. */
19716 if (CONST_INT_P (operands
[2]))
19719 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19720 operands
[3] = constm1_rtx
, op
= and_optab
;
19721 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19722 operands
[3] = const0_rtx
, op
= ior_optab
;
19726 else if (CONST_INT_P (operands
[3]))
19729 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19730 operands
[2] = constm1_rtx
, op
= and_optab
;
19731 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19732 operands
[2] = const0_rtx
, op
= ior_optab
;
19739 orig_out
= operands
[0];
19740 tmp
= gen_reg_rtx (mode
);
19743 /* Recurse to get the constant loaded. */
19744 if (ix86_expand_int_movcc (operands
) == 0)
19747 /* Mask in the interesting variable. */
19748 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19750 if (!rtx_equal_p (out
, orig_out
))
19751 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19757 * For comparison with above,
19767 if (! nonimmediate_operand (operands
[2], mode
))
19768 operands
[2] = force_reg (mode
, operands
[2]);
19769 if (! nonimmediate_operand (operands
[3], mode
))
19770 operands
[3] = force_reg (mode
, operands
[3]);
19772 if (! register_operand (operands
[2], VOIDmode
)
19774 || ! register_operand (operands
[3], VOIDmode
)))
19775 operands
[2] = force_reg (mode
, operands
[2]);
19778 && ! register_operand (operands
[3], VOIDmode
))
19779 operands
[3] = force_reg (mode
, operands
[3]);
19781 emit_insn (compare_seq
);
19782 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19783 gen_rtx_IF_THEN_ELSE (mode
,
19784 compare_op
, operands
[2],
19789 /* Swap, force into registers, or otherwise massage the two operands
19790 to an sse comparison with a mask result. Thus we differ a bit from
19791 ix86_prepare_fp_compare_args which expects to produce a flags result.
19793 The DEST operand exists to help determine whether to commute commutative
19794 operators. The POP0/POP1 operands are updated in place. The new
19795 comparison code is returned, or UNKNOWN if not implementable. */
19797 static enum rtx_code
19798 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19799 rtx
*pop0
, rtx
*pop1
)
19807 /* AVX supports all the needed comparisons. */
19810 /* We have no LTGT as an operator. We could implement it with
19811 NE & ORDERED, but this requires an extra temporary. It's
19812 not clear that it's worth it. */
19819 /* These are supported directly. */
19826 /* AVX has 3 operand comparisons, no need to swap anything. */
19829 /* For commutative operators, try to canonicalize the destination
19830 operand to be first in the comparison - this helps reload to
19831 avoid extra moves. */
19832 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19840 /* These are not supported directly before AVX, and furthermore
19841 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19842 comparison operands to transform into something that is
19847 code
= swap_condition (code
);
19851 gcc_unreachable ();
19857 /* Detect conditional moves that exactly match min/max operational
19858 semantics. Note that this is IEEE safe, as long as we don't
19859 interchange the operands.
19861 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19862 and TRUE if the operation is successful and instructions are emitted. */
19865 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19866 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19868 enum machine_mode mode
;
19874 else if (code
== UNGE
)
19877 if_true
= if_false
;
19883 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19885 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19890 mode
= GET_MODE (dest
);
19892 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19893 but MODE may be a vector mode and thus not appropriate. */
19894 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19896 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19899 if_true
= force_reg (mode
, if_true
);
19900 v
= gen_rtvec (2, if_true
, if_false
);
19901 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19905 code
= is_min
? SMIN
: SMAX
;
19906 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19909 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19913 /* Expand an sse vector comparison. Return the register with the result. */
19916 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19917 rtx op_true
, rtx op_false
)
19919 enum machine_mode mode
= GET_MODE (dest
);
19920 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19923 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19924 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19925 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19928 || reg_overlap_mentioned_p (dest
, op_true
)
19929 || reg_overlap_mentioned_p (dest
, op_false
))
19930 dest
= gen_reg_rtx (mode
);
19932 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19933 if (cmp_mode
!= mode
)
19935 x
= force_reg (cmp_mode
, x
);
19936 convert_move (dest
, x
, false);
19939 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19944 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19945 operations. This is used for both scalar and vector conditional moves. */
19948 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19950 enum machine_mode mode
= GET_MODE (dest
);
19953 if (vector_all_ones_operand (op_true
, mode
)
19954 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19956 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19958 else if (op_false
== CONST0_RTX (mode
))
19960 op_true
= force_reg (mode
, op_true
);
19961 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19962 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19964 else if (op_true
== CONST0_RTX (mode
))
19966 op_false
= force_reg (mode
, op_false
);
19967 x
= gen_rtx_NOT (mode
, cmp
);
19968 x
= gen_rtx_AND (mode
, x
, op_false
);
19969 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19971 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19973 op_false
= force_reg (mode
, op_false
);
19974 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19975 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19977 else if (TARGET_XOP
)
19979 op_true
= force_reg (mode
, op_true
);
19981 if (!nonimmediate_operand (op_false
, mode
))
19982 op_false
= force_reg (mode
, op_false
);
19984 emit_insn (gen_rtx_SET (mode
, dest
,
19985 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19991 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19993 if (!nonimmediate_operand (op_true
, mode
))
19994 op_true
= force_reg (mode
, op_true
);
19996 op_false
= force_reg (mode
, op_false
);
20002 gen
= gen_sse4_1_blendvps
;
20006 gen
= gen_sse4_1_blendvpd
;
20014 gen
= gen_sse4_1_pblendvb
;
20015 dest
= gen_lowpart (V16QImode
, dest
);
20016 op_false
= gen_lowpart (V16QImode
, op_false
);
20017 op_true
= gen_lowpart (V16QImode
, op_true
);
20018 cmp
= gen_lowpart (V16QImode
, cmp
);
20023 gen
= gen_avx_blendvps256
;
20027 gen
= gen_avx_blendvpd256
;
20035 gen
= gen_avx2_pblendvb
;
20036 dest
= gen_lowpart (V32QImode
, dest
);
20037 op_false
= gen_lowpart (V32QImode
, op_false
);
20038 op_true
= gen_lowpart (V32QImode
, op_true
);
20039 cmp
= gen_lowpart (V32QImode
, cmp
);
20047 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
20050 op_true
= force_reg (mode
, op_true
);
20052 t2
= gen_reg_rtx (mode
);
20054 t3
= gen_reg_rtx (mode
);
20058 x
= gen_rtx_AND (mode
, op_true
, cmp
);
20059 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
20061 x
= gen_rtx_NOT (mode
, cmp
);
20062 x
= gen_rtx_AND (mode
, x
, op_false
);
20063 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
20065 x
= gen_rtx_IOR (mode
, t3
, t2
);
20066 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20071 /* Expand a floating-point conditional move. Return true if successful. */
20074 ix86_expand_fp_movcc (rtx operands
[])
20076 enum machine_mode mode
= GET_MODE (operands
[0]);
20077 enum rtx_code code
= GET_CODE (operands
[1]);
20078 rtx tmp
, compare_op
;
20079 rtx op0
= XEXP (operands
[1], 0);
20080 rtx op1
= XEXP (operands
[1], 1);
20082 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20084 enum machine_mode cmode
;
20086 /* Since we've no cmove for sse registers, don't force bad register
20087 allocation just to gain access to it. Deny movcc when the
20088 comparison mode doesn't match the move mode. */
20089 cmode
= GET_MODE (op0
);
20090 if (cmode
== VOIDmode
)
20091 cmode
= GET_MODE (op1
);
20095 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20096 if (code
== UNKNOWN
)
20099 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20100 operands
[2], operands
[3]))
20103 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20104 operands
[2], operands
[3]);
20105 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20109 if (GET_MODE (op0
) == TImode
20110 || (GET_MODE (op0
) == DImode
20114 /* The floating point conditional move instructions don't directly
20115 support conditions resulting from a signed integer comparison. */
20117 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20118 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20120 tmp
= gen_reg_rtx (QImode
);
20121 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20123 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20126 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20127 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20128 operands
[2], operands
[3])));
20133 /* Expand a floating-point vector conditional move; a vcond operation
20134 rather than a movcc operation. */
20137 ix86_expand_fp_vcond (rtx operands
[])
20139 enum rtx_code code
= GET_CODE (operands
[3]);
20142 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20143 &operands
[4], &operands
[5]);
20144 if (code
== UNKNOWN
)
20147 switch (GET_CODE (operands
[3]))
20150 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20151 operands
[5], operands
[0], operands
[0]);
20152 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20153 operands
[5], operands
[1], operands
[2]);
20157 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20158 operands
[5], operands
[0], operands
[0]);
20159 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20160 operands
[5], operands
[1], operands
[2]);
20164 gcc_unreachable ();
20166 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20168 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20172 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20173 operands
[5], operands
[1], operands
[2]))
20176 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20177 operands
[1], operands
[2]);
20178 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20182 /* Expand a signed/unsigned integral vector conditional move. */
20185 ix86_expand_int_vcond (rtx operands
[])
20187 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20188 enum machine_mode mode
= GET_MODE (operands
[4]);
20189 enum rtx_code code
= GET_CODE (operands
[3]);
20190 bool negate
= false;
20193 cop0
= operands
[4];
20194 cop1
= operands
[5];
20196 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20197 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20198 if ((code
== LT
|| code
== GE
)
20199 && data_mode
== mode
20200 && cop1
== CONST0_RTX (mode
)
20201 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20202 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20203 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20204 && (GET_MODE_SIZE (data_mode
) == 16
20205 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20207 rtx negop
= operands
[2 - (code
== LT
)];
20208 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20209 if (negop
== CONST1_RTX (data_mode
))
20211 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20212 operands
[0], 1, OPTAB_DIRECT
);
20213 if (res
!= operands
[0])
20214 emit_move_insn (operands
[0], res
);
20217 else if (GET_MODE_INNER (data_mode
) != DImode
20218 && vector_all_ones_operand (negop
, data_mode
))
20220 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20221 operands
[0], 0, OPTAB_DIRECT
);
20222 if (res
!= operands
[0])
20223 emit_move_insn (operands
[0], res
);
20228 if (!nonimmediate_operand (cop1
, mode
))
20229 cop1
= force_reg (mode
, cop1
);
20230 if (!general_operand (operands
[1], data_mode
))
20231 operands
[1] = force_reg (data_mode
, operands
[1]);
20232 if (!general_operand (operands
[2], data_mode
))
20233 operands
[2] = force_reg (data_mode
, operands
[2]);
20235 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20237 && (mode
== V16QImode
|| mode
== V8HImode
20238 || mode
== V4SImode
|| mode
== V2DImode
))
20242 /* Canonicalize the comparison to EQ, GT, GTU. */
20253 code
= reverse_condition (code
);
20259 code
= reverse_condition (code
);
20265 code
= swap_condition (code
);
20266 x
= cop0
, cop0
= cop1
, cop1
= x
;
20270 gcc_unreachable ();
20273 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20274 if (mode
== V2DImode
)
20279 /* SSE4.1 supports EQ. */
20280 if (!TARGET_SSE4_1
)
20286 /* SSE4.2 supports GT/GTU. */
20287 if (!TARGET_SSE4_2
)
20292 gcc_unreachable ();
20296 /* Unsigned parallel compare is not supported by the hardware.
20297 Play some tricks to turn this into a signed comparison
20301 cop0
= force_reg (mode
, cop0
);
20311 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20315 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20316 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20317 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20318 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20320 gcc_unreachable ();
20322 /* Subtract (-(INT MAX) - 1) from both operands to make
20324 mask
= ix86_build_signbit_mask (mode
, true, false);
20325 t1
= gen_reg_rtx (mode
);
20326 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20328 t2
= gen_reg_rtx (mode
);
20329 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20341 /* Perform a parallel unsigned saturating subtraction. */
20342 x
= gen_reg_rtx (mode
);
20343 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20344 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20347 cop1
= CONST0_RTX (mode
);
20353 gcc_unreachable ();
20358 /* Allow the comparison to be done in one mode, but the movcc to
20359 happen in another mode. */
20360 if (data_mode
== mode
)
20362 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20363 operands
[1+negate
], operands
[2-negate
]);
20367 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20368 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20370 operands
[1+negate
], operands
[2-negate
]);
20371 x
= gen_lowpart (data_mode
, x
);
20374 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20375 operands
[2-negate
]);
20379 /* Expand a variable vector permutation. */
20382 ix86_expand_vec_perm (rtx operands
[])
20384 rtx target
= operands
[0];
20385 rtx op0
= operands
[1];
20386 rtx op1
= operands
[2];
20387 rtx mask
= operands
[3];
20388 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20389 enum machine_mode mode
= GET_MODE (op0
);
20390 enum machine_mode maskmode
= GET_MODE (mask
);
20392 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20394 /* Number of elements in the vector. */
20395 w
= GET_MODE_NUNITS (mode
);
20396 e
= GET_MODE_UNIT_SIZE (mode
);
20397 gcc_assert (w
<= 32);
20401 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20403 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20404 an constant shuffle operand. With a tiny bit of effort we can
20405 use VPERMD instead. A re-interpretation stall for V4DFmode is
20406 unfortunate but there's no avoiding it.
20407 Similarly for V16HImode we don't have instructions for variable
20408 shuffling, while for V32QImode we can use after preparing suitable
20409 masks vpshufb; vpshufb; vpermq; vpor. */
20411 if (mode
== V16HImode
)
20413 maskmode
= mode
= V32QImode
;
20419 maskmode
= mode
= V8SImode
;
20423 t1
= gen_reg_rtx (maskmode
);
20425 /* Replicate the low bits of the V4DImode mask into V8SImode:
20427 t1 = { A A B B C C D D }. */
20428 for (i
= 0; i
< w
/ 2; ++i
)
20429 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20430 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20431 vt
= force_reg (maskmode
, vt
);
20432 mask
= gen_lowpart (maskmode
, mask
);
20433 if (maskmode
== V8SImode
)
20434 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20436 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20438 /* Multiply the shuffle indicies by two. */
20439 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20442 /* Add one to the odd shuffle indicies:
20443 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20444 for (i
= 0; i
< w
/ 2; ++i
)
20446 vec
[i
* 2] = const0_rtx
;
20447 vec
[i
* 2 + 1] = const1_rtx
;
20449 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20450 vt
= force_const_mem (maskmode
, vt
);
20451 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20454 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20455 operands
[3] = mask
= t1
;
20456 target
= gen_lowpart (mode
, target
);
20457 op0
= gen_lowpart (mode
, op0
);
20458 op1
= gen_lowpart (mode
, op1
);
20464 /* The VPERMD and VPERMPS instructions already properly ignore
20465 the high bits of the shuffle elements. No need for us to
20466 perform an AND ourselves. */
20467 if (one_operand_shuffle
)
20468 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20471 t1
= gen_reg_rtx (V8SImode
);
20472 t2
= gen_reg_rtx (V8SImode
);
20473 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20474 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20480 mask
= gen_lowpart (V8SFmode
, mask
);
20481 if (one_operand_shuffle
)
20482 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20485 t1
= gen_reg_rtx (V8SFmode
);
20486 t2
= gen_reg_rtx (V8SFmode
);
20487 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20488 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20494 /* By combining the two 128-bit input vectors into one 256-bit
20495 input vector, we can use VPERMD and VPERMPS for the full
20496 two-operand shuffle. */
20497 t1
= gen_reg_rtx (V8SImode
);
20498 t2
= gen_reg_rtx (V8SImode
);
20499 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20500 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20501 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20502 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20506 t1
= gen_reg_rtx (V8SFmode
);
20507 t2
= gen_reg_rtx (V8SImode
);
20508 mask
= gen_lowpart (V4SImode
, mask
);
20509 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20510 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20511 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20512 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20516 t1
= gen_reg_rtx (V32QImode
);
20517 t2
= gen_reg_rtx (V32QImode
);
20518 t3
= gen_reg_rtx (V32QImode
);
20519 vt2
= GEN_INT (128);
20520 for (i
= 0; i
< 32; i
++)
20522 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20523 vt
= force_reg (V32QImode
, vt
);
20524 for (i
= 0; i
< 32; i
++)
20525 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20526 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20527 vt2
= force_reg (V32QImode
, vt2
);
20528 /* From mask create two adjusted masks, which contain the same
20529 bits as mask in the low 7 bits of each vector element.
20530 The first mask will have the most significant bit clear
20531 if it requests element from the same 128-bit lane
20532 and MSB set if it requests element from the other 128-bit lane.
20533 The second mask will have the opposite values of the MSB,
20534 and additionally will have its 128-bit lanes swapped.
20535 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20536 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20537 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20538 stands for other 12 bytes. */
20539 /* The bit whether element is from the same lane or the other
20540 lane is bit 4, so shift it up by 3 to the MSB position. */
20541 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20542 gen_lowpart (V4DImode
, mask
),
20544 /* Clear MSB bits from the mask just in case it had them set. */
20545 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20546 /* After this t1 will have MSB set for elements from other lane. */
20547 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20548 /* Clear bits other than MSB. */
20549 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20550 /* Or in the lower bits from mask into t3. */
20551 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20552 /* And invert MSB bits in t1, so MSB is set for elements from the same
20554 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20555 /* Swap 128-bit lanes in t3. */
20556 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20557 gen_lowpart (V4DImode
, t3
),
20558 const2_rtx
, GEN_INT (3),
20559 const0_rtx
, const1_rtx
));
20560 /* And or in the lower bits from mask into t1. */
20561 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20562 if (one_operand_shuffle
)
20564 /* Each of these shuffles will put 0s in places where
20565 element from the other 128-bit lane is needed, otherwise
20566 will shuffle in the requested value. */
20567 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20568 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20569 /* For t3 the 128-bit lanes are swapped again. */
20570 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20571 gen_lowpart (V4DImode
, t3
),
20572 const2_rtx
, GEN_INT (3),
20573 const0_rtx
, const1_rtx
));
20574 /* And oring both together leads to the result. */
20575 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20579 t4
= gen_reg_rtx (V32QImode
);
20580 /* Similarly to the above one_operand_shuffle code,
20581 just for repeated twice for each operand. merge_two:
20582 code will merge the two results together. */
20583 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20584 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20585 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20586 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20587 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20588 gen_lowpart (V4DImode
, t4
),
20589 const2_rtx
, GEN_INT (3),
20590 const0_rtx
, const1_rtx
));
20591 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20592 gen_lowpart (V4DImode
, t3
),
20593 const2_rtx
, GEN_INT (3),
20594 const0_rtx
, const1_rtx
));
20595 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20596 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20602 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20609 /* The XOP VPPERM insn supports three inputs. By ignoring the
20610 one_operand_shuffle special case, we avoid creating another
20611 set of constant vectors in memory. */
20612 one_operand_shuffle
= false;
20614 /* mask = mask & {2*w-1, ...} */
20615 vt
= GEN_INT (2*w
- 1);
20619 /* mask = mask & {w-1, ...} */
20620 vt
= GEN_INT (w
- 1);
20623 for (i
= 0; i
< w
; i
++)
20625 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20626 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20627 NULL_RTX
, 0, OPTAB_DIRECT
);
20629 /* For non-QImode operations, convert the word permutation control
20630 into a byte permutation control. */
20631 if (mode
!= V16QImode
)
20633 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20634 GEN_INT (exact_log2 (e
)),
20635 NULL_RTX
, 0, OPTAB_DIRECT
);
20637 /* Convert mask to vector of chars. */
20638 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20640 /* Replicate each of the input bytes into byte positions:
20641 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20642 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20643 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20644 for (i
= 0; i
< 16; ++i
)
20645 vec
[i
] = GEN_INT (i
/e
* e
);
20646 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20647 vt
= force_const_mem (V16QImode
, vt
);
20649 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20651 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20653 /* Convert it into the byte positions by doing
20654 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20655 for (i
= 0; i
< 16; ++i
)
20656 vec
[i
] = GEN_INT (i
% e
);
20657 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20658 vt
= force_const_mem (V16QImode
, vt
);
20659 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20662 /* The actual shuffle operations all operate on V16QImode. */
20663 op0
= gen_lowpart (V16QImode
, op0
);
20664 op1
= gen_lowpart (V16QImode
, op1
);
20665 target
= gen_lowpart (V16QImode
, target
);
20669 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20671 else if (one_operand_shuffle
)
20673 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20680 /* Shuffle the two input vectors independently. */
20681 t1
= gen_reg_rtx (V16QImode
);
20682 t2
= gen_reg_rtx (V16QImode
);
20683 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20684 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20687 /* Then merge them together. The key is whether any given control
20688 element contained a bit set that indicates the second word. */
20689 mask
= operands
[3];
20691 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20693 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20694 more shuffle to convert the V2DI input mask into a V4SI
20695 input mask. At which point the masking that expand_int_vcond
20696 will work as desired. */
20697 rtx t3
= gen_reg_rtx (V4SImode
);
20698 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20699 const0_rtx
, const0_rtx
,
20700 const2_rtx
, const2_rtx
));
20702 maskmode
= V4SImode
;
20706 for (i
= 0; i
< w
; i
++)
20708 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20709 vt
= force_reg (maskmode
, vt
);
20710 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20711 NULL_RTX
, 0, OPTAB_DIRECT
);
20713 xops
[0] = gen_lowpart (mode
, operands
[0]);
20714 xops
[1] = gen_lowpart (mode
, t2
);
20715 xops
[2] = gen_lowpart (mode
, t1
);
20716 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20719 ok
= ix86_expand_int_vcond (xops
);
20724 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20725 true if we should do zero extension, else sign extension. HIGH_P is
20726 true if we want the N/2 high elements, else the low elements. */
20729 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20731 enum machine_mode imode
= GET_MODE (src
);
20736 rtx (*unpack
)(rtx
, rtx
);
20737 rtx (*extract
)(rtx
, rtx
) = NULL
;
20738 enum machine_mode halfmode
= BLKmode
;
20744 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20746 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20747 halfmode
= V16QImode
;
20749 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20753 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20755 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20756 halfmode
= V8HImode
;
20758 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20762 unpack
= gen_avx2_zero_extendv4siv4di2
;
20764 unpack
= gen_avx2_sign_extendv4siv4di2
;
20765 halfmode
= V4SImode
;
20767 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20771 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20773 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20777 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20779 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20783 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20785 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20788 gcc_unreachable ();
20791 if (GET_MODE_SIZE (imode
) == 32)
20793 tmp
= gen_reg_rtx (halfmode
);
20794 emit_insn (extract (tmp
, src
));
20798 /* Shift higher 8 bytes to lower 8 bytes. */
20799 tmp
= gen_reg_rtx (imode
);
20800 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20801 gen_lowpart (V1TImode
, src
),
20807 emit_insn (unpack (dest
, tmp
));
20811 rtx (*unpack
)(rtx
, rtx
, rtx
);
20817 unpack
= gen_vec_interleave_highv16qi
;
20819 unpack
= gen_vec_interleave_lowv16qi
;
20823 unpack
= gen_vec_interleave_highv8hi
;
20825 unpack
= gen_vec_interleave_lowv8hi
;
20829 unpack
= gen_vec_interleave_highv4si
;
20831 unpack
= gen_vec_interleave_lowv4si
;
20834 gcc_unreachable ();
20838 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20840 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20841 src
, pc_rtx
, pc_rtx
);
20843 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20847 /* Expand conditional increment or decrement using adb/sbb instructions.
20848 The default case using setcc followed by the conditional move can be
20849 done by generic code. */
20851 ix86_expand_int_addcc (rtx operands
[])
20853 enum rtx_code code
= GET_CODE (operands
[1]);
20855 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20857 rtx val
= const0_rtx
;
20858 bool fpcmp
= false;
20859 enum machine_mode mode
;
20860 rtx op0
= XEXP (operands
[1], 0);
20861 rtx op1
= XEXP (operands
[1], 1);
20863 if (operands
[3] != const1_rtx
20864 && operands
[3] != constm1_rtx
)
20866 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20868 code
= GET_CODE (compare_op
);
20870 flags
= XEXP (compare_op
, 0);
20872 if (GET_MODE (flags
) == CCFPmode
20873 || GET_MODE (flags
) == CCFPUmode
)
20876 code
= ix86_fp_compare_code_to_integer (code
);
20883 PUT_CODE (compare_op
,
20884 reverse_condition_maybe_unordered
20885 (GET_CODE (compare_op
)));
20887 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20890 mode
= GET_MODE (operands
[0]);
20892 /* Construct either adc or sbb insn. */
20893 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20898 insn
= gen_subqi3_carry
;
20901 insn
= gen_subhi3_carry
;
20904 insn
= gen_subsi3_carry
;
20907 insn
= gen_subdi3_carry
;
20910 gcc_unreachable ();
20918 insn
= gen_addqi3_carry
;
20921 insn
= gen_addhi3_carry
;
20924 insn
= gen_addsi3_carry
;
20927 insn
= gen_adddi3_carry
;
20930 gcc_unreachable ();
20933 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20939 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20940 but works for floating pointer parameters and nonoffsetable memories.
20941 For pushes, it returns just stack offsets; the values will be saved
20942 in the right order. Maximally three parts are generated. */
20945 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20950 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20952 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20954 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20955 gcc_assert (size
>= 2 && size
<= 4);
20957 /* Optimize constant pool reference to immediates. This is used by fp
20958 moves, that force all constants to memory to allow combining. */
20959 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20961 rtx tmp
= maybe_get_pool_constant (operand
);
20966 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20968 /* The only non-offsetable memories we handle are pushes. */
20969 int ok
= push_operand (operand
, VOIDmode
);
20973 operand
= copy_rtx (operand
);
20974 PUT_MODE (operand
, word_mode
);
20975 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20979 if (GET_CODE (operand
) == CONST_VECTOR
)
20981 enum machine_mode imode
= int_mode_for_mode (mode
);
20982 /* Caution: if we looked through a constant pool memory above,
20983 the operand may actually have a different mode now. That's
20984 ok, since we want to pun this all the way back to an integer. */
20985 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20986 gcc_assert (operand
!= NULL
);
20992 if (mode
== DImode
)
20993 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20998 if (REG_P (operand
))
21000 gcc_assert (reload_completed
);
21001 for (i
= 0; i
< size
; i
++)
21002 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
21004 else if (offsettable_memref_p (operand
))
21006 operand
= adjust_address (operand
, SImode
, 0);
21007 parts
[0] = operand
;
21008 for (i
= 1; i
< size
; i
++)
21009 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
21011 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21016 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21020 real_to_target (l
, &r
, mode
);
21021 parts
[3] = gen_int_mode (l
[3], SImode
);
21022 parts
[2] = gen_int_mode (l
[2], SImode
);
21025 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21026 long double may not be 80-bit. */
21027 real_to_target (l
, &r
, mode
);
21028 parts
[2] = gen_int_mode (l
[2], SImode
);
21031 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
21034 gcc_unreachable ();
21036 parts
[1] = gen_int_mode (l
[1], SImode
);
21037 parts
[0] = gen_int_mode (l
[0], SImode
);
21040 gcc_unreachable ();
21045 if (mode
== TImode
)
21046 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
21047 if (mode
== XFmode
|| mode
== TFmode
)
21049 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
21050 if (REG_P (operand
))
21052 gcc_assert (reload_completed
);
21053 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
21054 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
21056 else if (offsettable_memref_p (operand
))
21058 operand
= adjust_address (operand
, DImode
, 0);
21059 parts
[0] = operand
;
21060 parts
[1] = adjust_address (operand
, upper_mode
, 8);
21062 else if (GET_CODE (operand
) == CONST_DOUBLE
)
21067 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
21068 real_to_target (l
, &r
, mode
);
21070 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21071 if (HOST_BITS_PER_WIDE_INT
>= 64)
21074 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21075 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21078 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21080 if (upper_mode
== SImode
)
21081 parts
[1] = gen_int_mode (l
[2], SImode
);
21082 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21085 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21086 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21089 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21092 gcc_unreachable ();
21099 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21100 Return false when normal moves are needed; true when all required
21101 insns have been emitted. Operands 2-4 contain the input values
21102 int the correct order; operands 5-7 contain the output values. */
21105 ix86_split_long_move (rtx operands
[])
21110 int collisions
= 0;
21111 enum machine_mode mode
= GET_MODE (operands
[0]);
21112 bool collisionparts
[4];
21114 /* The DFmode expanders may ask us to move double.
21115 For 64bit target this is single move. By hiding the fact
21116 here we simplify i386.md splitters. */
21117 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21119 /* Optimize constant pool reference to immediates. This is used by
21120 fp moves, that force all constants to memory to allow combining. */
21122 if (MEM_P (operands
[1])
21123 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21124 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21125 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21126 if (push_operand (operands
[0], VOIDmode
))
21128 operands
[0] = copy_rtx (operands
[0]);
21129 PUT_MODE (operands
[0], word_mode
);
21132 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21133 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21134 emit_move_insn (operands
[0], operands
[1]);
21138 /* The only non-offsettable memory we handle is push. */
21139 if (push_operand (operands
[0], VOIDmode
))
21142 gcc_assert (!MEM_P (operands
[0])
21143 || offsettable_memref_p (operands
[0]));
21145 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21146 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21148 /* When emitting push, take care for source operands on the stack. */
21149 if (push
&& MEM_P (operands
[1])
21150 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21152 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21154 /* Compensate for the stack decrement by 4. */
21155 if (!TARGET_64BIT
&& nparts
== 3
21156 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21157 src_base
= plus_constant (Pmode
, src_base
, 4);
21159 /* src_base refers to the stack pointer and is
21160 automatically decreased by emitted push. */
21161 for (i
= 0; i
< nparts
; i
++)
21162 part
[1][i
] = change_address (part
[1][i
],
21163 GET_MODE (part
[1][i
]), src_base
);
21166 /* We need to do copy in the right order in case an address register
21167 of the source overlaps the destination. */
21168 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21172 for (i
= 0; i
< nparts
; i
++)
21175 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21176 if (collisionparts
[i
])
21180 /* Collision in the middle part can be handled by reordering. */
21181 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21183 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21184 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21186 else if (collisions
== 1
21188 && (collisionparts
[1] || collisionparts
[2]))
21190 if (collisionparts
[1])
21192 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21193 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21197 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21198 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21202 /* If there are more collisions, we can't handle it by reordering.
21203 Do an lea to the last part and use only one colliding move. */
21204 else if (collisions
> 1)
21210 base
= part
[0][nparts
- 1];
21212 /* Handle the case when the last part isn't valid for lea.
21213 Happens in 64-bit mode storing the 12-byte XFmode. */
21214 if (GET_MODE (base
) != Pmode
)
21215 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21217 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21218 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21219 for (i
= 1; i
< nparts
; i
++)
21221 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21222 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21233 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21234 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21235 stack_pointer_rtx
, GEN_INT (-4)));
21236 emit_move_insn (part
[0][2], part
[1][2]);
21238 else if (nparts
== 4)
21240 emit_move_insn (part
[0][3], part
[1][3]);
21241 emit_move_insn (part
[0][2], part
[1][2]);
21246 /* In 64bit mode we don't have 32bit push available. In case this is
21247 register, it is OK - we will just use larger counterpart. We also
21248 retype memory - these comes from attempt to avoid REX prefix on
21249 moving of second half of TFmode value. */
21250 if (GET_MODE (part
[1][1]) == SImode
)
21252 switch (GET_CODE (part
[1][1]))
21255 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21259 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21263 gcc_unreachable ();
21266 if (GET_MODE (part
[1][0]) == SImode
)
21267 part
[1][0] = part
[1][1];
21270 emit_move_insn (part
[0][1], part
[1][1]);
21271 emit_move_insn (part
[0][0], part
[1][0]);
21275 /* Choose correct order to not overwrite the source before it is copied. */
21276 if ((REG_P (part
[0][0])
21277 && REG_P (part
[1][1])
21278 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21280 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21282 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21284 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21286 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21288 operands
[2 + i
] = part
[0][j
];
21289 operands
[6 + i
] = part
[1][j
];
21294 for (i
= 0; i
< nparts
; i
++)
21296 operands
[2 + i
] = part
[0][i
];
21297 operands
[6 + i
] = part
[1][i
];
21301 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21302 if (optimize_insn_for_size_p ())
21304 for (j
= 0; j
< nparts
- 1; j
++)
21305 if (CONST_INT_P (operands
[6 + j
])
21306 && operands
[6 + j
] != const0_rtx
21307 && REG_P (operands
[2 + j
]))
21308 for (i
= j
; i
< nparts
- 1; i
++)
21309 if (CONST_INT_P (operands
[7 + i
])
21310 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21311 operands
[7 + i
] = operands
[2 + j
];
21314 for (i
= 0; i
< nparts
; i
++)
21315 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21320 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21321 left shift by a constant, either using a single shift or
21322 a sequence of add instructions. */
21325 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21327 rtx (*insn
)(rtx
, rtx
, rtx
);
21330 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21331 && !optimize_insn_for_size_p ()))
21333 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21334 while (count
-- > 0)
21335 emit_insn (insn (operand
, operand
, operand
));
21339 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21340 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21345 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21347 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21348 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21349 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21351 rtx low
[2], high
[2];
21354 if (CONST_INT_P (operands
[2]))
21356 split_double_mode (mode
, operands
, 2, low
, high
);
21357 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21359 if (count
>= half_width
)
21361 emit_move_insn (high
[0], low
[1]);
21362 emit_move_insn (low
[0], const0_rtx
);
21364 if (count
> half_width
)
21365 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21369 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21371 if (!rtx_equal_p (operands
[0], operands
[1]))
21372 emit_move_insn (operands
[0], operands
[1]);
21374 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21375 ix86_expand_ashl_const (low
[0], count
, mode
);
21380 split_double_mode (mode
, operands
, 1, low
, high
);
21382 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21384 if (operands
[1] == const1_rtx
)
21386 /* Assuming we've chosen a QImode capable registers, then 1 << N
21387 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21388 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21390 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21392 ix86_expand_clear (low
[0]);
21393 ix86_expand_clear (high
[0]);
21394 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21396 d
= gen_lowpart (QImode
, low
[0]);
21397 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21398 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21399 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21401 d
= gen_lowpart (QImode
, high
[0]);
21402 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21403 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21404 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21407 /* Otherwise, we can get the same results by manually performing
21408 a bit extract operation on bit 5/6, and then performing the two
21409 shifts. The two methods of getting 0/1 into low/high are exactly
21410 the same size. Avoiding the shift in the bit extract case helps
21411 pentium4 a bit; no one else seems to care much either way. */
21414 enum machine_mode half_mode
;
21415 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21416 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21417 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21418 HOST_WIDE_INT bits
;
21421 if (mode
== DImode
)
21423 half_mode
= SImode
;
21424 gen_lshr3
= gen_lshrsi3
;
21425 gen_and3
= gen_andsi3
;
21426 gen_xor3
= gen_xorsi3
;
21431 half_mode
= DImode
;
21432 gen_lshr3
= gen_lshrdi3
;
21433 gen_and3
= gen_anddi3
;
21434 gen_xor3
= gen_xordi3
;
21438 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21439 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21441 x
= gen_lowpart (half_mode
, operands
[2]);
21442 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21444 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21445 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21446 emit_move_insn (low
[0], high
[0]);
21447 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21450 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21451 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21455 if (operands
[1] == constm1_rtx
)
21457 /* For -1 << N, we can avoid the shld instruction, because we
21458 know that we're shifting 0...31/63 ones into a -1. */
21459 emit_move_insn (low
[0], constm1_rtx
);
21460 if (optimize_insn_for_size_p ())
21461 emit_move_insn (high
[0], low
[0]);
21463 emit_move_insn (high
[0], constm1_rtx
);
21467 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21469 if (!rtx_equal_p (operands
[0], operands
[1]))
21470 emit_move_insn (operands
[0], operands
[1]);
21472 split_double_mode (mode
, operands
, 1, low
, high
);
21473 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21476 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21478 if (TARGET_CMOVE
&& scratch
)
21480 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21481 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21483 ix86_expand_clear (scratch
);
21484 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21488 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21489 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21491 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21496 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21498 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21499 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21500 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21501 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21503 rtx low
[2], high
[2];
21506 if (CONST_INT_P (operands
[2]))
21508 split_double_mode (mode
, operands
, 2, low
, high
);
21509 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21511 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21513 emit_move_insn (high
[0], high
[1]);
21514 emit_insn (gen_ashr3 (high
[0], high
[0],
21515 GEN_INT (half_width
- 1)));
21516 emit_move_insn (low
[0], high
[0]);
21519 else if (count
>= half_width
)
21521 emit_move_insn (low
[0], high
[1]);
21522 emit_move_insn (high
[0], low
[0]);
21523 emit_insn (gen_ashr3 (high
[0], high
[0],
21524 GEN_INT (half_width
- 1)));
21526 if (count
> half_width
)
21527 emit_insn (gen_ashr3 (low
[0], low
[0],
21528 GEN_INT (count
- half_width
)));
21532 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21534 if (!rtx_equal_p (operands
[0], operands
[1]))
21535 emit_move_insn (operands
[0], operands
[1]);
21537 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21538 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21543 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21545 if (!rtx_equal_p (operands
[0], operands
[1]))
21546 emit_move_insn (operands
[0], operands
[1]);
21548 split_double_mode (mode
, operands
, 1, low
, high
);
21550 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21551 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21553 if (TARGET_CMOVE
&& scratch
)
21555 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21556 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21558 emit_move_insn (scratch
, high
[0]);
21559 emit_insn (gen_ashr3 (scratch
, scratch
,
21560 GEN_INT (half_width
- 1)));
21561 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21566 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21567 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21569 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21575 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21577 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21578 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21579 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21580 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21582 rtx low
[2], high
[2];
21585 if (CONST_INT_P (operands
[2]))
21587 split_double_mode (mode
, operands
, 2, low
, high
);
21588 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21590 if (count
>= half_width
)
21592 emit_move_insn (low
[0], high
[1]);
21593 ix86_expand_clear (high
[0]);
21595 if (count
> half_width
)
21596 emit_insn (gen_lshr3 (low
[0], low
[0],
21597 GEN_INT (count
- half_width
)));
21601 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21603 if (!rtx_equal_p (operands
[0], operands
[1]))
21604 emit_move_insn (operands
[0], operands
[1]);
21606 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21607 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21612 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21614 if (!rtx_equal_p (operands
[0], operands
[1]))
21615 emit_move_insn (operands
[0], operands
[1]);
21617 split_double_mode (mode
, operands
, 1, low
, high
);
21619 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21620 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21622 if (TARGET_CMOVE
&& scratch
)
21624 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21625 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21627 ix86_expand_clear (scratch
);
21628 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21633 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21634 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21636 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21641 /* Predict just emitted jump instruction to be taken with probability PROB. */
21643 predict_jump (int prob
)
21645 rtx insn
= get_last_insn ();
21646 gcc_assert (JUMP_P (insn
));
21647 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21650 /* Helper function for the string operations below. Dest VARIABLE whether
21651 it is aligned to VALUE bytes. If true, jump to the label. */
21653 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21655 rtx label
= gen_label_rtx ();
21656 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21657 if (GET_MODE (variable
) == DImode
)
21658 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21660 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21661 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21664 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21666 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21670 /* Adjust COUNTER by the VALUE. */
21672 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21674 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21675 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21677 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21680 /* Zero extend possibly SImode EXP to Pmode register. */
21682 ix86_zero_extend_to_Pmode (rtx exp
)
21684 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
21687 /* Divide COUNTREG by SCALE. */
21689 scale_counter (rtx countreg
, int scale
)
21695 if (CONST_INT_P (countreg
))
21696 return GEN_INT (INTVAL (countreg
) / scale
);
21697 gcc_assert (REG_P (countreg
));
21699 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21700 GEN_INT (exact_log2 (scale
)),
21701 NULL
, 1, OPTAB_DIRECT
);
21705 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21706 DImode for constant loop counts. */
21708 static enum machine_mode
21709 counter_mode (rtx count_exp
)
21711 if (GET_MODE (count_exp
) != VOIDmode
)
21712 return GET_MODE (count_exp
);
21713 if (!CONST_INT_P (count_exp
))
21715 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21720 /* When SRCPTR is non-NULL, output simple loop to move memory
21721 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21722 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21723 equivalent loop to set memory by VALUE (supposed to be in MODE).
21725 The size is rounded down to whole number of chunk size moved at once.
21726 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21730 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21731 rtx destptr
, rtx srcptr
, rtx value
,
21732 rtx count
, enum machine_mode mode
, int unroll
,
21735 rtx out_label
, top_label
, iter
, tmp
;
21736 enum machine_mode iter_mode
= counter_mode (count
);
21737 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21738 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21744 top_label
= gen_label_rtx ();
21745 out_label
= gen_label_rtx ();
21746 iter
= gen_reg_rtx (iter_mode
);
21748 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21749 NULL
, 1, OPTAB_DIRECT
);
21750 /* Those two should combine. */
21751 if (piece_size
== const1_rtx
)
21753 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21755 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21757 emit_move_insn (iter
, const0_rtx
);
21759 emit_label (top_label
);
21761 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21762 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21763 destmem
= change_address (destmem
, mode
, x_addr
);
21767 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21768 srcmem
= change_address (srcmem
, mode
, y_addr
);
21770 /* When unrolling for chips that reorder memory reads and writes,
21771 we can save registers by using single temporary.
21772 Also using 4 temporaries is overkill in 32bit mode. */
21773 if (!TARGET_64BIT
&& 0)
21775 for (i
= 0; i
< unroll
; i
++)
21780 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21782 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21784 emit_move_insn (destmem
, srcmem
);
21790 gcc_assert (unroll
<= 4);
21791 for (i
= 0; i
< unroll
; i
++)
21793 tmpreg
[i
] = gen_reg_rtx (mode
);
21797 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21799 emit_move_insn (tmpreg
[i
], srcmem
);
21801 for (i
= 0; i
< unroll
; i
++)
21806 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21808 emit_move_insn (destmem
, tmpreg
[i
]);
21813 for (i
= 0; i
< unroll
; i
++)
21817 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21818 emit_move_insn (destmem
, value
);
21821 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21822 true, OPTAB_LIB_WIDEN
);
21824 emit_move_insn (iter
, tmp
);
21826 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21828 if (expected_size
!= -1)
21830 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21831 if (expected_size
== 0)
21833 else if (expected_size
> REG_BR_PROB_BASE
)
21834 predict_jump (REG_BR_PROB_BASE
- 1);
21836 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21839 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21840 iter
= ix86_zero_extend_to_Pmode (iter
);
21841 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21842 true, OPTAB_LIB_WIDEN
);
21843 if (tmp
!= destptr
)
21844 emit_move_insn (destptr
, tmp
);
21847 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21848 true, OPTAB_LIB_WIDEN
);
21850 emit_move_insn (srcptr
, tmp
);
21852 emit_label (out_label
);
21855 /* Output "rep; mov" instruction.
21856 Arguments have same meaning as for previous function */
21858 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21859 rtx destptr
, rtx srcptr
,
21861 enum machine_mode mode
)
21866 HOST_WIDE_INT rounded_count
;
21868 /* If the size is known, it is shorter to use rep movs. */
21869 if (mode
== QImode
&& CONST_INT_P (count
)
21870 && !(INTVAL (count
) & 3))
21873 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21874 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21875 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21876 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21877 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21878 if (mode
!= QImode
)
21880 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21881 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21882 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21883 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21884 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21885 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21889 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21890 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21892 if (CONST_INT_P (count
))
21894 rounded_count
= (INTVAL (count
)
21895 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21896 destmem
= shallow_copy_rtx (destmem
);
21897 srcmem
= shallow_copy_rtx (srcmem
);
21898 set_mem_size (destmem
, rounded_count
);
21899 set_mem_size (srcmem
, rounded_count
);
21903 if (MEM_SIZE_KNOWN_P (destmem
))
21904 clear_mem_size (destmem
);
21905 if (MEM_SIZE_KNOWN_P (srcmem
))
21906 clear_mem_size (srcmem
);
21908 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21912 /* Output "rep; stos" instruction.
21913 Arguments have same meaning as for previous function */
21915 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21916 rtx count
, enum machine_mode mode
,
21921 HOST_WIDE_INT rounded_count
;
21923 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21924 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21925 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21926 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21927 if (mode
!= QImode
)
21929 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21930 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21931 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21934 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21935 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21937 rounded_count
= (INTVAL (count
)
21938 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21939 destmem
= shallow_copy_rtx (destmem
);
21940 set_mem_size (destmem
, rounded_count
);
21942 else if (MEM_SIZE_KNOWN_P (destmem
))
21943 clear_mem_size (destmem
);
21944 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21948 emit_strmov (rtx destmem
, rtx srcmem
,
21949 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21951 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21952 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21953 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21956 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21958 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21959 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21962 if (CONST_INT_P (count
))
21964 HOST_WIDE_INT countval
= INTVAL (count
);
21967 if ((countval
& 0x10) && max_size
> 16)
21971 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21972 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21975 gcc_unreachable ();
21978 if ((countval
& 0x08) && max_size
> 8)
21981 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21984 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21985 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21989 if ((countval
& 0x04) && max_size
> 4)
21991 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21994 if ((countval
& 0x02) && max_size
> 2)
21996 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21999 if ((countval
& 0x01) && max_size
> 1)
22001 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
22008 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
22009 count
, 1, OPTAB_DIRECT
);
22010 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
22011 count
, QImode
, 1, 4);
22015 /* When there are stringops, we can cheaply increase dest and src pointers.
22016 Otherwise we save code size by maintaining offset (zero is readily
22017 available from preceding rep operation) and using x86 addressing modes.
22019 if (TARGET_SINGLE_STRINGOP
)
22023 rtx label
= ix86_expand_aligntest (count
, 4, true);
22024 src
= change_address (srcmem
, SImode
, srcptr
);
22025 dest
= change_address (destmem
, SImode
, destptr
);
22026 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22027 emit_label (label
);
22028 LABEL_NUSES (label
) = 1;
22032 rtx label
= ix86_expand_aligntest (count
, 2, true);
22033 src
= change_address (srcmem
, HImode
, srcptr
);
22034 dest
= change_address (destmem
, HImode
, destptr
);
22035 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22036 emit_label (label
);
22037 LABEL_NUSES (label
) = 1;
22041 rtx label
= ix86_expand_aligntest (count
, 1, true);
22042 src
= change_address (srcmem
, QImode
, srcptr
);
22043 dest
= change_address (destmem
, QImode
, destptr
);
22044 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
22045 emit_label (label
);
22046 LABEL_NUSES (label
) = 1;
22051 rtx offset
= force_reg (Pmode
, const0_rtx
);
22056 rtx label
= ix86_expand_aligntest (count
, 4, true);
22057 src
= change_address (srcmem
, SImode
, srcptr
);
22058 dest
= change_address (destmem
, SImode
, destptr
);
22059 emit_move_insn (dest
, src
);
22060 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
22061 true, OPTAB_LIB_WIDEN
);
22063 emit_move_insn (offset
, tmp
);
22064 emit_label (label
);
22065 LABEL_NUSES (label
) = 1;
22069 rtx label
= ix86_expand_aligntest (count
, 2, true);
22070 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22071 src
= change_address (srcmem
, HImode
, tmp
);
22072 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22073 dest
= change_address (destmem
, HImode
, tmp
);
22074 emit_move_insn (dest
, src
);
22075 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22076 true, OPTAB_LIB_WIDEN
);
22078 emit_move_insn (offset
, tmp
);
22079 emit_label (label
);
22080 LABEL_NUSES (label
) = 1;
22084 rtx label
= ix86_expand_aligntest (count
, 1, true);
22085 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22086 src
= change_address (srcmem
, QImode
, tmp
);
22087 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22088 dest
= change_address (destmem
, QImode
, tmp
);
22089 emit_move_insn (dest
, src
);
22090 emit_label (label
);
22091 LABEL_NUSES (label
) = 1;
22096 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22098 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22099 rtx count
, int max_size
)
22102 expand_simple_binop (counter_mode (count
), AND
, count
,
22103 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22104 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22105 gen_lowpart (QImode
, value
), count
, QImode
,
22109 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22111 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22115 if (CONST_INT_P (count
))
22117 HOST_WIDE_INT countval
= INTVAL (count
);
22120 if ((countval
& 0x10) && max_size
> 16)
22124 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22125 emit_insn (gen_strset (destptr
, dest
, value
));
22126 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22127 emit_insn (gen_strset (destptr
, dest
, value
));
22130 gcc_unreachable ();
22133 if ((countval
& 0x08) && max_size
> 8)
22137 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22138 emit_insn (gen_strset (destptr
, dest
, value
));
22142 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22143 emit_insn (gen_strset (destptr
, dest
, value
));
22144 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22145 emit_insn (gen_strset (destptr
, dest
, value
));
22149 if ((countval
& 0x04) && max_size
> 4)
22151 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22152 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22155 if ((countval
& 0x02) && max_size
> 2)
22157 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22158 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22161 if ((countval
& 0x01) && max_size
> 1)
22163 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22164 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22171 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22176 rtx label
= ix86_expand_aligntest (count
, 16, true);
22179 dest
= change_address (destmem
, DImode
, destptr
);
22180 emit_insn (gen_strset (destptr
, dest
, value
));
22181 emit_insn (gen_strset (destptr
, dest
, value
));
22185 dest
= change_address (destmem
, SImode
, destptr
);
22186 emit_insn (gen_strset (destptr
, dest
, value
));
22187 emit_insn (gen_strset (destptr
, dest
, value
));
22188 emit_insn (gen_strset (destptr
, dest
, value
));
22189 emit_insn (gen_strset (destptr
, dest
, value
));
22191 emit_label (label
);
22192 LABEL_NUSES (label
) = 1;
22196 rtx label
= ix86_expand_aligntest (count
, 8, true);
22199 dest
= change_address (destmem
, DImode
, destptr
);
22200 emit_insn (gen_strset (destptr
, dest
, value
));
22204 dest
= change_address (destmem
, SImode
, destptr
);
22205 emit_insn (gen_strset (destptr
, dest
, value
));
22206 emit_insn (gen_strset (destptr
, dest
, value
));
22208 emit_label (label
);
22209 LABEL_NUSES (label
) = 1;
22213 rtx label
= ix86_expand_aligntest (count
, 4, true);
22214 dest
= change_address (destmem
, SImode
, destptr
);
22215 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22216 emit_label (label
);
22217 LABEL_NUSES (label
) = 1;
22221 rtx label
= ix86_expand_aligntest (count
, 2, true);
22222 dest
= change_address (destmem
, HImode
, destptr
);
22223 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22224 emit_label (label
);
22225 LABEL_NUSES (label
) = 1;
22229 rtx label
= ix86_expand_aligntest (count
, 1, true);
22230 dest
= change_address (destmem
, QImode
, destptr
);
22231 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22232 emit_label (label
);
22233 LABEL_NUSES (label
) = 1;
22237 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22238 DESIRED_ALIGNMENT. */
22240 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22241 rtx destptr
, rtx srcptr
, rtx count
,
22242 int align
, int desired_alignment
)
22244 if (align
<= 1 && desired_alignment
> 1)
22246 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22247 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22248 destmem
= change_address (destmem
, QImode
, destptr
);
22249 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22250 ix86_adjust_counter (count
, 1);
22251 emit_label (label
);
22252 LABEL_NUSES (label
) = 1;
22254 if (align
<= 2 && desired_alignment
> 2)
22256 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22257 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22258 destmem
= change_address (destmem
, HImode
, destptr
);
22259 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22260 ix86_adjust_counter (count
, 2);
22261 emit_label (label
);
22262 LABEL_NUSES (label
) = 1;
22264 if (align
<= 4 && desired_alignment
> 4)
22266 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22267 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22268 destmem
= change_address (destmem
, SImode
, destptr
);
22269 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22270 ix86_adjust_counter (count
, 4);
22271 emit_label (label
);
22272 LABEL_NUSES (label
) = 1;
22274 gcc_assert (desired_alignment
<= 8);
22277 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22278 ALIGN_BYTES is how many bytes need to be copied. */
22280 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22281 int desired_align
, int align_bytes
)
22284 rtx orig_dst
= dst
;
22285 rtx orig_src
= src
;
22287 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22288 if (src_align_bytes
>= 0)
22289 src_align_bytes
= desired_align
- src_align_bytes
;
22290 if (align_bytes
& 1)
22292 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22293 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22295 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22297 if (align_bytes
& 2)
22299 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22300 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22301 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22302 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22303 if (src_align_bytes
>= 0
22304 && (src_align_bytes
& 1) == (align_bytes
& 1)
22305 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22306 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22308 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22310 if (align_bytes
& 4)
22312 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22313 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22314 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22315 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22316 if (src_align_bytes
>= 0)
22318 unsigned int src_align
= 0;
22319 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22321 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22323 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22324 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22327 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22329 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22330 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22331 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22332 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22333 if (src_align_bytes
>= 0)
22335 unsigned int src_align
= 0;
22336 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22338 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22340 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22342 if (src_align
> (unsigned int) desired_align
)
22343 src_align
= desired_align
;
22344 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22345 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22347 if (MEM_SIZE_KNOWN_P (orig_dst
))
22348 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22349 if (MEM_SIZE_KNOWN_P (orig_src
))
22350 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22355 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22356 DESIRED_ALIGNMENT. */
22358 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22359 int align
, int desired_alignment
)
22361 if (align
<= 1 && desired_alignment
> 1)
22363 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22364 destmem
= change_address (destmem
, QImode
, destptr
);
22365 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22366 ix86_adjust_counter (count
, 1);
22367 emit_label (label
);
22368 LABEL_NUSES (label
) = 1;
22370 if (align
<= 2 && desired_alignment
> 2)
22372 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22373 destmem
= change_address (destmem
, HImode
, destptr
);
22374 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22375 ix86_adjust_counter (count
, 2);
22376 emit_label (label
);
22377 LABEL_NUSES (label
) = 1;
22379 if (align
<= 4 && desired_alignment
> 4)
22381 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22382 destmem
= change_address (destmem
, SImode
, destptr
);
22383 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22384 ix86_adjust_counter (count
, 4);
22385 emit_label (label
);
22386 LABEL_NUSES (label
) = 1;
22388 gcc_assert (desired_alignment
<= 8);
22391 /* Set enough from DST to align DST known to by aligned by ALIGN to
22392 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22394 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22395 int desired_align
, int align_bytes
)
22398 rtx orig_dst
= dst
;
22399 if (align_bytes
& 1)
22401 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22403 emit_insn (gen_strset (destreg
, dst
,
22404 gen_lowpart (QImode
, value
)));
22406 if (align_bytes
& 2)
22408 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22409 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22410 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22412 emit_insn (gen_strset (destreg
, dst
,
22413 gen_lowpart (HImode
, value
)));
22415 if (align_bytes
& 4)
22417 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22418 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22419 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22421 emit_insn (gen_strset (destreg
, dst
,
22422 gen_lowpart (SImode
, value
)));
22424 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22425 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22426 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22427 if (MEM_SIZE_KNOWN_P (orig_dst
))
22428 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22432 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22433 static enum stringop_alg
22434 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22435 int *dynamic_check
, bool *noalign
)
22437 const struct stringop_algs
* algs
;
22438 bool optimize_for_speed
;
22439 /* Algorithms using the rep prefix want at least edi and ecx;
22440 additionally, memset wants eax and memcpy wants esi. Don't
22441 consider such algorithms if the user has appropriated those
22442 registers for their own purposes. */
22443 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22445 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22448 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22449 || (alg != rep_prefix_1_byte \
22450 && alg != rep_prefix_4_byte \
22451 && alg != rep_prefix_8_byte))
22452 const struct processor_costs
*cost
;
22454 /* Even if the string operation call is cold, we still might spend a lot
22455 of time processing large blocks. */
22456 if (optimize_function_for_size_p (cfun
)
22457 || (optimize_insn_for_size_p ()
22458 && expected_size
!= -1 && expected_size
< 256))
22459 optimize_for_speed
= false;
22461 optimize_for_speed
= true;
22463 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22465 *dynamic_check
= -1;
22467 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22469 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22470 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22471 return ix86_stringop_alg
;
22472 /* rep; movq or rep; movl is the smallest variant. */
22473 else if (!optimize_for_speed
)
22475 if (!count
|| (count
& 3))
22476 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22478 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22480 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22482 else if (expected_size
!= -1 && expected_size
< 4)
22483 return loop_1_byte
;
22484 else if (expected_size
!= -1)
22487 enum stringop_alg alg
= libcall
;
22488 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22490 /* We get here if the algorithms that were not libcall-based
22491 were rep-prefix based and we are unable to use rep prefixes
22492 based on global register usage. Break out of the loop and
22493 use the heuristic below. */
22494 if (algs
->size
[i
].max
== 0)
22496 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22498 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22500 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22502 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22503 last non-libcall inline algorithm. */
22504 if (TARGET_INLINE_ALL_STRINGOPS
)
22506 /* When the current size is best to be copied by a libcall,
22507 but we are still forced to inline, run the heuristic below
22508 that will pick code for medium sized blocks. */
22509 if (alg
!= libcall
)
22513 else if (ALG_USABLE_P (candidate
))
22515 *noalign
= algs
->size
[i
].noalign
;
22520 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22522 /* When asked to inline the call anyway, try to pick meaningful choice.
22523 We look for maximal size of block that is faster to copy by hand and
22524 take blocks of at most of that size guessing that average size will
22525 be roughly half of the block.
22527 If this turns out to be bad, we might simply specify the preferred
22528 choice in ix86_costs. */
22529 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22530 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22533 enum stringop_alg alg
;
22535 bool any_alg_usable_p
= true;
22537 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22539 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22540 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22542 if (candidate
!= libcall
&& candidate
22543 && ALG_USABLE_P (candidate
))
22544 max
= algs
->size
[i
].max
;
22546 /* If there aren't any usable algorithms, then recursing on
22547 smaller sizes isn't going to find anything. Just return the
22548 simple byte-at-a-time copy loop. */
22549 if (!any_alg_usable_p
)
22551 /* Pick something reasonable. */
22552 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22553 *dynamic_check
= 128;
22554 return loop_1_byte
;
22558 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
, noalign
);
22559 gcc_assert (*dynamic_check
== -1);
22560 gcc_assert (alg
!= libcall
);
22561 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22562 *dynamic_check
= max
;
22565 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22566 #undef ALG_USABLE_P
22569 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22570 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22572 decide_alignment (int align
,
22573 enum stringop_alg alg
,
22576 int desired_align
= 0;
22580 gcc_unreachable ();
22582 case unrolled_loop
:
22583 desired_align
= GET_MODE_SIZE (Pmode
);
22585 case rep_prefix_8_byte
:
22588 case rep_prefix_4_byte
:
22589 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22590 copying whole cacheline at once. */
22591 if (TARGET_PENTIUMPRO
)
22596 case rep_prefix_1_byte
:
22597 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22598 copying whole cacheline at once. */
22599 if (TARGET_PENTIUMPRO
)
22613 if (desired_align
< align
)
22614 desired_align
= align
;
22615 if (expected_size
!= -1 && expected_size
< 4)
22616 desired_align
= align
;
22617 return desired_align
;
22620 /* Return the smallest power of 2 greater than VAL. */
22622 smallest_pow2_greater_than (int val
)
22630 /* Expand string move (memcpy) operation. Use i386 string operations
22631 when profitable. expand_setmem contains similar code. The code
22632 depends upon architecture, block size and alignment, but always has
22633 the same overall structure:
22635 1) Prologue guard: Conditional that jumps up to epilogues for small
22636 blocks that can be handled by epilogue alone. This is faster
22637 but also needed for correctness, since prologue assume the block
22638 is larger than the desired alignment.
22640 Optional dynamic check for size and libcall for large
22641 blocks is emitted here too, with -minline-stringops-dynamically.
22643 2) Prologue: copy first few bytes in order to get destination
22644 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22645 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22646 copied. We emit either a jump tree on power of two sized
22647 blocks, or a byte loop.
22649 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22650 with specified algorithm.
22652 4) Epilogue: code copying tail of the block that is too small to be
22653 handled by main body (or up to size guarded by prologue guard). */
22656 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22657 rtx expected_align_exp
, rtx expected_size_exp
)
22663 rtx jump_around_label
= NULL
;
22664 HOST_WIDE_INT align
= 1;
22665 unsigned HOST_WIDE_INT count
= 0;
22666 HOST_WIDE_INT expected_size
= -1;
22667 int size_needed
= 0, epilogue_size_needed
;
22668 int desired_align
= 0, align_bytes
= 0;
22669 enum stringop_alg alg
;
22671 bool need_zero_guard
= false;
22674 if (CONST_INT_P (align_exp
))
22675 align
= INTVAL (align_exp
);
22676 /* i386 can do misaligned access on reasonably increased cost. */
22677 if (CONST_INT_P (expected_align_exp
)
22678 && INTVAL (expected_align_exp
) > align
)
22679 align
= INTVAL (expected_align_exp
);
22680 /* ALIGN is the minimum of destination and source alignment, but we care here
22681 just about destination alignment. */
22682 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22683 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22685 if (CONST_INT_P (count_exp
))
22686 count
= expected_size
= INTVAL (count_exp
);
22687 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22688 expected_size
= INTVAL (expected_size_exp
);
22690 /* Make sure we don't need to care about overflow later on. */
22691 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22694 /* Step 0: Decide on preferred algorithm, desired alignment and
22695 size of chunks to be copied by main loop. */
22697 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
, &noalign
);
22698 desired_align
= decide_alignment (align
, alg
, expected_size
);
22700 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
22701 align
= desired_align
;
22703 if (alg
== libcall
)
22705 gcc_assert (alg
!= no_stringop
);
22707 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22708 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22709 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22714 gcc_unreachable ();
22716 need_zero_guard
= true;
22717 size_needed
= GET_MODE_SIZE (word_mode
);
22719 case unrolled_loop
:
22720 need_zero_guard
= true;
22721 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22723 case rep_prefix_8_byte
:
22726 case rep_prefix_4_byte
:
22729 case rep_prefix_1_byte
:
22733 need_zero_guard
= true;
22738 epilogue_size_needed
= size_needed
;
22740 /* Step 1: Prologue guard. */
22742 /* Alignment code needs count to be in register. */
22743 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22745 if (INTVAL (count_exp
) > desired_align
22746 && INTVAL (count_exp
) > size_needed
)
22749 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22750 if (align_bytes
<= 0)
22753 align_bytes
= desired_align
- align_bytes
;
22755 if (align_bytes
== 0)
22756 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22758 gcc_assert (desired_align
>= 1 && align
>= 1);
22760 /* Ensure that alignment prologue won't copy past end of block. */
22761 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22763 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22764 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22765 Make sure it is power of 2. */
22766 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22770 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22772 /* If main algorithm works on QImode, no epilogue is needed.
22773 For small sizes just don't align anything. */
22774 if (size_needed
== 1)
22775 desired_align
= align
;
22782 label
= gen_label_rtx ();
22783 emit_cmp_and_jump_insns (count_exp
,
22784 GEN_INT (epilogue_size_needed
),
22785 LTU
, 0, counter_mode (count_exp
), 1, label
);
22786 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22787 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22789 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22793 /* Emit code to decide on runtime whether library call or inline should be
22795 if (dynamic_check
!= -1)
22797 if (CONST_INT_P (count_exp
))
22799 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22801 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22802 count_exp
= const0_rtx
;
22808 rtx hot_label
= gen_label_rtx ();
22809 jump_around_label
= gen_label_rtx ();
22810 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22811 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22812 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22813 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22814 emit_jump (jump_around_label
);
22815 emit_label (hot_label
);
22819 /* Step 2: Alignment prologue. */
22821 if (desired_align
> align
)
22823 if (align_bytes
== 0)
22825 /* Except for the first move in epilogue, we no longer know
22826 constant offset in aliasing info. It don't seems to worth
22827 the pain to maintain it for the first move, so throw away
22829 src
= change_address (src
, BLKmode
, srcreg
);
22830 dst
= change_address (dst
, BLKmode
, destreg
);
22831 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22836 /* If we know how many bytes need to be stored before dst is
22837 sufficiently aligned, maintain aliasing info accurately. */
22838 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22839 desired_align
, align_bytes
);
22840 count_exp
= plus_constant (counter_mode (count_exp
),
22841 count_exp
, -align_bytes
);
22842 count
-= align_bytes
;
22844 if (need_zero_guard
22845 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22846 || (align_bytes
== 0
22847 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22848 + desired_align
- align
))))
22850 /* It is possible that we copied enough so the main loop will not
22852 gcc_assert (size_needed
> 1);
22853 if (label
== NULL_RTX
)
22854 label
= gen_label_rtx ();
22855 emit_cmp_and_jump_insns (count_exp
,
22856 GEN_INT (size_needed
),
22857 LTU
, 0, counter_mode (count_exp
), 1, label
);
22858 if (expected_size
== -1
22859 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22860 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22862 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22865 if (label
&& size_needed
== 1)
22867 emit_label (label
);
22868 LABEL_NUSES (label
) = 1;
22870 epilogue_size_needed
= 1;
22872 else if (label
== NULL_RTX
)
22873 epilogue_size_needed
= size_needed
;
22875 /* Step 3: Main loop. */
22881 gcc_unreachable ();
22883 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22884 count_exp
, QImode
, 1, expected_size
);
22887 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22888 count_exp
, word_mode
, 1, expected_size
);
22890 case unrolled_loop
:
22891 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22892 registers for 4 temporaries anyway. */
22893 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22894 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22897 case rep_prefix_8_byte
:
22898 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22901 case rep_prefix_4_byte
:
22902 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22905 case rep_prefix_1_byte
:
22906 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22910 /* Adjust properly the offset of src and dest memory for aliasing. */
22911 if (CONST_INT_P (count_exp
))
22913 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22914 (count
/ size_needed
) * size_needed
);
22915 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22916 (count
/ size_needed
) * size_needed
);
22920 src
= change_address (src
, BLKmode
, srcreg
);
22921 dst
= change_address (dst
, BLKmode
, destreg
);
22924 /* Step 4: Epilogue to copy the remaining bytes. */
22928 /* When the main loop is done, COUNT_EXP might hold original count,
22929 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22930 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22931 bytes. Compensate if needed. */
22933 if (size_needed
< epilogue_size_needed
)
22936 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22937 GEN_INT (size_needed
- 1), count_exp
, 1,
22939 if (tmp
!= count_exp
)
22940 emit_move_insn (count_exp
, tmp
);
22942 emit_label (label
);
22943 LABEL_NUSES (label
) = 1;
22946 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22947 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22948 epilogue_size_needed
);
22949 if (jump_around_label
)
22950 emit_label (jump_around_label
);
22954 /* Helper function for memcpy. For QImode value 0xXY produce
22955 0xXYXYXYXY of wide specified by MODE. This is essentially
22956 a * 0x10101010, but we can do slightly better than
22957 synth_mult by unwinding the sequence by hand on CPUs with
22960 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22962 enum machine_mode valmode
= GET_MODE (val
);
22964 int nops
= mode
== DImode
? 3 : 2;
22966 gcc_assert (mode
== SImode
|| mode
== DImode
);
22967 if (val
== const0_rtx
)
22968 return copy_to_mode_reg (mode
, const0_rtx
);
22969 if (CONST_INT_P (val
))
22971 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22975 if (mode
== DImode
)
22976 v
|= (v
<< 16) << 16;
22977 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22980 if (valmode
== VOIDmode
)
22982 if (valmode
!= QImode
)
22983 val
= gen_lowpart (QImode
, val
);
22984 if (mode
== QImode
)
22986 if (!TARGET_PARTIAL_REG_STALL
)
22988 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22989 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22990 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22991 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22993 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22994 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22995 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
23000 rtx reg
= convert_modes (mode
, QImode
, val
, true);
23002 if (!TARGET_PARTIAL_REG_STALL
)
23003 if (mode
== SImode
)
23004 emit_insn (gen_movsi_insv_1 (reg
, reg
));
23006 emit_insn (gen_movdi_insv_1 (reg
, reg
));
23009 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
23010 NULL
, 1, OPTAB_DIRECT
);
23012 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23014 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
23015 NULL
, 1, OPTAB_DIRECT
);
23016 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23017 if (mode
== SImode
)
23019 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
23020 NULL
, 1, OPTAB_DIRECT
);
23021 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
23026 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23027 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23028 alignment from ALIGN to DESIRED_ALIGN. */
23030 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
23035 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
23036 promoted_val
= promote_duplicated_reg (DImode
, val
);
23037 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
23038 promoted_val
= promote_duplicated_reg (SImode
, val
);
23039 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
23040 promoted_val
= promote_duplicated_reg (HImode
, val
);
23042 promoted_val
= val
;
23044 return promoted_val
;
23047 /* Expand string clear operation (bzero). Use i386 string operations when
23048 profitable. See expand_movmem comment for explanation of individual
23049 steps performed. */
23051 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
23052 rtx expected_align_exp
, rtx expected_size_exp
)
23057 rtx jump_around_label
= NULL
;
23058 HOST_WIDE_INT align
= 1;
23059 unsigned HOST_WIDE_INT count
= 0;
23060 HOST_WIDE_INT expected_size
= -1;
23061 int size_needed
= 0, epilogue_size_needed
;
23062 int desired_align
= 0, align_bytes
= 0;
23063 enum stringop_alg alg
;
23064 rtx promoted_val
= NULL
;
23065 bool force_loopy_epilogue
= false;
23067 bool need_zero_guard
= false;
23070 if (CONST_INT_P (align_exp
))
23071 align
= INTVAL (align_exp
);
23072 /* i386 can do misaligned access on reasonably increased cost. */
23073 if (CONST_INT_P (expected_align_exp
)
23074 && INTVAL (expected_align_exp
) > align
)
23075 align
= INTVAL (expected_align_exp
);
23076 if (CONST_INT_P (count_exp
))
23077 count
= expected_size
= INTVAL (count_exp
);
23078 if (CONST_INT_P (expected_size_exp
) && count
== 0)
23079 expected_size
= INTVAL (expected_size_exp
);
23081 /* Make sure we don't need to care about overflow later on. */
23082 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23085 /* Step 0: Decide on preferred algorithm, desired alignment and
23086 size of chunks to be copied by main loop. */
23088 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
, &noalign
);
23089 desired_align
= decide_alignment (align
, alg
, expected_size
);
23091 if (!TARGET_ALIGN_STRINGOPS
|| noalign
)
23092 align
= desired_align
;
23094 if (alg
== libcall
)
23096 gcc_assert (alg
!= no_stringop
);
23098 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23099 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23104 gcc_unreachable ();
23106 need_zero_guard
= true;
23107 size_needed
= GET_MODE_SIZE (word_mode
);
23109 case unrolled_loop
:
23110 need_zero_guard
= true;
23111 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23113 case rep_prefix_8_byte
:
23116 case rep_prefix_4_byte
:
23119 case rep_prefix_1_byte
:
23123 need_zero_guard
= true;
23127 epilogue_size_needed
= size_needed
;
23129 /* Step 1: Prologue guard. */
23131 /* Alignment code needs count to be in register. */
23132 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23134 if (INTVAL (count_exp
) > desired_align
23135 && INTVAL (count_exp
) > size_needed
)
23138 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23139 if (align_bytes
<= 0)
23142 align_bytes
= desired_align
- align_bytes
;
23144 if (align_bytes
== 0)
23146 enum machine_mode mode
= SImode
;
23147 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23149 count_exp
= force_reg (mode
, count_exp
);
23152 /* Do the cheap promotion to allow better CSE across the
23153 main loop and epilogue (ie one load of the big constant in the
23154 front of all code. */
23155 if (CONST_INT_P (val_exp
))
23156 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23157 desired_align
, align
);
23158 /* Ensure that alignment prologue won't copy past end of block. */
23159 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23161 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23162 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23163 Make sure it is power of 2. */
23164 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23166 /* To improve performance of small blocks, we jump around the VAL
23167 promoting mode. This mean that if the promoted VAL is not constant,
23168 we might not use it in the epilogue and have to use byte
23170 if (epilogue_size_needed
> 2 && !promoted_val
)
23171 force_loopy_epilogue
= true;
23174 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23176 /* If main algorithm works on QImode, no epilogue is needed.
23177 For small sizes just don't align anything. */
23178 if (size_needed
== 1)
23179 desired_align
= align
;
23186 label
= gen_label_rtx ();
23187 emit_cmp_and_jump_insns (count_exp
,
23188 GEN_INT (epilogue_size_needed
),
23189 LTU
, 0, counter_mode (count_exp
), 1, label
);
23190 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23191 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23193 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23196 if (dynamic_check
!= -1)
23198 rtx hot_label
= gen_label_rtx ();
23199 jump_around_label
= gen_label_rtx ();
23200 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23201 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23202 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23203 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23204 emit_jump (jump_around_label
);
23205 emit_label (hot_label
);
23208 /* Step 2: Alignment prologue. */
23210 /* Do the expensive promotion once we branched off the small blocks. */
23212 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23213 desired_align
, align
);
23214 gcc_assert (desired_align
>= 1 && align
>= 1);
23216 if (desired_align
> align
)
23218 if (align_bytes
== 0)
23220 /* Except for the first move in epilogue, we no longer know
23221 constant offset in aliasing info. It don't seems to worth
23222 the pain to maintain it for the first move, so throw away
23224 dst
= change_address (dst
, BLKmode
, destreg
);
23225 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23230 /* If we know how many bytes need to be stored before dst is
23231 sufficiently aligned, maintain aliasing info accurately. */
23232 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23233 desired_align
, align_bytes
);
23234 count_exp
= plus_constant (counter_mode (count_exp
),
23235 count_exp
, -align_bytes
);
23236 count
-= align_bytes
;
23238 if (need_zero_guard
23239 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23240 || (align_bytes
== 0
23241 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23242 + desired_align
- align
))))
23244 /* It is possible that we copied enough so the main loop will not
23246 gcc_assert (size_needed
> 1);
23247 if (label
== NULL_RTX
)
23248 label
= gen_label_rtx ();
23249 emit_cmp_and_jump_insns (count_exp
,
23250 GEN_INT (size_needed
),
23251 LTU
, 0, counter_mode (count_exp
), 1, label
);
23252 if (expected_size
== -1
23253 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23254 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23256 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23259 if (label
&& size_needed
== 1)
23261 emit_label (label
);
23262 LABEL_NUSES (label
) = 1;
23264 promoted_val
= val_exp
;
23265 epilogue_size_needed
= 1;
23267 else if (label
== NULL_RTX
)
23268 epilogue_size_needed
= size_needed
;
23270 /* Step 3: Main loop. */
23276 gcc_unreachable ();
23278 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23279 count_exp
, QImode
, 1, expected_size
);
23282 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23283 count_exp
, word_mode
, 1, expected_size
);
23285 case unrolled_loop
:
23286 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23287 count_exp
, word_mode
, 4, expected_size
);
23289 case rep_prefix_8_byte
:
23290 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23293 case rep_prefix_4_byte
:
23294 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23297 case rep_prefix_1_byte
:
23298 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23302 /* Adjust properly the offset of src and dest memory for aliasing. */
23303 if (CONST_INT_P (count_exp
))
23304 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23305 (count
/ size_needed
) * size_needed
);
23307 dst
= change_address (dst
, BLKmode
, destreg
);
23309 /* Step 4: Epilogue to copy the remaining bytes. */
23313 /* When the main loop is done, COUNT_EXP might hold original count,
23314 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23315 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23316 bytes. Compensate if needed. */
23318 if (size_needed
< epilogue_size_needed
)
23321 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23322 GEN_INT (size_needed
- 1), count_exp
, 1,
23324 if (tmp
!= count_exp
)
23325 emit_move_insn (count_exp
, tmp
);
23327 emit_label (label
);
23328 LABEL_NUSES (label
) = 1;
23331 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23333 if (force_loopy_epilogue
)
23334 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23335 epilogue_size_needed
);
23337 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23338 epilogue_size_needed
);
23340 if (jump_around_label
)
23341 emit_label (jump_around_label
);
23345 /* Expand the appropriate insns for doing strlen if not just doing
23348 out = result, initialized with the start address
23349 align_rtx = alignment of the address.
23350 scratch = scratch register, initialized with the startaddress when
23351 not aligned, otherwise undefined
23353 This is just the body. It needs the initializations mentioned above and
23354 some address computing at the end. These things are done in i386.md. */
23357 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23361 rtx align_2_label
= NULL_RTX
;
23362 rtx align_3_label
= NULL_RTX
;
23363 rtx align_4_label
= gen_label_rtx ();
23364 rtx end_0_label
= gen_label_rtx ();
23366 rtx tmpreg
= gen_reg_rtx (SImode
);
23367 rtx scratch
= gen_reg_rtx (SImode
);
23371 if (CONST_INT_P (align_rtx
))
23372 align
= INTVAL (align_rtx
);
23374 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23376 /* Is there a known alignment and is it less than 4? */
23379 rtx scratch1
= gen_reg_rtx (Pmode
);
23380 emit_move_insn (scratch1
, out
);
23381 /* Is there a known alignment and is it not 2? */
23384 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23385 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23387 /* Leave just the 3 lower bits. */
23388 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23389 NULL_RTX
, 0, OPTAB_WIDEN
);
23391 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23392 Pmode
, 1, align_4_label
);
23393 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23394 Pmode
, 1, align_2_label
);
23395 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23396 Pmode
, 1, align_3_label
);
23400 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23401 check if is aligned to 4 - byte. */
23403 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23404 NULL_RTX
, 0, OPTAB_WIDEN
);
23406 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23407 Pmode
, 1, align_4_label
);
23410 mem
= change_address (src
, QImode
, out
);
23412 /* Now compare the bytes. */
23414 /* Compare the first n unaligned byte on a byte per byte basis. */
23415 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23416 QImode
, 1, end_0_label
);
23418 /* Increment the address. */
23419 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23421 /* Not needed with an alignment of 2 */
23424 emit_label (align_2_label
);
23426 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23429 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23431 emit_label (align_3_label
);
23434 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23437 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23440 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23441 align this loop. It gives only huge programs, but does not help to
23443 emit_label (align_4_label
);
23445 mem
= change_address (src
, SImode
, out
);
23446 emit_move_insn (scratch
, mem
);
23447 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23449 /* This formula yields a nonzero result iff one of the bytes is zero.
23450 This saves three branches inside loop and many cycles. */
23452 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23453 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23454 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23455 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23456 gen_int_mode (0x80808080, SImode
)));
23457 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23462 rtx reg
= gen_reg_rtx (SImode
);
23463 rtx reg2
= gen_reg_rtx (Pmode
);
23464 emit_move_insn (reg
, tmpreg
);
23465 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23467 /* If zero is not in the first two bytes, move two bytes forward. */
23468 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23469 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23470 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23471 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23472 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23475 /* Emit lea manually to avoid clobbering of flags. */
23476 emit_insn (gen_rtx_SET (SImode
, reg2
,
23477 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23479 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23480 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23481 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23482 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23488 rtx end_2_label
= gen_label_rtx ();
23489 /* Is zero in the first two bytes? */
23491 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23492 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23493 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23494 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23495 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23497 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23498 JUMP_LABEL (tmp
) = end_2_label
;
23500 /* Not in the first two. Move two bytes forward. */
23501 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23502 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23504 emit_label (end_2_label
);
23508 /* Avoid branch in fixing the byte. */
23509 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23510 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23511 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23512 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23513 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23515 emit_label (end_0_label
);
23518 /* Expand strlen. */
23521 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23523 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23525 /* The generic case of strlen expander is long. Avoid it's
23526 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23528 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23529 && !TARGET_INLINE_ALL_STRINGOPS
23530 && !optimize_insn_for_size_p ()
23531 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23534 addr
= force_reg (Pmode
, XEXP (src
, 0));
23535 scratch1
= gen_reg_rtx (Pmode
);
23537 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23538 && !optimize_insn_for_size_p ())
23540 /* Well it seems that some optimizer does not combine a call like
23541 foo(strlen(bar), strlen(bar));
23542 when the move and the subtraction is done here. It does calculate
23543 the length just once when these instructions are done inside of
23544 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23545 often used and I use one fewer register for the lifetime of
23546 output_strlen_unroll() this is better. */
23548 emit_move_insn (out
, addr
);
23550 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23552 /* strlensi_unroll_1 returns the address of the zero at the end of
23553 the string, like memchr(), so compute the length by subtracting
23554 the start address. */
23555 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23561 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23562 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23565 scratch2
= gen_reg_rtx (Pmode
);
23566 scratch3
= gen_reg_rtx (Pmode
);
23567 scratch4
= force_reg (Pmode
, constm1_rtx
);
23569 emit_move_insn (scratch3
, addr
);
23570 eoschar
= force_reg (QImode
, eoschar
);
23572 src
= replace_equiv_address_nv (src
, scratch3
);
23574 /* If .md starts supporting :P, this can be done in .md. */
23575 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23576 scratch4
), UNSPEC_SCAS
);
23577 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23578 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23579 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23584 /* For given symbol (function) construct code to compute address of it's PLT
23585 entry in large x86-64 PIC model. */
23587 construct_plt_address (rtx symbol
)
23591 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23592 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23593 gcc_assert (Pmode
== DImode
);
23595 tmp
= gen_reg_rtx (Pmode
);
23596 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23598 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23599 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23604 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23606 rtx pop
, bool sibcall
)
23608 /* We need to represent that SI and DI registers are clobbered
23610 static int clobbered_registers
[] = {
23611 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23612 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23613 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23614 XMM15_REG
, SI_REG
, DI_REG
23616 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23617 rtx use
= NULL
, call
;
23618 unsigned int vec_len
;
23620 if (pop
== const0_rtx
)
23622 gcc_assert (!TARGET_64BIT
|| !pop
);
23624 if (TARGET_MACHO
&& !TARGET_64BIT
)
23627 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23628 fnaddr
= machopic_indirect_call_target (fnaddr
);
23633 /* Static functions and indirect calls don't need the pic register. */
23634 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23635 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23636 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23637 use_reg (&use
, pic_offset_table_rtx
);
23640 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23642 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23643 emit_move_insn (al
, callarg2
);
23644 use_reg (&use
, al
);
23647 if (ix86_cmodel
== CM_LARGE_PIC
23649 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23650 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23651 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23653 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23654 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23656 fnaddr
= convert_to_mode (word_mode
, XEXP (fnaddr
, 0), 1);
23657 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23661 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23663 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23664 vec
[vec_len
++] = call
;
23668 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23669 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23670 vec
[vec_len
++] = pop
;
23673 if (TARGET_64BIT_MS_ABI
23674 && (!callarg2
|| INTVAL (callarg2
) != -2))
23678 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23679 UNSPEC_MS_TO_SYSV_CALL
);
23681 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23683 = gen_rtx_CLOBBER (VOIDmode
,
23684 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23686 clobbered_registers
[i
]));
23690 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23691 call
= emit_call_insn (call
);
23693 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23698 /* Output the assembly for a call instruction. */
23701 ix86_output_call_insn (rtx insn
, rtx call_op
)
23703 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23704 bool seh_nop_p
= false;
23707 if (SIBLING_CALL_P (insn
))
23711 /* SEH epilogue detection requires the indirect branch case
23712 to include REX.W. */
23713 else if (TARGET_SEH
)
23714 xasm
= "rex.W jmp %A0";
23718 output_asm_insn (xasm
, &call_op
);
23722 /* SEH unwinding can require an extra nop to be emitted in several
23723 circumstances. Determine if we have one of those. */
23728 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23730 /* If we get to another real insn, we don't need the nop. */
23734 /* If we get to the epilogue note, prevent a catch region from
23735 being adjacent to the standard epilogue sequence. If non-
23736 call-exceptions, we'll have done this during epilogue emission. */
23737 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23738 && !flag_non_call_exceptions
23739 && !can_throw_internal (insn
))
23746 /* If we didn't find a real insn following the call, prevent the
23747 unwinder from looking into the next function. */
23753 xasm
= "call\t%P0";
23755 xasm
= "call\t%A0";
23757 output_asm_insn (xasm
, &call_op
);
23765 /* Clear stack slot assignments remembered from previous functions.
23766 This is called from INIT_EXPANDERS once before RTL is emitted for each
23769 static struct machine_function
*
23770 ix86_init_machine_status (void)
23772 struct machine_function
*f
;
23774 f
= ggc_alloc_cleared_machine_function ();
23775 f
->use_fast_prologue_epilogue_nregs
= -1;
23776 f
->call_abi
= ix86_abi
;
23781 /* Return a MEM corresponding to a stack slot with mode MODE.
23782 Allocate a new slot if necessary.
23784 The RTL for a function can have several slots available: N is
23785 which slot to use. */
23788 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23790 struct stack_local_entry
*s
;
23792 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23794 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23795 if (s
->mode
== mode
&& s
->n
== n
)
23796 return validize_mem (copy_rtx (s
->rtl
));
23798 s
= ggc_alloc_stack_local_entry ();
23801 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23803 s
->next
= ix86_stack_locals
;
23804 ix86_stack_locals
= s
;
23805 return validize_mem (s
->rtl
);
23809 ix86_instantiate_decls (void)
23811 struct stack_local_entry
*s
;
23813 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23814 if (s
->rtl
!= NULL_RTX
)
23815 instantiate_decl_rtl (s
->rtl
);
23818 /* Calculate the length of the memory address in the instruction encoding.
23819 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23820 or other prefixes. We never generate addr32 prefix for LEA insn. */
23823 memory_address_length (rtx addr
, bool lea
)
23825 struct ix86_address parts
;
23826 rtx base
, index
, disp
;
23830 if (GET_CODE (addr
) == PRE_DEC
23831 || GET_CODE (addr
) == POST_INC
23832 || GET_CODE (addr
) == PRE_MODIFY
23833 || GET_CODE (addr
) == POST_MODIFY
)
23836 ok
= ix86_decompose_address (addr
, &parts
);
23839 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23841 /* If this is not LEA instruction, add the length of addr32 prefix. */
23842 if (TARGET_64BIT
&& !lea
23843 && (SImode_address_operand (addr
, VOIDmode
)
23844 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23845 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23849 index
= parts
.index
;
23852 if (base
&& GET_CODE (base
) == SUBREG
)
23853 base
= SUBREG_REG (base
);
23854 if (index
&& GET_CODE (index
) == SUBREG
)
23855 index
= SUBREG_REG (index
);
23857 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23858 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23861 - esp as the base always wants an index,
23862 - ebp as the base always wants a displacement,
23863 - r12 as the base always wants an index,
23864 - r13 as the base always wants a displacement. */
23866 /* Register Indirect. */
23867 if (base
&& !index
&& !disp
)
23869 /* esp (for its index) and ebp (for its displacement) need
23870 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23872 if (base
== arg_pointer_rtx
23873 || base
== frame_pointer_rtx
23874 || REGNO (base
) == SP_REG
23875 || REGNO (base
) == BP_REG
23876 || REGNO (base
) == R12_REG
23877 || REGNO (base
) == R13_REG
)
23881 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23882 is not disp32, but disp32(%rip), so for disp32
23883 SIB byte is needed, unless print_operand_address
23884 optimizes it into disp32(%rip) or (%rip) is implied
23886 else if (disp
&& !base
&& !index
)
23893 if (GET_CODE (disp
) == CONST
)
23894 symbol
= XEXP (disp
, 0);
23895 if (GET_CODE (symbol
) == PLUS
23896 && CONST_INT_P (XEXP (symbol
, 1)))
23897 symbol
= XEXP (symbol
, 0);
23899 if (GET_CODE (symbol
) != LABEL_REF
23900 && (GET_CODE (symbol
) != SYMBOL_REF
23901 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23902 && (GET_CODE (symbol
) != UNSPEC
23903 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23904 && XINT (symbol
, 1) != UNSPEC_PCREL
23905 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23911 /* Find the length of the displacement constant. */
23914 if (base
&& satisfies_constraint_K (disp
))
23919 /* ebp always wants a displacement. Similarly r13. */
23920 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23923 /* An index requires the two-byte modrm form.... */
23925 /* ...like esp (or r12), which always wants an index. */
23926 || base
== arg_pointer_rtx
23927 || base
== frame_pointer_rtx
23928 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23935 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23936 is set, expect that insn have 8bit immediate alternative. */
23938 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23942 extract_insn_cached (insn
);
23943 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23944 if (CONSTANT_P (recog_data
.operand
[i
]))
23946 enum attr_mode mode
= get_attr_mode (insn
);
23949 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23951 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23958 ival
= trunc_int_for_mode (ival
, HImode
);
23961 ival
= trunc_int_for_mode (ival
, SImode
);
23966 if (IN_RANGE (ival
, -128, 127))
23983 /* Immediates for DImode instructions are encoded
23984 as 32bit sign extended values. */
23989 fatal_insn ("unknown insn mode", insn
);
23995 /* Compute default value for "length_address" attribute. */
23997 ix86_attr_length_address_default (rtx insn
)
24001 if (get_attr_type (insn
) == TYPE_LEA
)
24003 rtx set
= PATTERN (insn
), addr
;
24005 if (GET_CODE (set
) == PARALLEL
)
24006 set
= XVECEXP (set
, 0, 0);
24008 gcc_assert (GET_CODE (set
) == SET
);
24010 addr
= SET_SRC (set
);
24012 return memory_address_length (addr
, true);
24015 extract_insn_cached (insn
);
24016 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24017 if (MEM_P (recog_data
.operand
[i
]))
24019 constrain_operands_cached (reload_completed
);
24020 if (which_alternative
!= -1)
24022 const char *constraints
= recog_data
.constraints
[i
];
24023 int alt
= which_alternative
;
24025 while (*constraints
== '=' || *constraints
== '+')
24028 while (*constraints
++ != ',')
24030 /* Skip ignored operands. */
24031 if (*constraints
== 'X')
24034 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
24039 /* Compute default value for "length_vex" attribute. It includes
24040 2 or 3 byte VEX prefix and 1 opcode byte. */
24043 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24047 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24048 byte VEX prefix. */
24049 if (!has_0f_opcode
|| has_vex_w
)
24052 /* We can always use 2 byte VEX prefix in 32bit. */
24056 extract_insn_cached (insn
);
24058 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24059 if (REG_P (recog_data
.operand
[i
]))
24061 /* REX.W bit uses 3 byte VEX prefix. */
24062 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24063 && GENERAL_REG_P (recog_data
.operand
[i
]))
24068 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24069 if (MEM_P (recog_data
.operand
[i
])
24070 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24077 /* Return the maximum number of instructions a cpu can issue. */
24080 ix86_issue_rate (void)
24084 case PROCESSOR_PENTIUM
:
24085 case PROCESSOR_ATOM
:
24087 case PROCESSOR_BTVER2
:
24090 case PROCESSOR_PENTIUMPRO
:
24091 case PROCESSOR_PENTIUM4
:
24092 case PROCESSOR_CORE2
:
24093 case PROCESSOR_COREI7
:
24094 case PROCESSOR_HASWELL
:
24095 case PROCESSOR_ATHLON
:
24097 case PROCESSOR_AMDFAM10
:
24098 case PROCESSOR_NOCONA
:
24099 case PROCESSOR_GENERIC32
:
24100 case PROCESSOR_GENERIC64
:
24101 case PROCESSOR_BDVER1
:
24102 case PROCESSOR_BDVER2
:
24103 case PROCESSOR_BDVER3
:
24104 case PROCESSOR_BTVER1
:
24112 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24113 by DEP_INSN and nothing set by DEP_INSN. */
24116 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24120 /* Simplify the test for uninteresting insns. */
24121 if (insn_type
!= TYPE_SETCC
24122 && insn_type
!= TYPE_ICMOV
24123 && insn_type
!= TYPE_FCMOV
24124 && insn_type
!= TYPE_IBR
)
24127 if ((set
= single_set (dep_insn
)) != 0)
24129 set
= SET_DEST (set
);
24132 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24133 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24134 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24135 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24137 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24138 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24143 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24146 /* This test is true if the dependent insn reads the flags but
24147 not any other potentially set register. */
24148 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24151 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24157 /* Return true iff USE_INSN has a memory address with operands set by
24161 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24164 extract_insn_cached (use_insn
);
24165 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24166 if (MEM_P (recog_data
.operand
[i
]))
24168 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24169 return modified_in_p (addr
, set_insn
) != 0;
24175 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24177 enum attr_type insn_type
, dep_insn_type
;
24178 enum attr_memory memory
;
24180 int dep_insn_code_number
;
24182 /* Anti and output dependencies have zero cost on all CPUs. */
24183 if (REG_NOTE_KIND (link
) != 0)
24186 dep_insn_code_number
= recog_memoized (dep_insn
);
24188 /* If we can't recognize the insns, we can't really do anything. */
24189 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24192 insn_type
= get_attr_type (insn
);
24193 dep_insn_type
= get_attr_type (dep_insn
);
24197 case PROCESSOR_PENTIUM
:
24198 /* Address Generation Interlock adds a cycle of latency. */
24199 if (insn_type
== TYPE_LEA
)
24201 rtx addr
= PATTERN (insn
);
24203 if (GET_CODE (addr
) == PARALLEL
)
24204 addr
= XVECEXP (addr
, 0, 0);
24206 gcc_assert (GET_CODE (addr
) == SET
);
24208 addr
= SET_SRC (addr
);
24209 if (modified_in_p (addr
, dep_insn
))
24212 else if (ix86_agi_dependent (dep_insn
, insn
))
24215 /* ??? Compares pair with jump/setcc. */
24216 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24219 /* Floating point stores require value to be ready one cycle earlier. */
24220 if (insn_type
== TYPE_FMOV
24221 && get_attr_memory (insn
) == MEMORY_STORE
24222 && !ix86_agi_dependent (dep_insn
, insn
))
24226 case PROCESSOR_PENTIUMPRO
:
24227 memory
= get_attr_memory (insn
);
24229 /* INT->FP conversion is expensive. */
24230 if (get_attr_fp_int_src (dep_insn
))
24233 /* There is one cycle extra latency between an FP op and a store. */
24234 if (insn_type
== TYPE_FMOV
24235 && (set
= single_set (dep_insn
)) != NULL_RTX
24236 && (set2
= single_set (insn
)) != NULL_RTX
24237 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24238 && MEM_P (SET_DEST (set2
)))
24241 /* Show ability of reorder buffer to hide latency of load by executing
24242 in parallel with previous instruction in case
24243 previous instruction is not needed to compute the address. */
24244 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24245 && !ix86_agi_dependent (dep_insn
, insn
))
24247 /* Claim moves to take one cycle, as core can issue one load
24248 at time and the next load can start cycle later. */
24249 if (dep_insn_type
== TYPE_IMOV
24250 || dep_insn_type
== TYPE_FMOV
)
24258 memory
= get_attr_memory (insn
);
24260 /* The esp dependency is resolved before the instruction is really
24262 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24263 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24266 /* INT->FP conversion is expensive. */
24267 if (get_attr_fp_int_src (dep_insn
))
24270 /* Show ability of reorder buffer to hide latency of load by executing
24271 in parallel with previous instruction in case
24272 previous instruction is not needed to compute the address. */
24273 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24274 && !ix86_agi_dependent (dep_insn
, insn
))
24276 /* Claim moves to take one cycle, as core can issue one load
24277 at time and the next load can start cycle later. */
24278 if (dep_insn_type
== TYPE_IMOV
24279 || dep_insn_type
== TYPE_FMOV
)
24288 case PROCESSOR_ATHLON
:
24290 case PROCESSOR_AMDFAM10
:
24291 case PROCESSOR_BDVER1
:
24292 case PROCESSOR_BDVER2
:
24293 case PROCESSOR_BDVER3
:
24294 case PROCESSOR_BTVER1
:
24295 case PROCESSOR_BTVER2
:
24296 case PROCESSOR_ATOM
:
24297 case PROCESSOR_GENERIC32
:
24298 case PROCESSOR_GENERIC64
:
24299 memory
= get_attr_memory (insn
);
24301 /* Show ability of reorder buffer to hide latency of load by executing
24302 in parallel with previous instruction in case
24303 previous instruction is not needed to compute the address. */
24304 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24305 && !ix86_agi_dependent (dep_insn
, insn
))
24307 enum attr_unit unit
= get_attr_unit (insn
);
24310 /* Because of the difference between the length of integer and
24311 floating unit pipeline preparation stages, the memory operands
24312 for floating point are cheaper.
24314 ??? For Athlon it the difference is most probably 2. */
24315 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24318 loadcost
= TARGET_ATHLON
? 2 : 0;
24320 if (cost
>= loadcost
)
24333 /* How many alternative schedules to try. This should be as wide as the
24334 scheduling freedom in the DFA, but no wider. Making this value too
24335 large results extra work for the scheduler. */
24338 ia32_multipass_dfa_lookahead (void)
24342 case PROCESSOR_PENTIUM
:
24345 case PROCESSOR_PENTIUMPRO
:
24349 case PROCESSOR_CORE2
:
24350 case PROCESSOR_COREI7
:
24351 case PROCESSOR_HASWELL
:
24352 case PROCESSOR_ATOM
:
24353 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24354 as many instructions can be executed on a cycle, i.e.,
24355 issue_rate. I wonder why tuning for many CPUs does not do this. */
24356 if (reload_completed
)
24357 return ix86_issue_rate ();
24358 /* Don't use lookahead for pre-reload schedule to save compile time. */
24366 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24367 execution. It is applied if
24368 (1) IMUL instruction is on the top of list;
24369 (2) There exists the only producer of independent IMUL instruction in
24371 (3) Put found producer on the top of ready list.
24372 Returns issue rate. */
24375 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24376 int clock_var ATTRIBUTE_UNUSED
)
24378 static int issue_rate
= -1;
24379 int n_ready
= *pn_ready
;
24380 rtx insn
, insn1
, insn2
;
24382 sd_iterator_def sd_it
;
24386 /* Set up issue rate. */
24387 issue_rate
= ix86_issue_rate();
24389 /* Do reodering for Atom only. */
24390 if (ix86_tune
!= PROCESSOR_ATOM
)
24392 /* Do not perform ready list reodering for pre-reload schedule pass. */
24393 if (!reload_completed
)
24395 /* Nothing to do if ready list contains only 1 instruction. */
24399 /* Check that IMUL instruction is on the top of ready list. */
24400 insn
= ready
[n_ready
- 1];
24401 if (!NONDEBUG_INSN_P (insn
))
24403 insn
= PATTERN (insn
);
24404 if (GET_CODE (insn
) == PARALLEL
)
24405 insn
= XVECEXP (insn
, 0, 0);
24406 if (GET_CODE (insn
) != SET
)
24408 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24409 && GET_MODE (SET_SRC (insn
)) == SImode
))
24412 /* Search for producer of independent IMUL instruction. */
24413 for (i
= n_ready
- 2; i
>= 0; i
--)
24416 if (!NONDEBUG_INSN_P (insn
))
24418 /* Skip IMUL instruction. */
24419 insn2
= PATTERN (insn
);
24420 if (GET_CODE (insn2
) == PARALLEL
)
24421 insn2
= XVECEXP (insn2
, 0, 0);
24422 if (GET_CODE (insn2
) == SET
24423 && GET_CODE (SET_SRC (insn2
)) == MULT
24424 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24427 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24430 con
= DEP_CON (dep
);
24431 if (!NONDEBUG_INSN_P (con
))
24433 insn1
= PATTERN (con
);
24434 if (GET_CODE (insn1
) == PARALLEL
)
24435 insn1
= XVECEXP (insn1
, 0, 0);
24437 if (GET_CODE (insn1
) == SET
24438 && GET_CODE (SET_SRC (insn1
)) == MULT
24439 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24441 sd_iterator_def sd_it1
;
24443 /* Check if there is no other dependee for IMUL. */
24445 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24448 pro
= DEP_PRO (dep1
);
24449 if (!NONDEBUG_INSN_P (pro
))
24462 return issue_rate
; /* Didn't find IMUL producer. */
24464 if (sched_verbose
> 1)
24465 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24466 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24468 /* Put IMUL producer (ready[index]) at the top of ready list. */
24469 insn1
= ready
[index
];
24470 for (i
= index
; i
< n_ready
- 1; i
++)
24471 ready
[i
] = ready
[i
+ 1];
24472 ready
[n_ready
- 1] = insn1
;
24478 ix86_class_likely_spilled_p (reg_class_t
);
24480 /* Returns true if lhs of insn is HW function argument register and set up
24481 is_spilled to true if it is likely spilled HW register. */
24483 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24487 if (!NONDEBUG_INSN_P (insn
))
24489 /* Call instructions are not movable, ignore it. */
24492 insn
= PATTERN (insn
);
24493 if (GET_CODE (insn
) == PARALLEL
)
24494 insn
= XVECEXP (insn
, 0, 0);
24495 if (GET_CODE (insn
) != SET
)
24497 dst
= SET_DEST (insn
);
24498 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24499 && ix86_function_arg_regno_p (REGNO (dst
)))
24501 /* Is it likely spilled HW register? */
24502 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24503 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24504 *is_spilled
= true;
24510 /* Add output dependencies for chain of function adjacent arguments if only
24511 there is a move to likely spilled HW register. Return first argument
24512 if at least one dependence was added or NULL otherwise. */
24514 add_parameter_dependencies (rtx call
, rtx head
)
24518 rtx first_arg
= NULL
;
24519 bool is_spilled
= false;
24521 head
= PREV_INSN (head
);
24523 /* Find nearest to call argument passing instruction. */
24526 last
= PREV_INSN (last
);
24529 if (!NONDEBUG_INSN_P (last
))
24531 if (insn_is_function_arg (last
, &is_spilled
))
24539 insn
= PREV_INSN (last
);
24540 if (!INSN_P (insn
))
24544 if (!NONDEBUG_INSN_P (insn
))
24549 if (insn_is_function_arg (insn
, &is_spilled
))
24551 /* Add output depdendence between two function arguments if chain
24552 of output arguments contains likely spilled HW registers. */
24554 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24555 first_arg
= last
= insn
;
24565 /* Add output or anti dependency from insn to first_arg to restrict its code
24568 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24573 set
= single_set (insn
);
24576 tmp
= SET_DEST (set
);
24579 /* Add output dependency to the first function argument. */
24580 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24583 /* Add anti dependency. */
24584 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24587 /* Avoid cross block motion of function argument through adding dependency
24588 from the first non-jump instruction in bb. */
24590 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24592 rtx insn
= BB_END (bb
);
24596 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24598 rtx set
= single_set (insn
);
24601 avoid_func_arg_motion (arg
, insn
);
24605 if (insn
== BB_HEAD (bb
))
24607 insn
= PREV_INSN (insn
);
24611 /* Hook for pre-reload schedule - avoid motion of function arguments
24612 passed in likely spilled HW registers. */
24614 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24617 rtx first_arg
= NULL
;
24618 if (reload_completed
)
24620 while (head
!= tail
&& DEBUG_INSN_P (head
))
24621 head
= NEXT_INSN (head
);
24622 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24623 if (INSN_P (insn
) && CALL_P (insn
))
24625 first_arg
= add_parameter_dependencies (insn
, head
);
24628 /* Add dependee for first argument to predecessors if only
24629 region contains more than one block. */
24630 basic_block bb
= BLOCK_FOR_INSN (insn
);
24631 int rgn
= CONTAINING_RGN (bb
->index
);
24632 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24633 /* Skip trivial regions and region head blocks that can have
24634 predecessors outside of region. */
24635 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24639 /* Assume that region is SCC, i.e. all immediate predecessors
24640 of non-head block are in the same region. */
24641 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24643 /* Avoid creating of loop-carried dependencies through
24644 using topological odering in region. */
24645 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24646 add_dependee_for_func_arg (first_arg
, e
->src
);
24654 else if (first_arg
)
24655 avoid_func_arg_motion (first_arg
, insn
);
24658 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24659 HW registers to maximum, to schedule them at soon as possible. These are
24660 moves from function argument registers at the top of the function entry
24661 and moves from function return value registers after call. */
24663 ix86_adjust_priority (rtx insn
, int priority
)
24667 if (reload_completed
)
24670 if (!NONDEBUG_INSN_P (insn
))
24673 set
= single_set (insn
);
24676 rtx tmp
= SET_SRC (set
);
24678 && HARD_REGISTER_P (tmp
)
24679 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24680 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24681 return current_sched_info
->sched_max_insns_priority
;
24687 /* Model decoder of Core 2/i7.
24688 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24689 track the instruction fetch block boundaries and make sure that long
24690 (9+ bytes) instructions are assigned to D0. */
24692 /* Maximum length of an insn that can be handled by
24693 a secondary decoder unit. '8' for Core 2/i7. */
24694 static int core2i7_secondary_decoder_max_insn_size
;
24696 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24697 '16' for Core 2/i7. */
24698 static int core2i7_ifetch_block_size
;
24700 /* Maximum number of instructions decoder can handle per cycle.
24701 '6' for Core 2/i7. */
24702 static int core2i7_ifetch_block_max_insns
;
24704 typedef struct ix86_first_cycle_multipass_data_
*
24705 ix86_first_cycle_multipass_data_t
;
24706 typedef const struct ix86_first_cycle_multipass_data_
*
24707 const_ix86_first_cycle_multipass_data_t
;
24709 /* A variable to store target state across calls to max_issue within
24711 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24712 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24714 /* Initialize DATA. */
24716 core2i7_first_cycle_multipass_init (void *_data
)
24718 ix86_first_cycle_multipass_data_t data
24719 = (ix86_first_cycle_multipass_data_t
) _data
;
24721 data
->ifetch_block_len
= 0;
24722 data
->ifetch_block_n_insns
= 0;
24723 data
->ready_try_change
= NULL
;
24724 data
->ready_try_change_size
= 0;
24727 /* Advancing the cycle; reset ifetch block counts. */
24729 core2i7_dfa_post_advance_cycle (void)
24731 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24733 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24735 data
->ifetch_block_len
= 0;
24736 data
->ifetch_block_n_insns
= 0;
24739 static int min_insn_size (rtx
);
24741 /* Filter out insns from ready_try that the core will not be able to issue
24742 on current cycle due to decoder. */
24744 core2i7_first_cycle_multipass_filter_ready_try
24745 (const_ix86_first_cycle_multipass_data_t data
,
24746 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24753 if (ready_try
[n_ready
])
24756 insn
= get_ready_element (n_ready
);
24757 insn_size
= min_insn_size (insn
);
24759 if (/* If this is a too long an insn for a secondary decoder ... */
24760 (!first_cycle_insn_p
24761 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24762 /* ... or it would not fit into the ifetch block ... */
24763 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24764 /* ... or the decoder is full already ... */
24765 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24766 /* ... mask the insn out. */
24768 ready_try
[n_ready
] = 1;
24770 if (data
->ready_try_change
)
24771 bitmap_set_bit (data
->ready_try_change
, n_ready
);
24776 /* Prepare for a new round of multipass lookahead scheduling. */
24778 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24779 bool first_cycle_insn_p
)
24781 ix86_first_cycle_multipass_data_t data
24782 = (ix86_first_cycle_multipass_data_t
) _data
;
24783 const_ix86_first_cycle_multipass_data_t prev_data
24784 = ix86_first_cycle_multipass_data
;
24786 /* Restore the state from the end of the previous round. */
24787 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24788 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24790 /* Filter instructions that cannot be issued on current cycle due to
24791 decoder restrictions. */
24792 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24793 first_cycle_insn_p
);
24796 /* INSN is being issued in current solution. Account for its impact on
24797 the decoder model. */
24799 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24800 rtx insn
, const void *_prev_data
)
24802 ix86_first_cycle_multipass_data_t data
24803 = (ix86_first_cycle_multipass_data_t
) _data
;
24804 const_ix86_first_cycle_multipass_data_t prev_data
24805 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24807 int insn_size
= min_insn_size (insn
);
24809 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24810 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24811 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24812 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24814 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24815 if (!data
->ready_try_change
)
24817 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24818 data
->ready_try_change_size
= n_ready
;
24820 else if (data
->ready_try_change_size
< n_ready
)
24822 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24824 data
->ready_try_change_size
= n_ready
;
24826 bitmap_clear (data
->ready_try_change
);
24828 /* Filter out insns from ready_try that the core will not be able to issue
24829 on current cycle due to decoder. */
24830 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24834 /* Revert the effect on ready_try. */
24836 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24838 int n_ready ATTRIBUTE_UNUSED
)
24840 const_ix86_first_cycle_multipass_data_t data
24841 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24842 unsigned int i
= 0;
24843 sbitmap_iterator sbi
;
24845 gcc_assert (bitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24846 EXECUTE_IF_SET_IN_BITMAP (data
->ready_try_change
, 0, i
, sbi
)
24852 /* Save the result of multipass lookahead scheduling for the next round. */
24854 core2i7_first_cycle_multipass_end (const void *_data
)
24856 const_ix86_first_cycle_multipass_data_t data
24857 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24858 ix86_first_cycle_multipass_data_t next_data
24859 = ix86_first_cycle_multipass_data
;
24863 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24864 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24868 /* Deallocate target data. */
24870 core2i7_first_cycle_multipass_fini (void *_data
)
24872 ix86_first_cycle_multipass_data_t data
24873 = (ix86_first_cycle_multipass_data_t
) _data
;
24875 if (data
->ready_try_change
)
24877 sbitmap_free (data
->ready_try_change
);
24878 data
->ready_try_change
= NULL
;
24879 data
->ready_try_change_size
= 0;
24883 /* Prepare for scheduling pass. */
24885 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24886 int verbose ATTRIBUTE_UNUSED
,
24887 int max_uid ATTRIBUTE_UNUSED
)
24889 /* Install scheduling hooks for current CPU. Some of these hooks are used
24890 in time-critical parts of the scheduler, so we only set them up when
24891 they are actually used. */
24894 case PROCESSOR_CORE2
:
24895 case PROCESSOR_COREI7
:
24896 case PROCESSOR_HASWELL
:
24897 /* Do not perform multipass scheduling for pre-reload schedule
24898 to save compile time. */
24899 if (reload_completed
)
24901 targetm
.sched
.dfa_post_advance_cycle
24902 = core2i7_dfa_post_advance_cycle
;
24903 targetm
.sched
.first_cycle_multipass_init
24904 = core2i7_first_cycle_multipass_init
;
24905 targetm
.sched
.first_cycle_multipass_begin
24906 = core2i7_first_cycle_multipass_begin
;
24907 targetm
.sched
.first_cycle_multipass_issue
24908 = core2i7_first_cycle_multipass_issue
;
24909 targetm
.sched
.first_cycle_multipass_backtrack
24910 = core2i7_first_cycle_multipass_backtrack
;
24911 targetm
.sched
.first_cycle_multipass_end
24912 = core2i7_first_cycle_multipass_end
;
24913 targetm
.sched
.first_cycle_multipass_fini
24914 = core2i7_first_cycle_multipass_fini
;
24916 /* Set decoder parameters. */
24917 core2i7_secondary_decoder_max_insn_size
= 8;
24918 core2i7_ifetch_block_size
= 16;
24919 core2i7_ifetch_block_max_insns
= 6;
24922 /* ... Fall through ... */
24924 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24925 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24926 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24927 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24928 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24929 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24930 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24936 /* Compute the alignment given to a constant that is being placed in memory.
24937 EXP is the constant and ALIGN is the alignment that the object would
24939 The value of this function is used instead of that alignment to align
24943 ix86_constant_alignment (tree exp
, int align
)
24945 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24946 || TREE_CODE (exp
) == INTEGER_CST
)
24948 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24950 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24953 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24954 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24955 return BITS_PER_WORD
;
24960 /* Compute the alignment for a static variable.
24961 TYPE is the data type, and ALIGN is the alignment that
24962 the object would ordinarily have. The value of this function is used
24963 instead of that alignment to align the object. */
24966 ix86_data_alignment (tree type
, int align
)
24968 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24970 if (AGGREGATE_TYPE_P (type
)
24971 && TYPE_SIZE (type
)
24972 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24973 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24974 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24975 && align
< max_align
)
24978 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24979 to 16byte boundary. */
24982 if (AGGREGATE_TYPE_P (type
)
24983 && TYPE_SIZE (type
)
24984 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24985 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24986 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24990 if (TREE_CODE (type
) == ARRAY_TYPE
)
24992 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24994 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24997 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25000 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25002 if ((TYPE_MODE (type
) == XCmode
25003 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25006 else if ((TREE_CODE (type
) == RECORD_TYPE
25007 || TREE_CODE (type
) == UNION_TYPE
25008 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25009 && TYPE_FIELDS (type
))
25011 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25013 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25016 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25017 || TREE_CODE (type
) == INTEGER_TYPE
)
25019 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25021 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25028 /* Compute the alignment for a local variable or a stack slot. EXP is
25029 the data type or decl itself, MODE is the widest mode available and
25030 ALIGN is the alignment that the object would ordinarily have. The
25031 value of this macro is used instead of that alignment to align the
25035 ix86_local_alignment (tree exp
, enum machine_mode mode
,
25036 unsigned int align
)
25040 if (exp
&& DECL_P (exp
))
25042 type
= TREE_TYPE (exp
);
25051 /* Don't do dynamic stack realignment for long long objects with
25052 -mpreferred-stack-boundary=2. */
25055 && ix86_preferred_stack_boundary
< 64
25056 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25057 && (!type
|| !TYPE_USER_ALIGN (type
))
25058 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25061 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25062 register in MODE. We will return the largest alignment of XF
25066 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25067 align
= GET_MODE_ALIGNMENT (DFmode
);
25071 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25072 to 16byte boundary. Exact wording is:
25074 An array uses the same alignment as its elements, except that a local or
25075 global array variable of length at least 16 bytes or
25076 a C99 variable-length array variable always has alignment of at least 16 bytes.
25078 This was added to allow use of aligned SSE instructions at arrays. This
25079 rule is meant for static storage (where compiler can not do the analysis
25080 by itself). We follow it for automatic variables only when convenient.
25081 We fully control everything in the function compiled and functions from
25082 other unit can not rely on the alignment.
25084 Exclude va_list type. It is the common case of local array where
25085 we can not benefit from the alignment. */
25086 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25089 if (AGGREGATE_TYPE_P (type
)
25090 && (va_list_type_node
== NULL_TREE
25091 || (TYPE_MAIN_VARIANT (type
)
25092 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25093 && TYPE_SIZE (type
)
25094 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25095 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25096 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25099 if (TREE_CODE (type
) == ARRAY_TYPE
)
25101 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25103 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25106 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25108 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25110 if ((TYPE_MODE (type
) == XCmode
25111 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25114 else if ((TREE_CODE (type
) == RECORD_TYPE
25115 || TREE_CODE (type
) == UNION_TYPE
25116 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25117 && TYPE_FIELDS (type
))
25119 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25121 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25124 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25125 || TREE_CODE (type
) == INTEGER_TYPE
)
25128 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25130 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25136 /* Compute the minimum required alignment for dynamic stack realignment
25137 purposes for a local variable, parameter or a stack slot. EXP is
25138 the data type or decl itself, MODE is its mode and ALIGN is the
25139 alignment that the object would ordinarily have. */
25142 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25143 unsigned int align
)
25147 if (exp
&& DECL_P (exp
))
25149 type
= TREE_TYPE (exp
);
25158 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25161 /* Don't do dynamic stack realignment for long long objects with
25162 -mpreferred-stack-boundary=2. */
25163 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25164 && (!type
|| !TYPE_USER_ALIGN (type
))
25165 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25171 /* Find a location for the static chain incoming to a nested function.
25172 This is a register, unless all free registers are used by arguments. */
25175 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25179 if (!DECL_STATIC_CHAIN (fndecl
))
25184 /* We always use R10 in 64-bit mode. */
25192 /* By default in 32-bit mode we use ECX to pass the static chain. */
25195 fntype
= TREE_TYPE (fndecl
);
25196 ccvt
= ix86_get_callcvt (fntype
);
25197 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
25199 /* Fastcall functions use ecx/edx for arguments, which leaves
25200 us with EAX for the static chain.
25201 Thiscall functions use ecx for arguments, which also
25202 leaves us with EAX for the static chain. */
25205 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
25207 /* Thiscall functions use ecx for arguments, which leaves
25208 us with EAX and EDX for the static chain.
25209 We are using for abi-compatibility EAX. */
25212 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25214 /* For regparm 3, we have no free call-clobbered registers in
25215 which to store the static chain. In order to implement this,
25216 we have the trampoline push the static chain to the stack.
25217 However, we can't push a value below the return address when
25218 we call the nested function directly, so we have to use an
25219 alternate entry point. For this we use ESI, and have the
25220 alternate entry point push ESI, so that things appear the
25221 same once we're executing the nested function. */
25224 if (fndecl
== current_function_decl
)
25225 ix86_static_chain_on_stack
= true;
25226 return gen_frame_mem (SImode
,
25227 plus_constant (Pmode
,
25228 arg_pointer_rtx
, -8));
25234 return gen_rtx_REG (Pmode
, regno
);
25237 /* Emit RTL insns to initialize the variable parts of a trampoline.
25238 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25239 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25240 to be passed to the target function. */
25243 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25249 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25255 /* Load the function address to r11. Try to load address using
25256 the shorter movl instead of movabs. We may want to support
25257 movq for kernel mode, but kernel does not use trampolines at
25258 the moment. FNADDR is a 32bit address and may not be in
25259 DImode when ptr_mode == SImode. Always use movl in this
25261 if (ptr_mode
== SImode
25262 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25264 fnaddr
= copy_addr_to_reg (fnaddr
);
25266 mem
= adjust_address (m_tramp
, HImode
, offset
);
25267 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25269 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25270 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25275 mem
= adjust_address (m_tramp
, HImode
, offset
);
25276 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25278 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25279 emit_move_insn (mem
, fnaddr
);
25283 /* Load static chain using movabs to r10. Use the shorter movl
25284 instead of movabs when ptr_mode == SImode. */
25285 if (ptr_mode
== SImode
)
25296 mem
= adjust_address (m_tramp
, HImode
, offset
);
25297 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25299 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25300 emit_move_insn (mem
, chain_value
);
25303 /* Jump to r11; the last (unused) byte is a nop, only there to
25304 pad the write out to a single 32-bit store. */
25305 mem
= adjust_address (m_tramp
, SImode
, offset
);
25306 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25313 /* Depending on the static chain location, either load a register
25314 with a constant, or push the constant to the stack. All of the
25315 instructions are the same size. */
25316 chain
= ix86_static_chain (fndecl
, true);
25319 switch (REGNO (chain
))
25322 opcode
= 0xb8; break;
25324 opcode
= 0xb9; break;
25326 gcc_unreachable ();
25332 mem
= adjust_address (m_tramp
, QImode
, offset
);
25333 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25335 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25336 emit_move_insn (mem
, chain_value
);
25339 mem
= adjust_address (m_tramp
, QImode
, offset
);
25340 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25342 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25344 /* Compute offset from the end of the jmp to the target function.
25345 In the case in which the trampoline stores the static chain on
25346 the stack, we need to skip the first insn which pushes the
25347 (call-saved) register static chain; this push is 1 byte. */
25349 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25350 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25351 offset
- (MEM_P (chain
) ? 1 : 0)),
25352 NULL_RTX
, 1, OPTAB_DIRECT
);
25353 emit_move_insn (mem
, disp
);
25356 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25358 #ifdef HAVE_ENABLE_EXECUTE_STACK
25359 #ifdef CHECK_EXECUTE_STACK_ENABLED
25360 if (CHECK_EXECUTE_STACK_ENABLED
)
25362 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25363 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25367 /* The following file contains several enumerations and data structures
25368 built from the definitions in i386-builtin-types.def. */
25370 #include "i386-builtin-types.inc"
25372 /* Table for the ix86 builtin non-function types. */
25373 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25375 /* Retrieve an element from the above table, building some of
25376 the types lazily. */
25379 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25381 unsigned int index
;
25384 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25386 type
= ix86_builtin_type_tab
[(int) tcode
];
25390 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25391 if (tcode
<= IX86_BT_LAST_VECT
)
25393 enum machine_mode mode
;
25395 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25396 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25397 mode
= ix86_builtin_type_vect_mode
[index
];
25399 type
= build_vector_type_for_mode (itype
, mode
);
25405 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25406 if (tcode
<= IX86_BT_LAST_PTR
)
25407 quals
= TYPE_UNQUALIFIED
;
25409 quals
= TYPE_QUAL_CONST
;
25411 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25412 if (quals
!= TYPE_UNQUALIFIED
)
25413 itype
= build_qualified_type (itype
, quals
);
25415 type
= build_pointer_type (itype
);
25418 ix86_builtin_type_tab
[(int) tcode
] = type
;
25422 /* Table for the ix86 builtin function types. */
25423 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25425 /* Retrieve an element from the above table, building some of
25426 the types lazily. */
25429 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25433 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25435 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25439 if (tcode
<= IX86_BT_LAST_FUNC
)
25441 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25442 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25443 tree rtype
, atype
, args
= void_list_node
;
25446 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25447 for (i
= after
- 1; i
> start
; --i
)
25449 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25450 args
= tree_cons (NULL
, atype
, args
);
25453 type
= build_function_type (rtype
, args
);
25457 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25458 enum ix86_builtin_func_type icode
;
25460 icode
= ix86_builtin_func_alias_base
[index
];
25461 type
= ix86_get_builtin_func_type (icode
);
25464 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25469 /* Codes for all the SSE/MMX builtins. */
25472 IX86_BUILTIN_ADDPS
,
25473 IX86_BUILTIN_ADDSS
,
25474 IX86_BUILTIN_DIVPS
,
25475 IX86_BUILTIN_DIVSS
,
25476 IX86_BUILTIN_MULPS
,
25477 IX86_BUILTIN_MULSS
,
25478 IX86_BUILTIN_SUBPS
,
25479 IX86_BUILTIN_SUBSS
,
25481 IX86_BUILTIN_CMPEQPS
,
25482 IX86_BUILTIN_CMPLTPS
,
25483 IX86_BUILTIN_CMPLEPS
,
25484 IX86_BUILTIN_CMPGTPS
,
25485 IX86_BUILTIN_CMPGEPS
,
25486 IX86_BUILTIN_CMPNEQPS
,
25487 IX86_BUILTIN_CMPNLTPS
,
25488 IX86_BUILTIN_CMPNLEPS
,
25489 IX86_BUILTIN_CMPNGTPS
,
25490 IX86_BUILTIN_CMPNGEPS
,
25491 IX86_BUILTIN_CMPORDPS
,
25492 IX86_BUILTIN_CMPUNORDPS
,
25493 IX86_BUILTIN_CMPEQSS
,
25494 IX86_BUILTIN_CMPLTSS
,
25495 IX86_BUILTIN_CMPLESS
,
25496 IX86_BUILTIN_CMPNEQSS
,
25497 IX86_BUILTIN_CMPNLTSS
,
25498 IX86_BUILTIN_CMPNLESS
,
25499 IX86_BUILTIN_CMPNGTSS
,
25500 IX86_BUILTIN_CMPNGESS
,
25501 IX86_BUILTIN_CMPORDSS
,
25502 IX86_BUILTIN_CMPUNORDSS
,
25504 IX86_BUILTIN_COMIEQSS
,
25505 IX86_BUILTIN_COMILTSS
,
25506 IX86_BUILTIN_COMILESS
,
25507 IX86_BUILTIN_COMIGTSS
,
25508 IX86_BUILTIN_COMIGESS
,
25509 IX86_BUILTIN_COMINEQSS
,
25510 IX86_BUILTIN_UCOMIEQSS
,
25511 IX86_BUILTIN_UCOMILTSS
,
25512 IX86_BUILTIN_UCOMILESS
,
25513 IX86_BUILTIN_UCOMIGTSS
,
25514 IX86_BUILTIN_UCOMIGESS
,
25515 IX86_BUILTIN_UCOMINEQSS
,
25517 IX86_BUILTIN_CVTPI2PS
,
25518 IX86_BUILTIN_CVTPS2PI
,
25519 IX86_BUILTIN_CVTSI2SS
,
25520 IX86_BUILTIN_CVTSI642SS
,
25521 IX86_BUILTIN_CVTSS2SI
,
25522 IX86_BUILTIN_CVTSS2SI64
,
25523 IX86_BUILTIN_CVTTPS2PI
,
25524 IX86_BUILTIN_CVTTSS2SI
,
25525 IX86_BUILTIN_CVTTSS2SI64
,
25527 IX86_BUILTIN_MAXPS
,
25528 IX86_BUILTIN_MAXSS
,
25529 IX86_BUILTIN_MINPS
,
25530 IX86_BUILTIN_MINSS
,
25532 IX86_BUILTIN_LOADUPS
,
25533 IX86_BUILTIN_STOREUPS
,
25534 IX86_BUILTIN_MOVSS
,
25536 IX86_BUILTIN_MOVHLPS
,
25537 IX86_BUILTIN_MOVLHPS
,
25538 IX86_BUILTIN_LOADHPS
,
25539 IX86_BUILTIN_LOADLPS
,
25540 IX86_BUILTIN_STOREHPS
,
25541 IX86_BUILTIN_STORELPS
,
25543 IX86_BUILTIN_MASKMOVQ
,
25544 IX86_BUILTIN_MOVMSKPS
,
25545 IX86_BUILTIN_PMOVMSKB
,
25547 IX86_BUILTIN_MOVNTPS
,
25548 IX86_BUILTIN_MOVNTQ
,
25550 IX86_BUILTIN_LOADDQU
,
25551 IX86_BUILTIN_STOREDQU
,
25553 IX86_BUILTIN_PACKSSWB
,
25554 IX86_BUILTIN_PACKSSDW
,
25555 IX86_BUILTIN_PACKUSWB
,
25557 IX86_BUILTIN_PADDB
,
25558 IX86_BUILTIN_PADDW
,
25559 IX86_BUILTIN_PADDD
,
25560 IX86_BUILTIN_PADDQ
,
25561 IX86_BUILTIN_PADDSB
,
25562 IX86_BUILTIN_PADDSW
,
25563 IX86_BUILTIN_PADDUSB
,
25564 IX86_BUILTIN_PADDUSW
,
25565 IX86_BUILTIN_PSUBB
,
25566 IX86_BUILTIN_PSUBW
,
25567 IX86_BUILTIN_PSUBD
,
25568 IX86_BUILTIN_PSUBQ
,
25569 IX86_BUILTIN_PSUBSB
,
25570 IX86_BUILTIN_PSUBSW
,
25571 IX86_BUILTIN_PSUBUSB
,
25572 IX86_BUILTIN_PSUBUSW
,
25575 IX86_BUILTIN_PANDN
,
25579 IX86_BUILTIN_PAVGB
,
25580 IX86_BUILTIN_PAVGW
,
25582 IX86_BUILTIN_PCMPEQB
,
25583 IX86_BUILTIN_PCMPEQW
,
25584 IX86_BUILTIN_PCMPEQD
,
25585 IX86_BUILTIN_PCMPGTB
,
25586 IX86_BUILTIN_PCMPGTW
,
25587 IX86_BUILTIN_PCMPGTD
,
25589 IX86_BUILTIN_PMADDWD
,
25591 IX86_BUILTIN_PMAXSW
,
25592 IX86_BUILTIN_PMAXUB
,
25593 IX86_BUILTIN_PMINSW
,
25594 IX86_BUILTIN_PMINUB
,
25596 IX86_BUILTIN_PMULHUW
,
25597 IX86_BUILTIN_PMULHW
,
25598 IX86_BUILTIN_PMULLW
,
25600 IX86_BUILTIN_PSADBW
,
25601 IX86_BUILTIN_PSHUFW
,
25603 IX86_BUILTIN_PSLLW
,
25604 IX86_BUILTIN_PSLLD
,
25605 IX86_BUILTIN_PSLLQ
,
25606 IX86_BUILTIN_PSRAW
,
25607 IX86_BUILTIN_PSRAD
,
25608 IX86_BUILTIN_PSRLW
,
25609 IX86_BUILTIN_PSRLD
,
25610 IX86_BUILTIN_PSRLQ
,
25611 IX86_BUILTIN_PSLLWI
,
25612 IX86_BUILTIN_PSLLDI
,
25613 IX86_BUILTIN_PSLLQI
,
25614 IX86_BUILTIN_PSRAWI
,
25615 IX86_BUILTIN_PSRADI
,
25616 IX86_BUILTIN_PSRLWI
,
25617 IX86_BUILTIN_PSRLDI
,
25618 IX86_BUILTIN_PSRLQI
,
25620 IX86_BUILTIN_PUNPCKHBW
,
25621 IX86_BUILTIN_PUNPCKHWD
,
25622 IX86_BUILTIN_PUNPCKHDQ
,
25623 IX86_BUILTIN_PUNPCKLBW
,
25624 IX86_BUILTIN_PUNPCKLWD
,
25625 IX86_BUILTIN_PUNPCKLDQ
,
25627 IX86_BUILTIN_SHUFPS
,
25629 IX86_BUILTIN_RCPPS
,
25630 IX86_BUILTIN_RCPSS
,
25631 IX86_BUILTIN_RSQRTPS
,
25632 IX86_BUILTIN_RSQRTPS_NR
,
25633 IX86_BUILTIN_RSQRTSS
,
25634 IX86_BUILTIN_RSQRTF
,
25635 IX86_BUILTIN_SQRTPS
,
25636 IX86_BUILTIN_SQRTPS_NR
,
25637 IX86_BUILTIN_SQRTSS
,
25639 IX86_BUILTIN_UNPCKHPS
,
25640 IX86_BUILTIN_UNPCKLPS
,
25642 IX86_BUILTIN_ANDPS
,
25643 IX86_BUILTIN_ANDNPS
,
25645 IX86_BUILTIN_XORPS
,
25648 IX86_BUILTIN_LDMXCSR
,
25649 IX86_BUILTIN_STMXCSR
,
25650 IX86_BUILTIN_SFENCE
,
25652 IX86_BUILTIN_FXSAVE
,
25653 IX86_BUILTIN_FXRSTOR
,
25654 IX86_BUILTIN_FXSAVE64
,
25655 IX86_BUILTIN_FXRSTOR64
,
25657 IX86_BUILTIN_XSAVE
,
25658 IX86_BUILTIN_XRSTOR
,
25659 IX86_BUILTIN_XSAVE64
,
25660 IX86_BUILTIN_XRSTOR64
,
25662 IX86_BUILTIN_XSAVEOPT
,
25663 IX86_BUILTIN_XSAVEOPT64
,
25665 /* 3DNow! Original */
25666 IX86_BUILTIN_FEMMS
,
25667 IX86_BUILTIN_PAVGUSB
,
25668 IX86_BUILTIN_PF2ID
,
25669 IX86_BUILTIN_PFACC
,
25670 IX86_BUILTIN_PFADD
,
25671 IX86_BUILTIN_PFCMPEQ
,
25672 IX86_BUILTIN_PFCMPGE
,
25673 IX86_BUILTIN_PFCMPGT
,
25674 IX86_BUILTIN_PFMAX
,
25675 IX86_BUILTIN_PFMIN
,
25676 IX86_BUILTIN_PFMUL
,
25677 IX86_BUILTIN_PFRCP
,
25678 IX86_BUILTIN_PFRCPIT1
,
25679 IX86_BUILTIN_PFRCPIT2
,
25680 IX86_BUILTIN_PFRSQIT1
,
25681 IX86_BUILTIN_PFRSQRT
,
25682 IX86_BUILTIN_PFSUB
,
25683 IX86_BUILTIN_PFSUBR
,
25684 IX86_BUILTIN_PI2FD
,
25685 IX86_BUILTIN_PMULHRW
,
25687 /* 3DNow! Athlon Extensions */
25688 IX86_BUILTIN_PF2IW
,
25689 IX86_BUILTIN_PFNACC
,
25690 IX86_BUILTIN_PFPNACC
,
25691 IX86_BUILTIN_PI2FW
,
25692 IX86_BUILTIN_PSWAPDSI
,
25693 IX86_BUILTIN_PSWAPDSF
,
25696 IX86_BUILTIN_ADDPD
,
25697 IX86_BUILTIN_ADDSD
,
25698 IX86_BUILTIN_DIVPD
,
25699 IX86_BUILTIN_DIVSD
,
25700 IX86_BUILTIN_MULPD
,
25701 IX86_BUILTIN_MULSD
,
25702 IX86_BUILTIN_SUBPD
,
25703 IX86_BUILTIN_SUBSD
,
25705 IX86_BUILTIN_CMPEQPD
,
25706 IX86_BUILTIN_CMPLTPD
,
25707 IX86_BUILTIN_CMPLEPD
,
25708 IX86_BUILTIN_CMPGTPD
,
25709 IX86_BUILTIN_CMPGEPD
,
25710 IX86_BUILTIN_CMPNEQPD
,
25711 IX86_BUILTIN_CMPNLTPD
,
25712 IX86_BUILTIN_CMPNLEPD
,
25713 IX86_BUILTIN_CMPNGTPD
,
25714 IX86_BUILTIN_CMPNGEPD
,
25715 IX86_BUILTIN_CMPORDPD
,
25716 IX86_BUILTIN_CMPUNORDPD
,
25717 IX86_BUILTIN_CMPEQSD
,
25718 IX86_BUILTIN_CMPLTSD
,
25719 IX86_BUILTIN_CMPLESD
,
25720 IX86_BUILTIN_CMPNEQSD
,
25721 IX86_BUILTIN_CMPNLTSD
,
25722 IX86_BUILTIN_CMPNLESD
,
25723 IX86_BUILTIN_CMPORDSD
,
25724 IX86_BUILTIN_CMPUNORDSD
,
25726 IX86_BUILTIN_COMIEQSD
,
25727 IX86_BUILTIN_COMILTSD
,
25728 IX86_BUILTIN_COMILESD
,
25729 IX86_BUILTIN_COMIGTSD
,
25730 IX86_BUILTIN_COMIGESD
,
25731 IX86_BUILTIN_COMINEQSD
,
25732 IX86_BUILTIN_UCOMIEQSD
,
25733 IX86_BUILTIN_UCOMILTSD
,
25734 IX86_BUILTIN_UCOMILESD
,
25735 IX86_BUILTIN_UCOMIGTSD
,
25736 IX86_BUILTIN_UCOMIGESD
,
25737 IX86_BUILTIN_UCOMINEQSD
,
25739 IX86_BUILTIN_MAXPD
,
25740 IX86_BUILTIN_MAXSD
,
25741 IX86_BUILTIN_MINPD
,
25742 IX86_BUILTIN_MINSD
,
25744 IX86_BUILTIN_ANDPD
,
25745 IX86_BUILTIN_ANDNPD
,
25747 IX86_BUILTIN_XORPD
,
25749 IX86_BUILTIN_SQRTPD
,
25750 IX86_BUILTIN_SQRTSD
,
25752 IX86_BUILTIN_UNPCKHPD
,
25753 IX86_BUILTIN_UNPCKLPD
,
25755 IX86_BUILTIN_SHUFPD
,
25757 IX86_BUILTIN_LOADUPD
,
25758 IX86_BUILTIN_STOREUPD
,
25759 IX86_BUILTIN_MOVSD
,
25761 IX86_BUILTIN_LOADHPD
,
25762 IX86_BUILTIN_LOADLPD
,
25764 IX86_BUILTIN_CVTDQ2PD
,
25765 IX86_BUILTIN_CVTDQ2PS
,
25767 IX86_BUILTIN_CVTPD2DQ
,
25768 IX86_BUILTIN_CVTPD2PI
,
25769 IX86_BUILTIN_CVTPD2PS
,
25770 IX86_BUILTIN_CVTTPD2DQ
,
25771 IX86_BUILTIN_CVTTPD2PI
,
25773 IX86_BUILTIN_CVTPI2PD
,
25774 IX86_BUILTIN_CVTSI2SD
,
25775 IX86_BUILTIN_CVTSI642SD
,
25777 IX86_BUILTIN_CVTSD2SI
,
25778 IX86_BUILTIN_CVTSD2SI64
,
25779 IX86_BUILTIN_CVTSD2SS
,
25780 IX86_BUILTIN_CVTSS2SD
,
25781 IX86_BUILTIN_CVTTSD2SI
,
25782 IX86_BUILTIN_CVTTSD2SI64
,
25784 IX86_BUILTIN_CVTPS2DQ
,
25785 IX86_BUILTIN_CVTPS2PD
,
25786 IX86_BUILTIN_CVTTPS2DQ
,
25788 IX86_BUILTIN_MOVNTI
,
25789 IX86_BUILTIN_MOVNTI64
,
25790 IX86_BUILTIN_MOVNTPD
,
25791 IX86_BUILTIN_MOVNTDQ
,
25793 IX86_BUILTIN_MOVQ128
,
25796 IX86_BUILTIN_MASKMOVDQU
,
25797 IX86_BUILTIN_MOVMSKPD
,
25798 IX86_BUILTIN_PMOVMSKB128
,
25800 IX86_BUILTIN_PACKSSWB128
,
25801 IX86_BUILTIN_PACKSSDW128
,
25802 IX86_BUILTIN_PACKUSWB128
,
25804 IX86_BUILTIN_PADDB128
,
25805 IX86_BUILTIN_PADDW128
,
25806 IX86_BUILTIN_PADDD128
,
25807 IX86_BUILTIN_PADDQ128
,
25808 IX86_BUILTIN_PADDSB128
,
25809 IX86_BUILTIN_PADDSW128
,
25810 IX86_BUILTIN_PADDUSB128
,
25811 IX86_BUILTIN_PADDUSW128
,
25812 IX86_BUILTIN_PSUBB128
,
25813 IX86_BUILTIN_PSUBW128
,
25814 IX86_BUILTIN_PSUBD128
,
25815 IX86_BUILTIN_PSUBQ128
,
25816 IX86_BUILTIN_PSUBSB128
,
25817 IX86_BUILTIN_PSUBSW128
,
25818 IX86_BUILTIN_PSUBUSB128
,
25819 IX86_BUILTIN_PSUBUSW128
,
25821 IX86_BUILTIN_PAND128
,
25822 IX86_BUILTIN_PANDN128
,
25823 IX86_BUILTIN_POR128
,
25824 IX86_BUILTIN_PXOR128
,
25826 IX86_BUILTIN_PAVGB128
,
25827 IX86_BUILTIN_PAVGW128
,
25829 IX86_BUILTIN_PCMPEQB128
,
25830 IX86_BUILTIN_PCMPEQW128
,
25831 IX86_BUILTIN_PCMPEQD128
,
25832 IX86_BUILTIN_PCMPGTB128
,
25833 IX86_BUILTIN_PCMPGTW128
,
25834 IX86_BUILTIN_PCMPGTD128
,
25836 IX86_BUILTIN_PMADDWD128
,
25838 IX86_BUILTIN_PMAXSW128
,
25839 IX86_BUILTIN_PMAXUB128
,
25840 IX86_BUILTIN_PMINSW128
,
25841 IX86_BUILTIN_PMINUB128
,
25843 IX86_BUILTIN_PMULUDQ
,
25844 IX86_BUILTIN_PMULUDQ128
,
25845 IX86_BUILTIN_PMULHUW128
,
25846 IX86_BUILTIN_PMULHW128
,
25847 IX86_BUILTIN_PMULLW128
,
25849 IX86_BUILTIN_PSADBW128
,
25850 IX86_BUILTIN_PSHUFHW
,
25851 IX86_BUILTIN_PSHUFLW
,
25852 IX86_BUILTIN_PSHUFD
,
25854 IX86_BUILTIN_PSLLDQI128
,
25855 IX86_BUILTIN_PSLLWI128
,
25856 IX86_BUILTIN_PSLLDI128
,
25857 IX86_BUILTIN_PSLLQI128
,
25858 IX86_BUILTIN_PSRAWI128
,
25859 IX86_BUILTIN_PSRADI128
,
25860 IX86_BUILTIN_PSRLDQI128
,
25861 IX86_BUILTIN_PSRLWI128
,
25862 IX86_BUILTIN_PSRLDI128
,
25863 IX86_BUILTIN_PSRLQI128
,
25865 IX86_BUILTIN_PSLLDQ128
,
25866 IX86_BUILTIN_PSLLW128
,
25867 IX86_BUILTIN_PSLLD128
,
25868 IX86_BUILTIN_PSLLQ128
,
25869 IX86_BUILTIN_PSRAW128
,
25870 IX86_BUILTIN_PSRAD128
,
25871 IX86_BUILTIN_PSRLW128
,
25872 IX86_BUILTIN_PSRLD128
,
25873 IX86_BUILTIN_PSRLQ128
,
25875 IX86_BUILTIN_PUNPCKHBW128
,
25876 IX86_BUILTIN_PUNPCKHWD128
,
25877 IX86_BUILTIN_PUNPCKHDQ128
,
25878 IX86_BUILTIN_PUNPCKHQDQ128
,
25879 IX86_BUILTIN_PUNPCKLBW128
,
25880 IX86_BUILTIN_PUNPCKLWD128
,
25881 IX86_BUILTIN_PUNPCKLDQ128
,
25882 IX86_BUILTIN_PUNPCKLQDQ128
,
25884 IX86_BUILTIN_CLFLUSH
,
25885 IX86_BUILTIN_MFENCE
,
25886 IX86_BUILTIN_LFENCE
,
25887 IX86_BUILTIN_PAUSE
,
25889 IX86_BUILTIN_BSRSI
,
25890 IX86_BUILTIN_BSRDI
,
25891 IX86_BUILTIN_RDPMC
,
25892 IX86_BUILTIN_RDTSC
,
25893 IX86_BUILTIN_RDTSCP
,
25894 IX86_BUILTIN_ROLQI
,
25895 IX86_BUILTIN_ROLHI
,
25896 IX86_BUILTIN_RORQI
,
25897 IX86_BUILTIN_RORHI
,
25900 IX86_BUILTIN_ADDSUBPS
,
25901 IX86_BUILTIN_HADDPS
,
25902 IX86_BUILTIN_HSUBPS
,
25903 IX86_BUILTIN_MOVSHDUP
,
25904 IX86_BUILTIN_MOVSLDUP
,
25905 IX86_BUILTIN_ADDSUBPD
,
25906 IX86_BUILTIN_HADDPD
,
25907 IX86_BUILTIN_HSUBPD
,
25908 IX86_BUILTIN_LDDQU
,
25910 IX86_BUILTIN_MONITOR
,
25911 IX86_BUILTIN_MWAIT
,
25914 IX86_BUILTIN_PHADDW
,
25915 IX86_BUILTIN_PHADDD
,
25916 IX86_BUILTIN_PHADDSW
,
25917 IX86_BUILTIN_PHSUBW
,
25918 IX86_BUILTIN_PHSUBD
,
25919 IX86_BUILTIN_PHSUBSW
,
25920 IX86_BUILTIN_PMADDUBSW
,
25921 IX86_BUILTIN_PMULHRSW
,
25922 IX86_BUILTIN_PSHUFB
,
25923 IX86_BUILTIN_PSIGNB
,
25924 IX86_BUILTIN_PSIGNW
,
25925 IX86_BUILTIN_PSIGND
,
25926 IX86_BUILTIN_PALIGNR
,
25927 IX86_BUILTIN_PABSB
,
25928 IX86_BUILTIN_PABSW
,
25929 IX86_BUILTIN_PABSD
,
25931 IX86_BUILTIN_PHADDW128
,
25932 IX86_BUILTIN_PHADDD128
,
25933 IX86_BUILTIN_PHADDSW128
,
25934 IX86_BUILTIN_PHSUBW128
,
25935 IX86_BUILTIN_PHSUBD128
,
25936 IX86_BUILTIN_PHSUBSW128
,
25937 IX86_BUILTIN_PMADDUBSW128
,
25938 IX86_BUILTIN_PMULHRSW128
,
25939 IX86_BUILTIN_PSHUFB128
,
25940 IX86_BUILTIN_PSIGNB128
,
25941 IX86_BUILTIN_PSIGNW128
,
25942 IX86_BUILTIN_PSIGND128
,
25943 IX86_BUILTIN_PALIGNR128
,
25944 IX86_BUILTIN_PABSB128
,
25945 IX86_BUILTIN_PABSW128
,
25946 IX86_BUILTIN_PABSD128
,
25948 /* AMDFAM10 - SSE4A New Instructions. */
25949 IX86_BUILTIN_MOVNTSD
,
25950 IX86_BUILTIN_MOVNTSS
,
25951 IX86_BUILTIN_EXTRQI
,
25952 IX86_BUILTIN_EXTRQ
,
25953 IX86_BUILTIN_INSERTQI
,
25954 IX86_BUILTIN_INSERTQ
,
25957 IX86_BUILTIN_BLENDPD
,
25958 IX86_BUILTIN_BLENDPS
,
25959 IX86_BUILTIN_BLENDVPD
,
25960 IX86_BUILTIN_BLENDVPS
,
25961 IX86_BUILTIN_PBLENDVB128
,
25962 IX86_BUILTIN_PBLENDW128
,
25967 IX86_BUILTIN_INSERTPS128
,
25969 IX86_BUILTIN_MOVNTDQA
,
25970 IX86_BUILTIN_MPSADBW128
,
25971 IX86_BUILTIN_PACKUSDW128
,
25972 IX86_BUILTIN_PCMPEQQ
,
25973 IX86_BUILTIN_PHMINPOSUW128
,
25975 IX86_BUILTIN_PMAXSB128
,
25976 IX86_BUILTIN_PMAXSD128
,
25977 IX86_BUILTIN_PMAXUD128
,
25978 IX86_BUILTIN_PMAXUW128
,
25980 IX86_BUILTIN_PMINSB128
,
25981 IX86_BUILTIN_PMINSD128
,
25982 IX86_BUILTIN_PMINUD128
,
25983 IX86_BUILTIN_PMINUW128
,
25985 IX86_BUILTIN_PMOVSXBW128
,
25986 IX86_BUILTIN_PMOVSXBD128
,
25987 IX86_BUILTIN_PMOVSXBQ128
,
25988 IX86_BUILTIN_PMOVSXWD128
,
25989 IX86_BUILTIN_PMOVSXWQ128
,
25990 IX86_BUILTIN_PMOVSXDQ128
,
25992 IX86_BUILTIN_PMOVZXBW128
,
25993 IX86_BUILTIN_PMOVZXBD128
,
25994 IX86_BUILTIN_PMOVZXBQ128
,
25995 IX86_BUILTIN_PMOVZXWD128
,
25996 IX86_BUILTIN_PMOVZXWQ128
,
25997 IX86_BUILTIN_PMOVZXDQ128
,
25999 IX86_BUILTIN_PMULDQ128
,
26000 IX86_BUILTIN_PMULLD128
,
26002 IX86_BUILTIN_ROUNDSD
,
26003 IX86_BUILTIN_ROUNDSS
,
26005 IX86_BUILTIN_ROUNDPD
,
26006 IX86_BUILTIN_ROUNDPS
,
26008 IX86_BUILTIN_FLOORPD
,
26009 IX86_BUILTIN_CEILPD
,
26010 IX86_BUILTIN_TRUNCPD
,
26011 IX86_BUILTIN_RINTPD
,
26012 IX86_BUILTIN_ROUNDPD_AZ
,
26014 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
26015 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
26016 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
26018 IX86_BUILTIN_FLOORPS
,
26019 IX86_BUILTIN_CEILPS
,
26020 IX86_BUILTIN_TRUNCPS
,
26021 IX86_BUILTIN_RINTPS
,
26022 IX86_BUILTIN_ROUNDPS_AZ
,
26024 IX86_BUILTIN_FLOORPS_SFIX
,
26025 IX86_BUILTIN_CEILPS_SFIX
,
26026 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
26028 IX86_BUILTIN_PTESTZ
,
26029 IX86_BUILTIN_PTESTC
,
26030 IX86_BUILTIN_PTESTNZC
,
26032 IX86_BUILTIN_VEC_INIT_V2SI
,
26033 IX86_BUILTIN_VEC_INIT_V4HI
,
26034 IX86_BUILTIN_VEC_INIT_V8QI
,
26035 IX86_BUILTIN_VEC_EXT_V2DF
,
26036 IX86_BUILTIN_VEC_EXT_V2DI
,
26037 IX86_BUILTIN_VEC_EXT_V4SF
,
26038 IX86_BUILTIN_VEC_EXT_V4SI
,
26039 IX86_BUILTIN_VEC_EXT_V8HI
,
26040 IX86_BUILTIN_VEC_EXT_V2SI
,
26041 IX86_BUILTIN_VEC_EXT_V4HI
,
26042 IX86_BUILTIN_VEC_EXT_V16QI
,
26043 IX86_BUILTIN_VEC_SET_V2DI
,
26044 IX86_BUILTIN_VEC_SET_V4SF
,
26045 IX86_BUILTIN_VEC_SET_V4SI
,
26046 IX86_BUILTIN_VEC_SET_V8HI
,
26047 IX86_BUILTIN_VEC_SET_V4HI
,
26048 IX86_BUILTIN_VEC_SET_V16QI
,
26050 IX86_BUILTIN_VEC_PACK_SFIX
,
26051 IX86_BUILTIN_VEC_PACK_SFIX256
,
26054 IX86_BUILTIN_CRC32QI
,
26055 IX86_BUILTIN_CRC32HI
,
26056 IX86_BUILTIN_CRC32SI
,
26057 IX86_BUILTIN_CRC32DI
,
26059 IX86_BUILTIN_PCMPESTRI128
,
26060 IX86_BUILTIN_PCMPESTRM128
,
26061 IX86_BUILTIN_PCMPESTRA128
,
26062 IX86_BUILTIN_PCMPESTRC128
,
26063 IX86_BUILTIN_PCMPESTRO128
,
26064 IX86_BUILTIN_PCMPESTRS128
,
26065 IX86_BUILTIN_PCMPESTRZ128
,
26066 IX86_BUILTIN_PCMPISTRI128
,
26067 IX86_BUILTIN_PCMPISTRM128
,
26068 IX86_BUILTIN_PCMPISTRA128
,
26069 IX86_BUILTIN_PCMPISTRC128
,
26070 IX86_BUILTIN_PCMPISTRO128
,
26071 IX86_BUILTIN_PCMPISTRS128
,
26072 IX86_BUILTIN_PCMPISTRZ128
,
26074 IX86_BUILTIN_PCMPGTQ
,
26076 /* AES instructions */
26077 IX86_BUILTIN_AESENC128
,
26078 IX86_BUILTIN_AESENCLAST128
,
26079 IX86_BUILTIN_AESDEC128
,
26080 IX86_BUILTIN_AESDECLAST128
,
26081 IX86_BUILTIN_AESIMC128
,
26082 IX86_BUILTIN_AESKEYGENASSIST128
,
26084 /* PCLMUL instruction */
26085 IX86_BUILTIN_PCLMULQDQ128
,
26088 IX86_BUILTIN_ADDPD256
,
26089 IX86_BUILTIN_ADDPS256
,
26090 IX86_BUILTIN_ADDSUBPD256
,
26091 IX86_BUILTIN_ADDSUBPS256
,
26092 IX86_BUILTIN_ANDPD256
,
26093 IX86_BUILTIN_ANDPS256
,
26094 IX86_BUILTIN_ANDNPD256
,
26095 IX86_BUILTIN_ANDNPS256
,
26096 IX86_BUILTIN_BLENDPD256
,
26097 IX86_BUILTIN_BLENDPS256
,
26098 IX86_BUILTIN_BLENDVPD256
,
26099 IX86_BUILTIN_BLENDVPS256
,
26100 IX86_BUILTIN_DIVPD256
,
26101 IX86_BUILTIN_DIVPS256
,
26102 IX86_BUILTIN_DPPS256
,
26103 IX86_BUILTIN_HADDPD256
,
26104 IX86_BUILTIN_HADDPS256
,
26105 IX86_BUILTIN_HSUBPD256
,
26106 IX86_BUILTIN_HSUBPS256
,
26107 IX86_BUILTIN_MAXPD256
,
26108 IX86_BUILTIN_MAXPS256
,
26109 IX86_BUILTIN_MINPD256
,
26110 IX86_BUILTIN_MINPS256
,
26111 IX86_BUILTIN_MULPD256
,
26112 IX86_BUILTIN_MULPS256
,
26113 IX86_BUILTIN_ORPD256
,
26114 IX86_BUILTIN_ORPS256
,
26115 IX86_BUILTIN_SHUFPD256
,
26116 IX86_BUILTIN_SHUFPS256
,
26117 IX86_BUILTIN_SUBPD256
,
26118 IX86_BUILTIN_SUBPS256
,
26119 IX86_BUILTIN_XORPD256
,
26120 IX86_BUILTIN_XORPS256
,
26121 IX86_BUILTIN_CMPSD
,
26122 IX86_BUILTIN_CMPSS
,
26123 IX86_BUILTIN_CMPPD
,
26124 IX86_BUILTIN_CMPPS
,
26125 IX86_BUILTIN_CMPPD256
,
26126 IX86_BUILTIN_CMPPS256
,
26127 IX86_BUILTIN_CVTDQ2PD256
,
26128 IX86_BUILTIN_CVTDQ2PS256
,
26129 IX86_BUILTIN_CVTPD2PS256
,
26130 IX86_BUILTIN_CVTPS2DQ256
,
26131 IX86_BUILTIN_CVTPS2PD256
,
26132 IX86_BUILTIN_CVTTPD2DQ256
,
26133 IX86_BUILTIN_CVTPD2DQ256
,
26134 IX86_BUILTIN_CVTTPS2DQ256
,
26135 IX86_BUILTIN_EXTRACTF128PD256
,
26136 IX86_BUILTIN_EXTRACTF128PS256
,
26137 IX86_BUILTIN_EXTRACTF128SI256
,
26138 IX86_BUILTIN_VZEROALL
,
26139 IX86_BUILTIN_VZEROUPPER
,
26140 IX86_BUILTIN_VPERMILVARPD
,
26141 IX86_BUILTIN_VPERMILVARPS
,
26142 IX86_BUILTIN_VPERMILVARPD256
,
26143 IX86_BUILTIN_VPERMILVARPS256
,
26144 IX86_BUILTIN_VPERMILPD
,
26145 IX86_BUILTIN_VPERMILPS
,
26146 IX86_BUILTIN_VPERMILPD256
,
26147 IX86_BUILTIN_VPERMILPS256
,
26148 IX86_BUILTIN_VPERMIL2PD
,
26149 IX86_BUILTIN_VPERMIL2PS
,
26150 IX86_BUILTIN_VPERMIL2PD256
,
26151 IX86_BUILTIN_VPERMIL2PS256
,
26152 IX86_BUILTIN_VPERM2F128PD256
,
26153 IX86_BUILTIN_VPERM2F128PS256
,
26154 IX86_BUILTIN_VPERM2F128SI256
,
26155 IX86_BUILTIN_VBROADCASTSS
,
26156 IX86_BUILTIN_VBROADCASTSD256
,
26157 IX86_BUILTIN_VBROADCASTSS256
,
26158 IX86_BUILTIN_VBROADCASTPD256
,
26159 IX86_BUILTIN_VBROADCASTPS256
,
26160 IX86_BUILTIN_VINSERTF128PD256
,
26161 IX86_BUILTIN_VINSERTF128PS256
,
26162 IX86_BUILTIN_VINSERTF128SI256
,
26163 IX86_BUILTIN_LOADUPD256
,
26164 IX86_BUILTIN_LOADUPS256
,
26165 IX86_BUILTIN_STOREUPD256
,
26166 IX86_BUILTIN_STOREUPS256
,
26167 IX86_BUILTIN_LDDQU256
,
26168 IX86_BUILTIN_MOVNTDQ256
,
26169 IX86_BUILTIN_MOVNTPD256
,
26170 IX86_BUILTIN_MOVNTPS256
,
26171 IX86_BUILTIN_LOADDQU256
,
26172 IX86_BUILTIN_STOREDQU256
,
26173 IX86_BUILTIN_MASKLOADPD
,
26174 IX86_BUILTIN_MASKLOADPS
,
26175 IX86_BUILTIN_MASKSTOREPD
,
26176 IX86_BUILTIN_MASKSTOREPS
,
26177 IX86_BUILTIN_MASKLOADPD256
,
26178 IX86_BUILTIN_MASKLOADPS256
,
26179 IX86_BUILTIN_MASKSTOREPD256
,
26180 IX86_BUILTIN_MASKSTOREPS256
,
26181 IX86_BUILTIN_MOVSHDUP256
,
26182 IX86_BUILTIN_MOVSLDUP256
,
26183 IX86_BUILTIN_MOVDDUP256
,
26185 IX86_BUILTIN_SQRTPD256
,
26186 IX86_BUILTIN_SQRTPS256
,
26187 IX86_BUILTIN_SQRTPS_NR256
,
26188 IX86_BUILTIN_RSQRTPS256
,
26189 IX86_BUILTIN_RSQRTPS_NR256
,
26191 IX86_BUILTIN_RCPPS256
,
26193 IX86_BUILTIN_ROUNDPD256
,
26194 IX86_BUILTIN_ROUNDPS256
,
26196 IX86_BUILTIN_FLOORPD256
,
26197 IX86_BUILTIN_CEILPD256
,
26198 IX86_BUILTIN_TRUNCPD256
,
26199 IX86_BUILTIN_RINTPD256
,
26200 IX86_BUILTIN_ROUNDPD_AZ256
,
26202 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26203 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26204 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26206 IX86_BUILTIN_FLOORPS256
,
26207 IX86_BUILTIN_CEILPS256
,
26208 IX86_BUILTIN_TRUNCPS256
,
26209 IX86_BUILTIN_RINTPS256
,
26210 IX86_BUILTIN_ROUNDPS_AZ256
,
26212 IX86_BUILTIN_FLOORPS_SFIX256
,
26213 IX86_BUILTIN_CEILPS_SFIX256
,
26214 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26216 IX86_BUILTIN_UNPCKHPD256
,
26217 IX86_BUILTIN_UNPCKLPD256
,
26218 IX86_BUILTIN_UNPCKHPS256
,
26219 IX86_BUILTIN_UNPCKLPS256
,
26221 IX86_BUILTIN_SI256_SI
,
26222 IX86_BUILTIN_PS256_PS
,
26223 IX86_BUILTIN_PD256_PD
,
26224 IX86_BUILTIN_SI_SI256
,
26225 IX86_BUILTIN_PS_PS256
,
26226 IX86_BUILTIN_PD_PD256
,
26228 IX86_BUILTIN_VTESTZPD
,
26229 IX86_BUILTIN_VTESTCPD
,
26230 IX86_BUILTIN_VTESTNZCPD
,
26231 IX86_BUILTIN_VTESTZPS
,
26232 IX86_BUILTIN_VTESTCPS
,
26233 IX86_BUILTIN_VTESTNZCPS
,
26234 IX86_BUILTIN_VTESTZPD256
,
26235 IX86_BUILTIN_VTESTCPD256
,
26236 IX86_BUILTIN_VTESTNZCPD256
,
26237 IX86_BUILTIN_VTESTZPS256
,
26238 IX86_BUILTIN_VTESTCPS256
,
26239 IX86_BUILTIN_VTESTNZCPS256
,
26240 IX86_BUILTIN_PTESTZ256
,
26241 IX86_BUILTIN_PTESTC256
,
26242 IX86_BUILTIN_PTESTNZC256
,
26244 IX86_BUILTIN_MOVMSKPD256
,
26245 IX86_BUILTIN_MOVMSKPS256
,
26248 IX86_BUILTIN_MPSADBW256
,
26249 IX86_BUILTIN_PABSB256
,
26250 IX86_BUILTIN_PABSW256
,
26251 IX86_BUILTIN_PABSD256
,
26252 IX86_BUILTIN_PACKSSDW256
,
26253 IX86_BUILTIN_PACKSSWB256
,
26254 IX86_BUILTIN_PACKUSDW256
,
26255 IX86_BUILTIN_PACKUSWB256
,
26256 IX86_BUILTIN_PADDB256
,
26257 IX86_BUILTIN_PADDW256
,
26258 IX86_BUILTIN_PADDD256
,
26259 IX86_BUILTIN_PADDQ256
,
26260 IX86_BUILTIN_PADDSB256
,
26261 IX86_BUILTIN_PADDSW256
,
26262 IX86_BUILTIN_PADDUSB256
,
26263 IX86_BUILTIN_PADDUSW256
,
26264 IX86_BUILTIN_PALIGNR256
,
26265 IX86_BUILTIN_AND256I
,
26266 IX86_BUILTIN_ANDNOT256I
,
26267 IX86_BUILTIN_PAVGB256
,
26268 IX86_BUILTIN_PAVGW256
,
26269 IX86_BUILTIN_PBLENDVB256
,
26270 IX86_BUILTIN_PBLENDVW256
,
26271 IX86_BUILTIN_PCMPEQB256
,
26272 IX86_BUILTIN_PCMPEQW256
,
26273 IX86_BUILTIN_PCMPEQD256
,
26274 IX86_BUILTIN_PCMPEQQ256
,
26275 IX86_BUILTIN_PCMPGTB256
,
26276 IX86_BUILTIN_PCMPGTW256
,
26277 IX86_BUILTIN_PCMPGTD256
,
26278 IX86_BUILTIN_PCMPGTQ256
,
26279 IX86_BUILTIN_PHADDW256
,
26280 IX86_BUILTIN_PHADDD256
,
26281 IX86_BUILTIN_PHADDSW256
,
26282 IX86_BUILTIN_PHSUBW256
,
26283 IX86_BUILTIN_PHSUBD256
,
26284 IX86_BUILTIN_PHSUBSW256
,
26285 IX86_BUILTIN_PMADDUBSW256
,
26286 IX86_BUILTIN_PMADDWD256
,
26287 IX86_BUILTIN_PMAXSB256
,
26288 IX86_BUILTIN_PMAXSW256
,
26289 IX86_BUILTIN_PMAXSD256
,
26290 IX86_BUILTIN_PMAXUB256
,
26291 IX86_BUILTIN_PMAXUW256
,
26292 IX86_BUILTIN_PMAXUD256
,
26293 IX86_BUILTIN_PMINSB256
,
26294 IX86_BUILTIN_PMINSW256
,
26295 IX86_BUILTIN_PMINSD256
,
26296 IX86_BUILTIN_PMINUB256
,
26297 IX86_BUILTIN_PMINUW256
,
26298 IX86_BUILTIN_PMINUD256
,
26299 IX86_BUILTIN_PMOVMSKB256
,
26300 IX86_BUILTIN_PMOVSXBW256
,
26301 IX86_BUILTIN_PMOVSXBD256
,
26302 IX86_BUILTIN_PMOVSXBQ256
,
26303 IX86_BUILTIN_PMOVSXWD256
,
26304 IX86_BUILTIN_PMOVSXWQ256
,
26305 IX86_BUILTIN_PMOVSXDQ256
,
26306 IX86_BUILTIN_PMOVZXBW256
,
26307 IX86_BUILTIN_PMOVZXBD256
,
26308 IX86_BUILTIN_PMOVZXBQ256
,
26309 IX86_BUILTIN_PMOVZXWD256
,
26310 IX86_BUILTIN_PMOVZXWQ256
,
26311 IX86_BUILTIN_PMOVZXDQ256
,
26312 IX86_BUILTIN_PMULDQ256
,
26313 IX86_BUILTIN_PMULHRSW256
,
26314 IX86_BUILTIN_PMULHUW256
,
26315 IX86_BUILTIN_PMULHW256
,
26316 IX86_BUILTIN_PMULLW256
,
26317 IX86_BUILTIN_PMULLD256
,
26318 IX86_BUILTIN_PMULUDQ256
,
26319 IX86_BUILTIN_POR256
,
26320 IX86_BUILTIN_PSADBW256
,
26321 IX86_BUILTIN_PSHUFB256
,
26322 IX86_BUILTIN_PSHUFD256
,
26323 IX86_BUILTIN_PSHUFHW256
,
26324 IX86_BUILTIN_PSHUFLW256
,
26325 IX86_BUILTIN_PSIGNB256
,
26326 IX86_BUILTIN_PSIGNW256
,
26327 IX86_BUILTIN_PSIGND256
,
26328 IX86_BUILTIN_PSLLDQI256
,
26329 IX86_BUILTIN_PSLLWI256
,
26330 IX86_BUILTIN_PSLLW256
,
26331 IX86_BUILTIN_PSLLDI256
,
26332 IX86_BUILTIN_PSLLD256
,
26333 IX86_BUILTIN_PSLLQI256
,
26334 IX86_BUILTIN_PSLLQ256
,
26335 IX86_BUILTIN_PSRAWI256
,
26336 IX86_BUILTIN_PSRAW256
,
26337 IX86_BUILTIN_PSRADI256
,
26338 IX86_BUILTIN_PSRAD256
,
26339 IX86_BUILTIN_PSRLDQI256
,
26340 IX86_BUILTIN_PSRLWI256
,
26341 IX86_BUILTIN_PSRLW256
,
26342 IX86_BUILTIN_PSRLDI256
,
26343 IX86_BUILTIN_PSRLD256
,
26344 IX86_BUILTIN_PSRLQI256
,
26345 IX86_BUILTIN_PSRLQ256
,
26346 IX86_BUILTIN_PSUBB256
,
26347 IX86_BUILTIN_PSUBW256
,
26348 IX86_BUILTIN_PSUBD256
,
26349 IX86_BUILTIN_PSUBQ256
,
26350 IX86_BUILTIN_PSUBSB256
,
26351 IX86_BUILTIN_PSUBSW256
,
26352 IX86_BUILTIN_PSUBUSB256
,
26353 IX86_BUILTIN_PSUBUSW256
,
26354 IX86_BUILTIN_PUNPCKHBW256
,
26355 IX86_BUILTIN_PUNPCKHWD256
,
26356 IX86_BUILTIN_PUNPCKHDQ256
,
26357 IX86_BUILTIN_PUNPCKHQDQ256
,
26358 IX86_BUILTIN_PUNPCKLBW256
,
26359 IX86_BUILTIN_PUNPCKLWD256
,
26360 IX86_BUILTIN_PUNPCKLDQ256
,
26361 IX86_BUILTIN_PUNPCKLQDQ256
,
26362 IX86_BUILTIN_PXOR256
,
26363 IX86_BUILTIN_MOVNTDQA256
,
26364 IX86_BUILTIN_VBROADCASTSS_PS
,
26365 IX86_BUILTIN_VBROADCASTSS_PS256
,
26366 IX86_BUILTIN_VBROADCASTSD_PD256
,
26367 IX86_BUILTIN_VBROADCASTSI256
,
26368 IX86_BUILTIN_PBLENDD256
,
26369 IX86_BUILTIN_PBLENDD128
,
26370 IX86_BUILTIN_PBROADCASTB256
,
26371 IX86_BUILTIN_PBROADCASTW256
,
26372 IX86_BUILTIN_PBROADCASTD256
,
26373 IX86_BUILTIN_PBROADCASTQ256
,
26374 IX86_BUILTIN_PBROADCASTB128
,
26375 IX86_BUILTIN_PBROADCASTW128
,
26376 IX86_BUILTIN_PBROADCASTD128
,
26377 IX86_BUILTIN_PBROADCASTQ128
,
26378 IX86_BUILTIN_VPERMVARSI256
,
26379 IX86_BUILTIN_VPERMDF256
,
26380 IX86_BUILTIN_VPERMVARSF256
,
26381 IX86_BUILTIN_VPERMDI256
,
26382 IX86_BUILTIN_VPERMTI256
,
26383 IX86_BUILTIN_VEXTRACT128I256
,
26384 IX86_BUILTIN_VINSERT128I256
,
26385 IX86_BUILTIN_MASKLOADD
,
26386 IX86_BUILTIN_MASKLOADQ
,
26387 IX86_BUILTIN_MASKLOADD256
,
26388 IX86_BUILTIN_MASKLOADQ256
,
26389 IX86_BUILTIN_MASKSTORED
,
26390 IX86_BUILTIN_MASKSTOREQ
,
26391 IX86_BUILTIN_MASKSTORED256
,
26392 IX86_BUILTIN_MASKSTOREQ256
,
26393 IX86_BUILTIN_PSLLVV4DI
,
26394 IX86_BUILTIN_PSLLVV2DI
,
26395 IX86_BUILTIN_PSLLVV8SI
,
26396 IX86_BUILTIN_PSLLVV4SI
,
26397 IX86_BUILTIN_PSRAVV8SI
,
26398 IX86_BUILTIN_PSRAVV4SI
,
26399 IX86_BUILTIN_PSRLVV4DI
,
26400 IX86_BUILTIN_PSRLVV2DI
,
26401 IX86_BUILTIN_PSRLVV8SI
,
26402 IX86_BUILTIN_PSRLVV4SI
,
26404 IX86_BUILTIN_GATHERSIV2DF
,
26405 IX86_BUILTIN_GATHERSIV4DF
,
26406 IX86_BUILTIN_GATHERDIV2DF
,
26407 IX86_BUILTIN_GATHERDIV4DF
,
26408 IX86_BUILTIN_GATHERSIV4SF
,
26409 IX86_BUILTIN_GATHERSIV8SF
,
26410 IX86_BUILTIN_GATHERDIV4SF
,
26411 IX86_BUILTIN_GATHERDIV8SF
,
26412 IX86_BUILTIN_GATHERSIV2DI
,
26413 IX86_BUILTIN_GATHERSIV4DI
,
26414 IX86_BUILTIN_GATHERDIV2DI
,
26415 IX86_BUILTIN_GATHERDIV4DI
,
26416 IX86_BUILTIN_GATHERSIV4SI
,
26417 IX86_BUILTIN_GATHERSIV8SI
,
26418 IX86_BUILTIN_GATHERDIV4SI
,
26419 IX86_BUILTIN_GATHERDIV8SI
,
26421 /* Alternate 4 element gather for the vectorizer where
26422 all operands are 32-byte wide. */
26423 IX86_BUILTIN_GATHERALTSIV4DF
,
26424 IX86_BUILTIN_GATHERALTDIV8SF
,
26425 IX86_BUILTIN_GATHERALTSIV4DI
,
26426 IX86_BUILTIN_GATHERALTDIV8SI
,
26428 /* TFmode support builtins. */
26430 IX86_BUILTIN_HUGE_VALQ
,
26431 IX86_BUILTIN_FABSQ
,
26432 IX86_BUILTIN_COPYSIGNQ
,
26434 /* Vectorizer support builtins. */
26435 IX86_BUILTIN_CPYSGNPS
,
26436 IX86_BUILTIN_CPYSGNPD
,
26437 IX86_BUILTIN_CPYSGNPS256
,
26438 IX86_BUILTIN_CPYSGNPD256
,
26440 /* FMA4 instructions. */
26441 IX86_BUILTIN_VFMADDSS
,
26442 IX86_BUILTIN_VFMADDSD
,
26443 IX86_BUILTIN_VFMADDPS
,
26444 IX86_BUILTIN_VFMADDPD
,
26445 IX86_BUILTIN_VFMADDPS256
,
26446 IX86_BUILTIN_VFMADDPD256
,
26447 IX86_BUILTIN_VFMADDSUBPS
,
26448 IX86_BUILTIN_VFMADDSUBPD
,
26449 IX86_BUILTIN_VFMADDSUBPS256
,
26450 IX86_BUILTIN_VFMADDSUBPD256
,
26452 /* FMA3 instructions. */
26453 IX86_BUILTIN_VFMADDSS3
,
26454 IX86_BUILTIN_VFMADDSD3
,
26456 /* XOP instructions. */
26457 IX86_BUILTIN_VPCMOV
,
26458 IX86_BUILTIN_VPCMOV_V2DI
,
26459 IX86_BUILTIN_VPCMOV_V4SI
,
26460 IX86_BUILTIN_VPCMOV_V8HI
,
26461 IX86_BUILTIN_VPCMOV_V16QI
,
26462 IX86_BUILTIN_VPCMOV_V4SF
,
26463 IX86_BUILTIN_VPCMOV_V2DF
,
26464 IX86_BUILTIN_VPCMOV256
,
26465 IX86_BUILTIN_VPCMOV_V4DI256
,
26466 IX86_BUILTIN_VPCMOV_V8SI256
,
26467 IX86_BUILTIN_VPCMOV_V16HI256
,
26468 IX86_BUILTIN_VPCMOV_V32QI256
,
26469 IX86_BUILTIN_VPCMOV_V8SF256
,
26470 IX86_BUILTIN_VPCMOV_V4DF256
,
26472 IX86_BUILTIN_VPPERM
,
26474 IX86_BUILTIN_VPMACSSWW
,
26475 IX86_BUILTIN_VPMACSWW
,
26476 IX86_BUILTIN_VPMACSSWD
,
26477 IX86_BUILTIN_VPMACSWD
,
26478 IX86_BUILTIN_VPMACSSDD
,
26479 IX86_BUILTIN_VPMACSDD
,
26480 IX86_BUILTIN_VPMACSSDQL
,
26481 IX86_BUILTIN_VPMACSSDQH
,
26482 IX86_BUILTIN_VPMACSDQL
,
26483 IX86_BUILTIN_VPMACSDQH
,
26484 IX86_BUILTIN_VPMADCSSWD
,
26485 IX86_BUILTIN_VPMADCSWD
,
26487 IX86_BUILTIN_VPHADDBW
,
26488 IX86_BUILTIN_VPHADDBD
,
26489 IX86_BUILTIN_VPHADDBQ
,
26490 IX86_BUILTIN_VPHADDWD
,
26491 IX86_BUILTIN_VPHADDWQ
,
26492 IX86_BUILTIN_VPHADDDQ
,
26493 IX86_BUILTIN_VPHADDUBW
,
26494 IX86_BUILTIN_VPHADDUBD
,
26495 IX86_BUILTIN_VPHADDUBQ
,
26496 IX86_BUILTIN_VPHADDUWD
,
26497 IX86_BUILTIN_VPHADDUWQ
,
26498 IX86_BUILTIN_VPHADDUDQ
,
26499 IX86_BUILTIN_VPHSUBBW
,
26500 IX86_BUILTIN_VPHSUBWD
,
26501 IX86_BUILTIN_VPHSUBDQ
,
26503 IX86_BUILTIN_VPROTB
,
26504 IX86_BUILTIN_VPROTW
,
26505 IX86_BUILTIN_VPROTD
,
26506 IX86_BUILTIN_VPROTQ
,
26507 IX86_BUILTIN_VPROTB_IMM
,
26508 IX86_BUILTIN_VPROTW_IMM
,
26509 IX86_BUILTIN_VPROTD_IMM
,
26510 IX86_BUILTIN_VPROTQ_IMM
,
26512 IX86_BUILTIN_VPSHLB
,
26513 IX86_BUILTIN_VPSHLW
,
26514 IX86_BUILTIN_VPSHLD
,
26515 IX86_BUILTIN_VPSHLQ
,
26516 IX86_BUILTIN_VPSHAB
,
26517 IX86_BUILTIN_VPSHAW
,
26518 IX86_BUILTIN_VPSHAD
,
26519 IX86_BUILTIN_VPSHAQ
,
26521 IX86_BUILTIN_VFRCZSS
,
26522 IX86_BUILTIN_VFRCZSD
,
26523 IX86_BUILTIN_VFRCZPS
,
26524 IX86_BUILTIN_VFRCZPD
,
26525 IX86_BUILTIN_VFRCZPS256
,
26526 IX86_BUILTIN_VFRCZPD256
,
26528 IX86_BUILTIN_VPCOMEQUB
,
26529 IX86_BUILTIN_VPCOMNEUB
,
26530 IX86_BUILTIN_VPCOMLTUB
,
26531 IX86_BUILTIN_VPCOMLEUB
,
26532 IX86_BUILTIN_VPCOMGTUB
,
26533 IX86_BUILTIN_VPCOMGEUB
,
26534 IX86_BUILTIN_VPCOMFALSEUB
,
26535 IX86_BUILTIN_VPCOMTRUEUB
,
26537 IX86_BUILTIN_VPCOMEQUW
,
26538 IX86_BUILTIN_VPCOMNEUW
,
26539 IX86_BUILTIN_VPCOMLTUW
,
26540 IX86_BUILTIN_VPCOMLEUW
,
26541 IX86_BUILTIN_VPCOMGTUW
,
26542 IX86_BUILTIN_VPCOMGEUW
,
26543 IX86_BUILTIN_VPCOMFALSEUW
,
26544 IX86_BUILTIN_VPCOMTRUEUW
,
26546 IX86_BUILTIN_VPCOMEQUD
,
26547 IX86_BUILTIN_VPCOMNEUD
,
26548 IX86_BUILTIN_VPCOMLTUD
,
26549 IX86_BUILTIN_VPCOMLEUD
,
26550 IX86_BUILTIN_VPCOMGTUD
,
26551 IX86_BUILTIN_VPCOMGEUD
,
26552 IX86_BUILTIN_VPCOMFALSEUD
,
26553 IX86_BUILTIN_VPCOMTRUEUD
,
26555 IX86_BUILTIN_VPCOMEQUQ
,
26556 IX86_BUILTIN_VPCOMNEUQ
,
26557 IX86_BUILTIN_VPCOMLTUQ
,
26558 IX86_BUILTIN_VPCOMLEUQ
,
26559 IX86_BUILTIN_VPCOMGTUQ
,
26560 IX86_BUILTIN_VPCOMGEUQ
,
26561 IX86_BUILTIN_VPCOMFALSEUQ
,
26562 IX86_BUILTIN_VPCOMTRUEUQ
,
26564 IX86_BUILTIN_VPCOMEQB
,
26565 IX86_BUILTIN_VPCOMNEB
,
26566 IX86_BUILTIN_VPCOMLTB
,
26567 IX86_BUILTIN_VPCOMLEB
,
26568 IX86_BUILTIN_VPCOMGTB
,
26569 IX86_BUILTIN_VPCOMGEB
,
26570 IX86_BUILTIN_VPCOMFALSEB
,
26571 IX86_BUILTIN_VPCOMTRUEB
,
26573 IX86_BUILTIN_VPCOMEQW
,
26574 IX86_BUILTIN_VPCOMNEW
,
26575 IX86_BUILTIN_VPCOMLTW
,
26576 IX86_BUILTIN_VPCOMLEW
,
26577 IX86_BUILTIN_VPCOMGTW
,
26578 IX86_BUILTIN_VPCOMGEW
,
26579 IX86_BUILTIN_VPCOMFALSEW
,
26580 IX86_BUILTIN_VPCOMTRUEW
,
26582 IX86_BUILTIN_VPCOMEQD
,
26583 IX86_BUILTIN_VPCOMNED
,
26584 IX86_BUILTIN_VPCOMLTD
,
26585 IX86_BUILTIN_VPCOMLED
,
26586 IX86_BUILTIN_VPCOMGTD
,
26587 IX86_BUILTIN_VPCOMGED
,
26588 IX86_BUILTIN_VPCOMFALSED
,
26589 IX86_BUILTIN_VPCOMTRUED
,
26591 IX86_BUILTIN_VPCOMEQQ
,
26592 IX86_BUILTIN_VPCOMNEQ
,
26593 IX86_BUILTIN_VPCOMLTQ
,
26594 IX86_BUILTIN_VPCOMLEQ
,
26595 IX86_BUILTIN_VPCOMGTQ
,
26596 IX86_BUILTIN_VPCOMGEQ
,
26597 IX86_BUILTIN_VPCOMFALSEQ
,
26598 IX86_BUILTIN_VPCOMTRUEQ
,
26600 /* LWP instructions. */
26601 IX86_BUILTIN_LLWPCB
,
26602 IX86_BUILTIN_SLWPCB
,
26603 IX86_BUILTIN_LWPVAL32
,
26604 IX86_BUILTIN_LWPVAL64
,
26605 IX86_BUILTIN_LWPINS32
,
26606 IX86_BUILTIN_LWPINS64
,
26611 IX86_BUILTIN_XBEGIN
,
26613 IX86_BUILTIN_XABORT
,
26614 IX86_BUILTIN_XTEST
,
26616 /* BMI instructions. */
26617 IX86_BUILTIN_BEXTR32
,
26618 IX86_BUILTIN_BEXTR64
,
26621 /* TBM instructions. */
26622 IX86_BUILTIN_BEXTRI32
,
26623 IX86_BUILTIN_BEXTRI64
,
26625 /* BMI2 instructions. */
26626 IX86_BUILTIN_BZHI32
,
26627 IX86_BUILTIN_BZHI64
,
26628 IX86_BUILTIN_PDEP32
,
26629 IX86_BUILTIN_PDEP64
,
26630 IX86_BUILTIN_PEXT32
,
26631 IX86_BUILTIN_PEXT64
,
26633 /* ADX instructions. */
26634 IX86_BUILTIN_ADDCARRYX32
,
26635 IX86_BUILTIN_ADDCARRYX64
,
26637 /* FSGSBASE instructions. */
26638 IX86_BUILTIN_RDFSBASE32
,
26639 IX86_BUILTIN_RDFSBASE64
,
26640 IX86_BUILTIN_RDGSBASE32
,
26641 IX86_BUILTIN_RDGSBASE64
,
26642 IX86_BUILTIN_WRFSBASE32
,
26643 IX86_BUILTIN_WRFSBASE64
,
26644 IX86_BUILTIN_WRGSBASE32
,
26645 IX86_BUILTIN_WRGSBASE64
,
26647 /* RDRND instructions. */
26648 IX86_BUILTIN_RDRAND16_STEP
,
26649 IX86_BUILTIN_RDRAND32_STEP
,
26650 IX86_BUILTIN_RDRAND64_STEP
,
26652 /* RDSEED instructions. */
26653 IX86_BUILTIN_RDSEED16_STEP
,
26654 IX86_BUILTIN_RDSEED32_STEP
,
26655 IX86_BUILTIN_RDSEED64_STEP
,
26657 /* F16C instructions. */
26658 IX86_BUILTIN_CVTPH2PS
,
26659 IX86_BUILTIN_CVTPH2PS256
,
26660 IX86_BUILTIN_CVTPS2PH
,
26661 IX86_BUILTIN_CVTPS2PH256
,
26663 /* CFString built-in for darwin */
26664 IX86_BUILTIN_CFSTRING
,
26666 /* Builtins to get CPU type and supported features. */
26667 IX86_BUILTIN_CPU_INIT
,
26668 IX86_BUILTIN_CPU_IS
,
26669 IX86_BUILTIN_CPU_SUPPORTS
,
26674 /* Table for the ix86 builtin decls. */
26675 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26677 /* Table of all of the builtin functions that are possible with different ISA's
26678 but are waiting to be built until a function is declared to use that
26680 struct builtin_isa
{
26681 const char *name
; /* function name */
26682 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26683 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26684 bool const_p
; /* true if the declaration is constant */
26685 bool set_and_not_built_p
;
26688 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26691 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26692 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26693 function decl in the ix86_builtins array. Returns the function decl or
26694 NULL_TREE, if the builtin was not added.
26696 If the front end has a special hook for builtin functions, delay adding
26697 builtin functions that aren't in the current ISA until the ISA is changed
26698 with function specific optimization. Doing so, can save about 300K for the
26699 default compiler. When the builtin is expanded, check at that time whether
26702 If the front end doesn't have a special hook, record all builtins, even if
26703 it isn't an instruction set in the current ISA in case the user uses
26704 function specific options for a different ISA, so that we don't get scope
26705 errors if a builtin is added in the middle of a function scope. */
26708 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26709 enum ix86_builtin_func_type tcode
,
26710 enum ix86_builtins code
)
26712 tree decl
= NULL_TREE
;
26714 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26716 ix86_builtins_isa
[(int) code
].isa
= mask
;
26718 mask
&= ~OPTION_MASK_ISA_64BIT
;
26720 || (mask
& ix86_isa_flags
) != 0
26721 || (lang_hooks
.builtin_function
26722 == lang_hooks
.builtin_function_ext_scope
))
26725 tree type
= ix86_get_builtin_func_type (tcode
);
26726 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26728 ix86_builtins
[(int) code
] = decl
;
26729 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26733 ix86_builtins
[(int) code
] = NULL_TREE
;
26734 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26735 ix86_builtins_isa
[(int) code
].name
= name
;
26736 ix86_builtins_isa
[(int) code
].const_p
= false;
26737 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26744 /* Like def_builtin, but also marks the function decl "const". */
26747 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26748 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26750 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26752 TREE_READONLY (decl
) = 1;
26754 ix86_builtins_isa
[(int) code
].const_p
= true;
26759 /* Add any new builtin functions for a given ISA that may not have been
26760 declared. This saves a bit of space compared to adding all of the
26761 declarations to the tree, even if we didn't use them. */
26764 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26768 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26770 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26771 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26775 /* Don't define the builtin again. */
26776 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26778 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26779 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26780 type
, i
, BUILT_IN_MD
, NULL
,
26783 ix86_builtins
[i
] = decl
;
26784 if (ix86_builtins_isa
[i
].const_p
)
26785 TREE_READONLY (decl
) = 1;
26790 /* Bits for builtin_description.flag. */
26792 /* Set when we don't support the comparison natively, and should
26793 swap_comparison in order to support it. */
26794 #define BUILTIN_DESC_SWAP_OPERANDS 1
26796 struct builtin_description
26798 const HOST_WIDE_INT mask
;
26799 const enum insn_code icode
;
26800 const char *const name
;
26801 const enum ix86_builtins code
;
26802 const enum rtx_code comparison
;
26806 static const struct builtin_description bdesc_comi
[] =
26808 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26809 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26810 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26811 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26812 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26813 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26814 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26815 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26816 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26817 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26818 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26819 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26820 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26821 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26822 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26823 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26824 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26825 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26826 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26827 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26828 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26829 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26830 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26831 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26834 static const struct builtin_description bdesc_pcmpestr
[] =
26837 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26838 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26839 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26840 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26841 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26842 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26843 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26846 static const struct builtin_description bdesc_pcmpistr
[] =
26849 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26850 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26851 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26852 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26853 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26854 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26855 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26858 /* Special builtins with variable number of arguments. */
26859 static const struct builtin_description bdesc_special_args
[] =
26861 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26862 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26863 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26866 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26869 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26871 /* FXSR, XSAVE and XSAVEOPT */
26872 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26873 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26874 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26875 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26876 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26878 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26879 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26880 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26881 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26882 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26885 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26886 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26887 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26889 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26890 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26891 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26892 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26894 /* SSE or 3DNow!A */
26895 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26896 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26899 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26900 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26901 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26902 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26903 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26904 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26905 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26906 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26907 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26908 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26910 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26911 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26914 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26917 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26920 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26921 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26924 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26925 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26927 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26928 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26929 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26930 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26931 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26933 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26934 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26935 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26936 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26937 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26938 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26939 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26941 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26942 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26943 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26945 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26946 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26947 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26948 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26949 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26950 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26951 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26952 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26955 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26956 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26957 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26958 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26959 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26960 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26961 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26962 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26963 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26965 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26966 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26967 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26968 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26969 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26970 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26973 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26974 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26975 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26976 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26977 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26978 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26979 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26980 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26983 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26984 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26985 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26988 /* Builtins with variable number of arguments. */
26989 static const struct builtin_description bdesc_args
[] =
26991 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26992 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26993 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26994 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26995 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26996 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26997 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27004 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27011 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27016 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27017 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27019 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27020 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27021 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27022 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27024 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27025 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27026 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27027 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27028 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27029 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27031 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27032 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27033 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27034 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27035 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27036 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27038 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27039 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
27040 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
27042 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
27044 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27045 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27046 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27047 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27048 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27049 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27051 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27052 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27053 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27054 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27055 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27056 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27058 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27059 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27060 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27061 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27064 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27065 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27066 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27067 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27069 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27070 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27071 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27072 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27073 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27074 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27075 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27076 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27077 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27078 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27079 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27080 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27081 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27082 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27083 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27086 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27087 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27088 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27089 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27090 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27091 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27094 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27098 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27099 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27102 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27103 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27104 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27105 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27109 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27112 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27115 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27116 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27118 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27122 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27125 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27126 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27127 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27128 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27129 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27130 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27131 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27132 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27133 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27134 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27135 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27136 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27137 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27138 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27139 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27141 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27142 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27143 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27144 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27146 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27147 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27148 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27149 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27151 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27153 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27154 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27155 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27156 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27157 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27159 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27160 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27161 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27163 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27165 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27166 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27167 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27169 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27170 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27172 /* SSE MMX or 3Dnow!A */
27173 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27174 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27175 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27177 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27178 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27179 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27180 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27182 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27183 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27185 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27196 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27206 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27207 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27211 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27214 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27223 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27232 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27235 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27240 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27243 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27250 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27255 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27264 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27297 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27302 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27306 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27308 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27309 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27311 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27319 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27323 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27324 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27325 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27326 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27328 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27329 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27330 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27331 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27332 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27333 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27341 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27344 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27347 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27349 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27350 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27351 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27353 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27355 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27358 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27359 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27362 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27363 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27365 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27366 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27367 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27368 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27369 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27370 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27373 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27374 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27375 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27376 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27377 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27378 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27380 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27381 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27382 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27383 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27384 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27385 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27386 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27387 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27388 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27389 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27390 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27391 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27392 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27393 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27394 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27395 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27396 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27397 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27398 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27399 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27400 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27401 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27402 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27403 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27406 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27407 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27410 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27411 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27412 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27413 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27414 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27415 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27416 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27417 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27418 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27419 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27421 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27422 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27423 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27424 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27425 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27426 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27427 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27428 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27429 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27430 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27431 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27432 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27433 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27435 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27436 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27437 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27438 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27439 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27440 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27441 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27442 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27443 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27444 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27445 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27446 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27449 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27450 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27451 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27452 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27454 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27455 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27456 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27457 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27459 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27460 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27462 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27463 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27465 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27466 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27467 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27468 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27470 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27471 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27473 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27474 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27476 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27477 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27478 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27481 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27482 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27483 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27484 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27485 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27488 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27489 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27490 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27491 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27494 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27495 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27497 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27498 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27499 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27500 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27503 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27506 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27518 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27522 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27523 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27526 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27527 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27530 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27536 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27538 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27541 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27546 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27549 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27552 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27557 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27560 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27562 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27563 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27568 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27573 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27574 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27575 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27577 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27581 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27591 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27593 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27594 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27596 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27597 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27599 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27600 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27601 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27602 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27604 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27605 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27607 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27608 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27610 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27611 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27612 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27613 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27615 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27616 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27617 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27618 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27619 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27620 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27622 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27623 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27624 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27625 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27626 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27627 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27628 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27629 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27630 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27631 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27632 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27633 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27634 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27635 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27636 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27638 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27639 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27641 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27642 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27644 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27747 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27748 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27749 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27750 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27751 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27752 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27753 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27754 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27755 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27756 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27757 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27758 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27759 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27760 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27761 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27762 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27763 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27764 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27765 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27766 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27767 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27768 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27769 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27770 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27771 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27772 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27773 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27774 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27775 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27776 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27777 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27778 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27779 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27780 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27781 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27782 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27783 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27784 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27785 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27794 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27797 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27798 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27799 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27802 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27803 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27806 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27807 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27808 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27809 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27812 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27813 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27814 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27815 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27816 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27817 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27820 /* FMA4 and XOP. */
27821 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27822 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27823 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27824 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27825 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27826 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27827 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27828 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27829 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27830 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27831 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27832 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27833 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27834 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27835 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27836 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27837 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27838 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27839 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27840 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27841 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27842 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27843 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27844 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27845 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27846 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27847 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27848 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27849 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27850 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27851 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27852 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27853 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27854 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27855 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27856 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27857 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27858 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27859 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27860 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27861 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27862 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27863 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27864 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27865 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27866 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27867 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27868 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27869 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27870 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27871 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27872 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27874 static const struct builtin_description bdesc_multi_arg
[] =
27876 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27877 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27878 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27879 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27880 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27881 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27883 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27884 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27885 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27886 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27887 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27888 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27890 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27891 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27892 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27893 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27894 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27895 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27896 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27897 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27898 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27899 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27900 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27901 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27903 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27904 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27905 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27906 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27907 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27908 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27909 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27910 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27911 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27912 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27913 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27914 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27917 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27924 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27932 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27940 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27948 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27956 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27964 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27972 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27980 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27988 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27996 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
28000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
28003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28004 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
28008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
28011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28013 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
28016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
28019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
28020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
28022 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
28024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
28027 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
28028 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28029 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
28030 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
28031 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
28032 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
28033 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
28035 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
28036 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28037 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
28038 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
28039 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
28040 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
28041 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
28043 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
28044 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28045 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28046 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28047 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28048 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28049 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28051 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28052 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28053 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28054 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28055 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28056 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28057 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28058 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28060 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28061 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28062 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28063 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28064 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28065 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28066 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28067 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28069 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28070 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28071 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28072 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28076 /* TM vector builtins. */
28078 /* Reuse the existing x86-specific `struct builtin_description' cause
28079 we're lazy. Add casts to make them fit. */
28080 static const struct builtin_description bdesc_tm
[] =
28082 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28083 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28084 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28085 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28086 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28087 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28088 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28090 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28091 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28092 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28093 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28094 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28095 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28096 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28098 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28099 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28100 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28101 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28102 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28103 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28104 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28106 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28107 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28108 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28111 /* TM callbacks. */
28113 /* Return the builtin decl needed to load a vector of TYPE. */
28116 ix86_builtin_tm_load (tree type
)
28118 if (TREE_CODE (type
) == VECTOR_TYPE
)
28120 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28123 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28125 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28127 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28133 /* Return the builtin decl needed to store a vector of TYPE. */
28136 ix86_builtin_tm_store (tree type
)
28138 if (TREE_CODE (type
) == VECTOR_TYPE
)
28140 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28143 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28145 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28147 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28153 /* Initialize the transactional memory vector load/store builtins. */
28156 ix86_init_tm_builtins (void)
28158 enum ix86_builtin_func_type ftype
;
28159 const struct builtin_description
*d
;
28162 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28163 tree attrs_log
, attrs_type_log
;
28168 /* If there are no builtins defined, we must be compiling in a
28169 language without trans-mem support. */
28170 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28173 /* Use whatever attributes a normal TM load has. */
28174 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28175 attrs_load
= DECL_ATTRIBUTES (decl
);
28176 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28177 /* Use whatever attributes a normal TM store has. */
28178 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28179 attrs_store
= DECL_ATTRIBUTES (decl
);
28180 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28181 /* Use whatever attributes a normal TM log has. */
28182 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28183 attrs_log
= DECL_ATTRIBUTES (decl
);
28184 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28186 for (i
= 0, d
= bdesc_tm
;
28187 i
< ARRAY_SIZE (bdesc_tm
);
28190 if ((d
->mask
& ix86_isa_flags
) != 0
28191 || (lang_hooks
.builtin_function
28192 == lang_hooks
.builtin_function_ext_scope
))
28194 tree type
, attrs
, attrs_type
;
28195 enum built_in_function code
= (enum built_in_function
) d
->code
;
28197 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28198 type
= ix86_get_builtin_func_type (ftype
);
28200 if (BUILTIN_TM_LOAD_P (code
))
28202 attrs
= attrs_load
;
28203 attrs_type
= attrs_type_load
;
28205 else if (BUILTIN_TM_STORE_P (code
))
28207 attrs
= attrs_store
;
28208 attrs_type
= attrs_type_store
;
28213 attrs_type
= attrs_type_log
;
28215 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28216 /* The builtin without the prefix for
28217 calling it directly. */
28218 d
->name
+ strlen ("__builtin_"),
28220 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28221 set the TYPE_ATTRIBUTES. */
28222 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28224 set_builtin_decl (code
, decl
, false);
28229 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28230 in the current target ISA to allow the user to compile particular modules
28231 with different target specific options that differ from the command line
28234 ix86_init_mmx_sse_builtins (void)
28236 const struct builtin_description
* d
;
28237 enum ix86_builtin_func_type ftype
;
28240 /* Add all special builtins with variable number of operands. */
28241 for (i
= 0, d
= bdesc_special_args
;
28242 i
< ARRAY_SIZE (bdesc_special_args
);
28248 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28249 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28252 /* Add all builtins with variable number of operands. */
28253 for (i
= 0, d
= bdesc_args
;
28254 i
< ARRAY_SIZE (bdesc_args
);
28260 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28261 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28264 /* pcmpestr[im] insns. */
28265 for (i
= 0, d
= bdesc_pcmpestr
;
28266 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28269 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28270 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28272 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28273 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28276 /* pcmpistr[im] insns. */
28277 for (i
= 0, d
= bdesc_pcmpistr
;
28278 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28281 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28282 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28284 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28285 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28288 /* comi/ucomi insns. */
28289 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28291 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28292 ftype
= INT_FTYPE_V2DF_V2DF
;
28294 ftype
= INT_FTYPE_V4SF_V4SF
;
28295 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28299 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28300 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28301 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28302 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28304 /* SSE or 3DNow!A */
28305 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28306 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28307 IX86_BUILTIN_MASKMOVQ
);
28310 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28311 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28313 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28314 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28315 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28316 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28319 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28320 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28321 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28322 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28325 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28326 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28327 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28328 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28329 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28330 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28331 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28332 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28333 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28334 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28335 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28336 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28339 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28340 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28343 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28344 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28345 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28346 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28347 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28348 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28349 IX86_BUILTIN_RDRAND64_STEP
);
28352 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28353 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28354 IX86_BUILTIN_GATHERSIV2DF
);
28356 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28357 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28358 IX86_BUILTIN_GATHERSIV4DF
);
28360 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28361 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28362 IX86_BUILTIN_GATHERDIV2DF
);
28364 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28365 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28366 IX86_BUILTIN_GATHERDIV4DF
);
28368 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28369 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28370 IX86_BUILTIN_GATHERSIV4SF
);
28372 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28373 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28374 IX86_BUILTIN_GATHERSIV8SF
);
28376 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28377 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28378 IX86_BUILTIN_GATHERDIV4SF
);
28380 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28381 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28382 IX86_BUILTIN_GATHERDIV8SF
);
28384 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28385 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28386 IX86_BUILTIN_GATHERSIV2DI
);
28388 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28389 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28390 IX86_BUILTIN_GATHERSIV4DI
);
28392 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28393 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28394 IX86_BUILTIN_GATHERDIV2DI
);
28396 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28397 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28398 IX86_BUILTIN_GATHERDIV4DI
);
28400 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28401 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28402 IX86_BUILTIN_GATHERSIV4SI
);
28404 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28405 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28406 IX86_BUILTIN_GATHERSIV8SI
);
28408 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28409 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28410 IX86_BUILTIN_GATHERDIV4SI
);
28412 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28413 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28414 IX86_BUILTIN_GATHERDIV8SI
);
28416 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28417 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28418 IX86_BUILTIN_GATHERALTSIV4DF
);
28420 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28421 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28422 IX86_BUILTIN_GATHERALTDIV8SF
);
28424 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28425 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28426 IX86_BUILTIN_GATHERALTSIV4DI
);
28428 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28429 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28430 IX86_BUILTIN_GATHERALTDIV8SI
);
28433 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28434 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28436 /* MMX access to the vec_init patterns. */
28437 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28438 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28440 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28441 V4HI_FTYPE_HI_HI_HI_HI
,
28442 IX86_BUILTIN_VEC_INIT_V4HI
);
28444 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28445 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28446 IX86_BUILTIN_VEC_INIT_V8QI
);
28448 /* Access to the vec_extract patterns. */
28449 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28450 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28451 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28452 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28453 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28454 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28455 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28456 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28457 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28458 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28460 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28461 "__builtin_ia32_vec_ext_v4hi",
28462 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28464 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28465 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28467 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28468 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28470 /* Access to the vec_set patterns. */
28471 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28472 "__builtin_ia32_vec_set_v2di",
28473 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28475 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28476 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28478 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28479 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28481 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28482 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28484 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28485 "__builtin_ia32_vec_set_v4hi",
28486 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28488 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28489 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28492 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28493 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28494 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28495 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28496 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28497 "__builtin_ia32_rdseed_di_step",
28498 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28501 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28502 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28503 def_builtin (OPTION_MASK_ISA_64BIT
,
28504 "__builtin_ia32_addcarryx_u64",
28505 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28506 IX86_BUILTIN_ADDCARRYX64
);
28508 /* Add FMA4 multi-arg argument instructions */
28509 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28514 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28515 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28519 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28520 to return a pointer to VERSION_DECL if the outcome of the expression
28521 formed by PREDICATE_CHAIN is true. This function will be called during
28522 version dispatch to decide which function version to execute. It returns
28523 the basic block at the end, to which more conditions can be added. */
28526 add_condition_to_bb (tree function_decl
, tree version_decl
,
28527 tree predicate_chain
, basic_block new_bb
)
28529 gimple return_stmt
;
28530 tree convert_expr
, result_var
;
28531 gimple convert_stmt
;
28532 gimple call_cond_stmt
;
28533 gimple if_else_stmt
;
28535 basic_block bb1
, bb2
, bb3
;
28538 tree cond_var
, and_expr_var
= NULL_TREE
;
28541 tree predicate_decl
, predicate_arg
;
28543 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
28545 gcc_assert (new_bb
!= NULL
);
28546 gseq
= bb_seq (new_bb
);
28549 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
28550 build_fold_addr_expr (version_decl
));
28551 result_var
= create_tmp_var (ptr_type_node
, NULL
);
28552 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
28553 return_stmt
= gimple_build_return (result_var
);
28555 if (predicate_chain
== NULL_TREE
)
28557 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28558 gimple_seq_add_stmt (&gseq
, return_stmt
);
28559 set_bb_seq (new_bb
, gseq
);
28560 gimple_set_bb (convert_stmt
, new_bb
);
28561 gimple_set_bb (return_stmt
, new_bb
);
28566 while (predicate_chain
!= NULL
)
28568 cond_var
= create_tmp_var (integer_type_node
, NULL
);
28569 predicate_decl
= TREE_PURPOSE (predicate_chain
);
28570 predicate_arg
= TREE_VALUE (predicate_chain
);
28571 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
28572 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
28574 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
28575 gimple_set_bb (call_cond_stmt
, new_bb
);
28576 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
28578 predicate_chain
= TREE_CHAIN (predicate_chain
);
28580 if (and_expr_var
== NULL
)
28581 and_expr_var
= cond_var
;
28584 gimple assign_stmt
;
28585 /* Use MIN_EXPR to check if any integer is zero?.
28586 and_expr_var = min_expr <cond_var, and_expr_var> */
28587 assign_stmt
= gimple_build_assign (and_expr_var
,
28588 build2 (MIN_EXPR
, integer_type_node
,
28589 cond_var
, and_expr_var
));
28591 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
28592 gimple_set_bb (assign_stmt
, new_bb
);
28593 gimple_seq_add_stmt (&gseq
, assign_stmt
);
28597 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
28599 NULL_TREE
, NULL_TREE
);
28600 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
28601 gimple_set_bb (if_else_stmt
, new_bb
);
28602 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
28604 gimple_seq_add_stmt (&gseq
, convert_stmt
);
28605 gimple_seq_add_stmt (&gseq
, return_stmt
);
28606 set_bb_seq (new_bb
, gseq
);
28609 e12
= split_block (bb1
, if_else_stmt
);
28611 e12
->flags
&= ~EDGE_FALLTHRU
;
28612 e12
->flags
|= EDGE_TRUE_VALUE
;
28614 e23
= split_block (bb2
, return_stmt
);
28616 gimple_set_bb (convert_stmt
, bb2
);
28617 gimple_set_bb (return_stmt
, bb2
);
28620 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
28623 make_edge (bb2
, EXIT_BLOCK_PTR
, 0);
28630 /* This parses the attribute arguments to target in DECL and determines
28631 the right builtin to use to match the platform specification.
28632 It returns the priority value for this version decl. If PREDICATE_LIST
28633 is not NULL, it stores the list of cpu features that need to be checked
28634 before dispatching this function. */
28636 static unsigned int
28637 get_builtin_code_for_version (tree decl
, tree
*predicate_list
)
28640 struct cl_target_option cur_target
;
28642 struct cl_target_option
*new_target
;
28643 const char *arg_str
= NULL
;
28644 const char *attrs_str
= NULL
;
28645 char *tok_str
= NULL
;
28648 /* Priority of i386 features, greater value is higher priority. This is
28649 used to decide the order in which function dispatch must happen. For
28650 instance, a version specialized for SSE4.2 should be checked for dispatch
28651 before a version for SSE3, as SSE4.2 implies SSE3. */
28652 enum feature_priority
28673 enum feature_priority priority
= P_ZERO
;
28675 /* These are the target attribute strings for which a dispatcher is
28676 available, from fold_builtin_cpu. */
28678 static struct _feature_list
28680 const char *const name
;
28681 const enum feature_priority priority
;
28683 const feature_list
[] =
28689 {"ssse3", P_SSSE3
},
28690 {"sse4.1", P_SSE4_1
},
28691 {"sse4.2", P_SSE4_2
},
28692 {"popcnt", P_POPCNT
},
28698 static unsigned int NUM_FEATURES
28699 = sizeof (feature_list
) / sizeof (struct _feature_list
);
28703 tree predicate_chain
= NULL_TREE
;
28704 tree predicate_decl
, predicate_arg
;
28706 attrs
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
28707 gcc_assert (attrs
!= NULL
);
28709 attrs
= TREE_VALUE (TREE_VALUE (attrs
));
28711 gcc_assert (TREE_CODE (attrs
) == STRING_CST
);
28712 attrs_str
= TREE_STRING_POINTER (attrs
);
28715 /* Handle arch= if specified. For priority, set it to be 1 more than
28716 the best instruction set the processor can handle. For instance, if
28717 there is a version for atom and a version for ssse3 (the highest ISA
28718 priority for atom), the atom version must be checked for dispatch
28719 before the ssse3 version. */
28720 if (strstr (attrs_str
, "arch=") != NULL
)
28722 cl_target_option_save (&cur_target
, &global_options
);
28723 target_node
= ix86_valid_target_attribute_tree (attrs
);
28725 gcc_assert (target_node
);
28726 new_target
= TREE_TARGET_OPTION (target_node
);
28727 gcc_assert (new_target
);
28729 if (new_target
->arch_specified
&& new_target
->arch
> 0)
28731 switch (new_target
->arch
)
28733 case PROCESSOR_CORE2
:
28735 priority
= P_PROC_SSSE3
;
28737 case PROCESSOR_COREI7
:
28738 arg_str
= "corei7";
28739 priority
= P_PROC_SSE4_2
;
28741 case PROCESSOR_ATOM
:
28743 priority
= P_PROC_SSSE3
;
28745 case PROCESSOR_AMDFAM10
:
28746 arg_str
= "amdfam10h";
28747 priority
= P_PROC_SSE4_a
;
28749 case PROCESSOR_BDVER1
:
28750 arg_str
= "bdver1";
28751 priority
= P_PROC_FMA
;
28753 case PROCESSOR_BDVER2
:
28754 arg_str
= "bdver2";
28755 priority
= P_PROC_FMA
;
28760 cl_target_option_restore (&global_options
, &cur_target
);
28762 if (predicate_list
&& arg_str
== NULL
)
28764 error_at (DECL_SOURCE_LOCATION (decl
),
28765 "No dispatcher found for the versioning attributes");
28769 if (predicate_list
)
28771 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_IS
];
28772 /* For a C string literal the length includes the trailing NULL. */
28773 predicate_arg
= build_string_literal (strlen (arg_str
) + 1, arg_str
);
28774 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28779 /* Process feature name. */
28780 tok_str
= (char *) xmalloc (strlen (attrs_str
) + 1);
28781 strcpy (tok_str
, attrs_str
);
28782 token
= strtok (tok_str
, ",");
28783 predicate_decl
= ix86_builtins
[(int) IX86_BUILTIN_CPU_SUPPORTS
];
28785 while (token
!= NULL
)
28787 /* Do not process "arch=" */
28788 if (strncmp (token
, "arch=", 5) == 0)
28790 token
= strtok (NULL
, ",");
28793 for (i
= 0; i
< NUM_FEATURES
; ++i
)
28795 if (strcmp (token
, feature_list
[i
].name
) == 0)
28797 if (predicate_list
)
28799 predicate_arg
= build_string_literal (
28800 strlen (feature_list
[i
].name
) + 1,
28801 feature_list
[i
].name
);
28802 predicate_chain
= tree_cons (predicate_decl
, predicate_arg
,
28805 /* Find the maximum priority feature. */
28806 if (feature_list
[i
].priority
> priority
)
28807 priority
= feature_list
[i
].priority
;
28812 if (predicate_list
&& i
== NUM_FEATURES
)
28814 error_at (DECL_SOURCE_LOCATION (decl
),
28815 "No dispatcher found for %s", token
);
28818 token
= strtok (NULL
, ",");
28822 if (predicate_list
&& predicate_chain
== NULL_TREE
)
28824 error_at (DECL_SOURCE_LOCATION (decl
),
28825 "No dispatcher found for the versioning attributes : %s",
28829 else if (predicate_list
)
28831 predicate_chain
= nreverse (predicate_chain
);
28832 *predicate_list
= predicate_chain
;
28838 /* This compares the priority of target features in function DECL1
28839 and DECL2. It returns positive value if DECL1 is higher priority,
28840 negative value if DECL2 is higher priority and 0 if they are the
28844 ix86_compare_version_priority (tree decl1
, tree decl2
)
28846 unsigned int priority1
= 0;
28847 unsigned int priority2
= 0;
28849 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl1
)) != NULL
)
28850 priority1
= get_builtin_code_for_version (decl1
, NULL
);
28852 if (lookup_attribute ("target", DECL_ATTRIBUTES (decl2
)) != NULL
)
28853 priority2
= get_builtin_code_for_version (decl2
, NULL
);
28855 return (int)priority1
- (int)priority2
;
28858 /* V1 and V2 point to function versions with different priorities
28859 based on the target ISA. This function compares their priorities. */
28862 feature_compare (const void *v1
, const void *v2
)
28864 typedef struct _function_version_info
28867 tree predicate_chain
;
28868 unsigned int dispatch_priority
;
28869 } function_version_info
;
28871 const function_version_info c1
= *(const function_version_info
*)v1
;
28872 const function_version_info c2
= *(const function_version_info
*)v2
;
28873 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
28876 /* This function generates the dispatch function for
28877 multi-versioned functions. DISPATCH_DECL is the function which will
28878 contain the dispatch logic. FNDECLS are the function choices for
28879 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
28880 in DISPATCH_DECL in which the dispatch code is generated. */
28883 dispatch_function_versions (tree dispatch_decl
,
28885 basic_block
*empty_bb
)
28888 gimple ifunc_cpu_init_stmt
;
28892 vec
<tree
> *fndecls
;
28893 unsigned int num_versions
= 0;
28894 unsigned int actual_versions
= 0;
28897 struct _function_version_info
28900 tree predicate_chain
;
28901 unsigned int dispatch_priority
;
28902 }*function_version_info
;
28904 gcc_assert (dispatch_decl
!= NULL
28905 && fndecls_p
!= NULL
28906 && empty_bb
!= NULL
);
28908 /*fndecls_p is actually a vector. */
28909 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
28911 /* At least one more version other than the default. */
28912 num_versions
= fndecls
->length ();
28913 gcc_assert (num_versions
>= 2);
28915 function_version_info
= (struct _function_version_info
*)
28916 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
28918 /* The first version in the vector is the default decl. */
28919 default_decl
= (*fndecls
)[0];
28921 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
28923 gseq
= bb_seq (*empty_bb
);
28924 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
28925 constructors, so explicity call __builtin_cpu_init here. */
28926 ifunc_cpu_init_stmt
= gimple_build_call_vec (
28927 ix86_builtins
[(int) IX86_BUILTIN_CPU_INIT
], vNULL
);
28928 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
28929 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
28930 set_bb_seq (*empty_bb
, gseq
);
28935 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
28937 tree version_decl
= ele
;
28938 tree predicate_chain
= NULL_TREE
;
28939 unsigned int priority
;
28940 /* Get attribute string, parse it and find the right predicate decl.
28941 The predicate function could be a lengthy combination of many
28942 features, like arch-type and various isa-variants. */
28943 priority
= get_builtin_code_for_version (version_decl
,
28946 if (predicate_chain
== NULL_TREE
)
28950 function_version_info
[ix
- 1].version_decl
= version_decl
;
28951 function_version_info
[ix
- 1].predicate_chain
= predicate_chain
;
28952 function_version_info
[ix
- 1].dispatch_priority
= priority
;
28955 /* Sort the versions according to descending order of dispatch priority. The
28956 priority is based on the ISA. This is not a perfect solution. There
28957 could still be ambiguity. If more than one function version is suitable
28958 to execute, which one should be dispatched? In future, allow the user
28959 to specify a dispatch priority next to the version. */
28960 qsort (function_version_info
, actual_versions
,
28961 sizeof (struct _function_version_info
), feature_compare
);
28963 for (i
= 0; i
< actual_versions
; ++i
)
28964 *empty_bb
= add_condition_to_bb (dispatch_decl
,
28965 function_version_info
[i
].version_decl
,
28966 function_version_info
[i
].predicate_chain
,
28969 /* dispatch default version at the end. */
28970 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
28973 free (function_version_info
);
28977 /* Comparator function to be used in qsort routine to sort attribute
28978 specification strings to "target". */
28981 attr_strcmp (const void *v1
, const void *v2
)
28983 const char *c1
= *(char *const*)v1
;
28984 const char *c2
= *(char *const*)v2
;
28985 return strcmp (c1
, c2
);
28988 /* ARGLIST is the argument to target attribute. This function tokenizes
28989 the comma separated arguments, sorts them and returns a string which
28990 is a unique identifier for the comma separated arguments. It also
28991 replaces non-identifier characters "=,-" with "_". */
28994 sorted_attr_string (tree arglist
)
28997 size_t str_len_sum
= 0;
28998 char **args
= NULL
;
28999 char *attr_str
, *ret_str
;
29001 unsigned int argnum
= 1;
29004 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29006 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29007 size_t len
= strlen (str
);
29008 str_len_sum
+= len
+ 1;
29009 if (arg
!= arglist
)
29011 for (i
= 0; i
< strlen (str
); i
++)
29016 attr_str
= XNEWVEC (char, str_len_sum
);
29018 for (arg
= arglist
; arg
; arg
= TREE_CHAIN (arg
))
29020 const char *str
= TREE_STRING_POINTER (TREE_VALUE (arg
));
29021 size_t len
= strlen (str
);
29022 memcpy (attr_str
+ str_len_sum
, str
, len
);
29023 attr_str
[str_len_sum
+ len
] = TREE_CHAIN (arg
) ? ',' : '\0';
29024 str_len_sum
+= len
+ 1;
29027 /* Replace "=,-" with "_". */
29028 for (i
= 0; i
< strlen (attr_str
); i
++)
29029 if (attr_str
[i
] == '=' || attr_str
[i
]== '-')
29035 args
= XNEWVEC (char *, argnum
);
29038 attr
= strtok (attr_str
, ",");
29039 while (attr
!= NULL
)
29043 attr
= strtok (NULL
, ",");
29046 qsort (args
, argnum
, sizeof (char *), attr_strcmp
);
29048 ret_str
= XNEWVEC (char, str_len_sum
);
29050 for (i
= 0; i
< argnum
; i
++)
29052 size_t len
= strlen (args
[i
]);
29053 memcpy (ret_str
+ str_len_sum
, args
[i
], len
);
29054 ret_str
[str_len_sum
+ len
] = i
< argnum
- 1 ? '_' : '\0';
29055 str_len_sum
+= len
+ 1;
29059 XDELETEVEC (attr_str
);
29063 /* This function changes the assembler name for functions that are
29064 versions. If DECL is a function version and has a "target"
29065 attribute, it appends the attribute string to its assembler name. */
29068 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
29071 const char *orig_name
, *version_string
;
29072 char *attr_str
, *assembler_name
;
29074 if (DECL_DECLARED_INLINE_P (decl
)
29075 && lookup_attribute ("gnu_inline",
29076 DECL_ATTRIBUTES (decl
)))
29077 error_at (DECL_SOURCE_LOCATION (decl
),
29078 "Function versions cannot be marked as gnu_inline,"
29079 " bodies have to be generated");
29081 if (DECL_VIRTUAL_P (decl
)
29082 || DECL_VINDEX (decl
))
29083 error_at (DECL_SOURCE_LOCATION (decl
),
29084 "Virtual function versioning not supported\n");
29086 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29088 /* target attribute string is NULL for default functions. */
29089 if (version_attr
== NULL_TREE
)
29092 orig_name
= IDENTIFIER_POINTER (id
);
29094 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
29096 if (strcmp (version_string
, "default") == 0)
29099 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
29100 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
29102 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
29104 /* Allow assembler name to be modified if already set. */
29105 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
29106 SET_DECL_RTL (decl
, NULL
);
29108 tree ret
= get_identifier (assembler_name
);
29109 XDELETEVEC (attr_str
);
29110 XDELETEVEC (assembler_name
);
29114 /* This function returns true if FN1 and FN2 are versions of the same function,
29115 that is, the target strings of the function decls are different. This assumes
29116 that FN1 and FN2 have the same signature. */
29119 ix86_function_versions (tree fn1
, tree fn2
)
29122 char *target1
, *target2
;
29125 if (TREE_CODE (fn1
) != FUNCTION_DECL
29126 || TREE_CODE (fn2
) != FUNCTION_DECL
)
29129 attr1
= lookup_attribute ("target", DECL_ATTRIBUTES (fn1
));
29130 attr2
= lookup_attribute ("target", DECL_ATTRIBUTES (fn2
));
29132 /* At least one function decl should have the target attribute specified. */
29133 if (attr1
== NULL_TREE
&& attr2
== NULL_TREE
)
29136 /* Diagnose missing target attribute if one of the decls is already
29137 multi-versioned. */
29138 if (attr1
== NULL_TREE
|| attr2
== NULL_TREE
)
29140 if (DECL_FUNCTION_VERSIONED (fn1
) || DECL_FUNCTION_VERSIONED (fn2
))
29142 if (attr2
!= NULL_TREE
)
29149 error_at (DECL_SOURCE_LOCATION (fn2
),
29150 "missing %<target%> attribute for multi-versioned %D",
29152 error_at (DECL_SOURCE_LOCATION (fn1
),
29153 "previous declaration of %D", fn1
);
29154 /* Prevent diagnosing of the same error multiple times. */
29155 DECL_ATTRIBUTES (fn2
)
29156 = tree_cons (get_identifier ("target"),
29157 copy_node (TREE_VALUE (attr1
)),
29158 DECL_ATTRIBUTES (fn2
));
29163 target1
= sorted_attr_string (TREE_VALUE (attr1
));
29164 target2
= sorted_attr_string (TREE_VALUE (attr2
));
29166 /* The sorted target strings must be different for fn1 and fn2
29168 if (strcmp (target1
, target2
) == 0)
29173 XDELETEVEC (target1
);
29174 XDELETEVEC (target2
);
29180 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
29182 /* For function version, add the target suffix to the assembler name. */
29183 if (TREE_CODE (decl
) == FUNCTION_DECL
29184 && DECL_FUNCTION_VERSIONED (decl
))
29185 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
29186 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29187 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
29193 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29194 is true, append the full path name of the source file. */
29197 make_name (tree decl
, const char *suffix
, bool make_unique
)
29199 char *global_var_name
;
29202 const char *unique_name
= NULL
;
29204 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
29206 /* Get a unique name that can be used globally without any chances
29207 of collision at link time. */
29209 unique_name
= IDENTIFIER_POINTER (get_file_function_name ("\0"));
29211 name_len
= strlen (name
) + strlen (suffix
) + 2;
29214 name_len
+= strlen (unique_name
) + 1;
29215 global_var_name
= XNEWVEC (char, name_len
);
29217 /* Use '.' to concatenate names as it is demangler friendly. */
29219 snprintf (global_var_name
, name_len
, "%s.%s.%s", name
, unique_name
,
29222 snprintf (global_var_name
, name_len
, "%s.%s", name
, suffix
);
29224 return global_var_name
;
29227 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29229 /* Make a dispatcher declaration for the multi-versioned function DECL.
29230 Calls to DECL function will be replaced with calls to the dispatcher
29231 by the front-end. Return the decl created. */
29234 make_dispatcher_decl (const tree decl
)
29238 tree fn_type
, func_type
;
29239 bool is_uniq
= false;
29241 if (TREE_PUBLIC (decl
) == 0)
29244 func_name
= make_name (decl
, "ifunc", is_uniq
);
29246 fn_type
= TREE_TYPE (decl
);
29247 func_type
= build_function_type (TREE_TYPE (fn_type
),
29248 TYPE_ARG_TYPES (fn_type
));
29250 func_decl
= build_fn_decl (func_name
, func_type
);
29251 XDELETEVEC (func_name
);
29252 TREE_USED (func_decl
) = 1;
29253 DECL_CONTEXT (func_decl
) = NULL_TREE
;
29254 DECL_INITIAL (func_decl
) = error_mark_node
;
29255 DECL_ARTIFICIAL (func_decl
) = 1;
29256 /* Mark this func as external, the resolver will flip it again if
29257 it gets generated. */
29258 DECL_EXTERNAL (func_decl
) = 1;
29259 /* This will be of type IFUNCs have to be externally visible. */
29260 TREE_PUBLIC (func_decl
) = 1;
29267 /* Returns true if decl is multi-versioned and DECL is the default function,
29268 that is it is not tagged with target specific optimization. */
29271 is_function_default_version (const tree decl
)
29273 if (TREE_CODE (decl
) != FUNCTION_DECL
29274 || !DECL_FUNCTION_VERSIONED (decl
))
29276 tree attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
29278 attr
= TREE_VALUE (TREE_VALUE (attr
));
29279 return (TREE_CODE (attr
) == STRING_CST
29280 && strcmp (TREE_STRING_POINTER (attr
), "default") == 0);
29283 /* Make a dispatcher declaration for the multi-versioned function DECL.
29284 Calls to DECL function will be replaced with calls to the dispatcher
29285 by the front-end. Returns the decl of the dispatcher function. */
29288 ix86_get_function_versions_dispatcher (void *decl
)
29290 tree fn
= (tree
) decl
;
29291 struct cgraph_node
*node
= NULL
;
29292 struct cgraph_node
*default_node
= NULL
;
29293 struct cgraph_function_version_info
*node_v
= NULL
;
29294 struct cgraph_function_version_info
*first_v
= NULL
;
29296 tree dispatch_decl
= NULL
;
29298 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29299 struct cgraph_function_version_info
*it_v
= NULL
;
29300 struct cgraph_node
*dispatcher_node
= NULL
;
29301 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
29304 struct cgraph_function_version_info
*default_version_info
= NULL
;
29306 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
29308 node
= cgraph_get_node (fn
);
29309 gcc_assert (node
!= NULL
);
29311 node_v
= get_cgraph_node_version (node
);
29312 gcc_assert (node_v
!= NULL
);
29314 if (node_v
->dispatcher_resolver
!= NULL
)
29315 return node_v
->dispatcher_resolver
;
29317 /* Find the default version and make it the first node. */
29319 /* Go to the beginnig of the chain. */
29320 while (first_v
->prev
!= NULL
)
29321 first_v
= first_v
->prev
;
29322 default_version_info
= first_v
;
29323 while (default_version_info
!= NULL
)
29325 if (is_function_default_version
29326 (default_version_info
->this_node
->symbol
.decl
))
29328 default_version_info
= default_version_info
->next
;
29331 /* If there is no default node, just return NULL. */
29332 if (default_version_info
== NULL
)
29335 /* Make default info the first node. */
29336 if (first_v
!= default_version_info
)
29338 default_version_info
->prev
->next
= default_version_info
->next
;
29339 if (default_version_info
->next
)
29340 default_version_info
->next
->prev
= default_version_info
->prev
;
29341 first_v
->prev
= default_version_info
;
29342 default_version_info
->next
= first_v
;
29343 default_version_info
->prev
= NULL
;
29346 default_node
= default_version_info
->this_node
;
29348 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) && HAVE_GNU_INDIRECT_FUNCTION
29349 /* Right now, the dispatching is done via ifunc. */
29350 dispatch_decl
= make_dispatcher_decl (default_node
->symbol
.decl
);
29352 dispatcher_node
= cgraph_get_create_node (dispatch_decl
);
29353 gcc_assert (dispatcher_node
!= NULL
);
29354 dispatcher_node
->dispatcher_function
= 1;
29355 dispatcher_version_info
29356 = insert_new_cgraph_node_version (dispatcher_node
);
29357 dispatcher_version_info
->next
= default_version_info
;
29358 dispatcher_node
->local
.finalized
= 1;
29360 /* Set the dispatcher for all the versions. */
29361 it_v
= default_version_info
;
29362 while (it_v
!= NULL
)
29364 it_v
->dispatcher_resolver
= dispatch_decl
;
29368 error_at (DECL_SOURCE_LOCATION (default_node
->symbol
.decl
),
29369 "multiversioning needs ifunc which is not supported "
29370 "in this configuration");
29372 return dispatch_decl
;
29375 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29379 make_attribute (const char *name
, const char *arg_name
, tree chain
)
29382 tree attr_arg_name
;
29386 attr_name
= get_identifier (name
);
29387 attr_arg_name
= build_string (strlen (arg_name
), arg_name
);
29388 attr_args
= tree_cons (NULL_TREE
, attr_arg_name
, NULL_TREE
);
29389 attr
= tree_cons (attr_name
, attr_args
, chain
);
29393 /* Make the resolver function decl to dispatch the versions of
29394 a multi-versioned function, DEFAULT_DECL. Create an
29395 empty basic block in the resolver and store the pointer in
29396 EMPTY_BB. Return the decl of the resolver function. */
29399 make_resolver_func (const tree default_decl
,
29400 const tree dispatch_decl
,
29401 basic_block
*empty_bb
)
29403 char *resolver_name
;
29404 tree decl
, type
, decl_name
, t
;
29405 bool is_uniq
= false;
29407 /* IFUNC's have to be globally visible. So, if the default_decl is
29408 not, then the name of the IFUNC should be made unique. */
29409 if (TREE_PUBLIC (default_decl
) == 0)
29412 /* Append the filename to the resolver function if the versions are
29413 not externally visible. This is because the resolver function has
29414 to be externally visible for the loader to find it. So, appending
29415 the filename will prevent conflicts with a resolver function from
29416 another module which is based on the same version name. */
29417 resolver_name
= make_name (default_decl
, "resolver", is_uniq
);
29419 /* The resolver function should return a (void *). */
29420 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
29422 decl
= build_fn_decl (resolver_name
, type
);
29423 decl_name
= get_identifier (resolver_name
);
29424 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
29426 DECL_NAME (decl
) = decl_name
;
29427 TREE_USED (decl
) = 1;
29428 DECL_ARTIFICIAL (decl
) = 1;
29429 DECL_IGNORED_P (decl
) = 0;
29430 /* IFUNC resolvers have to be externally visible. */
29431 TREE_PUBLIC (decl
) = 1;
29432 DECL_UNINLINABLE (decl
) = 0;
29434 /* Resolver is not external, body is generated. */
29435 DECL_EXTERNAL (decl
) = 0;
29436 DECL_EXTERNAL (dispatch_decl
) = 0;
29438 DECL_CONTEXT (decl
) = NULL_TREE
;
29439 DECL_INITIAL (decl
) = make_node (BLOCK
);
29440 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
29442 if (DECL_COMDAT_GROUP (default_decl
)
29443 || TREE_PUBLIC (default_decl
))
29445 /* In this case, each translation unit with a call to this
29446 versioned function will put out a resolver. Ensure it
29447 is comdat to keep just one copy. */
29448 DECL_COMDAT (decl
) = 1;
29449 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
29451 /* Build result decl and add to function_decl. */
29452 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
29453 DECL_ARTIFICIAL (t
) = 1;
29454 DECL_IGNORED_P (t
) = 1;
29455 DECL_RESULT (decl
) = t
;
29457 gimplify_function_tree (decl
);
29458 push_cfun (DECL_STRUCT_FUNCTION (decl
));
29459 *empty_bb
= init_lowered_empty_function (decl
, false);
29461 cgraph_add_new_function (decl
, true);
29462 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl
));
29466 gcc_assert (dispatch_decl
!= NULL
);
29467 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29468 DECL_ATTRIBUTES (dispatch_decl
)
29469 = make_attribute ("ifunc", resolver_name
, DECL_ATTRIBUTES (dispatch_decl
));
29471 /* Create the alias for dispatch to resolver here. */
29472 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29473 cgraph_same_body_alias (NULL
, dispatch_decl
, decl
);
29474 XDELETEVEC (resolver_name
);
29478 /* Generate the dispatching code body to dispatch multi-versioned function
29479 DECL. The target hook is called to process the "target" attributes and
29480 provide the code to dispatch the right function at run-time. NODE points
29481 to the dispatcher decl whose body will be created. */
29484 ix86_generate_version_dispatcher_body (void *node_p
)
29486 tree resolver_decl
;
29487 basic_block empty_bb
;
29488 vec
<tree
> fn_ver_vec
= vNULL
;
29489 tree default_ver_decl
;
29490 struct cgraph_node
*versn
;
29491 struct cgraph_node
*node
;
29493 struct cgraph_function_version_info
*node_version_info
= NULL
;
29494 struct cgraph_function_version_info
*versn_info
= NULL
;
29496 node
= (cgraph_node
*)node_p
;
29498 node_version_info
= get_cgraph_node_version (node
);
29499 gcc_assert (node
->dispatcher_function
29500 && node_version_info
!= NULL
);
29502 if (node_version_info
->dispatcher_resolver
)
29503 return node_version_info
->dispatcher_resolver
;
29505 /* The first version in the chain corresponds to the default version. */
29506 default_ver_decl
= node_version_info
->next
->this_node
->symbol
.decl
;
29508 /* node is going to be an alias, so remove the finalized bit. */
29509 node
->local
.finalized
= false;
29511 resolver_decl
= make_resolver_func (default_ver_decl
,
29512 node
->symbol
.decl
, &empty_bb
);
29514 node_version_info
->dispatcher_resolver
= resolver_decl
;
29516 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
29518 fn_ver_vec
.create (2);
29520 for (versn_info
= node_version_info
->next
; versn_info
;
29521 versn_info
= versn_info
->next
)
29523 versn
= versn_info
->this_node
;
29524 /* Check for virtual functions here again, as by this time it should
29525 have been determined if this function needs a vtable index or
29526 not. This happens for methods in derived classes that override
29527 virtual methods in base classes but are not explicitly marked as
29529 if (DECL_VINDEX (versn
->symbol
.decl
))
29530 error_at (DECL_SOURCE_LOCATION (versn
->symbol
.decl
),
29531 "Virtual function multiversioning not supported");
29532 fn_ver_vec
.safe_push (versn
->symbol
.decl
);
29535 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
29536 fn_ver_vec
.release ();
29537 rebuild_cgraph_edges ();
29539 return resolver_decl
;
29541 /* This builds the processor_model struct type defined in
29542 libgcc/config/i386/cpuinfo.c */
29545 build_processor_model_struct (void)
29547 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29549 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
29551 tree type
= make_node (RECORD_TYPE
);
29553 /* The first 3 fields are unsigned int. */
29554 for (i
= 0; i
< 3; ++i
)
29556 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29557 get_identifier (field_name
[i
]), unsigned_type_node
);
29558 if (field_chain
!= NULL_TREE
)
29559 DECL_CHAIN (field
) = field_chain
;
29560 field_chain
= field
;
29563 /* The last field is an array of unsigned integers of size one. */
29564 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
29565 get_identifier (field_name
[3]),
29566 build_array_type (unsigned_type_node
,
29567 build_index_type (size_one_node
)));
29568 if (field_chain
!= NULL_TREE
)
29569 DECL_CHAIN (field
) = field_chain
;
29570 field_chain
= field
;
29572 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
29576 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29579 make_var_decl (tree type
, const char *name
)
29583 new_decl
= build_decl (UNKNOWN_LOCATION
,
29585 get_identifier(name
),
29588 DECL_EXTERNAL (new_decl
) = 1;
29589 TREE_STATIC (new_decl
) = 1;
29590 TREE_PUBLIC (new_decl
) = 1;
29591 DECL_INITIAL (new_decl
) = 0;
29592 DECL_ARTIFICIAL (new_decl
) = 0;
29593 DECL_PRESERVE_P (new_decl
) = 1;
29595 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
29596 assemble_variable (new_decl
, 0, 0, 0);
29601 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29602 into an integer defined in libgcc/config/i386/cpuinfo.c */
29605 fold_builtin_cpu (tree fndecl
, tree
*args
)
29608 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29609 DECL_FUNCTION_CODE (fndecl
);
29610 tree param_string_cst
= NULL
;
29612 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29613 enum processor_features
29629 /* These are the values for vendor types and cpu types and subtypes
29630 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29631 the corresponding start value. */
29632 enum processor_model
29642 M_CPU_SUBTYPE_START
,
29643 M_INTEL_COREI7_NEHALEM
,
29644 M_INTEL_COREI7_WESTMERE
,
29645 M_INTEL_COREI7_SANDYBRIDGE
,
29646 M_AMDFAM10H_BARCELONA
,
29647 M_AMDFAM10H_SHANGHAI
,
29648 M_AMDFAM10H_ISTANBUL
,
29649 M_AMDFAM15H_BDVER1
,
29650 M_AMDFAM15H_BDVER2
,
29654 static struct _arch_names_table
29656 const char *const name
;
29657 const enum processor_model model
;
29659 const arch_names_table
[] =
29662 {"intel", M_INTEL
},
29663 {"atom", M_INTEL_ATOM
},
29664 {"core2", M_INTEL_CORE2
},
29665 {"corei7", M_INTEL_COREI7
},
29666 {"nehalem", M_INTEL_COREI7_NEHALEM
},
29667 {"westmere", M_INTEL_COREI7_WESTMERE
},
29668 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
29669 {"amdfam10h", M_AMDFAM10H
},
29670 {"barcelona", M_AMDFAM10H_BARCELONA
},
29671 {"shanghai", M_AMDFAM10H_SHANGHAI
},
29672 {"istanbul", M_AMDFAM10H_ISTANBUL
},
29673 {"amdfam15h", M_AMDFAM15H
},
29674 {"bdver1", M_AMDFAM15H_BDVER1
},
29675 {"bdver2", M_AMDFAM15H_BDVER2
},
29676 {"bdver3", M_AMDFAM15H_BDVER3
},
29679 static struct _isa_names_table
29681 const char *const name
;
29682 const enum processor_features feature
;
29684 const isa_names_table
[] =
29688 {"popcnt", F_POPCNT
},
29692 {"ssse3", F_SSSE3
},
29693 {"sse4.1", F_SSE4_1
},
29694 {"sse4.2", F_SSE4_2
},
29699 tree __processor_model_type
= build_processor_model_struct ();
29700 tree __cpu_model_var
= make_var_decl (__processor_model_type
,
29703 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
29705 param_string_cst
= *args
;
29706 while (param_string_cst
29707 && TREE_CODE (param_string_cst
) != STRING_CST
)
29709 /* *args must be a expr that can contain other EXPRS leading to a
29711 if (!EXPR_P (param_string_cst
))
29713 error ("Parameter to builtin must be a string constant or literal");
29714 return integer_zero_node
;
29716 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
29719 gcc_assert (param_string_cst
);
29721 if (fn_code
== IX86_BUILTIN_CPU_IS
)
29727 unsigned int field_val
= 0;
29728 unsigned int NUM_ARCH_NAMES
29729 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
29731 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
29732 if (strcmp (arch_names_table
[i
].name
,
29733 TREE_STRING_POINTER (param_string_cst
)) == 0)
29736 if (i
== NUM_ARCH_NAMES
)
29738 error ("Parameter to builtin not valid: %s",
29739 TREE_STRING_POINTER (param_string_cst
));
29740 return integer_zero_node
;
29743 field
= TYPE_FIELDS (__processor_model_type
);
29744 field_val
= arch_names_table
[i
].model
;
29746 /* CPU types are stored in the next field. */
29747 if (field_val
> M_CPU_TYPE_START
29748 && field_val
< M_CPU_SUBTYPE_START
)
29750 field
= DECL_CHAIN (field
);
29751 field_val
-= M_CPU_TYPE_START
;
29754 /* CPU subtypes are stored in the next field. */
29755 if (field_val
> M_CPU_SUBTYPE_START
)
29757 field
= DECL_CHAIN ( DECL_CHAIN (field
));
29758 field_val
-= M_CPU_SUBTYPE_START
;
29761 /* Get the appropriate field in __cpu_model. */
29762 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29765 /* Check the value. */
29766 final
= build2 (EQ_EXPR
, unsigned_type_node
, ref
,
29767 build_int_cstu (unsigned_type_node
, field_val
));
29768 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29770 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29777 unsigned int field_val
= 0;
29778 unsigned int NUM_ISA_NAMES
29779 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
29781 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
29782 if (strcmp (isa_names_table
[i
].name
,
29783 TREE_STRING_POINTER (param_string_cst
)) == 0)
29786 if (i
== NUM_ISA_NAMES
)
29788 error ("Parameter to builtin not valid: %s",
29789 TREE_STRING_POINTER (param_string_cst
));
29790 return integer_zero_node
;
29793 field
= TYPE_FIELDS (__processor_model_type
);
29794 /* Get the last field, which is __cpu_features. */
29795 while (DECL_CHAIN (field
))
29796 field
= DECL_CHAIN (field
);
29798 /* Get the appropriate field: __cpu_model.__cpu_features */
29799 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
29802 /* Access the 0th element of __cpu_features array. */
29803 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
29804 integer_zero_node
, NULL_TREE
, NULL_TREE
);
29806 field_val
= (1 << isa_names_table
[i
].feature
);
29807 /* Return __cpu_model.__cpu_features[0] & field_val */
29808 final
= build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
29809 build_int_cstu (unsigned_type_node
, field_val
));
29810 return build1 (CONVERT_EXPR
, integer_type_node
, final
);
29812 gcc_unreachable ();
29816 ix86_fold_builtin (tree fndecl
, int n_args
,
29817 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
29819 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29821 enum ix86_builtins fn_code
= (enum ix86_builtins
)
29822 DECL_FUNCTION_CODE (fndecl
);
29823 if (fn_code
== IX86_BUILTIN_CPU_IS
29824 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
29826 gcc_assert (n_args
== 1);
29827 return fold_builtin_cpu (fndecl
, args
);
29831 #ifdef SUBTARGET_FOLD_BUILTIN
29832 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
29838 /* Make builtins to detect cpu type and features supported. NAME is
29839 the builtin name, CODE is the builtin code, and FTYPE is the function
29840 type of the builtin. */
29843 make_cpu_type_builtin (const char* name
, int code
,
29844 enum ix86_builtin_func_type ftype
, bool is_const
)
29849 type
= ix86_get_builtin_func_type (ftype
);
29850 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
29852 gcc_assert (decl
!= NULL_TREE
);
29853 ix86_builtins
[(int) code
] = decl
;
29854 TREE_READONLY (decl
) = is_const
;
29857 /* Make builtins to get CPU type and features supported. The created
29860 __builtin_cpu_init (), to detect cpu type and features,
29861 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
29862 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
29866 ix86_init_platform_type_builtins (void)
29868 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
29869 INT_FTYPE_VOID
, false);
29870 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
29871 INT_FTYPE_PCCHAR
, true);
29872 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
29873 INT_FTYPE_PCCHAR
, true);
29876 /* Internal method for ix86_init_builtins. */
29879 ix86_init_builtins_va_builtins_abi (void)
29881 tree ms_va_ref
, sysv_va_ref
;
29882 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
29883 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
29884 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
29885 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
29889 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
29890 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
29891 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
29893 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
29896 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29897 fnvoid_va_start_ms
=
29898 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
29899 fnvoid_va_end_sysv
=
29900 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
29901 fnvoid_va_start_sysv
=
29902 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
29904 fnvoid_va_copy_ms
=
29905 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
29907 fnvoid_va_copy_sysv
=
29908 build_function_type_list (void_type_node
, sysv_va_ref
,
29909 sysv_va_ref
, NULL_TREE
);
29911 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
29912 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29913 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
29914 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29915 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
29916 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
29917 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
29918 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29919 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
29920 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29921 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
29922 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
29926 ix86_init_builtin_types (void)
29928 tree float128_type_node
, float80_type_node
;
29930 /* The __float80 type. */
29931 float80_type_node
= long_double_type_node
;
29932 if (TYPE_MODE (float80_type_node
) != XFmode
)
29934 /* The __float80 type. */
29935 float80_type_node
= make_node (REAL_TYPE
);
29937 TYPE_PRECISION (float80_type_node
) = 80;
29938 layout_type (float80_type_node
);
29940 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
29942 /* The __float128 type. */
29943 float128_type_node
= make_node (REAL_TYPE
);
29944 TYPE_PRECISION (float128_type_node
) = 128;
29945 layout_type (float128_type_node
);
29946 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
29948 /* This macro is built by i386-builtin-types.awk. */
29949 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
29953 ix86_init_builtins (void)
29957 ix86_init_builtin_types ();
29959 /* Builtins to get CPU type and features. */
29960 ix86_init_platform_type_builtins ();
29962 /* TFmode support builtins. */
29963 def_builtin_const (0, "__builtin_infq",
29964 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
29965 def_builtin_const (0, "__builtin_huge_valq",
29966 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
29968 /* We will expand them to normal call if SSE isn't available since
29969 they are used by libgcc. */
29970 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
29971 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
29972 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
29973 TREE_READONLY (t
) = 1;
29974 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
29976 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
29977 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
29978 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
29979 TREE_READONLY (t
) = 1;
29980 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
29982 ix86_init_tm_builtins ();
29983 ix86_init_mmx_sse_builtins ();
29986 ix86_init_builtins_va_builtins_abi ();
29988 #ifdef SUBTARGET_INIT_BUILTINS
29989 SUBTARGET_INIT_BUILTINS
;
29993 /* Return the ix86 builtin for CODE. */
29996 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
29998 if (code
>= IX86_BUILTIN_MAX
)
29999 return error_mark_node
;
30001 return ix86_builtins
[code
];
30004 /* Errors in the source file can cause expand_expr to return const0_rtx
30005 where we expect a vector. To avoid crashing, use one of the vector
30006 clear instructions. */
30008 safe_vector_operand (rtx x
, enum machine_mode mode
)
30010 if (x
== const0_rtx
)
30011 x
= CONST0_RTX (mode
);
30015 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30018 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
30021 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30022 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30023 rtx op0
= expand_normal (arg0
);
30024 rtx op1
= expand_normal (arg1
);
30025 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30026 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30027 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
30029 if (VECTOR_MODE_P (mode0
))
30030 op0
= safe_vector_operand (op0
, mode0
);
30031 if (VECTOR_MODE_P (mode1
))
30032 op1
= safe_vector_operand (op1
, mode1
);
30034 if (optimize
|| !target
30035 || GET_MODE (target
) != tmode
30036 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30037 target
= gen_reg_rtx (tmode
);
30039 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
30041 rtx x
= gen_reg_rtx (V4SImode
);
30042 emit_insn (gen_sse2_loadd (x
, op1
));
30043 op1
= gen_lowpart (TImode
, x
);
30046 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30047 op0
= copy_to_mode_reg (mode0
, op0
);
30048 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
30049 op1
= copy_to_mode_reg (mode1
, op1
);
30051 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30060 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30063 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
30064 enum ix86_builtin_func_type m_type
,
30065 enum rtx_code sub_code
)
30070 bool comparison_p
= false;
30072 bool last_arg_constant
= false;
30073 int num_memory
= 0;
30076 enum machine_mode mode
;
30079 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30083 case MULTI_ARG_4_DF2_DI_I
:
30084 case MULTI_ARG_4_DF2_DI_I1
:
30085 case MULTI_ARG_4_SF2_SI_I
:
30086 case MULTI_ARG_4_SF2_SI_I1
:
30088 last_arg_constant
= true;
30091 case MULTI_ARG_3_SF
:
30092 case MULTI_ARG_3_DF
:
30093 case MULTI_ARG_3_SF2
:
30094 case MULTI_ARG_3_DF2
:
30095 case MULTI_ARG_3_DI
:
30096 case MULTI_ARG_3_SI
:
30097 case MULTI_ARG_3_SI_DI
:
30098 case MULTI_ARG_3_HI
:
30099 case MULTI_ARG_3_HI_SI
:
30100 case MULTI_ARG_3_QI
:
30101 case MULTI_ARG_3_DI2
:
30102 case MULTI_ARG_3_SI2
:
30103 case MULTI_ARG_3_HI2
:
30104 case MULTI_ARG_3_QI2
:
30108 case MULTI_ARG_2_SF
:
30109 case MULTI_ARG_2_DF
:
30110 case MULTI_ARG_2_DI
:
30111 case MULTI_ARG_2_SI
:
30112 case MULTI_ARG_2_HI
:
30113 case MULTI_ARG_2_QI
:
30117 case MULTI_ARG_2_DI_IMM
:
30118 case MULTI_ARG_2_SI_IMM
:
30119 case MULTI_ARG_2_HI_IMM
:
30120 case MULTI_ARG_2_QI_IMM
:
30122 last_arg_constant
= true;
30125 case MULTI_ARG_1_SF
:
30126 case MULTI_ARG_1_DF
:
30127 case MULTI_ARG_1_SF2
:
30128 case MULTI_ARG_1_DF2
:
30129 case MULTI_ARG_1_DI
:
30130 case MULTI_ARG_1_SI
:
30131 case MULTI_ARG_1_HI
:
30132 case MULTI_ARG_1_QI
:
30133 case MULTI_ARG_1_SI_DI
:
30134 case MULTI_ARG_1_HI_DI
:
30135 case MULTI_ARG_1_HI_SI
:
30136 case MULTI_ARG_1_QI_DI
:
30137 case MULTI_ARG_1_QI_SI
:
30138 case MULTI_ARG_1_QI_HI
:
30142 case MULTI_ARG_2_DI_CMP
:
30143 case MULTI_ARG_2_SI_CMP
:
30144 case MULTI_ARG_2_HI_CMP
:
30145 case MULTI_ARG_2_QI_CMP
:
30147 comparison_p
= true;
30150 case MULTI_ARG_2_SF_TF
:
30151 case MULTI_ARG_2_DF_TF
:
30152 case MULTI_ARG_2_DI_TF
:
30153 case MULTI_ARG_2_SI_TF
:
30154 case MULTI_ARG_2_HI_TF
:
30155 case MULTI_ARG_2_QI_TF
:
30161 gcc_unreachable ();
30164 if (optimize
|| !target
30165 || GET_MODE (target
) != tmode
30166 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30167 target
= gen_reg_rtx (tmode
);
30169 gcc_assert (nargs
<= 4);
30171 for (i
= 0; i
< nargs
; i
++)
30173 tree arg
= CALL_EXPR_ARG (exp
, i
);
30174 rtx op
= expand_normal (arg
);
30175 int adjust
= (comparison_p
) ? 1 : 0;
30176 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
30178 if (last_arg_constant
&& i
== nargs
- 1)
30180 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
30182 enum insn_code new_icode
= icode
;
30185 case CODE_FOR_xop_vpermil2v2df3
:
30186 case CODE_FOR_xop_vpermil2v4sf3
:
30187 case CODE_FOR_xop_vpermil2v4df3
:
30188 case CODE_FOR_xop_vpermil2v8sf3
:
30189 error ("the last argument must be a 2-bit immediate");
30190 return gen_reg_rtx (tmode
);
30191 case CODE_FOR_xop_rotlv2di3
:
30192 new_icode
= CODE_FOR_rotlv2di3
;
30194 case CODE_FOR_xop_rotlv4si3
:
30195 new_icode
= CODE_FOR_rotlv4si3
;
30197 case CODE_FOR_xop_rotlv8hi3
:
30198 new_icode
= CODE_FOR_rotlv8hi3
;
30200 case CODE_FOR_xop_rotlv16qi3
:
30201 new_icode
= CODE_FOR_rotlv16qi3
;
30203 if (CONST_INT_P (op
))
30205 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
30206 op
= GEN_INT (INTVAL (op
) & mask
);
30207 gcc_checking_assert
30208 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
30212 gcc_checking_assert
30214 && insn_data
[new_icode
].operand
[0].mode
== tmode
30215 && insn_data
[new_icode
].operand
[1].mode
== tmode
30216 && insn_data
[new_icode
].operand
[2].mode
== mode
30217 && insn_data
[new_icode
].operand
[0].predicate
30218 == insn_data
[icode
].operand
[0].predicate
30219 && insn_data
[new_icode
].operand
[1].predicate
30220 == insn_data
[icode
].operand
[1].predicate
);
30226 gcc_unreachable ();
30233 if (VECTOR_MODE_P (mode
))
30234 op
= safe_vector_operand (op
, mode
);
30236 /* If we aren't optimizing, only allow one memory operand to be
30238 if (memory_operand (op
, mode
))
30241 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
30244 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
30246 op
= force_reg (mode
, op
);
30250 args
[i
].mode
= mode
;
30256 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30261 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
30262 GEN_INT ((int)sub_code
));
30263 else if (! comparison_p
)
30264 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30267 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
30271 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
30276 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30280 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
30284 gcc_unreachable ();
30294 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30295 insns with vec_merge. */
30298 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
30302 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30303 rtx op1
, op0
= expand_normal (arg0
);
30304 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
30305 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
30307 if (optimize
|| !target
30308 || GET_MODE (target
) != tmode
30309 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
30310 target
= gen_reg_rtx (tmode
);
30312 if (VECTOR_MODE_P (mode0
))
30313 op0
= safe_vector_operand (op0
, mode0
);
30315 if ((optimize
&& !register_operand (op0
, mode0
))
30316 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30317 op0
= copy_to_mode_reg (mode0
, op0
);
30320 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
30321 op1
= copy_to_mode_reg (mode0
, op1
);
30323 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
30330 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30333 ix86_expand_sse_compare (const struct builtin_description
*d
,
30334 tree exp
, rtx target
, bool swap
)
30337 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30338 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30339 rtx op0
= expand_normal (arg0
);
30340 rtx op1
= expand_normal (arg1
);
30342 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30343 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30344 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30345 enum rtx_code comparison
= d
->comparison
;
30347 if (VECTOR_MODE_P (mode0
))
30348 op0
= safe_vector_operand (op0
, mode0
);
30349 if (VECTOR_MODE_P (mode1
))
30350 op1
= safe_vector_operand (op1
, mode1
);
30352 /* Swap operands if we have a comparison that isn't available in
30356 rtx tmp
= gen_reg_rtx (mode1
);
30357 emit_move_insn (tmp
, op1
);
30362 if (optimize
|| !target
30363 || GET_MODE (target
) != tmode
30364 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30365 target
= gen_reg_rtx (tmode
);
30367 if ((optimize
&& !register_operand (op0
, mode0
))
30368 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
30369 op0
= copy_to_mode_reg (mode0
, op0
);
30370 if ((optimize
&& !register_operand (op1
, mode1
))
30371 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
30372 op1
= copy_to_mode_reg (mode1
, op1
);
30374 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
30375 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30382 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30385 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
30389 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30390 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30391 rtx op0
= expand_normal (arg0
);
30392 rtx op1
= expand_normal (arg1
);
30393 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30394 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30395 enum rtx_code comparison
= d
->comparison
;
30397 if (VECTOR_MODE_P (mode0
))
30398 op0
= safe_vector_operand (op0
, mode0
);
30399 if (VECTOR_MODE_P (mode1
))
30400 op1
= safe_vector_operand (op1
, mode1
);
30402 /* Swap operands if we have a comparison that isn't available in
30404 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
30411 target
= gen_reg_rtx (SImode
);
30412 emit_move_insn (target
, const0_rtx
);
30413 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30415 if ((optimize
&& !register_operand (op0
, mode0
))
30416 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30417 op0
= copy_to_mode_reg (mode0
, op0
);
30418 if ((optimize
&& !register_operand (op1
, mode1
))
30419 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30420 op1
= copy_to_mode_reg (mode1
, op1
);
30422 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30426 emit_insn (gen_rtx_SET (VOIDmode
,
30427 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30428 gen_rtx_fmt_ee (comparison
, QImode
,
30432 return SUBREG_REG (target
);
30435 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30438 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
30442 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30443 rtx op1
, op0
= expand_normal (arg0
);
30444 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30445 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30447 if (optimize
|| target
== 0
30448 || GET_MODE (target
) != tmode
30449 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30450 target
= gen_reg_rtx (tmode
);
30452 if (VECTOR_MODE_P (mode0
))
30453 op0
= safe_vector_operand (op0
, mode0
);
30455 if ((optimize
&& !register_operand (op0
, mode0
))
30456 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30457 op0
= copy_to_mode_reg (mode0
, op0
);
30459 op1
= GEN_INT (d
->comparison
);
30461 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
30469 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
30470 tree exp
, rtx target
)
30473 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30474 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30475 rtx op0
= expand_normal (arg0
);
30476 rtx op1
= expand_normal (arg1
);
30478 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
30479 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
30480 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
30482 if (optimize
|| target
== 0
30483 || GET_MODE (target
) != tmode
30484 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
30485 target
= gen_reg_rtx (tmode
);
30487 op0
= safe_vector_operand (op0
, mode0
);
30488 op1
= safe_vector_operand (op1
, mode1
);
30490 if ((optimize
&& !register_operand (op0
, mode0
))
30491 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30492 op0
= copy_to_mode_reg (mode0
, op0
);
30493 if ((optimize
&& !register_operand (op1
, mode1
))
30494 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30495 op1
= copy_to_mode_reg (mode1
, op1
);
30497 op2
= GEN_INT (d
->comparison
);
30499 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
30506 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30509 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
30513 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30514 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30515 rtx op0
= expand_normal (arg0
);
30516 rtx op1
= expand_normal (arg1
);
30517 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
30518 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
30519 enum rtx_code comparison
= d
->comparison
;
30521 if (VECTOR_MODE_P (mode0
))
30522 op0
= safe_vector_operand (op0
, mode0
);
30523 if (VECTOR_MODE_P (mode1
))
30524 op1
= safe_vector_operand (op1
, mode1
);
30526 target
= gen_reg_rtx (SImode
);
30527 emit_move_insn (target
, const0_rtx
);
30528 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30530 if ((optimize
&& !register_operand (op0
, mode0
))
30531 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
30532 op0
= copy_to_mode_reg (mode0
, op0
);
30533 if ((optimize
&& !register_operand (op1
, mode1
))
30534 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
30535 op1
= copy_to_mode_reg (mode1
, op1
);
30537 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
30541 emit_insn (gen_rtx_SET (VOIDmode
,
30542 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30543 gen_rtx_fmt_ee (comparison
, QImode
,
30547 return SUBREG_REG (target
);
30550 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30553 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
30554 tree exp
, rtx target
)
30557 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30558 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30559 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30560 tree arg3
= CALL_EXPR_ARG (exp
, 3);
30561 tree arg4
= CALL_EXPR_ARG (exp
, 4);
30562 rtx scratch0
, scratch1
;
30563 rtx op0
= expand_normal (arg0
);
30564 rtx op1
= expand_normal (arg1
);
30565 rtx op2
= expand_normal (arg2
);
30566 rtx op3
= expand_normal (arg3
);
30567 rtx op4
= expand_normal (arg4
);
30568 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
30570 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30571 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30572 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30573 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
30574 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
30575 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
30576 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
30578 if (VECTOR_MODE_P (modev2
))
30579 op0
= safe_vector_operand (op0
, modev2
);
30580 if (VECTOR_MODE_P (modev4
))
30581 op2
= safe_vector_operand (op2
, modev4
);
30583 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30584 op0
= copy_to_mode_reg (modev2
, op0
);
30585 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
30586 op1
= copy_to_mode_reg (modei3
, op1
);
30587 if ((optimize
&& !register_operand (op2
, modev4
))
30588 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
30589 op2
= copy_to_mode_reg (modev4
, op2
);
30590 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
30591 op3
= copy_to_mode_reg (modei5
, op3
);
30593 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
30595 error ("the fifth argument must be an 8-bit immediate");
30599 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
30601 if (optimize
|| !target
30602 || GET_MODE (target
) != tmode0
30603 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30604 target
= gen_reg_rtx (tmode0
);
30606 scratch1
= gen_reg_rtx (tmode1
);
30608 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30610 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
30612 if (optimize
|| !target
30613 || GET_MODE (target
) != tmode1
30614 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30615 target
= gen_reg_rtx (tmode1
);
30617 scratch0
= gen_reg_rtx (tmode0
);
30619 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
30623 gcc_assert (d
->flag
);
30625 scratch0
= gen_reg_rtx (tmode0
);
30626 scratch1
= gen_reg_rtx (tmode1
);
30628 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
30638 target
= gen_reg_rtx (SImode
);
30639 emit_move_insn (target
, const0_rtx
);
30640 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30643 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30644 gen_rtx_fmt_ee (EQ
, QImode
,
30645 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30648 return SUBREG_REG (target
);
30655 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30658 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
30659 tree exp
, rtx target
)
30662 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30663 tree arg1
= CALL_EXPR_ARG (exp
, 1);
30664 tree arg2
= CALL_EXPR_ARG (exp
, 2);
30665 rtx scratch0
, scratch1
;
30666 rtx op0
= expand_normal (arg0
);
30667 rtx op1
= expand_normal (arg1
);
30668 rtx op2
= expand_normal (arg2
);
30669 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
30671 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
30672 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
30673 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
30674 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
30675 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
30677 if (VECTOR_MODE_P (modev2
))
30678 op0
= safe_vector_operand (op0
, modev2
);
30679 if (VECTOR_MODE_P (modev3
))
30680 op1
= safe_vector_operand (op1
, modev3
);
30682 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
30683 op0
= copy_to_mode_reg (modev2
, op0
);
30684 if ((optimize
&& !register_operand (op1
, modev3
))
30685 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
30686 op1
= copy_to_mode_reg (modev3
, op1
);
30688 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
30690 error ("the third argument must be an 8-bit immediate");
30694 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
30696 if (optimize
|| !target
30697 || GET_MODE (target
) != tmode0
30698 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
30699 target
= gen_reg_rtx (tmode0
);
30701 scratch1
= gen_reg_rtx (tmode1
);
30703 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
30705 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
30707 if (optimize
|| !target
30708 || GET_MODE (target
) != tmode1
30709 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
30710 target
= gen_reg_rtx (tmode1
);
30712 scratch0
= gen_reg_rtx (tmode0
);
30714 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
30718 gcc_assert (d
->flag
);
30720 scratch0
= gen_reg_rtx (tmode0
);
30721 scratch1
= gen_reg_rtx (tmode1
);
30723 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
30733 target
= gen_reg_rtx (SImode
);
30734 emit_move_insn (target
, const0_rtx
);
30735 target
= gen_rtx_SUBREG (QImode
, target
, 0);
30738 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
30739 gen_rtx_fmt_ee (EQ
, QImode
,
30740 gen_rtx_REG ((enum machine_mode
) d
->flag
,
30743 return SUBREG_REG (target
);
30749 /* Subroutine of ix86_expand_builtin to take care of insns with
30750 variable number of operands. */
30753 ix86_expand_args_builtin (const struct builtin_description
*d
,
30754 tree exp
, rtx target
)
30756 rtx pat
, real_target
;
30757 unsigned int i
, nargs
;
30758 unsigned int nargs_constant
= 0;
30759 int num_memory
= 0;
30763 enum machine_mode mode
;
30765 bool last_arg_count
= false;
30766 enum insn_code icode
= d
->icode
;
30767 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30768 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30769 enum machine_mode rmode
= VOIDmode
;
30771 enum rtx_code comparison
= d
->comparison
;
30773 switch ((enum ix86_builtin_func_type
) d
->flag
)
30775 case V2DF_FTYPE_V2DF_ROUND
:
30776 case V4DF_FTYPE_V4DF_ROUND
:
30777 case V4SF_FTYPE_V4SF_ROUND
:
30778 case V8SF_FTYPE_V8SF_ROUND
:
30779 case V4SI_FTYPE_V4SF_ROUND
:
30780 case V8SI_FTYPE_V8SF_ROUND
:
30781 return ix86_expand_sse_round (d
, exp
, target
);
30782 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
30783 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
30784 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
30785 case INT_FTYPE_V8SF_V8SF_PTEST
:
30786 case INT_FTYPE_V4DI_V4DI_PTEST
:
30787 case INT_FTYPE_V4DF_V4DF_PTEST
:
30788 case INT_FTYPE_V4SF_V4SF_PTEST
:
30789 case INT_FTYPE_V2DI_V2DI_PTEST
:
30790 case INT_FTYPE_V2DF_V2DF_PTEST
:
30791 return ix86_expand_sse_ptest (d
, exp
, target
);
30792 case FLOAT128_FTYPE_FLOAT128
:
30793 case FLOAT_FTYPE_FLOAT
:
30794 case INT_FTYPE_INT
:
30795 case UINT64_FTYPE_INT
:
30796 case UINT16_FTYPE_UINT16
:
30797 case INT64_FTYPE_INT64
:
30798 case INT64_FTYPE_V4SF
:
30799 case INT64_FTYPE_V2DF
:
30800 case INT_FTYPE_V16QI
:
30801 case INT_FTYPE_V8QI
:
30802 case INT_FTYPE_V8SF
:
30803 case INT_FTYPE_V4DF
:
30804 case INT_FTYPE_V4SF
:
30805 case INT_FTYPE_V2DF
:
30806 case INT_FTYPE_V32QI
:
30807 case V16QI_FTYPE_V16QI
:
30808 case V8SI_FTYPE_V8SF
:
30809 case V8SI_FTYPE_V4SI
:
30810 case V8HI_FTYPE_V8HI
:
30811 case V8HI_FTYPE_V16QI
:
30812 case V8QI_FTYPE_V8QI
:
30813 case V8SF_FTYPE_V8SF
:
30814 case V8SF_FTYPE_V8SI
:
30815 case V8SF_FTYPE_V4SF
:
30816 case V8SF_FTYPE_V8HI
:
30817 case V4SI_FTYPE_V4SI
:
30818 case V4SI_FTYPE_V16QI
:
30819 case V4SI_FTYPE_V4SF
:
30820 case V4SI_FTYPE_V8SI
:
30821 case V4SI_FTYPE_V8HI
:
30822 case V4SI_FTYPE_V4DF
:
30823 case V4SI_FTYPE_V2DF
:
30824 case V4HI_FTYPE_V4HI
:
30825 case V4DF_FTYPE_V4DF
:
30826 case V4DF_FTYPE_V4SI
:
30827 case V4DF_FTYPE_V4SF
:
30828 case V4DF_FTYPE_V2DF
:
30829 case V4SF_FTYPE_V4SF
:
30830 case V4SF_FTYPE_V4SI
:
30831 case V4SF_FTYPE_V8SF
:
30832 case V4SF_FTYPE_V4DF
:
30833 case V4SF_FTYPE_V8HI
:
30834 case V4SF_FTYPE_V2DF
:
30835 case V2DI_FTYPE_V2DI
:
30836 case V2DI_FTYPE_V16QI
:
30837 case V2DI_FTYPE_V8HI
:
30838 case V2DI_FTYPE_V4SI
:
30839 case V2DF_FTYPE_V2DF
:
30840 case V2DF_FTYPE_V4SI
:
30841 case V2DF_FTYPE_V4DF
:
30842 case V2DF_FTYPE_V4SF
:
30843 case V2DF_FTYPE_V2SI
:
30844 case V2SI_FTYPE_V2SI
:
30845 case V2SI_FTYPE_V4SF
:
30846 case V2SI_FTYPE_V2SF
:
30847 case V2SI_FTYPE_V2DF
:
30848 case V2SF_FTYPE_V2SF
:
30849 case V2SF_FTYPE_V2SI
:
30850 case V32QI_FTYPE_V32QI
:
30851 case V32QI_FTYPE_V16QI
:
30852 case V16HI_FTYPE_V16HI
:
30853 case V16HI_FTYPE_V8HI
:
30854 case V8SI_FTYPE_V8SI
:
30855 case V16HI_FTYPE_V16QI
:
30856 case V8SI_FTYPE_V16QI
:
30857 case V4DI_FTYPE_V16QI
:
30858 case V8SI_FTYPE_V8HI
:
30859 case V4DI_FTYPE_V8HI
:
30860 case V4DI_FTYPE_V4SI
:
30861 case V4DI_FTYPE_V2DI
:
30864 case V4SF_FTYPE_V4SF_VEC_MERGE
:
30865 case V2DF_FTYPE_V2DF_VEC_MERGE
:
30866 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
30867 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
30868 case V16QI_FTYPE_V16QI_V16QI
:
30869 case V16QI_FTYPE_V8HI_V8HI
:
30870 case V8QI_FTYPE_V8QI_V8QI
:
30871 case V8QI_FTYPE_V4HI_V4HI
:
30872 case V8HI_FTYPE_V8HI_V8HI
:
30873 case V8HI_FTYPE_V16QI_V16QI
:
30874 case V8HI_FTYPE_V4SI_V4SI
:
30875 case V8SF_FTYPE_V8SF_V8SF
:
30876 case V8SF_FTYPE_V8SF_V8SI
:
30877 case V4SI_FTYPE_V4SI_V4SI
:
30878 case V4SI_FTYPE_V8HI_V8HI
:
30879 case V4SI_FTYPE_V4SF_V4SF
:
30880 case V4SI_FTYPE_V2DF_V2DF
:
30881 case V4HI_FTYPE_V4HI_V4HI
:
30882 case V4HI_FTYPE_V8QI_V8QI
:
30883 case V4HI_FTYPE_V2SI_V2SI
:
30884 case V4DF_FTYPE_V4DF_V4DF
:
30885 case V4DF_FTYPE_V4DF_V4DI
:
30886 case V4SF_FTYPE_V4SF_V4SF
:
30887 case V4SF_FTYPE_V4SF_V4SI
:
30888 case V4SF_FTYPE_V4SF_V2SI
:
30889 case V4SF_FTYPE_V4SF_V2DF
:
30890 case V4SF_FTYPE_V4SF_DI
:
30891 case V4SF_FTYPE_V4SF_SI
:
30892 case V2DI_FTYPE_V2DI_V2DI
:
30893 case V2DI_FTYPE_V16QI_V16QI
:
30894 case V2DI_FTYPE_V4SI_V4SI
:
30895 case V2UDI_FTYPE_V4USI_V4USI
:
30896 case V2DI_FTYPE_V2DI_V16QI
:
30897 case V2DI_FTYPE_V2DF_V2DF
:
30898 case V2SI_FTYPE_V2SI_V2SI
:
30899 case V2SI_FTYPE_V4HI_V4HI
:
30900 case V2SI_FTYPE_V2SF_V2SF
:
30901 case V2DF_FTYPE_V2DF_V2DF
:
30902 case V2DF_FTYPE_V2DF_V4SF
:
30903 case V2DF_FTYPE_V2DF_V2DI
:
30904 case V2DF_FTYPE_V2DF_DI
:
30905 case V2DF_FTYPE_V2DF_SI
:
30906 case V2SF_FTYPE_V2SF_V2SF
:
30907 case V1DI_FTYPE_V1DI_V1DI
:
30908 case V1DI_FTYPE_V8QI_V8QI
:
30909 case V1DI_FTYPE_V2SI_V2SI
:
30910 case V32QI_FTYPE_V16HI_V16HI
:
30911 case V16HI_FTYPE_V8SI_V8SI
:
30912 case V32QI_FTYPE_V32QI_V32QI
:
30913 case V16HI_FTYPE_V32QI_V32QI
:
30914 case V16HI_FTYPE_V16HI_V16HI
:
30915 case V8SI_FTYPE_V4DF_V4DF
:
30916 case V8SI_FTYPE_V8SI_V8SI
:
30917 case V8SI_FTYPE_V16HI_V16HI
:
30918 case V4DI_FTYPE_V4DI_V4DI
:
30919 case V4DI_FTYPE_V8SI_V8SI
:
30920 case V4UDI_FTYPE_V8USI_V8USI
:
30921 if (comparison
== UNKNOWN
)
30922 return ix86_expand_binop_builtin (icode
, exp
, target
);
30925 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
30926 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
30927 gcc_assert (comparison
!= UNKNOWN
);
30931 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
30932 case V16HI_FTYPE_V16HI_SI_COUNT
:
30933 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
30934 case V8SI_FTYPE_V8SI_SI_COUNT
:
30935 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
30936 case V4DI_FTYPE_V4DI_INT_COUNT
:
30937 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
30938 case V8HI_FTYPE_V8HI_SI_COUNT
:
30939 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
30940 case V4SI_FTYPE_V4SI_SI_COUNT
:
30941 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
30942 case V4HI_FTYPE_V4HI_SI_COUNT
:
30943 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
30944 case V2DI_FTYPE_V2DI_SI_COUNT
:
30945 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
30946 case V2SI_FTYPE_V2SI_SI_COUNT
:
30947 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
30948 case V1DI_FTYPE_V1DI_SI_COUNT
:
30950 last_arg_count
= true;
30952 case UINT64_FTYPE_UINT64_UINT64
:
30953 case UINT_FTYPE_UINT_UINT
:
30954 case UINT_FTYPE_UINT_USHORT
:
30955 case UINT_FTYPE_UINT_UCHAR
:
30956 case UINT16_FTYPE_UINT16_INT
:
30957 case UINT8_FTYPE_UINT8_INT
:
30960 case V2DI_FTYPE_V2DI_INT_CONVERT
:
30963 nargs_constant
= 1;
30965 case V4DI_FTYPE_V4DI_INT_CONVERT
:
30968 nargs_constant
= 1;
30970 case V8HI_FTYPE_V8HI_INT
:
30971 case V8HI_FTYPE_V8SF_INT
:
30972 case V8HI_FTYPE_V4SF_INT
:
30973 case V8SF_FTYPE_V8SF_INT
:
30974 case V4SI_FTYPE_V4SI_INT
:
30975 case V4SI_FTYPE_V8SI_INT
:
30976 case V4HI_FTYPE_V4HI_INT
:
30977 case V4DF_FTYPE_V4DF_INT
:
30978 case V4SF_FTYPE_V4SF_INT
:
30979 case V4SF_FTYPE_V8SF_INT
:
30980 case V2DI_FTYPE_V2DI_INT
:
30981 case V2DF_FTYPE_V2DF_INT
:
30982 case V2DF_FTYPE_V4DF_INT
:
30983 case V16HI_FTYPE_V16HI_INT
:
30984 case V8SI_FTYPE_V8SI_INT
:
30985 case V4DI_FTYPE_V4DI_INT
:
30986 case V2DI_FTYPE_V4DI_INT
:
30988 nargs_constant
= 1;
30990 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
30991 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
30992 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
30993 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
30994 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
30995 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
30998 case V32QI_FTYPE_V32QI_V32QI_INT
:
30999 case V16HI_FTYPE_V16HI_V16HI_INT
:
31000 case V16QI_FTYPE_V16QI_V16QI_INT
:
31001 case V4DI_FTYPE_V4DI_V4DI_INT
:
31002 case V8HI_FTYPE_V8HI_V8HI_INT
:
31003 case V8SI_FTYPE_V8SI_V8SI_INT
:
31004 case V8SI_FTYPE_V8SI_V4SI_INT
:
31005 case V8SF_FTYPE_V8SF_V8SF_INT
:
31006 case V8SF_FTYPE_V8SF_V4SF_INT
:
31007 case V4SI_FTYPE_V4SI_V4SI_INT
:
31008 case V4DF_FTYPE_V4DF_V4DF_INT
:
31009 case V4DF_FTYPE_V4DF_V2DF_INT
:
31010 case V4SF_FTYPE_V4SF_V4SF_INT
:
31011 case V2DI_FTYPE_V2DI_V2DI_INT
:
31012 case V4DI_FTYPE_V4DI_V2DI_INT
:
31013 case V2DF_FTYPE_V2DF_V2DF_INT
:
31015 nargs_constant
= 1;
31017 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
31020 nargs_constant
= 1;
31022 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
31025 nargs_constant
= 1;
31027 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
31030 nargs_constant
= 1;
31032 case V2DI_FTYPE_V2DI_UINT_UINT
:
31034 nargs_constant
= 2;
31036 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
31037 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
31038 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
31039 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
31041 nargs_constant
= 1;
31043 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
31045 nargs_constant
= 2;
31047 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
31048 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
31052 gcc_unreachable ();
31055 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31057 if (comparison
!= UNKNOWN
)
31059 gcc_assert (nargs
== 2);
31060 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
31063 if (rmode
== VOIDmode
|| rmode
== tmode
)
31067 || GET_MODE (target
) != tmode
31068 || !insn_p
->operand
[0].predicate (target
, tmode
))
31069 target
= gen_reg_rtx (tmode
);
31070 real_target
= target
;
31074 target
= gen_reg_rtx (rmode
);
31075 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
31078 for (i
= 0; i
< nargs
; i
++)
31080 tree arg
= CALL_EXPR_ARG (exp
, i
);
31081 rtx op
= expand_normal (arg
);
31082 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31083 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31085 if (last_arg_count
&& (i
+ 1) == nargs
)
31087 /* SIMD shift insns take either an 8-bit immediate or
31088 register as count. But builtin functions take int as
31089 count. If count doesn't match, we put it in register. */
31092 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
31093 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
31094 op
= copy_to_reg (op
);
31097 else if ((nargs
- i
) <= nargs_constant
)
31102 case CODE_FOR_avx2_inserti128
:
31103 case CODE_FOR_avx2_extracti128
:
31104 error ("the last argument must be an 1-bit immediate");
31107 case CODE_FOR_sse4_1_roundsd
:
31108 case CODE_FOR_sse4_1_roundss
:
31110 case CODE_FOR_sse4_1_roundpd
:
31111 case CODE_FOR_sse4_1_roundps
:
31112 case CODE_FOR_avx_roundpd256
:
31113 case CODE_FOR_avx_roundps256
:
31115 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
31116 case CODE_FOR_sse4_1_roundps_sfix
:
31117 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
31118 case CODE_FOR_avx_roundps_sfix256
:
31120 case CODE_FOR_sse4_1_blendps
:
31121 case CODE_FOR_avx_blendpd256
:
31122 case CODE_FOR_avx_vpermilv4df
:
31123 error ("the last argument must be a 4-bit immediate");
31126 case CODE_FOR_sse4_1_blendpd
:
31127 case CODE_FOR_avx_vpermilv2df
:
31128 case CODE_FOR_xop_vpermil2v2df3
:
31129 case CODE_FOR_xop_vpermil2v4sf3
:
31130 case CODE_FOR_xop_vpermil2v4df3
:
31131 case CODE_FOR_xop_vpermil2v8sf3
:
31132 error ("the last argument must be a 2-bit immediate");
31135 case CODE_FOR_avx_vextractf128v4df
:
31136 case CODE_FOR_avx_vextractf128v8sf
:
31137 case CODE_FOR_avx_vextractf128v8si
:
31138 case CODE_FOR_avx_vinsertf128v4df
:
31139 case CODE_FOR_avx_vinsertf128v8sf
:
31140 case CODE_FOR_avx_vinsertf128v8si
:
31141 error ("the last argument must be a 1-bit immediate");
31144 case CODE_FOR_avx_vmcmpv2df3
:
31145 case CODE_FOR_avx_vmcmpv4sf3
:
31146 case CODE_FOR_avx_cmpv2df3
:
31147 case CODE_FOR_avx_cmpv4sf3
:
31148 case CODE_FOR_avx_cmpv4df3
:
31149 case CODE_FOR_avx_cmpv8sf3
:
31150 error ("the last argument must be a 5-bit immediate");
31154 switch (nargs_constant
)
31157 if ((nargs
- i
) == nargs_constant
)
31159 error ("the next to last argument must be an 8-bit immediate");
31163 error ("the last argument must be an 8-bit immediate");
31166 gcc_unreachable ();
31173 if (VECTOR_MODE_P (mode
))
31174 op
= safe_vector_operand (op
, mode
);
31176 /* If we aren't optimizing, only allow one memory operand to
31178 if (memory_operand (op
, mode
))
31181 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
31183 if (optimize
|| !match
|| num_memory
> 1)
31184 op
= copy_to_mode_reg (mode
, op
);
31188 op
= copy_to_reg (op
);
31189 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
31194 args
[i
].mode
= mode
;
31200 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
31203 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
31206 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31210 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
31211 args
[2].op
, args
[3].op
);
31214 gcc_unreachable ();
31224 /* Subroutine of ix86_expand_builtin to take care of special insns
31225 with variable number of operands. */
31228 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
31229 tree exp
, rtx target
)
31233 unsigned int i
, nargs
, arg_adjust
, memory
;
31237 enum machine_mode mode
;
31239 enum insn_code icode
= d
->icode
;
31240 bool last_arg_constant
= false;
31241 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
31242 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
31243 enum { load
, store
} klass
;
31245 switch ((enum ix86_builtin_func_type
) d
->flag
)
31247 case VOID_FTYPE_VOID
:
31248 emit_insn (GEN_FCN (icode
) (target
));
31250 case VOID_FTYPE_UINT64
:
31251 case VOID_FTYPE_UNSIGNED
:
31257 case INT_FTYPE_VOID
:
31258 case UINT64_FTYPE_VOID
:
31259 case UNSIGNED_FTYPE_VOID
:
31264 case UINT64_FTYPE_PUNSIGNED
:
31265 case V2DI_FTYPE_PV2DI
:
31266 case V4DI_FTYPE_PV4DI
:
31267 case V32QI_FTYPE_PCCHAR
:
31268 case V16QI_FTYPE_PCCHAR
:
31269 case V8SF_FTYPE_PCV4SF
:
31270 case V8SF_FTYPE_PCFLOAT
:
31271 case V4SF_FTYPE_PCFLOAT
:
31272 case V4DF_FTYPE_PCV2DF
:
31273 case V4DF_FTYPE_PCDOUBLE
:
31274 case V2DF_FTYPE_PCDOUBLE
:
31275 case VOID_FTYPE_PVOID
:
31280 case VOID_FTYPE_PV2SF_V4SF
:
31281 case VOID_FTYPE_PV4DI_V4DI
:
31282 case VOID_FTYPE_PV2DI_V2DI
:
31283 case VOID_FTYPE_PCHAR_V32QI
:
31284 case VOID_FTYPE_PCHAR_V16QI
:
31285 case VOID_FTYPE_PFLOAT_V8SF
:
31286 case VOID_FTYPE_PFLOAT_V4SF
:
31287 case VOID_FTYPE_PDOUBLE_V4DF
:
31288 case VOID_FTYPE_PDOUBLE_V2DF
:
31289 case VOID_FTYPE_PLONGLONG_LONGLONG
:
31290 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
31291 case VOID_FTYPE_PINT_INT
:
31294 /* Reserve memory operand for target. */
31295 memory
= ARRAY_SIZE (args
);
31297 case V4SF_FTYPE_V4SF_PCV2SF
:
31298 case V2DF_FTYPE_V2DF_PCDOUBLE
:
31303 case V8SF_FTYPE_PCV8SF_V8SI
:
31304 case V4DF_FTYPE_PCV4DF_V4DI
:
31305 case V4SF_FTYPE_PCV4SF_V4SI
:
31306 case V2DF_FTYPE_PCV2DF_V2DI
:
31307 case V8SI_FTYPE_PCV8SI_V8SI
:
31308 case V4DI_FTYPE_PCV4DI_V4DI
:
31309 case V4SI_FTYPE_PCV4SI_V4SI
:
31310 case V2DI_FTYPE_PCV2DI_V2DI
:
31315 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
31316 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
31317 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
31318 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
31319 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
31320 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
31321 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
31322 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
31325 /* Reserve memory operand for target. */
31326 memory
= ARRAY_SIZE (args
);
31328 case VOID_FTYPE_UINT_UINT_UINT
:
31329 case VOID_FTYPE_UINT64_UINT_UINT
:
31330 case UCHAR_FTYPE_UINT_UINT_UINT
:
31331 case UCHAR_FTYPE_UINT64_UINT_UINT
:
31334 memory
= ARRAY_SIZE (args
);
31335 last_arg_constant
= true;
31338 gcc_unreachable ();
31341 gcc_assert (nargs
<= ARRAY_SIZE (args
));
31343 if (klass
== store
)
31345 arg
= CALL_EXPR_ARG (exp
, 0);
31346 op
= expand_normal (arg
);
31347 gcc_assert (target
== 0);
31350 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31351 target
= gen_rtx_MEM (tmode
, op
);
31354 target
= force_reg (tmode
, op
);
31362 || !register_operand (target
, tmode
)
31363 || GET_MODE (target
) != tmode
)
31364 target
= gen_reg_rtx (tmode
);
31367 for (i
= 0; i
< nargs
; i
++)
31369 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
31372 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
31373 op
= expand_normal (arg
);
31374 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
31376 if (last_arg_constant
&& (i
+ 1) == nargs
)
31380 if (icode
== CODE_FOR_lwp_lwpvalsi3
31381 || icode
== CODE_FOR_lwp_lwpinssi3
31382 || icode
== CODE_FOR_lwp_lwpvaldi3
31383 || icode
== CODE_FOR_lwp_lwpinsdi3
)
31384 error ("the last argument must be a 32-bit immediate");
31386 error ("the last argument must be an 8-bit immediate");
31394 /* This must be the memory operand. */
31395 op
= force_reg (Pmode
, convert_to_mode (Pmode
, op
, 1));
31396 op
= gen_rtx_MEM (mode
, op
);
31397 gcc_assert (GET_MODE (op
) == mode
31398 || GET_MODE (op
) == VOIDmode
);
31402 /* This must be register. */
31403 if (VECTOR_MODE_P (mode
))
31404 op
= safe_vector_operand (op
, mode
);
31406 gcc_assert (GET_MODE (op
) == mode
31407 || GET_MODE (op
) == VOIDmode
);
31408 op
= copy_to_mode_reg (mode
, op
);
31413 args
[i
].mode
= mode
;
31419 pat
= GEN_FCN (icode
) (target
);
31422 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
31425 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
31428 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
31431 gcc_unreachable ();
31437 return klass
== store
? 0 : target
;
31440 /* Return the integer constant in ARG. Constrain it to be in the range
31441 of the subparts of VEC_TYPE; issue an error if not. */
31444 get_element_number (tree vec_type
, tree arg
)
31446 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
31448 if (!host_integerp (arg
, 1)
31449 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
31451 error ("selector must be an integer constant in the range 0..%wi", max
);
31458 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31459 ix86_expand_vector_init. We DO have language-level syntax for this, in
31460 the form of (type){ init-list }. Except that since we can't place emms
31461 instructions from inside the compiler, we can't allow the use of MMX
31462 registers unless the user explicitly asks for it. So we do *not* define
31463 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31464 we have builtins invoked by mmintrin.h that gives us license to emit
31465 these sorts of instructions. */
31468 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
31470 enum machine_mode tmode
= TYPE_MODE (type
);
31471 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
31472 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
31473 rtvec v
= rtvec_alloc (n_elt
);
31475 gcc_assert (VECTOR_MODE_P (tmode
));
31476 gcc_assert (call_expr_nargs (exp
) == n_elt
);
31478 for (i
= 0; i
< n_elt
; ++i
)
31480 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
31481 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
31484 if (!target
|| !register_operand (target
, tmode
))
31485 target
= gen_reg_rtx (tmode
);
31487 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
31491 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31492 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31493 had a language-level syntax for referencing vector elements. */
31496 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
31498 enum machine_mode tmode
, mode0
;
31503 arg0
= CALL_EXPR_ARG (exp
, 0);
31504 arg1
= CALL_EXPR_ARG (exp
, 1);
31506 op0
= expand_normal (arg0
);
31507 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
31509 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31510 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
31511 gcc_assert (VECTOR_MODE_P (mode0
));
31513 op0
= force_reg (mode0
, op0
);
31515 if (optimize
|| !target
|| !register_operand (target
, tmode
))
31516 target
= gen_reg_rtx (tmode
);
31518 ix86_expand_vector_extract (true, target
, op0
, elt
);
31523 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31524 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31525 a language-level syntax for referencing vector elements. */
31528 ix86_expand_vec_set_builtin (tree exp
)
31530 enum machine_mode tmode
, mode1
;
31531 tree arg0
, arg1
, arg2
;
31533 rtx op0
, op1
, target
;
31535 arg0
= CALL_EXPR_ARG (exp
, 0);
31536 arg1
= CALL_EXPR_ARG (exp
, 1);
31537 arg2
= CALL_EXPR_ARG (exp
, 2);
31539 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
31540 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
31541 gcc_assert (VECTOR_MODE_P (tmode
));
31543 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
31544 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
31545 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
31547 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
31548 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
31550 op0
= force_reg (tmode
, op0
);
31551 op1
= force_reg (mode1
, op1
);
31553 /* OP0 is the source of these builtin functions and shouldn't be
31554 modified. Create a copy, use it and return it as target. */
31555 target
= gen_reg_rtx (tmode
);
31556 emit_move_insn (target
, op0
);
31557 ix86_expand_vector_set (true, target
, op1
, elt
);
31562 /* Expand an expression EXP that calls a built-in function,
31563 with result going to TARGET if that's convenient
31564 (and in mode MODE if that's convenient).
31565 SUBTARGET may be used as the target for computing one of EXP's operands.
31566 IGNORE is nonzero if the value is to be ignored. */
31569 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
31570 enum machine_mode mode ATTRIBUTE_UNUSED
,
31571 int ignore ATTRIBUTE_UNUSED
)
31573 const struct builtin_description
*d
;
31575 enum insn_code icode
;
31576 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
31577 tree arg0
, arg1
, arg2
, arg3
, arg4
;
31578 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
31579 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
31580 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
31582 /* For CPU builtins that can be folded, fold first and expand the fold. */
31585 case IX86_BUILTIN_CPU_INIT
:
31587 /* Make it call __cpu_indicator_init in libgcc. */
31588 tree call_expr
, fndecl
, type
;
31589 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
31590 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
31591 call_expr
= build_call_expr (fndecl
, 0);
31592 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
31594 case IX86_BUILTIN_CPU_IS
:
31595 case IX86_BUILTIN_CPU_SUPPORTS
:
31597 tree arg0
= CALL_EXPR_ARG (exp
, 0);
31598 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
31599 gcc_assert (fold_expr
!= NULL_TREE
);
31600 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
31604 /* Determine whether the builtin function is available under the current ISA.
31605 Originally the builtin was not created if it wasn't applicable to the
31606 current ISA based on the command line switches. With function specific
31607 options, we need to check in the context of the function making the call
31608 whether it is supported. */
31609 if (ix86_builtins_isa
[fcode
].isa
31610 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
31612 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
31613 NULL
, (enum fpmath_unit
) 0, false);
31616 error ("%qE needs unknown isa option", fndecl
);
31619 gcc_assert (opts
!= NULL
);
31620 error ("%qE needs isa option %s", fndecl
, opts
);
31628 case IX86_BUILTIN_MASKMOVQ
:
31629 case IX86_BUILTIN_MASKMOVDQU
:
31630 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
31631 ? CODE_FOR_mmx_maskmovq
31632 : CODE_FOR_sse2_maskmovdqu
);
31633 /* Note the arg order is different from the operand order. */
31634 arg1
= CALL_EXPR_ARG (exp
, 0);
31635 arg2
= CALL_EXPR_ARG (exp
, 1);
31636 arg0
= CALL_EXPR_ARG (exp
, 2);
31637 op0
= expand_normal (arg0
);
31638 op1
= expand_normal (arg1
);
31639 op2
= expand_normal (arg2
);
31640 mode0
= insn_data
[icode
].operand
[0].mode
;
31641 mode1
= insn_data
[icode
].operand
[1].mode
;
31642 mode2
= insn_data
[icode
].operand
[2].mode
;
31644 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31645 op0
= gen_rtx_MEM (mode1
, op0
);
31647 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31648 op0
= copy_to_mode_reg (mode0
, op0
);
31649 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
31650 op1
= copy_to_mode_reg (mode1
, op1
);
31651 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
31652 op2
= copy_to_mode_reg (mode2
, op2
);
31653 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31659 case IX86_BUILTIN_LDMXCSR
:
31660 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
31661 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31662 emit_move_insn (target
, op0
);
31663 emit_insn (gen_sse_ldmxcsr (target
));
31666 case IX86_BUILTIN_STMXCSR
:
31667 target
= assign_386_stack_local (SImode
, SLOT_TEMP
);
31668 emit_insn (gen_sse_stmxcsr (target
));
31669 return copy_to_mode_reg (SImode
, target
);
31671 case IX86_BUILTIN_CLFLUSH
:
31672 arg0
= CALL_EXPR_ARG (exp
, 0);
31673 op0
= expand_normal (arg0
);
31674 icode
= CODE_FOR_sse2_clflush
;
31675 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31676 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31678 emit_insn (gen_sse2_clflush (op0
));
31681 case IX86_BUILTIN_MONITOR
:
31682 arg0
= CALL_EXPR_ARG (exp
, 0);
31683 arg1
= CALL_EXPR_ARG (exp
, 1);
31684 arg2
= CALL_EXPR_ARG (exp
, 2);
31685 op0
= expand_normal (arg0
);
31686 op1
= expand_normal (arg1
);
31687 op2
= expand_normal (arg2
);
31689 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31691 op1
= copy_to_mode_reg (SImode
, op1
);
31693 op2
= copy_to_mode_reg (SImode
, op2
);
31694 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
31697 case IX86_BUILTIN_MWAIT
:
31698 arg0
= CALL_EXPR_ARG (exp
, 0);
31699 arg1
= CALL_EXPR_ARG (exp
, 1);
31700 op0
= expand_normal (arg0
);
31701 op1
= expand_normal (arg1
);
31703 op0
= copy_to_mode_reg (SImode
, op0
);
31705 op1
= copy_to_mode_reg (SImode
, op1
);
31706 emit_insn (gen_sse3_mwait (op0
, op1
));
31709 case IX86_BUILTIN_VEC_INIT_V2SI
:
31710 case IX86_BUILTIN_VEC_INIT_V4HI
:
31711 case IX86_BUILTIN_VEC_INIT_V8QI
:
31712 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
31714 case IX86_BUILTIN_VEC_EXT_V2DF
:
31715 case IX86_BUILTIN_VEC_EXT_V2DI
:
31716 case IX86_BUILTIN_VEC_EXT_V4SF
:
31717 case IX86_BUILTIN_VEC_EXT_V4SI
:
31718 case IX86_BUILTIN_VEC_EXT_V8HI
:
31719 case IX86_BUILTIN_VEC_EXT_V2SI
:
31720 case IX86_BUILTIN_VEC_EXT_V4HI
:
31721 case IX86_BUILTIN_VEC_EXT_V16QI
:
31722 return ix86_expand_vec_ext_builtin (exp
, target
);
31724 case IX86_BUILTIN_VEC_SET_V2DI
:
31725 case IX86_BUILTIN_VEC_SET_V4SF
:
31726 case IX86_BUILTIN_VEC_SET_V4SI
:
31727 case IX86_BUILTIN_VEC_SET_V8HI
:
31728 case IX86_BUILTIN_VEC_SET_V4HI
:
31729 case IX86_BUILTIN_VEC_SET_V16QI
:
31730 return ix86_expand_vec_set_builtin (exp
);
31732 case IX86_BUILTIN_INFQ
:
31733 case IX86_BUILTIN_HUGE_VALQ
:
31735 REAL_VALUE_TYPE inf
;
31739 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
31741 tmp
= validize_mem (force_const_mem (mode
, tmp
));
31744 target
= gen_reg_rtx (mode
);
31746 emit_move_insn (target
, tmp
);
31750 case IX86_BUILTIN_RDPMC
:
31751 case IX86_BUILTIN_RDTSC
:
31752 case IX86_BUILTIN_RDTSCP
:
31754 op0
= gen_reg_rtx (DImode
);
31755 op1
= gen_reg_rtx (DImode
);
31757 if (fcode
== IX86_BUILTIN_RDPMC
)
31759 arg0
= CALL_EXPR_ARG (exp
, 0);
31760 op2
= expand_normal (arg0
);
31761 if (!register_operand (op2
, SImode
))
31762 op2
= copy_to_mode_reg (SImode
, op2
);
31764 insn
= (TARGET_64BIT
31765 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
31766 : gen_rdpmc (op0
, op2
));
31769 else if (fcode
== IX86_BUILTIN_RDTSC
)
31771 insn
= (TARGET_64BIT
31772 ? gen_rdtsc_rex64 (op0
, op1
)
31773 : gen_rdtsc (op0
));
31778 op2
= gen_reg_rtx (SImode
);
31780 insn
= (TARGET_64BIT
31781 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
31782 : gen_rdtscp (op0
, op2
));
31785 arg0
= CALL_EXPR_ARG (exp
, 0);
31786 op4
= expand_normal (arg0
);
31787 if (!address_operand (op4
, VOIDmode
))
31789 op4
= convert_memory_address (Pmode
, op4
);
31790 op4
= copy_addr_to_reg (op4
);
31792 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
31796 target
= gen_reg_rtx (mode
);
31800 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
31801 op1
, 1, OPTAB_DIRECT
);
31802 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
31803 op0
, 1, OPTAB_DIRECT
);
31806 emit_move_insn (target
, op0
);
31809 case IX86_BUILTIN_FXSAVE
:
31810 case IX86_BUILTIN_FXRSTOR
:
31811 case IX86_BUILTIN_FXSAVE64
:
31812 case IX86_BUILTIN_FXRSTOR64
:
31815 case IX86_BUILTIN_FXSAVE
:
31816 icode
= CODE_FOR_fxsave
;
31818 case IX86_BUILTIN_FXRSTOR
:
31819 icode
= CODE_FOR_fxrstor
;
31821 case IX86_BUILTIN_FXSAVE64
:
31822 icode
= CODE_FOR_fxsave64
;
31824 case IX86_BUILTIN_FXRSTOR64
:
31825 icode
= CODE_FOR_fxrstor64
;
31828 gcc_unreachable ();
31831 arg0
= CALL_EXPR_ARG (exp
, 0);
31832 op0
= expand_normal (arg0
);
31834 if (!address_operand (op0
, VOIDmode
))
31836 op0
= convert_memory_address (Pmode
, op0
);
31837 op0
= copy_addr_to_reg (op0
);
31839 op0
= gen_rtx_MEM (BLKmode
, op0
);
31841 pat
= GEN_FCN (icode
) (op0
);
31846 case IX86_BUILTIN_XSAVE
:
31847 case IX86_BUILTIN_XRSTOR
:
31848 case IX86_BUILTIN_XSAVE64
:
31849 case IX86_BUILTIN_XRSTOR64
:
31850 case IX86_BUILTIN_XSAVEOPT
:
31851 case IX86_BUILTIN_XSAVEOPT64
:
31852 arg0
= CALL_EXPR_ARG (exp
, 0);
31853 arg1
= CALL_EXPR_ARG (exp
, 1);
31854 op0
= expand_normal (arg0
);
31855 op1
= expand_normal (arg1
);
31857 if (!address_operand (op0
, VOIDmode
))
31859 op0
= convert_memory_address (Pmode
, op0
);
31860 op0
= copy_addr_to_reg (op0
);
31862 op0
= gen_rtx_MEM (BLKmode
, op0
);
31864 op1
= force_reg (DImode
, op1
);
31868 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
31869 NULL
, 1, OPTAB_DIRECT
);
31872 case IX86_BUILTIN_XSAVE
:
31873 icode
= CODE_FOR_xsave_rex64
;
31875 case IX86_BUILTIN_XRSTOR
:
31876 icode
= CODE_FOR_xrstor_rex64
;
31878 case IX86_BUILTIN_XSAVE64
:
31879 icode
= CODE_FOR_xsave64
;
31881 case IX86_BUILTIN_XRSTOR64
:
31882 icode
= CODE_FOR_xrstor64
;
31884 case IX86_BUILTIN_XSAVEOPT
:
31885 icode
= CODE_FOR_xsaveopt_rex64
;
31887 case IX86_BUILTIN_XSAVEOPT64
:
31888 icode
= CODE_FOR_xsaveopt64
;
31891 gcc_unreachable ();
31894 op2
= gen_lowpart (SImode
, op2
);
31895 op1
= gen_lowpart (SImode
, op1
);
31896 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
31902 case IX86_BUILTIN_XSAVE
:
31903 icode
= CODE_FOR_xsave
;
31905 case IX86_BUILTIN_XRSTOR
:
31906 icode
= CODE_FOR_xrstor
;
31908 case IX86_BUILTIN_XSAVEOPT
:
31909 icode
= CODE_FOR_xsaveopt
;
31912 gcc_unreachable ();
31914 pat
= GEN_FCN (icode
) (op0
, op1
);
31921 case IX86_BUILTIN_LLWPCB
:
31922 arg0
= CALL_EXPR_ARG (exp
, 0);
31923 op0
= expand_normal (arg0
);
31924 icode
= CODE_FOR_lwp_llwpcb
;
31925 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
31926 op0
= force_reg (Pmode
, convert_to_mode (Pmode
, op0
, 1));
31927 emit_insn (gen_lwp_llwpcb (op0
));
31930 case IX86_BUILTIN_SLWPCB
:
31931 icode
= CODE_FOR_lwp_slwpcb
;
31933 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
31934 target
= gen_reg_rtx (Pmode
);
31935 emit_insn (gen_lwp_slwpcb (target
));
31938 case IX86_BUILTIN_BEXTRI32
:
31939 case IX86_BUILTIN_BEXTRI64
:
31940 arg0
= CALL_EXPR_ARG (exp
, 0);
31941 arg1
= CALL_EXPR_ARG (exp
, 1);
31942 op0
= expand_normal (arg0
);
31943 op1
= expand_normal (arg1
);
31944 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
31945 ? CODE_FOR_tbm_bextri_si
31946 : CODE_FOR_tbm_bextri_di
);
31947 if (!CONST_INT_P (op1
))
31949 error ("last argument must be an immediate");
31954 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
31955 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
31956 op1
= GEN_INT (length
);
31957 op2
= GEN_INT (lsb_index
);
31958 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
31964 case IX86_BUILTIN_RDRAND16_STEP
:
31965 icode
= CODE_FOR_rdrandhi_1
;
31969 case IX86_BUILTIN_RDRAND32_STEP
:
31970 icode
= CODE_FOR_rdrandsi_1
;
31974 case IX86_BUILTIN_RDRAND64_STEP
:
31975 icode
= CODE_FOR_rdranddi_1
;
31979 op0
= gen_reg_rtx (mode0
);
31980 emit_insn (GEN_FCN (icode
) (op0
));
31982 arg0
= CALL_EXPR_ARG (exp
, 0);
31983 op1
= expand_normal (arg0
);
31984 if (!address_operand (op1
, VOIDmode
))
31986 op1
= convert_memory_address (Pmode
, op1
);
31987 op1
= copy_addr_to_reg (op1
);
31989 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
31991 op1
= gen_reg_rtx (SImode
);
31992 emit_move_insn (op1
, CONST1_RTX (SImode
));
31994 /* Emit SImode conditional move. */
31995 if (mode0
== HImode
)
31997 op2
= gen_reg_rtx (SImode
);
31998 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
32000 else if (mode0
== SImode
)
32003 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
32006 target
= gen_reg_rtx (SImode
);
32008 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32010 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32011 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
32014 case IX86_BUILTIN_RDSEED16_STEP
:
32015 icode
= CODE_FOR_rdseedhi_1
;
32019 case IX86_BUILTIN_RDSEED32_STEP
:
32020 icode
= CODE_FOR_rdseedsi_1
;
32024 case IX86_BUILTIN_RDSEED64_STEP
:
32025 icode
= CODE_FOR_rdseeddi_1
;
32029 op0
= gen_reg_rtx (mode0
);
32030 emit_insn (GEN_FCN (icode
) (op0
));
32032 arg0
= CALL_EXPR_ARG (exp
, 0);
32033 op1
= expand_normal (arg0
);
32034 if (!address_operand (op1
, VOIDmode
))
32036 op1
= convert_memory_address (Pmode
, op1
);
32037 op1
= copy_addr_to_reg (op1
);
32039 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
32041 op2
= gen_reg_rtx (QImode
);
32043 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
32045 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
32048 target
= gen_reg_rtx (SImode
);
32050 emit_insn (gen_zero_extendqisi2 (target
, op2
));
32053 case IX86_BUILTIN_ADDCARRYX32
:
32054 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
32058 case IX86_BUILTIN_ADDCARRYX64
:
32059 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
32063 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
32064 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
32065 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
32066 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
32068 op0
= gen_reg_rtx (QImode
);
32070 /* Generate CF from input operand. */
32071 op1
= expand_normal (arg0
);
32072 op1
= copy_to_mode_reg (QImode
, convert_to_mode (QImode
, op1
, 1));
32073 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
32075 /* Gen ADCX instruction to compute X+Y+CF. */
32076 op2
= expand_normal (arg1
);
32077 op3
= expand_normal (arg2
);
32080 op2
= copy_to_mode_reg (mode0
, op2
);
32082 op3
= copy_to_mode_reg (mode0
, op3
);
32084 op0
= gen_reg_rtx (mode0
);
32086 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
32087 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
32088 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
32090 /* Store the result. */
32091 op4
= expand_normal (arg3
);
32092 if (!address_operand (op4
, VOIDmode
))
32094 op4
= convert_memory_address (Pmode
, op4
);
32095 op4
= copy_addr_to_reg (op4
);
32097 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
32099 /* Return current CF value. */
32101 target
= gen_reg_rtx (QImode
);
32103 PUT_MODE (pat
, QImode
);
32104 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
32107 case IX86_BUILTIN_GATHERSIV2DF
:
32108 icode
= CODE_FOR_avx2_gathersiv2df
;
32110 case IX86_BUILTIN_GATHERSIV4DF
:
32111 icode
= CODE_FOR_avx2_gathersiv4df
;
32113 case IX86_BUILTIN_GATHERDIV2DF
:
32114 icode
= CODE_FOR_avx2_gatherdiv2df
;
32116 case IX86_BUILTIN_GATHERDIV4DF
:
32117 icode
= CODE_FOR_avx2_gatherdiv4df
;
32119 case IX86_BUILTIN_GATHERSIV4SF
:
32120 icode
= CODE_FOR_avx2_gathersiv4sf
;
32122 case IX86_BUILTIN_GATHERSIV8SF
:
32123 icode
= CODE_FOR_avx2_gathersiv8sf
;
32125 case IX86_BUILTIN_GATHERDIV4SF
:
32126 icode
= CODE_FOR_avx2_gatherdiv4sf
;
32128 case IX86_BUILTIN_GATHERDIV8SF
:
32129 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32131 case IX86_BUILTIN_GATHERSIV2DI
:
32132 icode
= CODE_FOR_avx2_gathersiv2di
;
32134 case IX86_BUILTIN_GATHERSIV4DI
:
32135 icode
= CODE_FOR_avx2_gathersiv4di
;
32137 case IX86_BUILTIN_GATHERDIV2DI
:
32138 icode
= CODE_FOR_avx2_gatherdiv2di
;
32140 case IX86_BUILTIN_GATHERDIV4DI
:
32141 icode
= CODE_FOR_avx2_gatherdiv4di
;
32143 case IX86_BUILTIN_GATHERSIV4SI
:
32144 icode
= CODE_FOR_avx2_gathersiv4si
;
32146 case IX86_BUILTIN_GATHERSIV8SI
:
32147 icode
= CODE_FOR_avx2_gathersiv8si
;
32149 case IX86_BUILTIN_GATHERDIV4SI
:
32150 icode
= CODE_FOR_avx2_gatherdiv4si
;
32152 case IX86_BUILTIN_GATHERDIV8SI
:
32153 icode
= CODE_FOR_avx2_gatherdiv8si
;
32155 case IX86_BUILTIN_GATHERALTSIV4DF
:
32156 icode
= CODE_FOR_avx2_gathersiv4df
;
32158 case IX86_BUILTIN_GATHERALTDIV8SF
:
32159 icode
= CODE_FOR_avx2_gatherdiv8sf
;
32161 case IX86_BUILTIN_GATHERALTSIV4DI
:
32162 icode
= CODE_FOR_avx2_gathersiv4di
;
32164 case IX86_BUILTIN_GATHERALTDIV8SI
:
32165 icode
= CODE_FOR_avx2_gatherdiv8si
;
32169 arg0
= CALL_EXPR_ARG (exp
, 0);
32170 arg1
= CALL_EXPR_ARG (exp
, 1);
32171 arg2
= CALL_EXPR_ARG (exp
, 2);
32172 arg3
= CALL_EXPR_ARG (exp
, 3);
32173 arg4
= CALL_EXPR_ARG (exp
, 4);
32174 op0
= expand_normal (arg0
);
32175 op1
= expand_normal (arg1
);
32176 op2
= expand_normal (arg2
);
32177 op3
= expand_normal (arg3
);
32178 op4
= expand_normal (arg4
);
32179 /* Note the arg order is different from the operand order. */
32180 mode0
= insn_data
[icode
].operand
[1].mode
;
32181 mode2
= insn_data
[icode
].operand
[3].mode
;
32182 mode3
= insn_data
[icode
].operand
[4].mode
;
32183 mode4
= insn_data
[icode
].operand
[5].mode
;
32185 if (target
== NULL_RTX
32186 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
32187 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
32189 subtarget
= target
;
32191 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
32192 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
32194 rtx half
= gen_reg_rtx (V4SImode
);
32195 if (!nonimmediate_operand (op2
, V8SImode
))
32196 op2
= copy_to_mode_reg (V8SImode
, op2
);
32197 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
32200 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
32201 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
32203 rtx (*gen
) (rtx
, rtx
);
32204 rtx half
= gen_reg_rtx (mode0
);
32205 if (mode0
== V4SFmode
)
32206 gen
= gen_vec_extract_lo_v8sf
;
32208 gen
= gen_vec_extract_lo_v8si
;
32209 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
32210 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
32211 emit_insn (gen (half
, op0
));
32213 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
32214 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
32215 emit_insn (gen (half
, op3
));
32219 /* Force memory operand only with base register here. But we
32220 don't want to do it on memory operand for other builtin
32222 op1
= force_reg (Pmode
, convert_to_mode (Pmode
, op1
, 1));
32224 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
32225 op0
= copy_to_mode_reg (mode0
, op0
);
32226 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
32227 op1
= copy_to_mode_reg (Pmode
, op1
);
32228 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
32229 op2
= copy_to_mode_reg (mode2
, op2
);
32230 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
32231 op3
= copy_to_mode_reg (mode3
, op3
);
32232 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
32234 error ("last argument must be scale 1, 2, 4, 8");
32238 /* Optimize. If mask is known to have all high bits set,
32239 replace op0 with pc_rtx to signal that the instruction
32240 overwrites the whole destination and doesn't use its
32241 previous contents. */
32244 if (TREE_CODE (arg3
) == VECTOR_CST
)
32246 unsigned int negative
= 0;
32247 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
32249 tree cst
= VECTOR_CST_ELT (arg3
, i
);
32250 if (TREE_CODE (cst
) == INTEGER_CST
32251 && tree_int_cst_sign_bit (cst
))
32253 else if (TREE_CODE (cst
) == REAL_CST
32254 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
32257 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
32260 else if (TREE_CODE (arg3
) == SSA_NAME
)
32262 /* Recognize also when mask is like:
32263 __v2df src = _mm_setzero_pd ();
32264 __v2df mask = _mm_cmpeq_pd (src, src);
32266 __v8sf src = _mm256_setzero_ps ();
32267 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32268 as that is a cheaper way to load all ones into
32269 a register than having to load a constant from
32271 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
32272 if (is_gimple_call (def_stmt
))
32274 tree fndecl
= gimple_call_fndecl (def_stmt
);
32276 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
32277 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
32279 case IX86_BUILTIN_CMPPD
:
32280 case IX86_BUILTIN_CMPPS
:
32281 case IX86_BUILTIN_CMPPD256
:
32282 case IX86_BUILTIN_CMPPS256
:
32283 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
32286 case IX86_BUILTIN_CMPEQPD
:
32287 case IX86_BUILTIN_CMPEQPS
:
32288 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
32289 && initializer_zerop (gimple_call_arg (def_stmt
,
32300 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
32305 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
32306 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
32308 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
32309 ? V4SFmode
: V4SImode
;
32310 if (target
== NULL_RTX
)
32311 target
= gen_reg_rtx (tmode
);
32312 if (tmode
== V4SFmode
)
32313 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
32315 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
32318 target
= subtarget
;
32322 case IX86_BUILTIN_XABORT
:
32323 icode
= CODE_FOR_xabort
;
32324 arg0
= CALL_EXPR_ARG (exp
, 0);
32325 op0
= expand_normal (arg0
);
32326 mode0
= insn_data
[icode
].operand
[0].mode
;
32327 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
32329 error ("the xabort's argument must be an 8-bit immediate");
32332 emit_insn (gen_xabort (op0
));
32339 for (i
= 0, d
= bdesc_special_args
;
32340 i
< ARRAY_SIZE (bdesc_special_args
);
32342 if (d
->code
== fcode
)
32343 return ix86_expand_special_args_builtin (d
, exp
, target
);
32345 for (i
= 0, d
= bdesc_args
;
32346 i
< ARRAY_SIZE (bdesc_args
);
32348 if (d
->code
== fcode
)
32351 case IX86_BUILTIN_FABSQ
:
32352 case IX86_BUILTIN_COPYSIGNQ
:
32354 /* Emit a normal call if SSE isn't available. */
32355 return expand_call (exp
, target
, ignore
);
32357 return ix86_expand_args_builtin (d
, exp
, target
);
32360 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
32361 if (d
->code
== fcode
)
32362 return ix86_expand_sse_comi (d
, exp
, target
);
32364 for (i
= 0, d
= bdesc_pcmpestr
;
32365 i
< ARRAY_SIZE (bdesc_pcmpestr
);
32367 if (d
->code
== fcode
)
32368 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
32370 for (i
= 0, d
= bdesc_pcmpistr
;
32371 i
< ARRAY_SIZE (bdesc_pcmpistr
);
32373 if (d
->code
== fcode
)
32374 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
32376 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
32377 if (d
->code
== fcode
)
32378 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
32379 (enum ix86_builtin_func_type
)
32380 d
->flag
, d
->comparison
);
32382 gcc_unreachable ();
32385 /* Returns a function decl for a vectorized version of the builtin function
32386 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32387 if it is not available. */
32390 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
32393 enum machine_mode in_mode
, out_mode
;
32395 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
32397 if (TREE_CODE (type_out
) != VECTOR_TYPE
32398 || TREE_CODE (type_in
) != VECTOR_TYPE
32399 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
32402 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32403 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
32404 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32405 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32409 case BUILT_IN_SQRT
:
32410 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32412 if (out_n
== 2 && in_n
== 2)
32413 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
32414 else if (out_n
== 4 && in_n
== 4)
32415 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
32419 case BUILT_IN_SQRTF
:
32420 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32422 if (out_n
== 4 && in_n
== 4)
32423 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
32424 else if (out_n
== 8 && in_n
== 8)
32425 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
32429 case BUILT_IN_IFLOOR
:
32430 case BUILT_IN_LFLOOR
:
32431 case BUILT_IN_LLFLOOR
:
32432 /* The round insn does not trap on denormals. */
32433 if (flag_trapping_math
|| !TARGET_ROUND
)
32436 if (out_mode
== SImode
&& in_mode
== DFmode
)
32438 if (out_n
== 4 && in_n
== 2)
32439 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
32440 else if (out_n
== 8 && in_n
== 4)
32441 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
32445 case BUILT_IN_IFLOORF
:
32446 case BUILT_IN_LFLOORF
:
32447 case BUILT_IN_LLFLOORF
:
32448 /* The round insn does not trap on denormals. */
32449 if (flag_trapping_math
|| !TARGET_ROUND
)
32452 if (out_mode
== SImode
&& in_mode
== SFmode
)
32454 if (out_n
== 4 && in_n
== 4)
32455 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
32456 else if (out_n
== 8 && in_n
== 8)
32457 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
32461 case BUILT_IN_ICEIL
:
32462 case BUILT_IN_LCEIL
:
32463 case BUILT_IN_LLCEIL
:
32464 /* The round insn does not trap on denormals. */
32465 if (flag_trapping_math
|| !TARGET_ROUND
)
32468 if (out_mode
== SImode
&& in_mode
== DFmode
)
32470 if (out_n
== 4 && in_n
== 2)
32471 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
32472 else if (out_n
== 8 && in_n
== 4)
32473 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
32477 case BUILT_IN_ICEILF
:
32478 case BUILT_IN_LCEILF
:
32479 case BUILT_IN_LLCEILF
:
32480 /* The round insn does not trap on denormals. */
32481 if (flag_trapping_math
|| !TARGET_ROUND
)
32484 if (out_mode
== SImode
&& in_mode
== SFmode
)
32486 if (out_n
== 4 && in_n
== 4)
32487 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
32488 else if (out_n
== 8 && in_n
== 8)
32489 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
32493 case BUILT_IN_IRINT
:
32494 case BUILT_IN_LRINT
:
32495 case BUILT_IN_LLRINT
:
32496 if (out_mode
== SImode
&& in_mode
== DFmode
)
32498 if (out_n
== 4 && in_n
== 2)
32499 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
32500 else if (out_n
== 8 && in_n
== 4)
32501 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
32505 case BUILT_IN_IRINTF
:
32506 case BUILT_IN_LRINTF
:
32507 case BUILT_IN_LLRINTF
:
32508 if (out_mode
== SImode
&& in_mode
== SFmode
)
32510 if (out_n
== 4 && in_n
== 4)
32511 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
32512 else if (out_n
== 8 && in_n
== 8)
32513 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
32517 case BUILT_IN_IROUND
:
32518 case BUILT_IN_LROUND
:
32519 case BUILT_IN_LLROUND
:
32520 /* The round insn does not trap on denormals. */
32521 if (flag_trapping_math
|| !TARGET_ROUND
)
32524 if (out_mode
== SImode
&& in_mode
== DFmode
)
32526 if (out_n
== 4 && in_n
== 2)
32527 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
32528 else if (out_n
== 8 && in_n
== 4)
32529 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
32533 case BUILT_IN_IROUNDF
:
32534 case BUILT_IN_LROUNDF
:
32535 case BUILT_IN_LLROUNDF
:
32536 /* The round insn does not trap on denormals. */
32537 if (flag_trapping_math
|| !TARGET_ROUND
)
32540 if (out_mode
== SImode
&& in_mode
== SFmode
)
32542 if (out_n
== 4 && in_n
== 4)
32543 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
32544 else if (out_n
== 8 && in_n
== 8)
32545 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
32549 case BUILT_IN_COPYSIGN
:
32550 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32552 if (out_n
== 2 && in_n
== 2)
32553 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
32554 else if (out_n
== 4 && in_n
== 4)
32555 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
32559 case BUILT_IN_COPYSIGNF
:
32560 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32562 if (out_n
== 4 && in_n
== 4)
32563 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
32564 else if (out_n
== 8 && in_n
== 8)
32565 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
32569 case BUILT_IN_FLOOR
:
32570 /* The round insn does not trap on denormals. */
32571 if (flag_trapping_math
|| !TARGET_ROUND
)
32574 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32576 if (out_n
== 2 && in_n
== 2)
32577 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
32578 else if (out_n
== 4 && in_n
== 4)
32579 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
32583 case BUILT_IN_FLOORF
:
32584 /* The round insn does not trap on denormals. */
32585 if (flag_trapping_math
|| !TARGET_ROUND
)
32588 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32590 if (out_n
== 4 && in_n
== 4)
32591 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
32592 else if (out_n
== 8 && in_n
== 8)
32593 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
32597 case BUILT_IN_CEIL
:
32598 /* The round insn does not trap on denormals. */
32599 if (flag_trapping_math
|| !TARGET_ROUND
)
32602 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32604 if (out_n
== 2 && in_n
== 2)
32605 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
32606 else if (out_n
== 4 && in_n
== 4)
32607 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
32611 case BUILT_IN_CEILF
:
32612 /* The round insn does not trap on denormals. */
32613 if (flag_trapping_math
|| !TARGET_ROUND
)
32616 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32618 if (out_n
== 4 && in_n
== 4)
32619 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
32620 else if (out_n
== 8 && in_n
== 8)
32621 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
32625 case BUILT_IN_TRUNC
:
32626 /* The round insn does not trap on denormals. */
32627 if (flag_trapping_math
|| !TARGET_ROUND
)
32630 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32632 if (out_n
== 2 && in_n
== 2)
32633 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
32634 else if (out_n
== 4 && in_n
== 4)
32635 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
32639 case BUILT_IN_TRUNCF
:
32640 /* The round insn does not trap on denormals. */
32641 if (flag_trapping_math
|| !TARGET_ROUND
)
32644 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32646 if (out_n
== 4 && in_n
== 4)
32647 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
32648 else if (out_n
== 8 && in_n
== 8)
32649 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
32653 case BUILT_IN_RINT
:
32654 /* The round insn does not trap on denormals. */
32655 if (flag_trapping_math
|| !TARGET_ROUND
)
32658 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32660 if (out_n
== 2 && in_n
== 2)
32661 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
32662 else if (out_n
== 4 && in_n
== 4)
32663 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
32667 case BUILT_IN_RINTF
:
32668 /* The round insn does not trap on denormals. */
32669 if (flag_trapping_math
|| !TARGET_ROUND
)
32672 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32674 if (out_n
== 4 && in_n
== 4)
32675 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
32676 else if (out_n
== 8 && in_n
== 8)
32677 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
32681 case BUILT_IN_ROUND
:
32682 /* The round insn does not trap on denormals. */
32683 if (flag_trapping_math
|| !TARGET_ROUND
)
32686 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32688 if (out_n
== 2 && in_n
== 2)
32689 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
32690 else if (out_n
== 4 && in_n
== 4)
32691 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
32695 case BUILT_IN_ROUNDF
:
32696 /* The round insn does not trap on denormals. */
32697 if (flag_trapping_math
|| !TARGET_ROUND
)
32700 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32702 if (out_n
== 4 && in_n
== 4)
32703 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
32704 else if (out_n
== 8 && in_n
== 8)
32705 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
32710 if (out_mode
== DFmode
&& in_mode
== DFmode
)
32712 if (out_n
== 2 && in_n
== 2)
32713 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
32714 if (out_n
== 4 && in_n
== 4)
32715 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
32719 case BUILT_IN_FMAF
:
32720 if (out_mode
== SFmode
&& in_mode
== SFmode
)
32722 if (out_n
== 4 && in_n
== 4)
32723 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
32724 if (out_n
== 8 && in_n
== 8)
32725 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
32733 /* Dispatch to a handler for a vectorization library. */
32734 if (ix86_veclib_handler
)
32735 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
32741 /* Handler for an SVML-style interface to
32742 a library with vectorized intrinsics. */
32745 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
32748 tree fntype
, new_fndecl
, args
;
32751 enum machine_mode el_mode
, in_mode
;
32754 /* The SVML is suitable for unsafe math only. */
32755 if (!flag_unsafe_math_optimizations
)
32758 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32759 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32760 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32761 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32762 if (el_mode
!= in_mode
32770 case BUILT_IN_LOG10
:
32772 case BUILT_IN_TANH
:
32774 case BUILT_IN_ATAN
:
32775 case BUILT_IN_ATAN2
:
32776 case BUILT_IN_ATANH
:
32777 case BUILT_IN_CBRT
:
32778 case BUILT_IN_SINH
:
32780 case BUILT_IN_ASINH
:
32781 case BUILT_IN_ASIN
:
32782 case BUILT_IN_COSH
:
32784 case BUILT_IN_ACOSH
:
32785 case BUILT_IN_ACOS
:
32786 if (el_mode
!= DFmode
|| n
!= 2)
32790 case BUILT_IN_EXPF
:
32791 case BUILT_IN_LOGF
:
32792 case BUILT_IN_LOG10F
:
32793 case BUILT_IN_POWF
:
32794 case BUILT_IN_TANHF
:
32795 case BUILT_IN_TANF
:
32796 case BUILT_IN_ATANF
:
32797 case BUILT_IN_ATAN2F
:
32798 case BUILT_IN_ATANHF
:
32799 case BUILT_IN_CBRTF
:
32800 case BUILT_IN_SINHF
:
32801 case BUILT_IN_SINF
:
32802 case BUILT_IN_ASINHF
:
32803 case BUILT_IN_ASINF
:
32804 case BUILT_IN_COSHF
:
32805 case BUILT_IN_COSF
:
32806 case BUILT_IN_ACOSHF
:
32807 case BUILT_IN_ACOSF
:
32808 if (el_mode
!= SFmode
|| n
!= 4)
32816 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32818 if (fn
== BUILT_IN_LOGF
)
32819 strcpy (name
, "vmlsLn4");
32820 else if (fn
== BUILT_IN_LOG
)
32821 strcpy (name
, "vmldLn2");
32824 sprintf (name
, "vmls%s", bname
+10);
32825 name
[strlen (name
)-1] = '4';
32828 sprintf (name
, "vmld%s2", bname
+10);
32830 /* Convert to uppercase. */
32834 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32836 args
= TREE_CHAIN (args
))
32840 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32842 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32844 /* Build a function declaration for the vectorized function. */
32845 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32846 FUNCTION_DECL
, get_identifier (name
), fntype
);
32847 TREE_PUBLIC (new_fndecl
) = 1;
32848 DECL_EXTERNAL (new_fndecl
) = 1;
32849 DECL_IS_NOVOPS (new_fndecl
) = 1;
32850 TREE_READONLY (new_fndecl
) = 1;
32855 /* Handler for an ACML-style interface to
32856 a library with vectorized intrinsics. */
32859 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
32861 char name
[20] = "__vr.._";
32862 tree fntype
, new_fndecl
, args
;
32865 enum machine_mode el_mode
, in_mode
;
32868 /* The ACML is 64bits only and suitable for unsafe math only as
32869 it does not correctly support parts of IEEE with the required
32870 precision such as denormals. */
32872 || !flag_unsafe_math_optimizations
)
32875 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
32876 n
= TYPE_VECTOR_SUBPARTS (type_out
);
32877 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
32878 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
32879 if (el_mode
!= in_mode
32889 case BUILT_IN_LOG2
:
32890 case BUILT_IN_LOG10
:
32893 if (el_mode
!= DFmode
32898 case BUILT_IN_SINF
:
32899 case BUILT_IN_COSF
:
32900 case BUILT_IN_EXPF
:
32901 case BUILT_IN_POWF
:
32902 case BUILT_IN_LOGF
:
32903 case BUILT_IN_LOG2F
:
32904 case BUILT_IN_LOG10F
:
32907 if (el_mode
!= SFmode
32916 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
32917 sprintf (name
+ 7, "%s", bname
+10);
32920 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
32922 args
= TREE_CHAIN (args
))
32926 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
32928 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
32930 /* Build a function declaration for the vectorized function. */
32931 new_fndecl
= build_decl (BUILTINS_LOCATION
,
32932 FUNCTION_DECL
, get_identifier (name
), fntype
);
32933 TREE_PUBLIC (new_fndecl
) = 1;
32934 DECL_EXTERNAL (new_fndecl
) = 1;
32935 DECL_IS_NOVOPS (new_fndecl
) = 1;
32936 TREE_READONLY (new_fndecl
) = 1;
32941 /* Returns a decl of a function that implements gather load with
32942 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
32943 Return NULL_TREE if it is not available. */
32946 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
32947 const_tree index_type
, int scale
)
32950 enum ix86_builtins code
;
32955 if ((TREE_CODE (index_type
) != INTEGER_TYPE
32956 && !POINTER_TYPE_P (index_type
))
32957 || (TYPE_MODE (index_type
) != SImode
32958 && TYPE_MODE (index_type
) != DImode
))
32961 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
32964 /* v*gather* insn sign extends index to pointer mode. */
32965 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
32966 && TYPE_UNSIGNED (index_type
))
32971 || (scale
& (scale
- 1)) != 0)
32974 si
= TYPE_MODE (index_type
) == SImode
;
32975 switch (TYPE_MODE (mem_vectype
))
32978 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
32981 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
32984 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
32987 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
32990 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
32993 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
32996 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
32999 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
33005 return ix86_builtins
[code
];
33008 /* Returns a code for a target-specific builtin that implements
33009 reciprocal of the function, or NULL_TREE if not available. */
33012 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
33013 bool sqrt ATTRIBUTE_UNUSED
)
33015 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
33016 && flag_finite_math_only
&& !flag_trapping_math
33017 && flag_unsafe_math_optimizations
))
33021 /* Machine dependent builtins. */
33024 /* Vectorized version of sqrt to rsqrt conversion. */
33025 case IX86_BUILTIN_SQRTPS_NR
:
33026 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
33028 case IX86_BUILTIN_SQRTPS_NR256
:
33029 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
33035 /* Normal builtins. */
33038 /* Sqrt to rsqrt conversion. */
33039 case BUILT_IN_SQRTF
:
33040 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
33047 /* Helper for avx_vpermilps256_operand et al. This is also used by
33048 the expansion functions to turn the parallel back into a mask.
33049 The return value is 0 for no match and the imm8+1 for a match. */
33052 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
33054 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
33056 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33058 if (XVECLEN (par
, 0) != (int) nelt
)
33061 /* Validate that all of the elements are constants, and not totally
33062 out of range. Copy the data into an integral array to make the
33063 subsequent checks easier. */
33064 for (i
= 0; i
< nelt
; ++i
)
33066 rtx er
= XVECEXP (par
, 0, i
);
33067 unsigned HOST_WIDE_INT ei
;
33069 if (!CONST_INT_P (er
))
33080 /* In the 256-bit DFmode case, we can only move elements within
33082 for (i
= 0; i
< 2; ++i
)
33086 mask
|= ipar
[i
] << i
;
33088 for (i
= 2; i
< 4; ++i
)
33092 mask
|= (ipar
[i
] - 2) << i
;
33097 /* In the 256-bit SFmode case, we have full freedom of movement
33098 within the low 128-bit lane, but the high 128-bit lane must
33099 mirror the exact same pattern. */
33100 for (i
= 0; i
< 4; ++i
)
33101 if (ipar
[i
] + 4 != ipar
[i
+ 4])
33108 /* In the 128-bit case, we've full freedom in the placement of
33109 the elements from the source operand. */
33110 for (i
= 0; i
< nelt
; ++i
)
33111 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
33115 gcc_unreachable ();
33118 /* Make sure success has a non-zero value by adding one. */
33122 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33123 the expansion functions to turn the parallel back into a mask.
33124 The return value is 0 for no match and the imm8+1 for a match. */
33127 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
33129 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
33131 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
33133 if (XVECLEN (par
, 0) != (int) nelt
)
33136 /* Validate that all of the elements are constants, and not totally
33137 out of range. Copy the data into an integral array to make the
33138 subsequent checks easier. */
33139 for (i
= 0; i
< nelt
; ++i
)
33141 rtx er
= XVECEXP (par
, 0, i
);
33142 unsigned HOST_WIDE_INT ei
;
33144 if (!CONST_INT_P (er
))
33147 if (ei
>= 2 * nelt
)
33152 /* Validate that the halves of the permute are halves. */
33153 for (i
= 0; i
< nelt2
- 1; ++i
)
33154 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33156 for (i
= nelt2
; i
< nelt
- 1; ++i
)
33157 if (ipar
[i
] + 1 != ipar
[i
+ 1])
33160 /* Reconstruct the mask. */
33161 for (i
= 0; i
< 2; ++i
)
33163 unsigned e
= ipar
[i
* nelt2
];
33167 mask
|= e
<< (i
* 4);
33170 /* Make sure success has a non-zero value by adding one. */
33174 /* Store OPERAND to the memory after reload is completed. This means
33175 that we can't easily use assign_stack_local. */
33177 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
33181 gcc_assert (reload_completed
);
33182 if (ix86_using_red_zone ())
33184 result
= gen_rtx_MEM (mode
,
33185 gen_rtx_PLUS (Pmode
,
33187 GEN_INT (-RED_ZONE_SIZE
)));
33188 emit_move_insn (result
, operand
);
33190 else if (TARGET_64BIT
)
33196 operand
= gen_lowpart (DImode
, operand
);
33200 gen_rtx_SET (VOIDmode
,
33201 gen_rtx_MEM (DImode
,
33202 gen_rtx_PRE_DEC (DImode
,
33203 stack_pointer_rtx
)),
33207 gcc_unreachable ();
33209 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33218 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
33220 gen_rtx_SET (VOIDmode
,
33221 gen_rtx_MEM (SImode
,
33222 gen_rtx_PRE_DEC (Pmode
,
33223 stack_pointer_rtx
)),
33226 gen_rtx_SET (VOIDmode
,
33227 gen_rtx_MEM (SImode
,
33228 gen_rtx_PRE_DEC (Pmode
,
33229 stack_pointer_rtx
)),
33234 /* Store HImodes as SImodes. */
33235 operand
= gen_lowpart (SImode
, operand
);
33239 gen_rtx_SET (VOIDmode
,
33240 gen_rtx_MEM (GET_MODE (operand
),
33241 gen_rtx_PRE_DEC (SImode
,
33242 stack_pointer_rtx
)),
33246 gcc_unreachable ();
33248 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
33253 /* Free operand from the memory. */
33255 ix86_free_from_memory (enum machine_mode mode
)
33257 if (!ix86_using_red_zone ())
33261 if (mode
== DImode
|| TARGET_64BIT
)
33265 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33266 to pop or add instruction if registers are available. */
33267 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
33268 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
33273 /* Return a register priority for hard reg REGNO. */
33275 ix86_register_priority (int hard_regno
)
33277 /* ebp and r13 as the base always wants a displacement, r12 as the
33278 base always wants an index. So discourage their usage in an
33280 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
33282 if (hard_regno
== BP_REG
)
33284 /* New x86-64 int registers result in bigger code size. Discourage
33286 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
33288 /* New x86-64 SSE registers result in bigger code size. Discourage
33290 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
33292 /* Usage of AX register results in smaller code. Prefer it. */
33293 if (hard_regno
== 0)
33298 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33300 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33301 QImode must go into class Q_REGS.
33302 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33303 movdf to do mem-to-mem moves through integer regs. */
33306 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
33308 enum machine_mode mode
= GET_MODE (x
);
33310 /* We're only allowed to return a subclass of CLASS. Many of the
33311 following checks fail for NO_REGS, so eliminate that early. */
33312 if (regclass
== NO_REGS
)
33315 /* All classes can load zeros. */
33316 if (x
== CONST0_RTX (mode
))
33319 /* Force constants into memory if we are loading a (nonzero) constant into
33320 an MMX or SSE register. This is because there are no MMX/SSE instructions
33321 to load from a constant. */
33323 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
33326 /* Prefer SSE regs only, if we can use them for math. */
33327 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
33328 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33330 /* Floating-point constants need more complex checks. */
33331 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
33333 /* General regs can load everything. */
33334 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
33337 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33338 zero above. We only want to wind up preferring 80387 registers if
33339 we plan on doing computation with them. */
33341 && standard_80387_constant_p (x
) > 0)
33343 /* Limit class to non-sse. */
33344 if (regclass
== FLOAT_SSE_REGS
)
33346 if (regclass
== FP_TOP_SSE_REGS
)
33348 if (regclass
== FP_SECOND_SSE_REGS
)
33349 return FP_SECOND_REG
;
33350 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
33357 /* Generally when we see PLUS here, it's the function invariant
33358 (plus soft-fp const_int). Which can only be computed into general
33360 if (GET_CODE (x
) == PLUS
)
33361 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
33363 /* QImode constants are easy to load, but non-constant QImode data
33364 must go into Q_REGS. */
33365 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
33367 if (reg_class_subset_p (regclass
, Q_REGS
))
33369 if (reg_class_subset_p (Q_REGS
, regclass
))
33377 /* Discourage putting floating-point values in SSE registers unless
33378 SSE math is being used, and likewise for the 387 registers. */
33380 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
33382 enum machine_mode mode
= GET_MODE (x
);
33384 /* Restrict the output reload class to the register bank that we are doing
33385 math on. If we would like not to return a subset of CLASS, reject this
33386 alternative: if reload cannot do this, it will still use its choice. */
33387 mode
= GET_MODE (x
);
33388 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
33389 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
33391 if (X87_FLOAT_MODE_P (mode
))
33393 if (regclass
== FP_TOP_SSE_REGS
)
33395 else if (regclass
== FP_SECOND_SSE_REGS
)
33396 return FP_SECOND_REG
;
33398 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
33405 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
33406 enum machine_mode mode
, secondary_reload_info
*sri
)
33408 /* Double-word spills from general registers to non-offsettable memory
33409 references (zero-extended addresses) require special handling. */
33412 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
33413 && rclass
== GENERAL_REGS
33414 && !offsettable_memref_p (x
))
33417 ? CODE_FOR_reload_noff_load
33418 : CODE_FOR_reload_noff_store
);
33419 /* Add the cost of moving address to a temporary. */
33420 sri
->extra_cost
= 1;
33425 /* QImode spills from non-QI registers require
33426 intermediate register on 32bit targets. */
33428 && !in_p
&& mode
== QImode
33429 && (rclass
== GENERAL_REGS
33430 || rclass
== LEGACY_REGS
33431 || rclass
== NON_Q_REGS
33434 || rclass
== INDEX_REGS
))
33443 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
33444 regno
= true_regnum (x
);
33446 /* Return Q_REGS if the operand is in memory. */
33451 /* This condition handles corner case where an expression involving
33452 pointers gets vectorized. We're trying to use the address of a
33453 stack slot as a vector initializer.
33455 (set (reg:V2DI 74 [ vect_cst_.2 ])
33456 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33458 Eventually frame gets turned into sp+offset like this:
33460 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33461 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33462 (const_int 392 [0x188]))))
33464 That later gets turned into:
33466 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33467 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33468 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33470 We'll have the following reload recorded:
33472 Reload 0: reload_in (DI) =
33473 (plus:DI (reg/f:DI 7 sp)
33474 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33475 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33476 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33477 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33478 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33479 reload_reg_rtx: (reg:V2DI 22 xmm1)
33481 Which isn't going to work since SSE instructions can't handle scalar
33482 additions. Returning GENERAL_REGS forces the addition into integer
33483 register and reload can handle subsequent reloads without problems. */
33485 if (in_p
&& GET_CODE (x
) == PLUS
33486 && SSE_CLASS_P (rclass
)
33487 && SCALAR_INT_MODE_P (mode
))
33488 return GENERAL_REGS
;
33493 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33496 ix86_class_likely_spilled_p (reg_class_t rclass
)
33507 case SSE_FIRST_REG
:
33509 case FP_SECOND_REG
:
33519 /* If we are copying between general and FP registers, we need a memory
33520 location. The same is true for SSE and MMX registers.
33522 To optimize register_move_cost performance, allow inline variant.
33524 The macro can't work reliably when one of the CLASSES is class containing
33525 registers from multiple units (SSE, MMX, integer). We avoid this by never
33526 combining those units in single alternative in the machine description.
33527 Ensure that this constraint holds to avoid unexpected surprises.
33529 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33530 enforce these sanity checks. */
33533 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33534 enum machine_mode mode
, int strict
)
33536 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
33537 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
33538 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
33539 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
33540 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
33541 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
33543 gcc_assert (!strict
|| lra_in_progress
);
33547 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
33550 /* ??? This is a lie. We do have moves between mmx/general, and for
33551 mmx/sse2. But by saying we need secondary memory we discourage the
33552 register allocator from using the mmx registers unless needed. */
33553 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
33556 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33558 /* SSE1 doesn't have any direct moves from other classes. */
33562 /* If the target says that inter-unit moves are more expensive
33563 than moving through memory, then don't generate them. */
33564 if (!TARGET_INTER_UNIT_MOVES
)
33567 /* Between SSE and general, we have moves no larger than word size. */
33568 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33576 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
33577 enum machine_mode mode
, int strict
)
33579 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
33582 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33584 On the 80386, this is the size of MODE in words,
33585 except in the FP regs, where a single reg is always enough. */
33587 static unsigned char
33588 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
33590 if (MAYBE_INTEGER_CLASS_P (rclass
))
33592 if (mode
== XFmode
)
33593 return (TARGET_64BIT
? 2 : 3);
33594 else if (mode
== XCmode
)
33595 return (TARGET_64BIT
? 4 : 6);
33597 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
33601 if (COMPLEX_MODE_P (mode
))
33608 /* Return true if the registers in CLASS cannot represent the change from
33609 modes FROM to TO. */
33612 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
33613 enum reg_class regclass
)
33618 /* x87 registers can't do subreg at all, as all values are reformatted
33619 to extended precision. */
33620 if (MAYBE_FLOAT_CLASS_P (regclass
))
33623 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
33625 /* Vector registers do not support QI or HImode loads. If we don't
33626 disallow a change to these modes, reload will assume it's ok to
33627 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33628 the vec_dupv4hi pattern. */
33629 if (GET_MODE_SIZE (from
) < 4)
33632 /* Vector registers do not support subreg with nonzero offsets, which
33633 are otherwise valid for integer registers. Since we can't see
33634 whether we have a nonzero offset from here, prohibit all
33635 nonparadoxical subregs changing size. */
33636 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
33643 /* Return the cost of moving data of mode M between a
33644 register and memory. A value of 2 is the default; this cost is
33645 relative to those in `REGISTER_MOVE_COST'.
33647 This function is used extensively by register_move_cost that is used to
33648 build tables at startup. Make it inline in this case.
33649 When IN is 2, return maximum of in and out move cost.
33651 If moving between registers and memory is more expensive than
33652 between two registers, you should define this macro to express the
33655 Model also increased moving costs of QImode registers in non
33659 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
33663 if (FLOAT_CLASS_P (regclass
))
33681 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
33682 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
33684 if (SSE_CLASS_P (regclass
))
33687 switch (GET_MODE_SIZE (mode
))
33702 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
33703 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
33705 if (MMX_CLASS_P (regclass
))
33708 switch (GET_MODE_SIZE (mode
))
33720 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
33721 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
33723 switch (GET_MODE_SIZE (mode
))
33726 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
33729 return ix86_cost
->int_store
[0];
33730 if (TARGET_PARTIAL_REG_DEPENDENCY
33731 && optimize_function_for_speed_p (cfun
))
33732 cost
= ix86_cost
->movzbl_load
;
33734 cost
= ix86_cost
->int_load
[0];
33736 return MAX (cost
, ix86_cost
->int_store
[0]);
33742 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
33744 return ix86_cost
->movzbl_load
;
33746 return ix86_cost
->int_store
[0] + 4;
33751 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
33752 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
33754 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33755 if (mode
== TFmode
)
33758 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
33760 cost
= ix86_cost
->int_load
[2];
33762 cost
= ix86_cost
->int_store
[2];
33763 return (cost
* (((int) GET_MODE_SIZE (mode
)
33764 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
33769 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
33772 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
33776 /* Return the cost of moving data from a register in class CLASS1 to
33777 one in class CLASS2.
33779 It is not required that the cost always equal 2 when FROM is the same as TO;
33780 on some machines it is expensive to move between registers if they are not
33781 general registers. */
33784 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
33785 reg_class_t class2_i
)
33787 enum reg_class class1
= (enum reg_class
) class1_i
;
33788 enum reg_class class2
= (enum reg_class
) class2_i
;
33790 /* In case we require secondary memory, compute cost of the store followed
33791 by load. In order to avoid bad register allocation choices, we need
33792 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33794 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
33798 cost
+= inline_memory_move_cost (mode
, class1
, 2);
33799 cost
+= inline_memory_move_cost (mode
, class2
, 2);
33801 /* In case of copying from general_purpose_register we may emit multiple
33802 stores followed by single load causing memory size mismatch stall.
33803 Count this as arbitrarily high cost of 20. */
33804 if (targetm
.class_max_nregs (class1
, mode
)
33805 > targetm
.class_max_nregs (class2
, mode
))
33808 /* In the case of FP/MMX moves, the registers actually overlap, and we
33809 have to switch modes in order to treat them differently. */
33810 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
33811 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
33817 /* Moves between SSE/MMX and integer unit are expensive. */
33818 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
33819 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
33821 /* ??? By keeping returned value relatively high, we limit the number
33822 of moves between integer and MMX/SSE registers for all targets.
33823 Additionally, high value prevents problem with x86_modes_tieable_p(),
33824 where integer modes in MMX/SSE registers are not tieable
33825 because of missing QImode and HImode moves to, from or between
33826 MMX/SSE registers. */
33827 return MAX (8, ix86_cost
->mmxsse_to_integer
);
33829 if (MAYBE_FLOAT_CLASS_P (class1
))
33830 return ix86_cost
->fp_move
;
33831 if (MAYBE_SSE_CLASS_P (class1
))
33832 return ix86_cost
->sse_move
;
33833 if (MAYBE_MMX_CLASS_P (class1
))
33834 return ix86_cost
->mmx_move
;
33838 /* Return TRUE if hard register REGNO can hold a value of machine-mode
33842 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
33844 /* Flags and only flags can only hold CCmode values. */
33845 if (CC_REGNO_P (regno
))
33846 return GET_MODE_CLASS (mode
) == MODE_CC
;
33847 if (GET_MODE_CLASS (mode
) == MODE_CC
33848 || GET_MODE_CLASS (mode
) == MODE_RANDOM
33849 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
33851 if (STACK_REGNO_P (regno
))
33852 return VALID_FP_MODE_P (mode
);
33853 if (SSE_REGNO_P (regno
))
33855 /* We implement the move patterns for all vector modes into and
33856 out of SSE registers, even when no operation instructions
33857 are available. OImode move is available only when AVX is
33859 return ((TARGET_AVX
&& mode
== OImode
)
33860 || VALID_AVX256_REG_MODE (mode
)
33861 || VALID_SSE_REG_MODE (mode
)
33862 || VALID_SSE2_REG_MODE (mode
)
33863 || VALID_MMX_REG_MODE (mode
)
33864 || VALID_MMX_REG_MODE_3DNOW (mode
));
33866 if (MMX_REGNO_P (regno
))
33868 /* We implement the move patterns for 3DNOW modes even in MMX mode,
33869 so if the register is available at all, then we can move data of
33870 the given mode into or out of it. */
33871 return (VALID_MMX_REG_MODE (mode
)
33872 || VALID_MMX_REG_MODE_3DNOW (mode
));
33875 if (mode
== QImode
)
33877 /* Take care for QImode values - they can be in non-QI regs,
33878 but then they do cause partial register stalls. */
33879 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
33881 if (!TARGET_PARTIAL_REG_STALL
)
33883 return !can_create_pseudo_p ();
33885 /* We handle both integer and floats in the general purpose registers. */
33886 else if (VALID_INT_MODE_P (mode
))
33888 else if (VALID_FP_MODE_P (mode
))
33890 else if (VALID_DFP_MODE_P (mode
))
33892 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
33893 on to use that value in smaller contexts, this can easily force a
33894 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
33895 supporting DImode, allow it. */
33896 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
33902 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
33903 tieable integer mode. */
33906 ix86_tieable_integer_mode_p (enum machine_mode mode
)
33915 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
33918 return TARGET_64BIT
;
33925 /* Return true if MODE1 is accessible in a register that can hold MODE2
33926 without copying. That is, all register classes that can hold MODE2
33927 can also hold MODE1. */
33930 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
33932 if (mode1
== mode2
)
33935 if (ix86_tieable_integer_mode_p (mode1
)
33936 && ix86_tieable_integer_mode_p (mode2
))
33939 /* MODE2 being XFmode implies fp stack or general regs, which means we
33940 can tie any smaller floating point modes to it. Note that we do not
33941 tie this with TFmode. */
33942 if (mode2
== XFmode
)
33943 return mode1
== SFmode
|| mode1
== DFmode
;
33945 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
33946 that we can tie it with SFmode. */
33947 if (mode2
== DFmode
)
33948 return mode1
== SFmode
;
33950 /* If MODE2 is only appropriate for an SSE register, then tie with
33951 any other mode acceptable to SSE registers. */
33952 if (GET_MODE_SIZE (mode2
) == 32
33953 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33954 return (GET_MODE_SIZE (mode1
) == 32
33955 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33956 if (GET_MODE_SIZE (mode2
) == 16
33957 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
33958 return (GET_MODE_SIZE (mode1
) == 16
33959 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
33961 /* If MODE2 is appropriate for an MMX register, then tie
33962 with any other mode acceptable to MMX registers. */
33963 if (GET_MODE_SIZE (mode2
) == 8
33964 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
33965 return (GET_MODE_SIZE (mode1
) == 8
33966 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
33971 /* Return the cost of moving between two registers of mode MODE. */
33974 ix86_set_reg_reg_cost (enum machine_mode mode
)
33976 unsigned int units
= UNITS_PER_WORD
;
33978 switch (GET_MODE_CLASS (mode
))
33984 units
= GET_MODE_SIZE (CCmode
);
33988 if ((TARGET_SSE
&& mode
== TFmode
)
33989 || (TARGET_80387
&& mode
== XFmode
)
33990 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
33991 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
33992 units
= GET_MODE_SIZE (mode
);
33995 case MODE_COMPLEX_FLOAT
:
33996 if ((TARGET_SSE
&& mode
== TCmode
)
33997 || (TARGET_80387
&& mode
== XCmode
)
33998 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
33999 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
34000 units
= GET_MODE_SIZE (mode
);
34003 case MODE_VECTOR_INT
:
34004 case MODE_VECTOR_FLOAT
:
34005 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
34006 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
34007 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
34008 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
34009 units
= GET_MODE_SIZE (mode
);
34012 /* Return the cost of moving between two registers of mode MODE,
34013 assuming that the move will be in pieces of at most UNITS bytes. */
34014 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
34017 /* Compute a (partial) cost for rtx X. Return true if the complete
34018 cost has been computed, and false if subexpressions should be
34019 scanned. In either case, *TOTAL contains the cost result. */
34022 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
34025 enum rtx_code code
= (enum rtx_code
) code_i
;
34026 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
34027 enum machine_mode mode
= GET_MODE (x
);
34028 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
34033 if (register_operand (SET_DEST (x
), VOIDmode
)
34034 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
34036 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
34045 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
34047 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
34049 else if (flag_pic
&& SYMBOLIC_CONST (x
)
34051 || (!GET_CODE (x
) != LABEL_REF
34052 && (GET_CODE (x
) != SYMBOL_REF
34053 || !SYMBOL_REF_LOCAL_P (x
)))))
34060 if (mode
== VOIDmode
)
34065 switch (standard_80387_constant_p (x
))
34070 default: /* Other constants */
34077 if (SSE_FLOAT_MODE_P (mode
))
34080 switch (standard_sse_constant_p (x
))
34084 case 1: /* 0: xor eliminates false dependency */
34087 default: /* -1: cmp contains false dependency */
34092 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34093 it'll probably end up. Add a penalty for size. */
34094 *total
= (COSTS_N_INSNS (1)
34095 + (flag_pic
!= 0 && !TARGET_64BIT
)
34096 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
34100 /* The zero extensions is often completely free on x86_64, so make
34101 it as cheap as possible. */
34102 if (TARGET_64BIT
&& mode
== DImode
34103 && GET_MODE (XEXP (x
, 0)) == SImode
)
34105 else if (TARGET_ZERO_EXTEND_WITH_AND
)
34106 *total
= cost
->add
;
34108 *total
= cost
->movzx
;
34112 *total
= cost
->movsx
;
34116 if (SCALAR_INT_MODE_P (mode
)
34117 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
34118 && CONST_INT_P (XEXP (x
, 1)))
34120 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34123 *total
= cost
->add
;
34126 if ((value
== 2 || value
== 3)
34127 && cost
->lea
<= cost
->shift_const
)
34129 *total
= cost
->lea
;
34139 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34141 /* ??? Should be SSE vector operation cost. */
34142 /* At least for published AMD latencies, this really is the same
34143 as the latency for a simple fpu operation like fabs. */
34144 /* V*QImode is emulated with 1-11 insns. */
34145 if (mode
== V16QImode
|| mode
== V32QImode
)
34148 if (TARGET_XOP
&& mode
== V16QImode
)
34150 /* For XOP we use vpshab, which requires a broadcast of the
34151 value to the variable shift insn. For constants this
34152 means a V16Q const in mem; even when we can perform the
34153 shift with one insn set the cost to prefer paddb. */
34154 if (CONSTANT_P (XEXP (x
, 1)))
34156 *total
= (cost
->fabs
34157 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
34158 + (speed
? 2 : COSTS_N_BYTES (16)));
34163 else if (TARGET_SSSE3
)
34165 *total
= cost
->fabs
* count
;
34168 *total
= cost
->fabs
;
34170 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34172 if (CONST_INT_P (XEXP (x
, 1)))
34174 if (INTVAL (XEXP (x
, 1)) > 32)
34175 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
34177 *total
= cost
->shift_const
* 2;
34181 if (GET_CODE (XEXP (x
, 1)) == AND
)
34182 *total
= cost
->shift_var
* 2;
34184 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
34189 if (CONST_INT_P (XEXP (x
, 1)))
34190 *total
= cost
->shift_const
;
34192 *total
= cost
->shift_var
;
34200 gcc_assert (FLOAT_MODE_P (mode
));
34201 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
34203 /* ??? SSE scalar/vector cost should be used here. */
34204 /* ??? Bald assumption that fma has the same cost as fmul. */
34205 *total
= cost
->fmul
;
34206 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
34208 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34210 if (GET_CODE (sub
) == NEG
)
34211 sub
= XEXP (sub
, 0);
34212 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
34215 if (GET_CODE (sub
) == NEG
)
34216 sub
= XEXP (sub
, 0);
34217 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
34222 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34224 /* ??? SSE scalar cost should be used here. */
34225 *total
= cost
->fmul
;
34228 else if (X87_FLOAT_MODE_P (mode
))
34230 *total
= cost
->fmul
;
34233 else if (FLOAT_MODE_P (mode
))
34235 /* ??? SSE vector cost should be used here. */
34236 *total
= cost
->fmul
;
34239 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34241 /* V*QImode is emulated with 7-13 insns. */
34242 if (mode
== V16QImode
|| mode
== V32QImode
)
34245 if (TARGET_XOP
&& mode
== V16QImode
)
34247 else if (TARGET_SSSE3
)
34249 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
34251 /* V*DImode is emulated with 5-8 insns. */
34252 else if (mode
== V2DImode
|| mode
== V4DImode
)
34254 if (TARGET_XOP
&& mode
== V2DImode
)
34255 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
34257 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
34259 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34260 insns, including two PMULUDQ. */
34261 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
34262 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
34264 *total
= cost
->fmul
;
34269 rtx op0
= XEXP (x
, 0);
34270 rtx op1
= XEXP (x
, 1);
34272 if (CONST_INT_P (XEXP (x
, 1)))
34274 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
34275 for (nbits
= 0; value
!= 0; value
&= value
- 1)
34279 /* This is arbitrary. */
34282 /* Compute costs correctly for widening multiplication. */
34283 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
34284 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
34285 == GET_MODE_SIZE (mode
))
34287 int is_mulwiden
= 0;
34288 enum machine_mode inner_mode
= GET_MODE (op0
);
34290 if (GET_CODE (op0
) == GET_CODE (op1
))
34291 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
34292 else if (CONST_INT_P (op1
))
34294 if (GET_CODE (op0
) == SIGN_EXTEND
)
34295 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
34298 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
34302 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
34305 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
34306 + nbits
* cost
->mult_bit
34307 + rtx_cost (op0
, outer_code
, opno
, speed
)
34308 + rtx_cost (op1
, outer_code
, opno
, speed
));
34317 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34318 /* ??? SSE cost should be used here. */
34319 *total
= cost
->fdiv
;
34320 else if (X87_FLOAT_MODE_P (mode
))
34321 *total
= cost
->fdiv
;
34322 else if (FLOAT_MODE_P (mode
))
34323 /* ??? SSE vector cost should be used here. */
34324 *total
= cost
->fdiv
;
34326 *total
= cost
->divide
[MODE_INDEX (mode
)];
34330 if (GET_MODE_CLASS (mode
) == MODE_INT
34331 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
34333 if (GET_CODE (XEXP (x
, 0)) == PLUS
34334 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
34335 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
34336 && CONSTANT_P (XEXP (x
, 1)))
34338 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
34339 if (val
== 2 || val
== 4 || val
== 8)
34341 *total
= cost
->lea
;
34342 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34343 outer_code
, opno
, speed
);
34344 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
34345 outer_code
, opno
, speed
);
34346 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34350 else if (GET_CODE (XEXP (x
, 0)) == MULT
34351 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
34353 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
34354 if (val
== 2 || val
== 4 || val
== 8)
34356 *total
= cost
->lea
;
34357 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34358 outer_code
, opno
, speed
);
34359 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34363 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
34365 *total
= cost
->lea
;
34366 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
34367 outer_code
, opno
, speed
);
34368 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
34369 outer_code
, opno
, speed
);
34370 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
34377 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34379 /* ??? SSE cost should be used here. */
34380 *total
= cost
->fadd
;
34383 else if (X87_FLOAT_MODE_P (mode
))
34385 *total
= cost
->fadd
;
34388 else if (FLOAT_MODE_P (mode
))
34390 /* ??? SSE vector cost should be used here. */
34391 *total
= cost
->fadd
;
34399 if (GET_MODE_CLASS (mode
) == MODE_INT
34400 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34402 *total
= (cost
->add
* 2
34403 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
34404 << (GET_MODE (XEXP (x
, 0)) != DImode
))
34405 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
34406 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
34412 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34414 /* ??? SSE cost should be used here. */
34415 *total
= cost
->fchs
;
34418 else if (X87_FLOAT_MODE_P (mode
))
34420 *total
= cost
->fchs
;
34423 else if (FLOAT_MODE_P (mode
))
34425 /* ??? SSE vector cost should be used here. */
34426 *total
= cost
->fchs
;
34432 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
34434 /* ??? Should be SSE vector operation cost. */
34435 /* At least for published AMD latencies, this really is the same
34436 as the latency for a simple fpu operation like fabs. */
34437 *total
= cost
->fabs
;
34439 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
34440 *total
= cost
->add
* 2;
34442 *total
= cost
->add
;
34446 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
34447 && XEXP (XEXP (x
, 0), 1) == const1_rtx
34448 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
34449 && XEXP (x
, 1) == const0_rtx
)
34451 /* This kind of construct is implemented using test[bwl].
34452 Treat it as if we had an AND. */
34453 *total
= (cost
->add
34454 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
34455 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
34461 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
34466 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34467 /* ??? SSE cost should be used here. */
34468 *total
= cost
->fabs
;
34469 else if (X87_FLOAT_MODE_P (mode
))
34470 *total
= cost
->fabs
;
34471 else if (FLOAT_MODE_P (mode
))
34472 /* ??? SSE vector cost should be used here. */
34473 *total
= cost
->fabs
;
34477 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
34478 /* ??? SSE cost should be used here. */
34479 *total
= cost
->fsqrt
;
34480 else if (X87_FLOAT_MODE_P (mode
))
34481 *total
= cost
->fsqrt
;
34482 else if (FLOAT_MODE_P (mode
))
34483 /* ??? SSE vector cost should be used here. */
34484 *total
= cost
->fsqrt
;
34488 if (XINT (x
, 1) == UNSPEC_TP
)
34495 case VEC_DUPLICATE
:
34496 /* ??? Assume all of these vector manipulation patterns are
34497 recognizable. In which case they all pretty much have the
34499 *total
= cost
->fabs
;
34509 static int current_machopic_label_num
;
34511 /* Given a symbol name and its associated stub, write out the
34512 definition of the stub. */
34515 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
34517 unsigned int length
;
34518 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
34519 int label
= ++current_machopic_label_num
;
34521 /* For 64-bit we shouldn't get here. */
34522 gcc_assert (!TARGET_64BIT
);
34524 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34525 symb
= targetm
.strip_name_encoding (symb
);
34527 length
= strlen (stub
);
34528 binder_name
= XALLOCAVEC (char, length
+ 32);
34529 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
34531 length
= strlen (symb
);
34532 symbol_name
= XALLOCAVEC (char, length
+ 32);
34533 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
34535 sprintf (lazy_ptr_name
, "L%d$lz", label
);
34537 if (MACHOPIC_ATT_STUB
)
34538 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
34539 else if (MACHOPIC_PURE
)
34540 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
34542 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
34544 fprintf (file
, "%s:\n", stub
);
34545 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34547 if (MACHOPIC_ATT_STUB
)
34549 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34551 else if (MACHOPIC_PURE
)
34554 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34555 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
34556 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34557 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34558 label
, lazy_ptr_name
, label
);
34559 fprintf (file
, "\tjmp\t*%%ecx\n");
34562 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
34564 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34565 it needs no stub-binding-helper. */
34566 if (MACHOPIC_ATT_STUB
)
34569 fprintf (file
, "%s:\n", binder_name
);
34573 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
34574 fprintf (file
, "\tpushl\t%%ecx\n");
34577 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
34579 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
34581 /* N.B. Keep the correspondence of these
34582 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34583 old-pic/new-pic/non-pic stubs; altering this will break
34584 compatibility with existing dylibs. */
34587 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34588 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
34591 /* 16-byte -mdynamic-no-pic stub. */
34592 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
34594 fprintf (file
, "%s:\n", lazy_ptr_name
);
34595 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
34596 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
34598 #endif /* TARGET_MACHO */
34600 /* Order the registers for register allocator. */
34603 x86_order_regs_for_local_alloc (void)
34608 /* First allocate the local general purpose registers. */
34609 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34610 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
34611 reg_alloc_order
[pos
++] = i
;
34613 /* Global general purpose registers. */
34614 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
34615 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
34616 reg_alloc_order
[pos
++] = i
;
34618 /* x87 registers come first in case we are doing FP math
34620 if (!TARGET_SSE_MATH
)
34621 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34622 reg_alloc_order
[pos
++] = i
;
34624 /* SSE registers. */
34625 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
34626 reg_alloc_order
[pos
++] = i
;
34627 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
34628 reg_alloc_order
[pos
++] = i
;
34630 /* x87 registers. */
34631 if (TARGET_SSE_MATH
)
34632 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
34633 reg_alloc_order
[pos
++] = i
;
34635 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
34636 reg_alloc_order
[pos
++] = i
;
34638 /* Initialize the rest of array as we do not allocate some registers
34640 while (pos
< FIRST_PSEUDO_REGISTER
)
34641 reg_alloc_order
[pos
++] = 0;
34644 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34645 in struct attribute_spec handler. */
34647 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
34649 int flags ATTRIBUTE_UNUSED
,
34650 bool *no_add_attrs
)
34652 if (TREE_CODE (*node
) != FUNCTION_TYPE
34653 && TREE_CODE (*node
) != METHOD_TYPE
34654 && TREE_CODE (*node
) != FIELD_DECL
34655 && TREE_CODE (*node
) != TYPE_DECL
)
34657 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34659 *no_add_attrs
= true;
34664 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
34666 *no_add_attrs
= true;
34669 if (is_attribute_p ("callee_pop_aggregate_return", name
))
34673 cst
= TREE_VALUE (args
);
34674 if (TREE_CODE (cst
) != INTEGER_CST
)
34676 warning (OPT_Wattributes
,
34677 "%qE attribute requires an integer constant argument",
34679 *no_add_attrs
= true;
34681 else if (compare_tree_int (cst
, 0) != 0
34682 && compare_tree_int (cst
, 1) != 0)
34684 warning (OPT_Wattributes
,
34685 "argument to %qE attribute is neither zero, nor one",
34687 *no_add_attrs
= true;
34696 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34697 struct attribute_spec.handler. */
34699 ix86_handle_abi_attribute (tree
*node
, tree name
,
34700 tree args ATTRIBUTE_UNUSED
,
34701 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34703 if (TREE_CODE (*node
) != FUNCTION_TYPE
34704 && TREE_CODE (*node
) != METHOD_TYPE
34705 && TREE_CODE (*node
) != FIELD_DECL
34706 && TREE_CODE (*node
) != TYPE_DECL
)
34708 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34710 *no_add_attrs
= true;
34714 /* Can combine regparm with all attributes but fastcall. */
34715 if (is_attribute_p ("ms_abi", name
))
34717 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
34719 error ("ms_abi and sysv_abi attributes are not compatible");
34724 else if (is_attribute_p ("sysv_abi", name
))
34726 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
34728 error ("ms_abi and sysv_abi attributes are not compatible");
34737 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34738 struct attribute_spec.handler. */
34740 ix86_handle_struct_attribute (tree
*node
, tree name
,
34741 tree args ATTRIBUTE_UNUSED
,
34742 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34745 if (DECL_P (*node
))
34747 if (TREE_CODE (*node
) == TYPE_DECL
)
34748 type
= &TREE_TYPE (*node
);
34753 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
34755 warning (OPT_Wattributes
, "%qE attribute ignored",
34757 *no_add_attrs
= true;
34760 else if ((is_attribute_p ("ms_struct", name
)
34761 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
34762 || ((is_attribute_p ("gcc_struct", name
)
34763 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
34765 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
34767 *no_add_attrs
= true;
34774 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
34775 tree args ATTRIBUTE_UNUSED
,
34776 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
34778 if (TREE_CODE (*node
) != FUNCTION_DECL
)
34780 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
34782 *no_add_attrs
= true;
34788 ix86_ms_bitfield_layout_p (const_tree record_type
)
34790 return ((TARGET_MS_BITFIELD_LAYOUT
34791 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
34792 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
34795 /* Returns an expression indicating where the this parameter is
34796 located on entry to the FUNCTION. */
34799 x86_this_parameter (tree function
)
34801 tree type
= TREE_TYPE (function
);
34802 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
34807 const int *parm_regs
;
34809 if (ix86_function_type_abi (type
) == MS_ABI
)
34810 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
34812 parm_regs
= x86_64_int_parameter_registers
;
34813 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
34816 nregs
= ix86_function_regparm (type
, function
);
34818 if (nregs
> 0 && !stdarg_p (type
))
34821 unsigned int ccvt
= ix86_get_callcvt (type
);
34823 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34824 regno
= aggr
? DX_REG
: CX_REG
;
34825 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34829 return gen_rtx_MEM (SImode
,
34830 plus_constant (Pmode
, stack_pointer_rtx
, 4));
34839 return gen_rtx_MEM (SImode
,
34840 plus_constant (Pmode
,
34841 stack_pointer_rtx
, 4));
34844 return gen_rtx_REG (SImode
, regno
);
34847 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
34851 /* Determine whether x86_output_mi_thunk can succeed. */
34854 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
34855 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
34856 HOST_WIDE_INT vcall_offset
, const_tree function
)
34858 /* 64-bit can handle anything. */
34862 /* For 32-bit, everything's fine if we have one free register. */
34863 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
34866 /* Need a free register for vcall_offset. */
34870 /* Need a free register for GOT references. */
34871 if (flag_pic
&& !targetm
.binds_local_p (function
))
34874 /* Otherwise ok. */
34878 /* Output the assembler code for a thunk function. THUNK_DECL is the
34879 declaration for the thunk function itself, FUNCTION is the decl for
34880 the target function. DELTA is an immediate constant offset to be
34881 added to THIS. If VCALL_OFFSET is nonzero, the word at
34882 *(*this + vcall_offset) should be added to THIS. */
34885 x86_output_mi_thunk (FILE *file
,
34886 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
34887 HOST_WIDE_INT vcall_offset
, tree function
)
34889 rtx this_param
= x86_this_parameter (function
);
34890 rtx this_reg
, tmp
, fnaddr
;
34891 unsigned int tmp_regno
;
34894 tmp_regno
= R10_REG
;
34897 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
34898 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
34899 tmp_regno
= AX_REG
;
34900 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
34901 tmp_regno
= DX_REG
;
34903 tmp_regno
= CX_REG
;
34906 emit_note (NOTE_INSN_PROLOGUE_END
);
34908 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
34909 pull it in now and let DELTA benefit. */
34910 if (REG_P (this_param
))
34911 this_reg
= this_param
;
34912 else if (vcall_offset
)
34914 /* Put the this parameter into %eax. */
34915 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
34916 emit_move_insn (this_reg
, this_param
);
34919 this_reg
= NULL_RTX
;
34921 /* Adjust the this parameter by a fixed constant. */
34924 rtx delta_rtx
= GEN_INT (delta
);
34925 rtx delta_dst
= this_reg
? this_reg
: this_param
;
34929 if (!x86_64_general_operand (delta_rtx
, Pmode
))
34931 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34932 emit_move_insn (tmp
, delta_rtx
);
34937 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
34940 /* Adjust the this parameter by a value stored in the vtable. */
34943 rtx vcall_addr
, vcall_mem
, this_mem
;
34945 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
34947 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
34948 if (Pmode
!= ptr_mode
)
34949 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
34950 emit_move_insn (tmp
, this_mem
);
34952 /* Adjust the this parameter. */
34953 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
34955 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
34957 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
34958 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
34959 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
34962 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
34963 if (Pmode
!= ptr_mode
)
34964 emit_insn (gen_addsi_1_zext (this_reg
,
34965 gen_rtx_REG (ptr_mode
,
34969 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
34972 /* If necessary, drop THIS back to its stack slot. */
34973 if (this_reg
&& this_reg
!= this_param
)
34974 emit_move_insn (this_param
, this_reg
);
34976 fnaddr
= XEXP (DECL_RTL (function
), 0);
34979 if (!flag_pic
|| targetm
.binds_local_p (function
)
34980 || cfun
->machine
->call_abi
== MS_ABI
)
34984 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
34985 tmp
= gen_rtx_CONST (Pmode
, tmp
);
34986 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
34991 if (!flag_pic
|| targetm
.binds_local_p (function
))
34994 else if (TARGET_MACHO
)
34996 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
34997 fnaddr
= XEXP (fnaddr
, 0);
34999 #endif /* TARGET_MACHO */
35002 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
35003 output_set_got (tmp
, NULL_RTX
);
35005 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
35006 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
35007 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
35011 /* Our sibling call patterns do not allow memories, because we have no
35012 predicate that can distinguish between frame and non-frame memory.
35013 For our purposes here, we can get away with (ab)using a jump pattern,
35014 because we're going to do no optimization. */
35015 if (MEM_P (fnaddr
))
35016 emit_jump_insn (gen_indirect_jump (fnaddr
));
35019 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
35020 fnaddr
= legitimize_pic_address (fnaddr
,
35021 gen_rtx_REG (Pmode
, tmp_regno
));
35023 if (!sibcall_insn_operand (fnaddr
, word_mode
))
35025 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
35026 if (GET_MODE (fnaddr
) != word_mode
)
35027 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
35028 emit_move_insn (tmp
, fnaddr
);
35032 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
35033 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
35034 tmp
= emit_call_insn (tmp
);
35035 SIBLING_CALL_P (tmp
) = 1;
35039 /* Emit just enough of rest_of_compilation to get the insns emitted.
35040 Note that use_thunk calls assemble_start_function et al. */
35041 tmp
= get_insns ();
35042 shorten_branches (tmp
);
35043 final_start_function (tmp
, file
, 1);
35044 final (tmp
, file
, 1);
35045 final_end_function ();
35049 x86_file_start (void)
35051 default_file_start ();
35053 darwin_file_start ();
35055 if (X86_FILE_START_VERSION_DIRECTIVE
)
35056 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
35057 if (X86_FILE_START_FLTUSED
)
35058 fputs ("\t.global\t__fltused\n", asm_out_file
);
35059 if (ix86_asm_dialect
== ASM_INTEL
)
35060 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
35064 x86_field_alignment (tree field
, int computed
)
35066 enum machine_mode mode
;
35067 tree type
= TREE_TYPE (field
);
35069 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
35071 mode
= TYPE_MODE (strip_array_types (type
));
35072 if (mode
== DFmode
|| mode
== DCmode
35073 || GET_MODE_CLASS (mode
) == MODE_INT
35074 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
35075 return MIN (32, computed
);
35079 /* Output assembler code to FILE to increment profiler label # LABELNO
35080 for profiling a function entry. */
35082 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
35084 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
35089 #ifndef NO_PROFILE_COUNTERS
35090 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
35093 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
35094 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
35096 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35100 #ifndef NO_PROFILE_COUNTERS
35101 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
35104 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
35108 #ifndef NO_PROFILE_COUNTERS
35109 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
35112 fprintf (file
, "\tcall\t%s\n", mcount_name
);
35116 /* We don't have exact information about the insn sizes, but we may assume
35117 quite safely that we are informed about all 1 byte insns and memory
35118 address sizes. This is enough to eliminate unnecessary padding in
35122 min_insn_size (rtx insn
)
35126 if (!INSN_P (insn
) || !active_insn_p (insn
))
35129 /* Discard alignments we've emit and jump instructions. */
35130 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
35131 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
35133 if (JUMP_TABLE_DATA_P (insn
))
35136 /* Important case - calls are always 5 bytes.
35137 It is common to have many calls in the row. */
35139 && symbolic_reference_mentioned_p (PATTERN (insn
))
35140 && !SIBLING_CALL_P (insn
))
35142 len
= get_attr_length (insn
);
35146 /* For normal instructions we rely on get_attr_length being exact,
35147 with a few exceptions. */
35148 if (!JUMP_P (insn
))
35150 enum attr_type type
= get_attr_type (insn
);
35155 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
35156 || asm_noperands (PATTERN (insn
)) >= 0)
35163 /* Otherwise trust get_attr_length. */
35167 l
= get_attr_length_address (insn
);
35168 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
35177 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35179 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35183 ix86_avoid_jump_mispredicts (void)
35185 rtx insn
, start
= get_insns ();
35186 int nbytes
= 0, njumps
= 0;
35189 /* Look for all minimal intervals of instructions containing 4 jumps.
35190 The intervals are bounded by START and INSN. NBYTES is the total
35191 size of instructions in the interval including INSN and not including
35192 START. When the NBYTES is smaller than 16 bytes, it is possible
35193 that the end of START and INSN ends up in the same 16byte page.
35195 The smallest offset in the page INSN can start is the case where START
35196 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35197 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35199 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
35203 if (LABEL_P (insn
))
35205 int align
= label_to_alignment (insn
);
35206 int max_skip
= label_to_max_skip (insn
);
35210 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35211 already in the current 16 byte page, because otherwise
35212 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35213 bytes to reach 16 byte boundary. */
35215 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
35218 fprintf (dump_file
, "Label %i with max_skip %i\n",
35219 INSN_UID (insn
), max_skip
);
35222 while (nbytes
+ max_skip
>= 16)
35224 start
= NEXT_INSN (start
);
35225 if ((JUMP_P (start
)
35226 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35227 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35229 njumps
--, isjump
= 1;
35232 nbytes
-= min_insn_size (start
);
35238 min_size
= min_insn_size (insn
);
35239 nbytes
+= min_size
;
35241 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
35242 INSN_UID (insn
), min_size
);
35244 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
35245 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
35253 start
= NEXT_INSN (start
);
35254 if ((JUMP_P (start
)
35255 && GET_CODE (PATTERN (start
)) != ADDR_VEC
35256 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
35258 njumps
--, isjump
= 1;
35261 nbytes
-= min_insn_size (start
);
35263 gcc_assert (njumps
>= 0);
35265 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
35266 INSN_UID (start
), INSN_UID (insn
), nbytes
);
35268 if (njumps
== 3 && isjump
&& nbytes
< 16)
35270 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
35273 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
35274 INSN_UID (insn
), padsize
);
35275 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
35281 /* AMD Athlon works faster
35282 when RET is not destination of conditional jump or directly preceded
35283 by other jump instruction. We avoid the penalty by inserting NOP just
35284 before the RET instructions in such cases. */
35286 ix86_pad_returns (void)
35291 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35293 basic_block bb
= e
->src
;
35294 rtx ret
= BB_END (bb
);
35296 bool replace
= false;
35298 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
35299 || optimize_bb_for_size_p (bb
))
35301 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
35302 if (active_insn_p (prev
) || LABEL_P (prev
))
35304 if (prev
&& LABEL_P (prev
))
35309 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35310 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
35311 && !(e
->flags
& EDGE_FALLTHRU
))
35316 prev
= prev_active_insn (ret
);
35318 && ((JUMP_P (prev
) && any_condjump_p (prev
))
35321 /* Empty functions get branch mispredict even when
35322 the jump destination is not visible to us. */
35323 if (!prev
&& !optimize_function_for_size_p (cfun
))
35328 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
35334 /* Count the minimum number of instructions in BB. Return 4 if the
35335 number of instructions >= 4. */
35338 ix86_count_insn_bb (basic_block bb
)
35341 int insn_count
= 0;
35343 /* Count number of instructions in this block. Return 4 if the number
35344 of instructions >= 4. */
35345 FOR_BB_INSNS (bb
, insn
)
35347 /* Only happen in exit blocks. */
35349 && ANY_RETURN_P (PATTERN (insn
)))
35352 if (NONDEBUG_INSN_P (insn
)
35353 && GET_CODE (PATTERN (insn
)) != USE
35354 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
35357 if (insn_count
>= 4)
35366 /* Count the minimum number of instructions in code path in BB.
35367 Return 4 if the number of instructions >= 4. */
35370 ix86_count_insn (basic_block bb
)
35374 int min_prev_count
;
35376 /* Only bother counting instructions along paths with no
35377 more than 2 basic blocks between entry and exit. Given
35378 that BB has an edge to exit, determine if a predecessor
35379 of BB has an edge from entry. If so, compute the number
35380 of instructions in the predecessor block. If there
35381 happen to be multiple such blocks, compute the minimum. */
35382 min_prev_count
= 4;
35383 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
35386 edge_iterator prev_ei
;
35388 if (e
->src
== ENTRY_BLOCK_PTR
)
35390 min_prev_count
= 0;
35393 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
35395 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
35397 int count
= ix86_count_insn_bb (e
->src
);
35398 if (count
< min_prev_count
)
35399 min_prev_count
= count
;
35405 if (min_prev_count
< 4)
35406 min_prev_count
+= ix86_count_insn_bb (bb
);
35408 return min_prev_count
;
35411 /* Pad short function to 4 instructions. */
35414 ix86_pad_short_function (void)
35419 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
35421 rtx ret
= BB_END (e
->src
);
35422 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
35424 int insn_count
= ix86_count_insn (e
->src
);
35426 /* Pad short function. */
35427 if (insn_count
< 4)
35431 /* Find epilogue. */
35434 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
35435 insn
= PREV_INSN (insn
);
35440 /* Two NOPs count as one instruction. */
35441 insn_count
= 2 * (4 - insn_count
);
35442 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
35448 /* Implement machine specific optimizations. We implement padding of returns
35449 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35453 /* We are freeing block_for_insn in the toplev to keep compatibility
35454 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35455 compute_bb_for_insn ();
35457 if (optimize
&& optimize_function_for_speed_p (cfun
))
35459 if (TARGET_PAD_SHORT_FUNCTION
)
35460 ix86_pad_short_function ();
35461 else if (TARGET_PAD_RETURNS
)
35462 ix86_pad_returns ();
35463 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35464 if (TARGET_FOUR_JUMP_LIMIT
)
35465 ix86_avoid_jump_mispredicts ();
35470 /* Return nonzero when QImode register that must be represented via REX prefix
35473 x86_extended_QIreg_mentioned_p (rtx insn
)
35476 extract_insn_cached (insn
);
35477 for (i
= 0; i
< recog_data
.n_operands
; i
++)
35478 if (GENERAL_REG_P (recog_data
.operand
[i
])
35479 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
35484 /* Return nonzero when P points to register encoded via REX prefix.
35485 Called via for_each_rtx. */
35487 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
35489 unsigned int regno
;
35492 regno
= REGNO (*p
);
35493 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
35496 /* Return true when INSN mentions register that must be encoded using REX
35499 x86_extended_reg_mentioned_p (rtx insn
)
35501 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
35502 extended_reg_mentioned_1
, NULL
);
35505 /* If profitable, negate (without causing overflow) integer constant
35506 of mode MODE at location LOC. Return true in this case. */
35508 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
35512 if (!CONST_INT_P (*loc
))
35518 /* DImode x86_64 constants must fit in 32 bits. */
35519 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
35530 gcc_unreachable ();
35533 /* Avoid overflows. */
35534 if (mode_signbit_p (mode
, *loc
))
35537 val
= INTVAL (*loc
);
35539 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35540 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35541 if ((val
< 0 && val
!= -128)
35544 *loc
= GEN_INT (-val
);
35551 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35552 optabs would emit if we didn't have TFmode patterns. */
35555 x86_emit_floatuns (rtx operands
[2])
35557 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
35558 enum machine_mode mode
, inmode
;
35560 inmode
= GET_MODE (operands
[1]);
35561 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
35564 in
= force_reg (inmode
, operands
[1]);
35565 mode
= GET_MODE (out
);
35566 neglab
= gen_label_rtx ();
35567 donelab
= gen_label_rtx ();
35568 f0
= gen_reg_rtx (mode
);
35570 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
35572 expand_float (out
, in
, 0);
35574 emit_jump_insn (gen_jump (donelab
));
35577 emit_label (neglab
);
35579 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
35581 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
35583 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
35585 expand_float (f0
, i0
, 0);
35587 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
35589 emit_label (donelab
);
35592 /* AVX2 does support 32-byte integer vector operations,
35593 thus the longest vector we are faced with is V32QImode. */
35594 #define MAX_VECT_LEN 32
35596 struct expand_vec_perm_d
35598 rtx target
, op0
, op1
;
35599 unsigned char perm
[MAX_VECT_LEN
];
35600 enum machine_mode vmode
;
35601 unsigned char nelt
;
35602 bool one_operand_p
;
35606 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
35607 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
35608 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
35610 /* Get a vector mode of the same size as the original but with elements
35611 twice as wide. This is only guaranteed to apply to integral vectors. */
35613 static inline enum machine_mode
35614 get_mode_wider_vector (enum machine_mode o
)
35616 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35617 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
35618 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
35619 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
35623 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35624 with all elements equal to VAR. Return true if successful. */
35627 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
35628 rtx target
, rtx val
)
35651 /* First attempt to recognize VAL as-is. */
35652 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35653 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
35654 if (recog_memoized (insn
) < 0)
35657 /* If that fails, force VAL into a register. */
35660 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
35661 seq
= get_insns ();
35664 emit_insn_before (seq
, insn
);
35666 ok
= recog_memoized (insn
) >= 0;
35675 if (TARGET_SSE
|| TARGET_3DNOW_A
)
35679 val
= gen_lowpart (SImode
, val
);
35680 x
= gen_rtx_TRUNCATE (HImode
, val
);
35681 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
35682 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35695 struct expand_vec_perm_d dperm
;
35699 memset (&dperm
, 0, sizeof (dperm
));
35700 dperm
.target
= target
;
35701 dperm
.vmode
= mode
;
35702 dperm
.nelt
= GET_MODE_NUNITS (mode
);
35703 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
35704 dperm
.one_operand_p
= true;
35706 /* Extend to SImode using a paradoxical SUBREG. */
35707 tmp1
= gen_reg_rtx (SImode
);
35708 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
35710 /* Insert the SImode value as low element of a V4SImode vector. */
35711 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
35712 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
35714 ok
= (expand_vec_perm_1 (&dperm
)
35715 || expand_vec_perm_broadcast_1 (&dperm
));
35727 /* Replicate the value once into the next wider mode and recurse. */
35729 enum machine_mode smode
, wsmode
, wvmode
;
35732 smode
= GET_MODE_INNER (mode
);
35733 wvmode
= get_mode_wider_vector (mode
);
35734 wsmode
= GET_MODE_INNER (wvmode
);
35736 val
= convert_modes (wsmode
, smode
, val
, true);
35737 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
35738 GEN_INT (GET_MODE_BITSIZE (smode
)),
35739 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35740 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
35742 x
= gen_lowpart (wvmode
, target
);
35743 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
35751 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
35752 rtx x
= gen_reg_rtx (hvmode
);
35754 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
35757 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
35758 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35767 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35768 whose ONE_VAR element is VAR, and other elements are zero. Return true
35772 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
35773 rtx target
, rtx var
, int one_var
)
35775 enum machine_mode vsimode
;
35778 bool use_vector_set
= false;
35783 /* For SSE4.1, we normally use vector set. But if the second
35784 element is zero and inter-unit moves are OK, we use movq
35786 use_vector_set
= (TARGET_64BIT
35788 && !(TARGET_INTER_UNIT_MOVES
35794 use_vector_set
= TARGET_SSE4_1
;
35797 use_vector_set
= TARGET_SSE2
;
35800 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
35807 use_vector_set
= TARGET_AVX
;
35810 /* Use ix86_expand_vector_set in 64bit mode only. */
35811 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
35817 if (use_vector_set
)
35819 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
35820 var
= force_reg (GET_MODE_INNER (mode
), var
);
35821 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
35837 var
= force_reg (GET_MODE_INNER (mode
), var
);
35838 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
35839 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
35844 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
35845 new_target
= gen_reg_rtx (mode
);
35847 new_target
= target
;
35848 var
= force_reg (GET_MODE_INNER (mode
), var
);
35849 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
35850 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
35851 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
35854 /* We need to shuffle the value to the correct position, so
35855 create a new pseudo to store the intermediate result. */
35857 /* With SSE2, we can use the integer shuffle insns. */
35858 if (mode
!= V4SFmode
&& TARGET_SSE2
)
35860 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
35862 GEN_INT (one_var
== 1 ? 0 : 1),
35863 GEN_INT (one_var
== 2 ? 0 : 1),
35864 GEN_INT (one_var
== 3 ? 0 : 1)));
35865 if (target
!= new_target
)
35866 emit_move_insn (target
, new_target
);
35870 /* Otherwise convert the intermediate result to V4SFmode and
35871 use the SSE1 shuffle instructions. */
35872 if (mode
!= V4SFmode
)
35874 tmp
= gen_reg_rtx (V4SFmode
);
35875 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
35880 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
35882 GEN_INT (one_var
== 1 ? 0 : 1),
35883 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
35884 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
35886 if (mode
!= V4SFmode
)
35887 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
35888 else if (tmp
!= target
)
35889 emit_move_insn (target
, tmp
);
35891 else if (target
!= new_target
)
35892 emit_move_insn (target
, new_target
);
35897 vsimode
= V4SImode
;
35903 vsimode
= V2SImode
;
35909 /* Zero extend the variable element to SImode and recurse. */
35910 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
35912 x
= gen_reg_rtx (vsimode
);
35913 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
35915 gcc_unreachable ();
35917 emit_move_insn (target
, gen_lowpart (mode
, x
));
35925 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35926 consisting of the values in VALS. It is known that all elements
35927 except ONE_VAR are constants. Return true if successful. */
35930 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
35931 rtx target
, rtx vals
, int one_var
)
35933 rtx var
= XVECEXP (vals
, 0, one_var
);
35934 enum machine_mode wmode
;
35937 const_vec
= copy_rtx (vals
);
35938 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
35939 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
35947 /* For the two element vectors, it's just as easy to use
35948 the general case. */
35952 /* Use ix86_expand_vector_set in 64bit mode only. */
35975 /* There's no way to set one QImode entry easily. Combine
35976 the variable value with its adjacent constant value, and
35977 promote to an HImode set. */
35978 x
= XVECEXP (vals
, 0, one_var
^ 1);
35981 var
= convert_modes (HImode
, QImode
, var
, true);
35982 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
35983 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
35984 x
= GEN_INT (INTVAL (x
) & 0xff);
35988 var
= convert_modes (HImode
, QImode
, var
, true);
35989 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
35991 if (x
!= const0_rtx
)
35992 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
35993 1, OPTAB_LIB_WIDEN
);
35995 x
= gen_reg_rtx (wmode
);
35996 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
35997 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
35999 emit_move_insn (target
, gen_lowpart (mode
, x
));
36006 emit_move_insn (target
, const_vec
);
36007 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
36011 /* A subroutine of ix86_expand_vector_init_general. Use vector
36012 concatenate to handle the most general case: all values variable,
36013 and none identical. */
36016 ix86_expand_vector_init_concat (enum machine_mode mode
,
36017 rtx target
, rtx
*ops
, int n
)
36019 enum machine_mode cmode
, hmode
= VOIDmode
;
36020 rtx first
[8], second
[4];
36060 gcc_unreachable ();
36063 if (!register_operand (ops
[1], cmode
))
36064 ops
[1] = force_reg (cmode
, ops
[1]);
36065 if (!register_operand (ops
[0], cmode
))
36066 ops
[0] = force_reg (cmode
, ops
[0]);
36067 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36068 gen_rtx_VEC_CONCAT (mode
, ops
[0],
36088 gcc_unreachable ();
36104 gcc_unreachable ();
36109 /* FIXME: We process inputs backward to help RA. PR 36222. */
36112 for (; i
> 0; i
-= 2, j
--)
36114 first
[j
] = gen_reg_rtx (cmode
);
36115 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
36116 ix86_expand_vector_init (false, first
[j
],
36117 gen_rtx_PARALLEL (cmode
, v
));
36123 gcc_assert (hmode
!= VOIDmode
);
36124 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36126 second
[j
] = gen_reg_rtx (hmode
);
36127 ix86_expand_vector_init_concat (hmode
, second
[j
],
36131 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
36134 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
36138 gcc_unreachable ();
36142 /* A subroutine of ix86_expand_vector_init_general. Use vector
36143 interleave to handle the most general case: all values variable,
36144 and none identical. */
36147 ix86_expand_vector_init_interleave (enum machine_mode mode
,
36148 rtx target
, rtx
*ops
, int n
)
36150 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
36153 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
36154 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
36155 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
36160 gen_load_even
= gen_vec_setv8hi
;
36161 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
36162 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36163 inner_mode
= HImode
;
36164 first_imode
= V4SImode
;
36165 second_imode
= V2DImode
;
36166 third_imode
= VOIDmode
;
36169 gen_load_even
= gen_vec_setv16qi
;
36170 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
36171 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
36172 inner_mode
= QImode
;
36173 first_imode
= V8HImode
;
36174 second_imode
= V4SImode
;
36175 third_imode
= V2DImode
;
36178 gcc_unreachable ();
36181 for (i
= 0; i
< n
; i
++)
36183 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36184 op0
= gen_reg_rtx (SImode
);
36185 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
36187 /* Insert the SImode value as low element of V4SImode vector. */
36188 op1
= gen_reg_rtx (V4SImode
);
36189 op0
= gen_rtx_VEC_MERGE (V4SImode
,
36190 gen_rtx_VEC_DUPLICATE (V4SImode
,
36192 CONST0_RTX (V4SImode
),
36194 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
36196 /* Cast the V4SImode vector back to a vector in orignal mode. */
36197 op0
= gen_reg_rtx (mode
);
36198 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
36200 /* Load even elements into the second positon. */
36201 emit_insn (gen_load_even (op0
,
36202 force_reg (inner_mode
,
36206 /* Cast vector to FIRST_IMODE vector. */
36207 ops
[i
] = gen_reg_rtx (first_imode
);
36208 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
36211 /* Interleave low FIRST_IMODE vectors. */
36212 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
36214 op0
= gen_reg_rtx (first_imode
);
36215 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
36217 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36218 ops
[j
] = gen_reg_rtx (second_imode
);
36219 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
36222 /* Interleave low SECOND_IMODE vectors. */
36223 switch (second_imode
)
36226 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
36228 op0
= gen_reg_rtx (second_imode
);
36229 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
36232 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36234 ops
[j
] = gen_reg_rtx (third_imode
);
36235 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
36237 second_imode
= V2DImode
;
36238 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
36242 op0
= gen_reg_rtx (second_imode
);
36243 emit_insn (gen_interleave_second_low (op0
, ops
[0],
36246 /* Cast the SECOND_IMODE vector back to a vector on original
36248 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36249 gen_lowpart (mode
, op0
)));
36253 gcc_unreachable ();
36257 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36258 all values variable, and none identical. */
36261 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
36262 rtx target
, rtx vals
)
36264 rtx ops
[32], op0
, op1
;
36265 enum machine_mode half_mode
= VOIDmode
;
36272 if (!mmx_ok
&& !TARGET_SSE
)
36284 n
= GET_MODE_NUNITS (mode
);
36285 for (i
= 0; i
< n
; i
++)
36286 ops
[i
] = XVECEXP (vals
, 0, i
);
36287 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
36291 half_mode
= V16QImode
;
36295 half_mode
= V8HImode
;
36299 n
= GET_MODE_NUNITS (mode
);
36300 for (i
= 0; i
< n
; i
++)
36301 ops
[i
] = XVECEXP (vals
, 0, i
);
36302 op0
= gen_reg_rtx (half_mode
);
36303 op1
= gen_reg_rtx (half_mode
);
36304 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
36306 ix86_expand_vector_init_interleave (half_mode
, op1
,
36307 &ops
[n
>> 1], n
>> 2);
36308 emit_insn (gen_rtx_SET (VOIDmode
, target
,
36309 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
36313 if (!TARGET_SSE4_1
)
36321 /* Don't use ix86_expand_vector_init_interleave if we can't
36322 move from GPR to SSE register directly. */
36323 if (!TARGET_INTER_UNIT_MOVES
)
36326 n
= GET_MODE_NUNITS (mode
);
36327 for (i
= 0; i
< n
; i
++)
36328 ops
[i
] = XVECEXP (vals
, 0, i
);
36329 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
36337 gcc_unreachable ();
36341 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
36342 enum machine_mode inner_mode
;
36343 rtx words
[4], shift
;
36345 inner_mode
= GET_MODE_INNER (mode
);
36346 n_elts
= GET_MODE_NUNITS (mode
);
36347 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
36348 n_elt_per_word
= n_elts
/ n_words
;
36349 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
36351 for (i
= 0; i
< n_words
; ++i
)
36353 rtx word
= NULL_RTX
;
36355 for (j
= 0; j
< n_elt_per_word
; ++j
)
36357 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
36358 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
36364 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
36365 word
, 1, OPTAB_LIB_WIDEN
);
36366 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
36367 word
, 1, OPTAB_LIB_WIDEN
);
36375 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
36376 else if (n_words
== 2)
36378 rtx tmp
= gen_reg_rtx (mode
);
36379 emit_clobber (tmp
);
36380 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
36381 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
36382 emit_move_insn (target
, tmp
);
36384 else if (n_words
== 4)
36386 rtx tmp
= gen_reg_rtx (V4SImode
);
36387 gcc_assert (word_mode
== SImode
);
36388 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
36389 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
36390 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
36393 gcc_unreachable ();
36397 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36398 instructions unless MMX_OK is true. */
36401 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
36403 enum machine_mode mode
= GET_MODE (target
);
36404 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36405 int n_elts
= GET_MODE_NUNITS (mode
);
36406 int n_var
= 0, one_var
= -1;
36407 bool all_same
= true, all_const_zero
= true;
36411 for (i
= 0; i
< n_elts
; ++i
)
36413 x
= XVECEXP (vals
, 0, i
);
36414 if (!(CONST_INT_P (x
)
36415 || GET_CODE (x
) == CONST_DOUBLE
36416 || GET_CODE (x
) == CONST_FIXED
))
36417 n_var
++, one_var
= i
;
36418 else if (x
!= CONST0_RTX (inner_mode
))
36419 all_const_zero
= false;
36420 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
36424 /* Constants are best loaded from the constant pool. */
36427 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
36431 /* If all values are identical, broadcast the value. */
36433 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
36434 XVECEXP (vals
, 0, 0)))
36437 /* Values where only one field is non-constant are best loaded from
36438 the pool and overwritten via move later. */
36442 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
36443 XVECEXP (vals
, 0, one_var
),
36447 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
36451 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
36455 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
36457 enum machine_mode mode
= GET_MODE (target
);
36458 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36459 enum machine_mode half_mode
;
36460 bool use_vec_merge
= false;
36462 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
36464 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
36465 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
36466 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
36467 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
36468 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
36469 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
36471 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
36473 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
36474 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
36475 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
36476 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
36477 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
36478 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
36488 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36489 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
36491 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36493 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36494 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36500 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
36504 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
36505 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
36507 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
36509 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
36510 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36517 /* For the two element vectors, we implement a VEC_CONCAT with
36518 the extraction of the other element. */
36520 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
36521 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
36524 op0
= val
, op1
= tmp
;
36526 op0
= tmp
, op1
= val
;
36528 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
36529 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36534 use_vec_merge
= TARGET_SSE4_1
;
36541 use_vec_merge
= true;
36545 /* tmp = target = A B C D */
36546 tmp
= copy_to_reg (target
);
36547 /* target = A A B B */
36548 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
36549 /* target = X A B B */
36550 ix86_expand_vector_set (false, target
, val
, 0);
36551 /* target = A X C D */
36552 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36553 const1_rtx
, const0_rtx
,
36554 GEN_INT (2+4), GEN_INT (3+4)));
36558 /* tmp = target = A B C D */
36559 tmp
= copy_to_reg (target
);
36560 /* tmp = X B C D */
36561 ix86_expand_vector_set (false, tmp
, val
, 0);
36562 /* target = A B X D */
36563 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36564 const0_rtx
, const1_rtx
,
36565 GEN_INT (0+4), GEN_INT (3+4)));
36569 /* tmp = target = A B C D */
36570 tmp
= copy_to_reg (target
);
36571 /* tmp = X B C D */
36572 ix86_expand_vector_set (false, tmp
, val
, 0);
36573 /* target = A B X D */
36574 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
36575 const0_rtx
, const1_rtx
,
36576 GEN_INT (2+4), GEN_INT (0+4)));
36580 gcc_unreachable ();
36585 use_vec_merge
= TARGET_SSE4_1
;
36589 /* Element 0 handled by vec_merge below. */
36592 use_vec_merge
= true;
36598 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36599 store into element 0, then shuffle them back. */
36603 order
[0] = GEN_INT (elt
);
36604 order
[1] = const1_rtx
;
36605 order
[2] = const2_rtx
;
36606 order
[3] = GEN_INT (3);
36607 order
[elt
] = const0_rtx
;
36609 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36610 order
[1], order
[2], order
[3]));
36612 ix86_expand_vector_set (false, target
, val
, 0);
36614 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
36615 order
[1], order
[2], order
[3]));
36619 /* For SSE1, we have to reuse the V4SF code. */
36620 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
36621 gen_lowpart (SFmode
, val
), elt
);
36626 use_vec_merge
= TARGET_SSE2
;
36629 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36633 use_vec_merge
= TARGET_SSE4_1
;
36640 half_mode
= V16QImode
;
36646 half_mode
= V8HImode
;
36652 half_mode
= V4SImode
;
36658 half_mode
= V2DImode
;
36664 half_mode
= V4SFmode
;
36670 half_mode
= V2DFmode
;
36676 /* Compute offset. */
36680 gcc_assert (i
<= 1);
36682 /* Extract the half. */
36683 tmp
= gen_reg_rtx (half_mode
);
36684 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
36686 /* Put val in tmp at elt. */
36687 ix86_expand_vector_set (false, tmp
, val
, elt
);
36690 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
36699 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
36700 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
36701 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36705 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36707 emit_move_insn (mem
, target
);
36709 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36710 emit_move_insn (tmp
, val
);
36712 emit_move_insn (target
, mem
);
36717 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
36719 enum machine_mode mode
= GET_MODE (vec
);
36720 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
36721 bool use_vec_extr
= false;
36734 use_vec_extr
= true;
36738 use_vec_extr
= TARGET_SSE4_1
;
36750 tmp
= gen_reg_rtx (mode
);
36751 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
36752 GEN_INT (elt
), GEN_INT (elt
),
36753 GEN_INT (elt
+4), GEN_INT (elt
+4)));
36757 tmp
= gen_reg_rtx (mode
);
36758 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
36762 gcc_unreachable ();
36765 use_vec_extr
= true;
36770 use_vec_extr
= TARGET_SSE4_1
;
36784 tmp
= gen_reg_rtx (mode
);
36785 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
36786 GEN_INT (elt
), GEN_INT (elt
),
36787 GEN_INT (elt
), GEN_INT (elt
)));
36791 tmp
= gen_reg_rtx (mode
);
36792 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
36796 gcc_unreachable ();
36799 use_vec_extr
= true;
36804 /* For SSE1, we have to reuse the V4SF code. */
36805 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
36806 gen_lowpart (V4SFmode
, vec
), elt
);
36812 use_vec_extr
= TARGET_SSE2
;
36815 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
36819 use_vec_extr
= TARGET_SSE4_1
;
36825 tmp
= gen_reg_rtx (V4SFmode
);
36827 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
36829 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
36830 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36838 tmp
= gen_reg_rtx (V2DFmode
);
36840 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
36842 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
36843 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36851 tmp
= gen_reg_rtx (V16QImode
);
36853 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
36855 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
36856 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
36864 tmp
= gen_reg_rtx (V8HImode
);
36866 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
36868 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
36869 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
36877 tmp
= gen_reg_rtx (V4SImode
);
36879 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
36881 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
36882 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
36890 tmp
= gen_reg_rtx (V2DImode
);
36892 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
36894 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
36895 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
36901 /* ??? Could extract the appropriate HImode element and shift. */
36908 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
36909 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
36911 /* Let the rtl optimizers know about the zero extension performed. */
36912 if (inner_mode
== QImode
|| inner_mode
== HImode
)
36914 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
36915 target
= gen_lowpart (SImode
, target
);
36918 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
36922 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
36924 emit_move_insn (mem
, vec
);
36926 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
36927 emit_move_insn (target
, tmp
);
36931 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
36932 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
36933 The upper bits of DEST are undefined, though they shouldn't cause
36934 exceptions (some bits from src or all zeros are ok). */
36937 emit_reduc_half (rtx dest
, rtx src
, int i
)
36940 switch (GET_MODE (src
))
36944 tem
= gen_sse_movhlps (dest
, src
, src
);
36946 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
36947 GEN_INT (1 + 4), GEN_INT (1 + 4));
36950 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
36956 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
36957 gen_lowpart (V1TImode
, src
),
36962 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
36964 tem
= gen_avx_shufps256 (dest
, src
, src
,
36965 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
36969 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
36971 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
36978 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
36979 gen_lowpart (V4DImode
, src
),
36980 gen_lowpart (V4DImode
, src
),
36983 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
36984 gen_lowpart (V2TImode
, src
),
36988 gcc_unreachable ();
36993 /* Expand a vector reduction. FN is the binary pattern to reduce;
36994 DEST is the destination; IN is the input vector. */
36997 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
36999 rtx half
, dst
, vec
= in
;
37000 enum machine_mode mode
= GET_MODE (in
);
37003 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37005 && mode
== V8HImode
37006 && fn
== gen_uminv8hi3
)
37008 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
37012 for (i
= GET_MODE_BITSIZE (mode
);
37013 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
37016 half
= gen_reg_rtx (mode
);
37017 emit_reduc_half (half
, vec
, i
);
37018 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
37021 dst
= gen_reg_rtx (mode
);
37022 emit_insn (fn (dst
, half
, vec
));
37027 /* Target hook for scalar_mode_supported_p. */
37029 ix86_scalar_mode_supported_p (enum machine_mode mode
)
37031 if (DECIMAL_FLOAT_MODE_P (mode
))
37032 return default_decimal_float_supported_p ();
37033 else if (mode
== TFmode
)
37036 return default_scalar_mode_supported_p (mode
);
37039 /* Implements target hook vector_mode_supported_p. */
37041 ix86_vector_mode_supported_p (enum machine_mode mode
)
37043 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
37045 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
37047 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
37049 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
37051 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
37056 /* Target hook for c_mode_for_suffix. */
37057 static enum machine_mode
37058 ix86_c_mode_for_suffix (char suffix
)
37068 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37070 We do this in the new i386 backend to maintain source compatibility
37071 with the old cc0-based compiler. */
37074 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
37075 tree inputs ATTRIBUTE_UNUSED
,
37078 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
37080 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
37085 /* Implements target vector targetm.asm.encode_section_info. */
37087 static void ATTRIBUTE_UNUSED
37088 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
37090 default_encode_section_info (decl
, rtl
, first
);
37092 if (TREE_CODE (decl
) == VAR_DECL
37093 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
37094 && ix86_in_large_data_p (decl
))
37095 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
37098 /* Worker function for REVERSE_CONDITION. */
37101 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
37103 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
37104 ? reverse_condition (code
)
37105 : reverse_condition_maybe_unordered (code
));
37108 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37112 output_387_reg_move (rtx insn
, rtx
*operands
)
37114 if (REG_P (operands
[0]))
37116 if (REG_P (operands
[1])
37117 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37119 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
37120 return output_387_ffreep (operands
, 0);
37121 return "fstp\t%y0";
37123 if (STACK_TOP_P (operands
[0]))
37124 return "fld%Z1\t%y1";
37127 else if (MEM_P (operands
[0]))
37129 gcc_assert (REG_P (operands
[1]));
37130 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
37131 return "fstp%Z0\t%y0";
37134 /* There is no non-popping store to memory for XFmode.
37135 So if we need one, follow the store with a load. */
37136 if (GET_MODE (operands
[0]) == XFmode
)
37137 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37139 return "fst%Z0\t%y0";
37146 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37147 FP status register is set. */
37150 ix86_emit_fp_unordered_jump (rtx label
)
37152 rtx reg
= gen_reg_rtx (HImode
);
37155 emit_insn (gen_x86_fnstsw_1 (reg
));
37157 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
37159 emit_insn (gen_x86_sahf_1 (reg
));
37161 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
37162 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
37166 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
37168 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37169 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
37172 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
37173 gen_rtx_LABEL_REF (VOIDmode
, label
),
37175 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
37177 emit_jump_insn (temp
);
37178 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
37181 /* Output code to perform a log1p XFmode calculation. */
37183 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
37185 rtx label1
= gen_label_rtx ();
37186 rtx label2
= gen_label_rtx ();
37188 rtx tmp
= gen_reg_rtx (XFmode
);
37189 rtx tmp2
= gen_reg_rtx (XFmode
);
37192 emit_insn (gen_absxf2 (tmp
, op1
));
37193 test
= gen_rtx_GE (VOIDmode
, tmp
,
37194 CONST_DOUBLE_FROM_REAL_VALUE (
37195 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
37197 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
37199 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37200 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
37201 emit_jump (label2
);
37203 emit_label (label1
);
37204 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
37205 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
37206 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
37207 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
37209 emit_label (label2
);
37212 /* Emit code for round calculation. */
37213 void ix86_emit_i387_round (rtx op0
, rtx op1
)
37215 enum machine_mode inmode
= GET_MODE (op1
);
37216 enum machine_mode outmode
= GET_MODE (op0
);
37217 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
37218 rtx scratch
= gen_reg_rtx (HImode
);
37219 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
37220 rtx jump_label
= gen_label_rtx ();
37222 rtx (*gen_abs
) (rtx
, rtx
);
37223 rtx (*gen_neg
) (rtx
, rtx
);
37228 gen_abs
= gen_abssf2
;
37231 gen_abs
= gen_absdf2
;
37234 gen_abs
= gen_absxf2
;
37237 gcc_unreachable ();
37243 gen_neg
= gen_negsf2
;
37246 gen_neg
= gen_negdf2
;
37249 gen_neg
= gen_negxf2
;
37252 gen_neg
= gen_neghi2
;
37255 gen_neg
= gen_negsi2
;
37258 gen_neg
= gen_negdi2
;
37261 gcc_unreachable ();
37264 e1
= gen_reg_rtx (inmode
);
37265 e2
= gen_reg_rtx (inmode
);
37266 res
= gen_reg_rtx (outmode
);
37268 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
37270 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37272 /* scratch = fxam(op1) */
37273 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
37274 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
37276 /* e1 = fabs(op1) */
37277 emit_insn (gen_abs (e1
, op1
));
37279 /* e2 = e1 + 0.5 */
37280 half
= force_reg (inmode
, half
);
37281 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37282 gen_rtx_PLUS (inmode
, e1
, half
)));
37284 /* res = floor(e2) */
37285 if (inmode
!= XFmode
)
37287 tmp1
= gen_reg_rtx (XFmode
);
37289 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
37290 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
37300 rtx tmp0
= gen_reg_rtx (XFmode
);
37302 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
37304 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37305 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
37306 UNSPEC_TRUNC_NOOP
)));
37310 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
37313 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
37316 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
37319 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
37322 gcc_unreachable ();
37325 /* flags = signbit(a) */
37326 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
37328 /* if (flags) then res = -res */
37329 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
37330 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
37331 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
37333 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37334 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
37335 JUMP_LABEL (insn
) = jump_label
;
37337 emit_insn (gen_neg (res
, res
));
37339 emit_label (jump_label
);
37340 LABEL_NUSES (jump_label
) = 1;
37342 emit_move_insn (op0
, res
);
37345 /* Output code to perform a Newton-Rhapson approximation of a single precision
37346 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37348 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
37350 rtx x0
, x1
, e0
, e1
;
37352 x0
= gen_reg_rtx (mode
);
37353 e0
= gen_reg_rtx (mode
);
37354 e1
= gen_reg_rtx (mode
);
37355 x1
= gen_reg_rtx (mode
);
37357 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37359 b
= force_reg (mode
, b
);
37361 /* x0 = rcp(b) estimate */
37362 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37363 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
37366 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37367 gen_rtx_MULT (mode
, x0
, b
)));
37370 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37371 gen_rtx_MULT (mode
, x0
, e0
)));
37374 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37375 gen_rtx_PLUS (mode
, x0
, x0
)));
37378 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
37379 gen_rtx_MINUS (mode
, e1
, e0
)));
37382 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37383 gen_rtx_MULT (mode
, a
, x1
)));
37386 /* Output code to perform a Newton-Rhapson approximation of a
37387 single precision floating point [reciprocal] square root. */
37389 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
37392 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
37395 x0
= gen_reg_rtx (mode
);
37396 e0
= gen_reg_rtx (mode
);
37397 e1
= gen_reg_rtx (mode
);
37398 e2
= gen_reg_rtx (mode
);
37399 e3
= gen_reg_rtx (mode
);
37401 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
37402 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37404 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
37405 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
37407 if (VECTOR_MODE_P (mode
))
37409 mthree
= ix86_build_const_vector (mode
, true, mthree
);
37410 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
37413 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37414 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37416 a
= force_reg (mode
, a
);
37418 /* x0 = rsqrt(a) estimate */
37419 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37420 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
37423 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37428 zero
= gen_reg_rtx (mode
);
37429 mask
= gen_reg_rtx (mode
);
37431 zero
= force_reg (mode
, CONST0_RTX(mode
));
37432 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37433 gen_rtx_NE (mode
, zero
, a
)));
37435 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
37436 gen_rtx_AND (mode
, x0
, mask
)));
37440 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
37441 gen_rtx_MULT (mode
, x0
, a
)));
37443 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
37444 gen_rtx_MULT (mode
, e0
, x0
)));
37447 mthree
= force_reg (mode
, mthree
);
37448 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
37449 gen_rtx_PLUS (mode
, e1
, mthree
)));
37451 mhalf
= force_reg (mode
, mhalf
);
37453 /* e3 = -.5 * x0 */
37454 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37455 gen_rtx_MULT (mode
, x0
, mhalf
)));
37457 /* e3 = -.5 * e0 */
37458 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
37459 gen_rtx_MULT (mode
, e0
, mhalf
)));
37460 /* ret = e2 * e3 */
37461 emit_insn (gen_rtx_SET (VOIDmode
, res
,
37462 gen_rtx_MULT (mode
, e2
, e3
)));
37465 #ifdef TARGET_SOLARIS
37466 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37469 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
37472 /* With Binutils 2.15, the "@unwind" marker must be specified on
37473 every occurrence of the ".eh_frame" section, not just the first
37476 && strcmp (name
, ".eh_frame") == 0)
37478 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
37479 flags
& SECTION_WRITE
? "aw" : "a");
37484 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
37486 solaris_elf_asm_comdat_section (name
, flags
, decl
);
37491 default_elf_asm_named_section (name
, flags
, decl
);
37493 #endif /* TARGET_SOLARIS */
37495 /* Return the mangling of TYPE if it is an extended fundamental type. */
37497 static const char *
37498 ix86_mangle_type (const_tree type
)
37500 type
= TYPE_MAIN_VARIANT (type
);
37502 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
37503 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
37506 switch (TYPE_MODE (type
))
37509 /* __float128 is "g". */
37512 /* "long double" or __float80 is "e". */
37519 /* For 32-bit code we can save PIC register setup by using
37520 __stack_chk_fail_local hidden function instead of calling
37521 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37522 register, so it is better to call __stack_chk_fail directly. */
37524 static tree ATTRIBUTE_UNUSED
37525 ix86_stack_protect_fail (void)
37527 return TARGET_64BIT
37528 ? default_external_stack_protect_fail ()
37529 : default_hidden_stack_protect_fail ();
37532 /* Select a format to encode pointers in exception handling data. CODE
37533 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37534 true if the symbol may be affected by dynamic relocations.
37536 ??? All x86 object file formats are capable of representing this.
37537 After all, the relocation needed is the same as for the call insn.
37538 Whether or not a particular assembler allows us to enter such, I
37539 guess we'll have to see. */
37541 asm_preferred_eh_data_format (int code
, int global
)
37545 int type
= DW_EH_PE_sdata8
;
37547 || ix86_cmodel
== CM_SMALL_PIC
37548 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
37549 type
= DW_EH_PE_sdata4
;
37550 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
37552 if (ix86_cmodel
== CM_SMALL
37553 || (ix86_cmodel
== CM_MEDIUM
&& code
))
37554 return DW_EH_PE_udata4
;
37555 return DW_EH_PE_absptr
;
37558 /* Expand copysign from SIGN to the positive value ABS_VALUE
37559 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37562 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
37564 enum machine_mode mode
= GET_MODE (sign
);
37565 rtx sgn
= gen_reg_rtx (mode
);
37566 if (mask
== NULL_RTX
)
37568 enum machine_mode vmode
;
37570 if (mode
== SFmode
)
37572 else if (mode
== DFmode
)
37577 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
37578 if (!VECTOR_MODE_P (mode
))
37580 /* We need to generate a scalar mode mask in this case. */
37581 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37582 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37583 mask
= gen_reg_rtx (mode
);
37584 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37588 mask
= gen_rtx_NOT (mode
, mask
);
37589 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
37590 gen_rtx_AND (mode
, mask
, sign
)));
37591 emit_insn (gen_rtx_SET (VOIDmode
, result
,
37592 gen_rtx_IOR (mode
, abs_value
, sgn
)));
37595 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37596 mask for masking out the sign-bit is stored in *SMASK, if that is
37599 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
37601 enum machine_mode vmode
, mode
= GET_MODE (op0
);
37604 xa
= gen_reg_rtx (mode
);
37605 if (mode
== SFmode
)
37607 else if (mode
== DFmode
)
37611 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
37612 if (!VECTOR_MODE_P (mode
))
37614 /* We need to generate a scalar mode mask in this case. */
37615 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
37616 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
37617 mask
= gen_reg_rtx (mode
);
37618 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
37620 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
37621 gen_rtx_AND (mode
, op0
, mask
)));
37629 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37630 swapping the operands if SWAP_OPERANDS is true. The expanded
37631 code is a forward jump to a newly created label in case the
37632 comparison is true. The generated label rtx is returned. */
37634 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
37635 bool swap_operands
)
37646 label
= gen_label_rtx ();
37647 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
37648 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37649 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
37650 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
37651 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
37652 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
37653 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
37654 JUMP_LABEL (tmp
) = label
;
37659 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37660 using comparison code CODE. Operands are swapped for the comparison if
37661 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37663 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
37664 bool swap_operands
)
37666 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
37667 enum machine_mode mode
= GET_MODE (op0
);
37668 rtx mask
= gen_reg_rtx (mode
);
37677 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
37679 emit_insn (insn (mask
, op0
, op1
,
37680 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
37684 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37685 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37687 ix86_gen_TWO52 (enum machine_mode mode
)
37689 REAL_VALUE_TYPE TWO52r
;
37692 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
37693 TWO52
= const_double_from_real_value (TWO52r
, mode
);
37694 TWO52
= force_reg (mode
, TWO52
);
37699 /* Expand SSE sequence for computing lround from OP1 storing
37702 ix86_expand_lround (rtx op0
, rtx op1
)
37704 /* C code for the stuff we're doing below:
37705 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37708 enum machine_mode mode
= GET_MODE (op1
);
37709 const struct real_format
*fmt
;
37710 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37713 /* load nextafter (0.5, 0.0) */
37714 fmt
= REAL_MODE_FORMAT (mode
);
37715 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37716 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37718 /* adj = copysign (0.5, op1) */
37719 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37720 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
37722 /* adj = op1 + adj */
37723 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37725 /* op0 = (imode)adj */
37726 expand_fix (op0
, adj
, 0);
37729 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37732 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
37734 /* C code for the stuff we're doing below (for do_floor):
37736 xi -= (double)xi > op1 ? 1 : 0;
37739 enum machine_mode fmode
= GET_MODE (op1
);
37740 enum machine_mode imode
= GET_MODE (op0
);
37741 rtx ireg
, freg
, label
, tmp
;
37743 /* reg = (long)op1 */
37744 ireg
= gen_reg_rtx (imode
);
37745 expand_fix (ireg
, op1
, 0);
37747 /* freg = (double)reg */
37748 freg
= gen_reg_rtx (fmode
);
37749 expand_float (freg
, ireg
, 0);
37751 /* ireg = (freg > op1) ? ireg - 1 : ireg */
37752 label
= ix86_expand_sse_compare_and_jump (UNLE
,
37753 freg
, op1
, !do_floor
);
37754 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
37755 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
37756 emit_move_insn (ireg
, tmp
);
37758 emit_label (label
);
37759 LABEL_NUSES (label
) = 1;
37761 emit_move_insn (op0
, ireg
);
37764 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
37765 result in OPERAND0. */
37767 ix86_expand_rint (rtx operand0
, rtx operand1
)
37769 /* C code for the stuff we're doing below:
37770 xa = fabs (operand1);
37771 if (!isless (xa, 2**52))
37773 xa = xa + 2**52 - 2**52;
37774 return copysign (xa, operand1);
37776 enum machine_mode mode
= GET_MODE (operand0
);
37777 rtx res
, xa
, label
, TWO52
, mask
;
37779 res
= gen_reg_rtx (mode
);
37780 emit_move_insn (res
, operand1
);
37782 /* xa = abs (operand1) */
37783 xa
= ix86_expand_sse_fabs (res
, &mask
);
37785 /* if (!isless (xa, TWO52)) goto label; */
37786 TWO52
= ix86_gen_TWO52 (mode
);
37787 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37789 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37790 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37792 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
37794 emit_label (label
);
37795 LABEL_NUSES (label
) = 1;
37797 emit_move_insn (operand0
, res
);
37800 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37803 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
37805 /* C code for the stuff we expand below.
37806 double xa = fabs (x), x2;
37807 if (!isless (xa, TWO52))
37809 xa = xa + TWO52 - TWO52;
37810 x2 = copysign (xa, x);
37819 enum machine_mode mode
= GET_MODE (operand0
);
37820 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
37822 TWO52
= ix86_gen_TWO52 (mode
);
37824 /* Temporary for holding the result, initialized to the input
37825 operand to ease control flow. */
37826 res
= gen_reg_rtx (mode
);
37827 emit_move_insn (res
, operand1
);
37829 /* xa = abs (operand1) */
37830 xa
= ix86_expand_sse_fabs (res
, &mask
);
37832 /* if (!isless (xa, TWO52)) goto label; */
37833 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37835 /* xa = xa + TWO52 - TWO52; */
37836 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37837 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
37839 /* xa = copysign (xa, operand1) */
37840 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
37842 /* generate 1.0 or -1.0 */
37843 one
= force_reg (mode
,
37844 const_double_from_real_value (do_floor
37845 ? dconst1
: dconstm1
, mode
));
37847 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37848 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37849 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37850 gen_rtx_AND (mode
, one
, tmp
)));
37851 /* We always need to subtract here to preserve signed zero. */
37852 tmp
= expand_simple_binop (mode
, MINUS
,
37853 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37854 emit_move_insn (res
, tmp
);
37856 emit_label (label
);
37857 LABEL_NUSES (label
) = 1;
37859 emit_move_insn (operand0
, res
);
37862 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
37865 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
37867 /* C code for the stuff we expand below.
37868 double xa = fabs (x), x2;
37869 if (!isless (xa, TWO52))
37871 x2 = (double)(long)x;
37878 if (HONOR_SIGNED_ZEROS (mode))
37879 return copysign (x2, x);
37882 enum machine_mode mode
= GET_MODE (operand0
);
37883 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
37885 TWO52
= ix86_gen_TWO52 (mode
);
37887 /* Temporary for holding the result, initialized to the input
37888 operand to ease control flow. */
37889 res
= gen_reg_rtx (mode
);
37890 emit_move_insn (res
, operand1
);
37892 /* xa = abs (operand1) */
37893 xa
= ix86_expand_sse_fabs (res
, &mask
);
37895 /* if (!isless (xa, TWO52)) goto label; */
37896 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37898 /* xa = (double)(long)x */
37899 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37900 expand_fix (xi
, res
, 0);
37901 expand_float (xa
, xi
, 0);
37904 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37906 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
37907 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
37908 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37909 gen_rtx_AND (mode
, one
, tmp
)));
37910 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
37911 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37912 emit_move_insn (res
, tmp
);
37914 if (HONOR_SIGNED_ZEROS (mode
))
37915 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
37917 emit_label (label
);
37918 LABEL_NUSES (label
) = 1;
37920 emit_move_insn (operand0
, res
);
37923 /* Expand SSE sequence for computing round from OPERAND1 storing
37924 into OPERAND0. Sequence that works without relying on DImode truncation
37925 via cvttsd2siq that is only available on 64bit targets. */
37927 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
37929 /* C code for the stuff we expand below.
37930 double xa = fabs (x), xa2, x2;
37931 if (!isless (xa, TWO52))
37933 Using the absolute value and copying back sign makes
37934 -0.0 -> -0.0 correct.
37935 xa2 = xa + TWO52 - TWO52;
37940 else if (dxa > 0.5)
37942 x2 = copysign (xa2, x);
37945 enum machine_mode mode
= GET_MODE (operand0
);
37946 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
37948 TWO52
= ix86_gen_TWO52 (mode
);
37950 /* Temporary for holding the result, initialized to the input
37951 operand to ease control flow. */
37952 res
= gen_reg_rtx (mode
);
37953 emit_move_insn (res
, operand1
);
37955 /* xa = abs (operand1) */
37956 xa
= ix86_expand_sse_fabs (res
, &mask
);
37958 /* if (!isless (xa, TWO52)) goto label; */
37959 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37961 /* xa2 = xa + TWO52 - TWO52; */
37962 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37963 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
37965 /* dxa = xa2 - xa; */
37966 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
37968 /* generate 0.5, 1.0 and -0.5 */
37969 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
37970 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37971 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
37975 tmp
= gen_reg_rtx (mode
);
37976 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
37977 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
37978 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37979 gen_rtx_AND (mode
, one
, tmp
)));
37980 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37981 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
37982 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
37983 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
37984 gen_rtx_AND (mode
, one
, tmp
)));
37985 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
37987 /* res = copysign (xa2, operand1) */
37988 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
37990 emit_label (label
);
37991 LABEL_NUSES (label
) = 1;
37993 emit_move_insn (operand0
, res
);
37996 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37999 ix86_expand_trunc (rtx operand0
, rtx operand1
)
38001 /* C code for SSE variant we expand below.
38002 double xa = fabs (x), x2;
38003 if (!isless (xa, TWO52))
38005 x2 = (double)(long)x;
38006 if (HONOR_SIGNED_ZEROS (mode))
38007 return copysign (x2, x);
38010 enum machine_mode mode
= GET_MODE (operand0
);
38011 rtx xa
, xi
, TWO52
, label
, res
, mask
;
38013 TWO52
= ix86_gen_TWO52 (mode
);
38015 /* Temporary for holding the result, initialized to the input
38016 operand to ease control flow. */
38017 res
= gen_reg_rtx (mode
);
38018 emit_move_insn (res
, operand1
);
38020 /* xa = abs (operand1) */
38021 xa
= ix86_expand_sse_fabs (res
, &mask
);
38023 /* if (!isless (xa, TWO52)) goto label; */
38024 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38026 /* x = (double)(long)x */
38027 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38028 expand_fix (xi
, res
, 0);
38029 expand_float (res
, xi
, 0);
38031 if (HONOR_SIGNED_ZEROS (mode
))
38032 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
38034 emit_label (label
);
38035 LABEL_NUSES (label
) = 1;
38037 emit_move_insn (operand0
, res
);
38040 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38043 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
38045 enum machine_mode mode
= GET_MODE (operand0
);
38046 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
38048 /* C code for SSE variant we expand below.
38049 double xa = fabs (x), x2;
38050 if (!isless (xa, TWO52))
38052 xa2 = xa + TWO52 - TWO52;
38056 x2 = copysign (xa2, x);
38060 TWO52
= ix86_gen_TWO52 (mode
);
38062 /* Temporary for holding the result, initialized to the input
38063 operand to ease control flow. */
38064 res
= gen_reg_rtx (mode
);
38065 emit_move_insn (res
, operand1
);
38067 /* xa = abs (operand1) */
38068 xa
= ix86_expand_sse_fabs (res
, &smask
);
38070 /* if (!isless (xa, TWO52)) goto label; */
38071 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38073 /* res = xa + TWO52 - TWO52; */
38074 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
38075 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
38076 emit_move_insn (res
, tmp
);
38079 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
38081 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38082 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
38083 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
38084 gen_rtx_AND (mode
, mask
, one
)));
38085 tmp
= expand_simple_binop (mode
, MINUS
,
38086 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
38087 emit_move_insn (res
, tmp
);
38089 /* res = copysign (res, operand1) */
38090 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
38092 emit_label (label
);
38093 LABEL_NUSES (label
) = 1;
38095 emit_move_insn (operand0
, res
);
38098 /* Expand SSE sequence for computing round from OPERAND1 storing
38101 ix86_expand_round (rtx operand0
, rtx operand1
)
38103 /* C code for the stuff we're doing below:
38104 double xa = fabs (x);
38105 if (!isless (xa, TWO52))
38107 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38108 return copysign (xa, x);
38110 enum machine_mode mode
= GET_MODE (operand0
);
38111 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
38112 const struct real_format
*fmt
;
38113 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38115 /* Temporary for holding the result, initialized to the input
38116 operand to ease control flow. */
38117 res
= gen_reg_rtx (mode
);
38118 emit_move_insn (res
, operand1
);
38120 TWO52
= ix86_gen_TWO52 (mode
);
38121 xa
= ix86_expand_sse_fabs (res
, &mask
);
38122 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
38124 /* load nextafter (0.5, 0.0) */
38125 fmt
= REAL_MODE_FORMAT (mode
);
38126 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38127 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38129 /* xa = xa + 0.5 */
38130 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
38131 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
38133 /* xa = (double)(int64_t)xa */
38134 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
38135 expand_fix (xi
, xa
, 0);
38136 expand_float (xa
, xi
, 0);
38138 /* res = copysign (xa, operand1) */
38139 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
38141 emit_label (label
);
38142 LABEL_NUSES (label
) = 1;
38144 emit_move_insn (operand0
, res
);
38147 /* Expand SSE sequence for computing round
38148 from OP1 storing into OP0 using sse4 round insn. */
38150 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
38152 enum machine_mode mode
= GET_MODE (op0
);
38153 rtx e1
, e2
, res
, half
;
38154 const struct real_format
*fmt
;
38155 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
38156 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
38157 rtx (*gen_round
) (rtx
, rtx
, rtx
);
38162 gen_copysign
= gen_copysignsf3
;
38163 gen_round
= gen_sse4_1_roundsf2
;
38166 gen_copysign
= gen_copysigndf3
;
38167 gen_round
= gen_sse4_1_rounddf2
;
38170 gcc_unreachable ();
38173 /* round (a) = trunc (a + copysign (0.5, a)) */
38175 /* load nextafter (0.5, 0.0) */
38176 fmt
= REAL_MODE_FORMAT (mode
);
38177 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
38178 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
38179 half
= const_double_from_real_value (pred_half
, mode
);
38181 /* e1 = copysign (0.5, op1) */
38182 e1
= gen_reg_rtx (mode
);
38183 emit_insn (gen_copysign (e1
, half
, op1
));
38185 /* e2 = op1 + e1 */
38186 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
38188 /* res = trunc (e2) */
38189 res
= gen_reg_rtx (mode
);
38190 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
38192 emit_move_insn (op0
, res
);
38196 /* Table of valid machine attributes. */
38197 static const struct attribute_spec ix86_attribute_table
[] =
38199 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38200 affects_type_identity } */
38201 /* Stdcall attribute says callee is responsible for popping arguments
38202 if they are not variable. */
38203 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38205 /* Fastcall attribute says callee is responsible for popping arguments
38206 if they are not variable. */
38207 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38209 /* Thiscall attribute says callee is responsible for popping arguments
38210 if they are not variable. */
38211 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38213 /* Cdecl attribute says the callee is a normal C declaration */
38214 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38216 /* Regparm attribute specifies how many integer arguments are to be
38217 passed in registers. */
38218 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
38220 /* Sseregparm attribute says we are using x86_64 calling conventions
38221 for FP arguments. */
38222 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
38224 /* The transactional memory builtins are implicitly regparm or fastcall
38225 depending on the ABI. Override the generic do-nothing attribute that
38226 these builtins were declared with. */
38227 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
38229 /* force_align_arg_pointer says this function realigns the stack at entry. */
38230 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
38231 false, true, true, ix86_handle_cconv_attribute
, false },
38232 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38233 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
38234 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
38235 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
38238 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38240 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
38242 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38243 SUBTARGET_ATTRIBUTE_TABLE
,
38245 /* ms_abi and sysv_abi calling convention function attributes. */
38246 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38247 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
38248 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
38250 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38251 ix86_handle_callee_pop_aggregate_return
, true },
38253 { NULL
, 0, 0, false, false, false, NULL
, false }
38256 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38258 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
38260 int misalign ATTRIBUTE_UNUSED
)
38264 switch (type_of_cost
)
38267 return ix86_cost
->scalar_stmt_cost
;
38270 return ix86_cost
->scalar_load_cost
;
38273 return ix86_cost
->scalar_store_cost
;
38276 return ix86_cost
->vec_stmt_cost
;
38279 return ix86_cost
->vec_align_load_cost
;
38282 return ix86_cost
->vec_store_cost
;
38284 case vec_to_scalar
:
38285 return ix86_cost
->vec_to_scalar_cost
;
38287 case scalar_to_vec
:
38288 return ix86_cost
->scalar_to_vec_cost
;
38290 case unaligned_load
:
38291 case unaligned_store
:
38292 return ix86_cost
->vec_unalign_load_cost
;
38294 case cond_branch_taken
:
38295 return ix86_cost
->cond_taken_branch_cost
;
38297 case cond_branch_not_taken
:
38298 return ix86_cost
->cond_not_taken_branch_cost
;
38301 case vec_promote_demote
:
38302 return ix86_cost
->vec_stmt_cost
;
38304 case vec_construct
:
38305 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
38306 return elements
/ 2 + 1;
38309 gcc_unreachable ();
38313 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38314 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38315 insn every time. */
38317 static GTY(()) rtx vselect_insn
;
38319 /* Initialize vselect_insn. */
38322 init_vselect_insn (void)
38327 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
38328 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
38329 XVECEXP (x
, 0, i
) = const0_rtx
;
38330 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
38332 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
38334 vselect_insn
= emit_insn (x
);
38338 /* Construct (set target (vec_select op0 (parallel perm))) and
38339 return true if that's a valid instruction in the active ISA. */
38342 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
38343 unsigned nelt
, bool testing_p
)
38346 rtx x
, save_vconcat
;
38349 if (vselect_insn
== NULL_RTX
)
38350 init_vselect_insn ();
38352 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
38353 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
38354 for (i
= 0; i
< nelt
; ++i
)
38355 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
38356 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38357 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
38358 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
38359 SET_DEST (PATTERN (vselect_insn
)) = target
;
38360 icode
= recog_memoized (vselect_insn
);
38362 if (icode
>= 0 && !testing_p
)
38363 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
38365 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
38366 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
38367 INSN_CODE (vselect_insn
) = -1;
38372 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38375 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
38376 const unsigned char *perm
, unsigned nelt
,
38379 enum machine_mode v2mode
;
38383 if (vselect_insn
== NULL_RTX
)
38384 init_vselect_insn ();
38386 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
38387 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
38388 PUT_MODE (x
, v2mode
);
38391 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
38392 XEXP (x
, 0) = const0_rtx
;
38393 XEXP (x
, 1) = const0_rtx
;
38397 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38398 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38401 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
38403 enum machine_mode vmode
= d
->vmode
;
38404 unsigned i
, mask
, nelt
= d
->nelt
;
38405 rtx target
, op0
, op1
, x
;
38406 rtx rperm
[32], vperm
;
38408 if (d
->one_operand_p
)
38410 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
38412 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
38414 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
38419 /* This is a blend, not a permute. Elements must stay in their
38420 respective lanes. */
38421 for (i
= 0; i
< nelt
; ++i
)
38423 unsigned e
= d
->perm
[i
];
38424 if (!(e
== i
|| e
== i
+ nelt
))
38431 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38432 decision should be extracted elsewhere, so that we only try that
38433 sequence once all budget==3 options have been tried. */
38434 target
= d
->target
;
38447 for (i
= 0; i
< nelt
; ++i
)
38448 mask
|= (d
->perm
[i
] >= nelt
) << i
;
38452 for (i
= 0; i
< 2; ++i
)
38453 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
38458 for (i
= 0; i
< 4; ++i
)
38459 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38464 /* See if bytes move in pairs so we can use pblendw with
38465 an immediate argument, rather than pblendvb with a vector
38467 for (i
= 0; i
< 16; i
+= 2)
38468 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38471 for (i
= 0; i
< nelt
; ++i
)
38472 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
38475 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
38476 vperm
= force_reg (vmode
, vperm
);
38478 if (GET_MODE_SIZE (vmode
) == 16)
38479 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
38481 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
38485 for (i
= 0; i
< 8; ++i
)
38486 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38491 target
= gen_lowpart (vmode
, target
);
38492 op0
= gen_lowpart (vmode
, op0
);
38493 op1
= gen_lowpart (vmode
, op1
);
38497 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38498 for (i
= 0; i
< 32; i
+= 2)
38499 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38501 /* See if bytes move in quadruplets. If yes, vpblendd
38502 with immediate can be used. */
38503 for (i
= 0; i
< 32; i
+= 4)
38504 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
38508 /* See if bytes move the same in both lanes. If yes,
38509 vpblendw with immediate can be used. */
38510 for (i
= 0; i
< 16; i
+= 2)
38511 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
38514 /* Use vpblendw. */
38515 for (i
= 0; i
< 16; ++i
)
38516 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
38521 /* Use vpblendd. */
38522 for (i
= 0; i
< 8; ++i
)
38523 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
38528 /* See if words move in pairs. If yes, vpblendd can be used. */
38529 for (i
= 0; i
< 16; i
+= 2)
38530 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
38534 /* See if words move the same in both lanes. If not,
38535 vpblendvb must be used. */
38536 for (i
= 0; i
< 8; i
++)
38537 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
38539 /* Use vpblendvb. */
38540 for (i
= 0; i
< 32; ++i
)
38541 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
38545 target
= gen_lowpart (vmode
, target
);
38546 op0
= gen_lowpart (vmode
, op0
);
38547 op1
= gen_lowpart (vmode
, op1
);
38548 goto finish_pblendvb
;
38551 /* Use vpblendw. */
38552 for (i
= 0; i
< 16; ++i
)
38553 mask
|= (d
->perm
[i
] >= 16) << i
;
38557 /* Use vpblendd. */
38558 for (i
= 0; i
< 8; ++i
)
38559 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
38564 /* Use vpblendd. */
38565 for (i
= 0; i
< 4; ++i
)
38566 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
38571 gcc_unreachable ();
38574 /* This matches five different patterns with the different modes. */
38575 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
38576 x
= gen_rtx_SET (VOIDmode
, target
, x
);
38582 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38583 in terms of the variable form of vpermilps.
38585 Note that we will have already failed the immediate input vpermilps,
38586 which requires that the high and low part shuffle be identical; the
38587 variable form doesn't require that. */
38590 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
38592 rtx rperm
[8], vperm
;
38595 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
38598 /* We can only permute within the 128-bit lane. */
38599 for (i
= 0; i
< 8; ++i
)
38601 unsigned e
= d
->perm
[i
];
38602 if (i
< 4 ? e
>= 4 : e
< 4)
38609 for (i
= 0; i
< 8; ++i
)
38611 unsigned e
= d
->perm
[i
];
38613 /* Within each 128-bit lane, the elements of op0 are numbered
38614 from 0 and the elements of op1 are numbered from 4. */
38620 rperm
[i
] = GEN_INT (e
);
38623 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
38624 vperm
= force_reg (V8SImode
, vperm
);
38625 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
38630 /* Return true if permutation D can be performed as VMODE permutation
38634 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
38636 unsigned int i
, j
, chunk
;
38638 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
38639 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
38640 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
38643 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
38646 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
38647 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
38648 if (d
->perm
[i
] & (chunk
- 1))
38651 for (j
= 1; j
< chunk
; ++j
)
38652 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
38658 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38659 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38662 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
38664 unsigned i
, nelt
, eltsz
, mask
;
38665 unsigned char perm
[32];
38666 enum machine_mode vmode
= V16QImode
;
38667 rtx rperm
[32], vperm
, target
, op0
, op1
;
38671 if (!d
->one_operand_p
)
38673 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
38676 && valid_perm_using_mode_p (V2TImode
, d
))
38681 /* Use vperm2i128 insn. The pattern uses
38682 V4DImode instead of V2TImode. */
38683 target
= gen_lowpart (V4DImode
, d
->target
);
38684 op0
= gen_lowpart (V4DImode
, d
->op0
);
38685 op1
= gen_lowpart (V4DImode
, d
->op1
);
38687 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
38688 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
38689 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
38697 if (GET_MODE_SIZE (d
->vmode
) == 16)
38702 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38707 /* V4DImode should be already handled through
38708 expand_vselect by vpermq instruction. */
38709 gcc_assert (d
->vmode
!= V4DImode
);
38712 if (d
->vmode
== V8SImode
38713 || d
->vmode
== V16HImode
38714 || d
->vmode
== V32QImode
)
38716 /* First see if vpermq can be used for
38717 V8SImode/V16HImode/V32QImode. */
38718 if (valid_perm_using_mode_p (V4DImode
, d
))
38720 for (i
= 0; i
< 4; i
++)
38721 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
38724 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
38725 gen_lowpart (V4DImode
, d
->op0
),
38729 /* Next see if vpermd can be used. */
38730 if (valid_perm_using_mode_p (V8SImode
, d
))
38733 /* Or if vpermps can be used. */
38734 else if (d
->vmode
== V8SFmode
)
38737 if (vmode
== V32QImode
)
38739 /* vpshufb only works intra lanes, it is not
38740 possible to shuffle bytes in between the lanes. */
38741 for (i
= 0; i
< nelt
; ++i
)
38742 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
38753 if (vmode
== V8SImode
)
38754 for (i
= 0; i
< 8; ++i
)
38755 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
38758 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38759 if (!d
->one_operand_p
)
38760 mask
= 2 * nelt
- 1;
38761 else if (vmode
== V16QImode
)
38764 mask
= nelt
/ 2 - 1;
38766 for (i
= 0; i
< nelt
; ++i
)
38768 unsigned j
, e
= d
->perm
[i
] & mask
;
38769 for (j
= 0; j
< eltsz
; ++j
)
38770 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
38774 vperm
= gen_rtx_CONST_VECTOR (vmode
,
38775 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
38776 vperm
= force_reg (vmode
, vperm
);
38778 target
= gen_lowpart (vmode
, d
->target
);
38779 op0
= gen_lowpart (vmode
, d
->op0
);
38780 if (d
->one_operand_p
)
38782 if (vmode
== V16QImode
)
38783 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
38784 else if (vmode
== V32QImode
)
38785 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
38786 else if (vmode
== V8SFmode
)
38787 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
38789 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
38793 op1
= gen_lowpart (vmode
, d
->op1
);
38794 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
38800 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
38801 in a single instruction. */
38804 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
38806 unsigned i
, nelt
= d
->nelt
;
38807 unsigned char perm2
[MAX_VECT_LEN
];
38809 /* Check plain VEC_SELECT first, because AVX has instructions that could
38810 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
38811 input where SEL+CONCAT may not. */
38812 if (d
->one_operand_p
)
38814 int mask
= nelt
- 1;
38815 bool identity_perm
= true;
38816 bool broadcast_perm
= true;
38818 for (i
= 0; i
< nelt
; i
++)
38820 perm2
[i
] = d
->perm
[i
] & mask
;
38822 identity_perm
= false;
38824 broadcast_perm
= false;
38830 emit_move_insn (d
->target
, d
->op0
);
38833 else if (broadcast_perm
&& TARGET_AVX2
)
38835 /* Use vpbroadcast{b,w,d}. */
38836 rtx (*gen
) (rtx
, rtx
) = NULL
;
38840 gen
= gen_avx2_pbroadcastv32qi_1
;
38843 gen
= gen_avx2_pbroadcastv16hi_1
;
38846 gen
= gen_avx2_pbroadcastv8si_1
;
38849 gen
= gen_avx2_pbroadcastv16qi
;
38852 gen
= gen_avx2_pbroadcastv8hi
;
38855 gen
= gen_avx2_vec_dupv8sf_1
;
38857 /* For other modes prefer other shuffles this function creates. */
38863 emit_insn (gen (d
->target
, d
->op0
));
38868 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
38871 /* There are plenty of patterns in sse.md that are written for
38872 SEL+CONCAT and are not replicated for a single op. Perhaps
38873 that should be changed, to avoid the nastiness here. */
38875 /* Recognize interleave style patterns, which means incrementing
38876 every other permutation operand. */
38877 for (i
= 0; i
< nelt
; i
+= 2)
38879 perm2
[i
] = d
->perm
[i
] & mask
;
38880 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
38882 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38886 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
38889 for (i
= 0; i
< nelt
; i
+= 4)
38891 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
38892 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
38893 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
38894 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
38897 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
38903 /* Finally, try the fully general two operand permute. */
38904 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
38908 /* Recognize interleave style patterns with reversed operands. */
38909 if (!d
->one_operand_p
)
38911 for (i
= 0; i
< nelt
; ++i
)
38913 unsigned e
= d
->perm
[i
];
38921 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
38926 /* Try the SSE4.1 blend variable merge instructions. */
38927 if (expand_vec_perm_blend (d
))
38930 /* Try one of the AVX vpermil variable permutations. */
38931 if (expand_vec_perm_vpermil (d
))
38934 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
38935 vpshufb, vpermd, vpermps or vpermq variable permutation. */
38936 if (expand_vec_perm_pshufb (d
))
38942 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38943 in terms of a pair of pshuflw + pshufhw instructions. */
38946 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
38948 unsigned char perm2
[MAX_VECT_LEN
];
38952 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
38955 /* The two permutations only operate in 64-bit lanes. */
38956 for (i
= 0; i
< 4; ++i
)
38957 if (d
->perm
[i
] >= 4)
38959 for (i
= 4; i
< 8; ++i
)
38960 if (d
->perm
[i
] < 4)
38966 /* Emit the pshuflw. */
38967 memcpy (perm2
, d
->perm
, 4);
38968 for (i
= 4; i
< 8; ++i
)
38970 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
38973 /* Emit the pshufhw. */
38974 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
38975 for (i
= 0; i
< 4; ++i
)
38977 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
38983 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38984 the permutation using the SSSE3 palignr instruction. This succeeds
38985 when all of the elements in PERM fit within one vector and we merely
38986 need to shift them down so that a single vector permutation has a
38987 chance to succeed. */
38990 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
38992 unsigned i
, nelt
= d
->nelt
;
38997 /* Even with AVX, palignr only operates on 128-bit vectors. */
38998 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39001 min
= nelt
, max
= 0;
39002 for (i
= 0; i
< nelt
; ++i
)
39004 unsigned e
= d
->perm
[i
];
39010 if (min
== 0 || max
- min
>= nelt
)
39013 /* Given that we have SSSE3, we know we'll be able to implement the
39014 single operand permutation after the palignr with pshufb. */
39018 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
39019 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
39020 gen_lowpart (TImode
, d
->op1
),
39021 gen_lowpart (TImode
, d
->op0
), shift
));
39023 d
->op0
= d
->op1
= d
->target
;
39024 d
->one_operand_p
= true;
39027 for (i
= 0; i
< nelt
; ++i
)
39029 unsigned e
= d
->perm
[i
] - min
;
39035 /* Test for the degenerate case where the alignment by itself
39036 produces the desired permutation. */
39040 ok
= expand_vec_perm_1 (d
);
39046 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
39048 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39049 a two vector permutation into a single vector permutation by using
39050 an interleave operation to merge the vectors. */
39053 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
39055 struct expand_vec_perm_d dremap
, dfinal
;
39056 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39057 unsigned HOST_WIDE_INT contents
;
39058 unsigned char remap
[2 * MAX_VECT_LEN
];
39060 bool ok
, same_halves
= false;
39062 if (GET_MODE_SIZE (d
->vmode
) == 16)
39064 if (d
->one_operand_p
)
39067 else if (GET_MODE_SIZE (d
->vmode
) == 32)
39071 /* For 32-byte modes allow even d->one_operand_p.
39072 The lack of cross-lane shuffling in some instructions
39073 might prevent a single insn shuffle. */
39075 dfinal
.testing_p
= true;
39076 /* If expand_vec_perm_interleave3 can expand this into
39077 a 3 insn sequence, give up and let it be expanded as
39078 3 insn sequence. While that is one insn longer,
39079 it doesn't need a memory operand and in the common
39080 case that both interleave low and high permutations
39081 with the same operands are adjacent needs 4 insns
39082 for both after CSE. */
39083 if (expand_vec_perm_interleave3 (&dfinal
))
39089 /* Examine from whence the elements come. */
39091 for (i
= 0; i
< nelt
; ++i
)
39092 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
39094 memset (remap
, 0xff, sizeof (remap
));
39097 if (GET_MODE_SIZE (d
->vmode
) == 16)
39099 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
39101 /* Split the two input vectors into 4 halves. */
39102 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
39107 /* If the elements from the low halves use interleave low, and similarly
39108 for interleave high. If the elements are from mis-matched halves, we
39109 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39110 if ((contents
& (h1
| h3
)) == contents
)
39113 for (i
= 0; i
< nelt2
; ++i
)
39116 remap
[i
+ nelt
] = i
* 2 + 1;
39117 dremap
.perm
[i
* 2] = i
;
39118 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39120 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39121 dremap
.vmode
= V4SFmode
;
39123 else if ((contents
& (h2
| h4
)) == contents
)
39126 for (i
= 0; i
< nelt2
; ++i
)
39128 remap
[i
+ nelt2
] = i
* 2;
39129 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
39130 dremap
.perm
[i
* 2] = i
+ nelt2
;
39131 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
39133 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
39134 dremap
.vmode
= V4SFmode
;
39136 else if ((contents
& (h1
| h4
)) == contents
)
39139 for (i
= 0; i
< nelt2
; ++i
)
39142 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
39143 dremap
.perm
[i
] = i
;
39144 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
39149 dremap
.vmode
= V2DImode
;
39151 dremap
.perm
[0] = 0;
39152 dremap
.perm
[1] = 3;
39155 else if ((contents
& (h2
| h3
)) == contents
)
39158 for (i
= 0; i
< nelt2
; ++i
)
39160 remap
[i
+ nelt2
] = i
;
39161 remap
[i
+ nelt
] = i
+ nelt2
;
39162 dremap
.perm
[i
] = i
+ nelt2
;
39163 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
39168 dremap
.vmode
= V2DImode
;
39170 dremap
.perm
[0] = 1;
39171 dremap
.perm
[1] = 2;
39179 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
39180 unsigned HOST_WIDE_INT q
[8];
39181 unsigned int nonzero_halves
[4];
39183 /* Split the two input vectors into 8 quarters. */
39184 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
39185 for (i
= 1; i
< 8; ++i
)
39186 q
[i
] = q
[0] << (nelt4
* i
);
39187 for (i
= 0; i
< 4; ++i
)
39188 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
39190 nonzero_halves
[nzcnt
] = i
;
39196 gcc_assert (d
->one_operand_p
);
39197 nonzero_halves
[1] = nonzero_halves
[0];
39198 same_halves
= true;
39200 else if (d
->one_operand_p
)
39202 gcc_assert (nonzero_halves
[0] == 0);
39203 gcc_assert (nonzero_halves
[1] == 1);
39208 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
39210 /* Attempt to increase the likelihood that dfinal
39211 shuffle will be intra-lane. */
39212 char tmph
= nonzero_halves
[0];
39213 nonzero_halves
[0] = nonzero_halves
[1];
39214 nonzero_halves
[1] = tmph
;
39217 /* vperm2f128 or vperm2i128. */
39218 for (i
= 0; i
< nelt2
; ++i
)
39220 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
39221 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
39222 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
39223 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
39226 if (d
->vmode
!= V8SFmode
39227 && d
->vmode
!= V4DFmode
39228 && d
->vmode
!= V8SImode
)
39230 dremap
.vmode
= V8SImode
;
39232 for (i
= 0; i
< 4; ++i
)
39234 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
39235 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
39239 else if (d
->one_operand_p
)
39241 else if (TARGET_AVX2
39242 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
39245 for (i
= 0; i
< nelt4
; ++i
)
39248 remap
[i
+ nelt
] = i
* 2 + 1;
39249 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
39250 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
39251 dremap
.perm
[i
* 2] = i
;
39252 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
39253 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
39254 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
39257 else if (TARGET_AVX2
39258 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
39261 for (i
= 0; i
< nelt4
; ++i
)
39263 remap
[i
+ nelt4
] = i
* 2;
39264 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
39265 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
39266 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
39267 dremap
.perm
[i
* 2] = i
+ nelt4
;
39268 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
39269 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
39270 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
39277 /* Use the remapping array set up above to move the elements from their
39278 swizzled locations into their final destinations. */
39280 for (i
= 0; i
< nelt
; ++i
)
39282 unsigned e
= remap
[d
->perm
[i
]];
39283 gcc_assert (e
< nelt
);
39284 /* If same_halves is true, both halves of the remapped vector are the
39285 same. Avoid cross-lane accesses if possible. */
39286 if (same_halves
&& i
>= nelt2
)
39288 gcc_assert (e
< nelt2
);
39289 dfinal
.perm
[i
] = e
+ nelt2
;
39292 dfinal
.perm
[i
] = e
;
39294 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
39295 dfinal
.op1
= dfinal
.op0
;
39296 dfinal
.one_operand_p
= true;
39297 dremap
.target
= dfinal
.op0
;
39299 /* Test if the final remap can be done with a single insn. For V4SFmode or
39300 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39302 ok
= expand_vec_perm_1 (&dfinal
);
39303 seq
= get_insns ();
39312 if (dremap
.vmode
!= dfinal
.vmode
)
39314 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
39315 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
39316 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
39319 ok
= expand_vec_perm_1 (&dremap
);
39326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39327 a single vector cross-lane permutation into vpermq followed
39328 by any of the single insn permutations. */
39331 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
39333 struct expand_vec_perm_d dremap
, dfinal
;
39334 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
39335 unsigned contents
[2];
39339 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
39340 && d
->one_operand_p
))
39345 for (i
= 0; i
< nelt2
; ++i
)
39347 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
39348 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
39351 for (i
= 0; i
< 2; ++i
)
39353 unsigned int cnt
= 0;
39354 for (j
= 0; j
< 4; ++j
)
39355 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
39363 dremap
.vmode
= V4DImode
;
39365 dremap
.target
= gen_reg_rtx (V4DImode
);
39366 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
39367 dremap
.op1
= dremap
.op0
;
39368 dremap
.one_operand_p
= true;
39369 for (i
= 0; i
< 2; ++i
)
39371 unsigned int cnt
= 0;
39372 for (j
= 0; j
< 4; ++j
)
39373 if ((contents
[i
] & (1u << j
)) != 0)
39374 dremap
.perm
[2 * i
+ cnt
++] = j
;
39375 for (; cnt
< 2; ++cnt
)
39376 dremap
.perm
[2 * i
+ cnt
] = 0;
39380 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
39381 dfinal
.op1
= dfinal
.op0
;
39382 dfinal
.one_operand_p
= true;
39383 for (i
= 0, j
= 0; i
< nelt
; ++i
)
39387 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
39388 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
39390 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
39391 dfinal
.perm
[i
] |= nelt4
;
39393 gcc_unreachable ();
39396 ok
= expand_vec_perm_1 (&dremap
);
39399 ok
= expand_vec_perm_1 (&dfinal
);
39405 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39406 a vector permutation using two instructions, vperm2f128 resp.
39407 vperm2i128 followed by any single in-lane permutation. */
39410 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
39412 struct expand_vec_perm_d dfirst
, dsecond
;
39413 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
39417 || GET_MODE_SIZE (d
->vmode
) != 32
39418 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
39422 dsecond
.one_operand_p
= false;
39423 dsecond
.testing_p
= true;
39425 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39426 immediate. For perm < 16 the second permutation uses
39427 d->op0 as first operand, for perm >= 16 it uses d->op1
39428 as first operand. The second operand is the result of
39430 for (perm
= 0; perm
< 32; perm
++)
39432 /* Ignore permutations which do not move anything cross-lane. */
39435 /* The second shuffle for e.g. V4DFmode has
39436 0123 and ABCD operands.
39437 Ignore AB23, as 23 is already in the second lane
39438 of the first operand. */
39439 if ((perm
& 0xc) == (1 << 2)) continue;
39440 /* And 01CD, as 01 is in the first lane of the first
39442 if ((perm
& 3) == 0) continue;
39443 /* And 4567, as then the vperm2[fi]128 doesn't change
39444 anything on the original 4567 second operand. */
39445 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
39449 /* The second shuffle for e.g. V4DFmode has
39450 4567 and ABCD operands.
39451 Ignore AB67, as 67 is already in the second lane
39452 of the first operand. */
39453 if ((perm
& 0xc) == (3 << 2)) continue;
39454 /* And 45CD, as 45 is in the first lane of the first
39456 if ((perm
& 3) == 2) continue;
39457 /* And 0123, as then the vperm2[fi]128 doesn't change
39458 anything on the original 0123 first operand. */
39459 if ((perm
& 0xf) == (1 << 2)) continue;
39462 for (i
= 0; i
< nelt
; i
++)
39464 j
= d
->perm
[i
] / nelt2
;
39465 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
39466 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
39467 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
39468 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
39476 ok
= expand_vec_perm_1 (&dsecond
);
39487 /* Found a usable second shuffle. dfirst will be
39488 vperm2f128 on d->op0 and d->op1. */
39489 dsecond
.testing_p
= false;
39491 dfirst
.target
= gen_reg_rtx (d
->vmode
);
39492 for (i
= 0; i
< nelt
; i
++)
39493 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
39494 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
39496 ok
= expand_vec_perm_1 (&dfirst
);
39499 /* And dsecond is some single insn shuffle, taking
39500 d->op0 and result of vperm2f128 (if perm < 16) or
39501 d->op1 and result of vperm2f128 (otherwise). */
39502 dsecond
.op1
= dfirst
.target
;
39504 dsecond
.op0
= dfirst
.op1
;
39506 ok
= expand_vec_perm_1 (&dsecond
);
39512 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39513 if (d
->one_operand_p
)
39520 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39521 a two vector permutation using 2 intra-lane interleave insns
39522 and cross-lane shuffle for 32-byte vectors. */
39525 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
39528 rtx (*gen
) (rtx
, rtx
, rtx
);
39530 if (d
->one_operand_p
)
39532 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
39534 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
39540 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
39542 for (i
= 0; i
< nelt
; i
+= 2)
39543 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
39544 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
39554 gen
= gen_vec_interleave_highv32qi
;
39556 gen
= gen_vec_interleave_lowv32qi
;
39560 gen
= gen_vec_interleave_highv16hi
;
39562 gen
= gen_vec_interleave_lowv16hi
;
39566 gen
= gen_vec_interleave_highv8si
;
39568 gen
= gen_vec_interleave_lowv8si
;
39572 gen
= gen_vec_interleave_highv4di
;
39574 gen
= gen_vec_interleave_lowv4di
;
39578 gen
= gen_vec_interleave_highv8sf
;
39580 gen
= gen_vec_interleave_lowv8sf
;
39584 gen
= gen_vec_interleave_highv4df
;
39586 gen
= gen_vec_interleave_lowv4df
;
39589 gcc_unreachable ();
39592 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
39596 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39597 a single vector permutation using a single intra-lane vector
39598 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39599 the non-swapped and swapped vectors together. */
39602 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
39604 struct expand_vec_perm_d dfirst
, dsecond
;
39605 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
39608 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
39612 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
39613 || !d
->one_operand_p
)
39617 for (i
= 0; i
< nelt
; i
++)
39618 dfirst
.perm
[i
] = 0xff;
39619 for (i
= 0, msk
= 0; i
< nelt
; i
++)
39621 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
39622 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
39624 dfirst
.perm
[j
] = d
->perm
[i
];
39628 for (i
= 0; i
< nelt
; i
++)
39629 if (dfirst
.perm
[i
] == 0xff)
39630 dfirst
.perm
[i
] = i
;
39633 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39636 ok
= expand_vec_perm_1 (&dfirst
);
39637 seq
= get_insns ();
39649 dsecond
.op0
= dfirst
.target
;
39650 dsecond
.op1
= dfirst
.target
;
39651 dsecond
.one_operand_p
= true;
39652 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39653 for (i
= 0; i
< nelt
; i
++)
39654 dsecond
.perm
[i
] = i
^ nelt2
;
39656 ok
= expand_vec_perm_1 (&dsecond
);
39659 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
39660 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
39664 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39665 permutation using two vperm2f128, followed by a vshufpd insn blending
39666 the two vectors together. */
39669 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
39671 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
39674 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
39684 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
39685 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
39686 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
39687 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
39688 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
39689 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
39690 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
39691 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
39692 dthird
.perm
[0] = (d
->perm
[0] % 2);
39693 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
39694 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
39695 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
39697 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
39698 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
39699 dthird
.op0
= dfirst
.target
;
39700 dthird
.op1
= dsecond
.target
;
39701 dthird
.one_operand_p
= false;
39703 canonicalize_perm (&dfirst
);
39704 canonicalize_perm (&dsecond
);
39706 ok
= expand_vec_perm_1 (&dfirst
)
39707 && expand_vec_perm_1 (&dsecond
)
39708 && expand_vec_perm_1 (&dthird
);
39715 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39716 permutation with two pshufb insns and an ior. We should have already
39717 failed all two instruction sequences. */
39720 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
39722 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
39723 unsigned int i
, nelt
, eltsz
;
39725 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
39727 gcc_assert (!d
->one_operand_p
);
39730 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39732 /* Generate two permutation masks. If the required element is within
39733 the given vector it is shuffled into the proper lane. If the required
39734 element is in the other vector, force a zero into the lane by setting
39735 bit 7 in the permutation mask. */
39736 m128
= GEN_INT (-128);
39737 for (i
= 0; i
< nelt
; ++i
)
39739 unsigned j
, e
= d
->perm
[i
];
39740 unsigned which
= (e
>= nelt
);
39744 for (j
= 0; j
< eltsz
; ++j
)
39746 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
39747 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
39751 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
39752 vperm
= force_reg (V16QImode
, vperm
);
39754 l
= gen_reg_rtx (V16QImode
);
39755 op
= gen_lowpart (V16QImode
, d
->op0
);
39756 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
39758 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
39759 vperm
= force_reg (V16QImode
, vperm
);
39761 h
= gen_reg_rtx (V16QImode
);
39762 op
= gen_lowpart (V16QImode
, d
->op1
);
39763 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
39765 op
= gen_lowpart (V16QImode
, d
->target
);
39766 emit_insn (gen_iorv16qi3 (op
, l
, h
));
39771 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
39772 with two vpshufb insns, vpermq and vpor. We should have already failed
39773 all two or three instruction sequences. */
39776 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
39778 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
39779 unsigned int i
, nelt
, eltsz
;
39782 || !d
->one_operand_p
39783 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39790 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39792 /* Generate two permutation masks. If the required element is within
39793 the same lane, it is shuffled in. If the required element from the
39794 other lane, force a zero by setting bit 7 in the permutation mask.
39795 In the other mask the mask has non-negative elements if element
39796 is requested from the other lane, but also moved to the other lane,
39797 so that the result of vpshufb can have the two V2TImode halves
39799 m128
= GEN_INT (-128);
39800 for (i
= 0; i
< nelt
; ++i
)
39802 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39803 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39805 for (j
= 0; j
< eltsz
; ++j
)
39807 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
39808 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
39812 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39813 vperm
= force_reg (V32QImode
, vperm
);
39815 h
= gen_reg_rtx (V32QImode
);
39816 op
= gen_lowpart (V32QImode
, d
->op0
);
39817 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39819 /* Swap the 128-byte lanes of h into hp. */
39820 hp
= gen_reg_rtx (V4DImode
);
39821 op
= gen_lowpart (V4DImode
, h
);
39822 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
39825 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39826 vperm
= force_reg (V32QImode
, vperm
);
39828 l
= gen_reg_rtx (V32QImode
);
39829 op
= gen_lowpart (V32QImode
, d
->op0
);
39830 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39832 op
= gen_lowpart (V32QImode
, d
->target
);
39833 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
39838 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
39839 and extract-odd permutations of two V32QImode and V16QImode operand
39840 with two vpshufb insns, vpor and vpermq. We should have already
39841 failed all two or three instruction sequences. */
39844 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
39846 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
39847 unsigned int i
, nelt
, eltsz
;
39850 || d
->one_operand_p
39851 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39854 for (i
= 0; i
< d
->nelt
; ++i
)
39855 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
39862 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39864 /* Generate two permutation masks. In the first permutation mask
39865 the first quarter will contain indexes for the first half
39866 of the op0, the second quarter will contain bit 7 set, third quarter
39867 will contain indexes for the second half of the op0 and the
39868 last quarter bit 7 set. In the second permutation mask
39869 the first quarter will contain bit 7 set, the second quarter
39870 indexes for the first half of the op1, the third quarter bit 7 set
39871 and last quarter indexes for the second half of the op1.
39872 I.e. the first mask e.g. for V32QImode extract even will be:
39873 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
39874 (all values masked with 0xf except for -128) and second mask
39875 for extract even will be
39876 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
39877 m128
= GEN_INT (-128);
39878 for (i
= 0; i
< nelt
; ++i
)
39880 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39881 unsigned which
= d
->perm
[i
] >= nelt
;
39882 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
39884 for (j
= 0; j
< eltsz
; ++j
)
39886 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
39887 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
39891 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
39892 vperm
= force_reg (V32QImode
, vperm
);
39894 l
= gen_reg_rtx (V32QImode
);
39895 op
= gen_lowpart (V32QImode
, d
->op0
);
39896 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
39898 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
39899 vperm
= force_reg (V32QImode
, vperm
);
39901 h
= gen_reg_rtx (V32QImode
);
39902 op
= gen_lowpart (V32QImode
, d
->op1
);
39903 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
39905 ior
= gen_reg_rtx (V32QImode
);
39906 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
39908 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
39909 op
= gen_lowpart (V4DImode
, d
->target
);
39910 ior
= gen_lowpart (V4DImode
, ior
);
39911 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
39912 const1_rtx
, GEN_INT (3)));
39917 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
39918 and extract-odd permutations. */
39921 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
39928 t1
= gen_reg_rtx (V4DFmode
);
39929 t2
= gen_reg_rtx (V4DFmode
);
39931 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39932 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39933 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39935 /* Now an unpck[lh]pd will produce the result required. */
39937 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
39939 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
39945 int mask
= odd
? 0xdd : 0x88;
39947 t1
= gen_reg_rtx (V8SFmode
);
39948 t2
= gen_reg_rtx (V8SFmode
);
39949 t3
= gen_reg_rtx (V8SFmode
);
39951 /* Shuffle within the 128-bit lanes to produce:
39952 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
39953 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
39956 /* Shuffle the lanes around to produce:
39957 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
39958 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
39961 /* Shuffle within the 128-bit lanes to produce:
39962 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
39963 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
39965 /* Shuffle within the 128-bit lanes to produce:
39966 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
39967 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
39969 /* Shuffle the lanes around to produce:
39970 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
39971 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
39980 /* These are always directly implementable by expand_vec_perm_1. */
39981 gcc_unreachable ();
39985 return expand_vec_perm_pshufb2 (d
);
39988 /* We need 2*log2(N)-1 operations to achieve odd/even
39989 with interleave. */
39990 t1
= gen_reg_rtx (V8HImode
);
39991 t2
= gen_reg_rtx (V8HImode
);
39992 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
39993 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
39994 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
39995 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
39997 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
39999 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
40006 return expand_vec_perm_pshufb2 (d
);
40009 t1
= gen_reg_rtx (V16QImode
);
40010 t2
= gen_reg_rtx (V16QImode
);
40011 t3
= gen_reg_rtx (V16QImode
);
40012 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
40013 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
40014 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
40015 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
40016 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
40017 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
40019 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
40021 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
40028 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
40033 struct expand_vec_perm_d d_copy
= *d
;
40034 d_copy
.vmode
= V4DFmode
;
40035 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
40036 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
40037 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
40038 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40041 t1
= gen_reg_rtx (V4DImode
);
40042 t2
= gen_reg_rtx (V4DImode
);
40044 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40045 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
40046 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
40048 /* Now an vpunpck[lh]qdq will produce the result required. */
40050 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
40052 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
40059 struct expand_vec_perm_d d_copy
= *d
;
40060 d_copy
.vmode
= V8SFmode
;
40061 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
40062 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
40063 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
40064 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
40067 t1
= gen_reg_rtx (V8SImode
);
40068 t2
= gen_reg_rtx (V8SImode
);
40070 /* Shuffle the lanes around into
40071 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40072 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
40073 gen_lowpart (V4DImode
, d
->op0
),
40074 gen_lowpart (V4DImode
, d
->op1
),
40076 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
40077 gen_lowpart (V4DImode
, d
->op0
),
40078 gen_lowpart (V4DImode
, d
->op1
),
40081 /* Swap the 2nd and 3rd position in each lane into
40082 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40083 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
40084 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40085 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
40086 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40088 /* Now an vpunpck[lh]qdq will produce
40089 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40091 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
40092 gen_lowpart (V4DImode
, t1
),
40093 gen_lowpart (V4DImode
, t2
));
40095 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
40096 gen_lowpart (V4DImode
, t1
),
40097 gen_lowpart (V4DImode
, t2
));
40102 gcc_unreachable ();
40108 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40109 extract-even and extract-odd permutations. */
40112 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
40114 unsigned i
, odd
, nelt
= d
->nelt
;
40117 if (odd
!= 0 && odd
!= 1)
40120 for (i
= 1; i
< nelt
; ++i
)
40121 if (d
->perm
[i
] != 2 * i
+ odd
)
40124 return expand_vec_perm_even_odd_1 (d
, odd
);
40127 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40128 permutations. We assume that expand_vec_perm_1 has already failed. */
40131 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
40133 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
40134 enum machine_mode vmode
= d
->vmode
;
40135 unsigned char perm2
[4];
40143 /* These are special-cased in sse.md so that we can optionally
40144 use the vbroadcast instruction. They expand to two insns
40145 if the input happens to be in a register. */
40146 gcc_unreachable ();
40152 /* These are always implementable using standard shuffle patterns. */
40153 gcc_unreachable ();
40157 /* These can be implemented via interleave. We save one insn by
40158 stopping once we have promoted to V4SImode and then use pshufd. */
40162 rtx (*gen
) (rtx
, rtx
, rtx
)
40163 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
40164 : gen_vec_interleave_lowv8hi
;
40168 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
40169 : gen_vec_interleave_highv8hi
;
40174 dest
= gen_reg_rtx (vmode
);
40175 emit_insn (gen (dest
, op0
, op0
));
40176 vmode
= get_mode_wider_vector (vmode
);
40177 op0
= gen_lowpart (vmode
, dest
);
40179 while (vmode
!= V4SImode
);
40181 memset (perm2
, elt
, 4);
40182 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
40191 /* For AVX2 broadcasts of the first element vpbroadcast* or
40192 vpermq should be used by expand_vec_perm_1. */
40193 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
40197 gcc_unreachable ();
40201 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40202 broadcast permutations. */
40205 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
40207 unsigned i
, elt
, nelt
= d
->nelt
;
40209 if (!d
->one_operand_p
)
40213 for (i
= 1; i
< nelt
; ++i
)
40214 if (d
->perm
[i
] != elt
)
40217 return expand_vec_perm_broadcast_1 (d
);
40220 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40221 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40222 all the shorter instruction sequences. */
40225 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
40227 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
40228 unsigned int i
, nelt
, eltsz
;
40232 || d
->one_operand_p
40233 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
40240 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
40242 /* Generate 4 permutation masks. If the required element is within
40243 the same lane, it is shuffled in. If the required element from the
40244 other lane, force a zero by setting bit 7 in the permutation mask.
40245 In the other mask the mask has non-negative elements if element
40246 is requested from the other lane, but also moved to the other lane,
40247 so that the result of vpshufb can have the two V2TImode halves
40249 m128
= GEN_INT (-128);
40250 for (i
= 0; i
< 32; ++i
)
40252 rperm
[0][i
] = m128
;
40253 rperm
[1][i
] = m128
;
40254 rperm
[2][i
] = m128
;
40255 rperm
[3][i
] = m128
;
40261 for (i
= 0; i
< nelt
; ++i
)
40263 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
40264 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
40265 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
40267 for (j
= 0; j
< eltsz
; ++j
)
40268 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
40269 used
[which
] = true;
40272 for (i
= 0; i
< 2; ++i
)
40274 if (!used
[2 * i
+ 1])
40279 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
40280 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
40281 vperm
= force_reg (V32QImode
, vperm
);
40282 h
[i
] = gen_reg_rtx (V32QImode
);
40283 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40284 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
40287 /* Swap the 128-byte lanes of h[X]. */
40288 for (i
= 0; i
< 2; ++i
)
40290 if (h
[i
] == NULL_RTX
)
40292 op
= gen_reg_rtx (V4DImode
);
40293 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
40294 const2_rtx
, GEN_INT (3), const0_rtx
,
40296 h
[i
] = gen_lowpart (V32QImode
, op
);
40299 for (i
= 0; i
< 2; ++i
)
40306 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
40307 vperm
= force_reg (V32QImode
, vperm
);
40308 l
[i
] = gen_reg_rtx (V32QImode
);
40309 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
40310 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
40313 for (i
= 0; i
< 2; ++i
)
40317 op
= gen_reg_rtx (V32QImode
);
40318 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
40325 gcc_assert (l
[0] && l
[1]);
40326 op
= gen_lowpart (V32QImode
, d
->target
);
40327 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
40331 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40332 With all of the interface bits taken care of, perform the expansion
40333 in D and return true on success. */
40336 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
40338 /* Try a single instruction expansion. */
40339 if (expand_vec_perm_1 (d
))
40342 /* Try sequences of two instructions. */
40344 if (expand_vec_perm_pshuflw_pshufhw (d
))
40347 if (expand_vec_perm_palignr (d
))
40350 if (expand_vec_perm_interleave2 (d
))
40353 if (expand_vec_perm_broadcast (d
))
40356 if (expand_vec_perm_vpermq_perm_1 (d
))
40359 if (expand_vec_perm_vperm2f128 (d
))
40362 /* Try sequences of three instructions. */
40364 if (expand_vec_perm_2vperm2f128_vshuf (d
))
40367 if (expand_vec_perm_pshufb2 (d
))
40370 if (expand_vec_perm_interleave3 (d
))
40373 if (expand_vec_perm_vperm2f128_vblend (d
))
40376 /* Try sequences of four instructions. */
40378 if (expand_vec_perm_vpshufb2_vpermq (d
))
40381 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
40384 /* ??? Look for narrow permutations whose element orderings would
40385 allow the promotion to a wider mode. */
40387 /* ??? Look for sequences of interleave or a wider permute that place
40388 the data into the correct lanes for a half-vector shuffle like
40389 pshuf[lh]w or vpermilps. */
40391 /* ??? Look for sequences of interleave that produce the desired results.
40392 The combinatorics of punpck[lh] get pretty ugly... */
40394 if (expand_vec_perm_even_odd (d
))
40397 /* Even longer sequences. */
40398 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
40404 /* If a permutation only uses one operand, make it clear. Returns true
40405 if the permutation references both operands. */
40408 canonicalize_perm (struct expand_vec_perm_d
*d
)
40410 int i
, which
, nelt
= d
->nelt
;
40412 for (i
= which
= 0; i
< nelt
; ++i
)
40413 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
40415 d
->one_operand_p
= true;
40422 if (!rtx_equal_p (d
->op0
, d
->op1
))
40424 d
->one_operand_p
= false;
40427 /* The elements of PERM do not suggest that only the first operand
40428 is used, but both operands are identical. Allow easier matching
40429 of the permutation by folding the permutation into the single
40434 for (i
= 0; i
< nelt
; ++i
)
40435 d
->perm
[i
] &= nelt
- 1;
40444 return (which
== 3);
40448 ix86_expand_vec_perm_const (rtx operands
[4])
40450 struct expand_vec_perm_d d
;
40451 unsigned char perm
[MAX_VECT_LEN
];
40456 d
.target
= operands
[0];
40457 d
.op0
= operands
[1];
40458 d
.op1
= operands
[2];
40461 d
.vmode
= GET_MODE (d
.target
);
40462 gcc_assert (VECTOR_MODE_P (d
.vmode
));
40463 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40464 d
.testing_p
= false;
40466 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
40467 gcc_assert (XVECLEN (sel
, 0) == nelt
);
40468 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
40470 for (i
= 0; i
< nelt
; ++i
)
40472 rtx e
= XVECEXP (sel
, 0, i
);
40473 int ei
= INTVAL (e
) & (2 * nelt
- 1);
40478 two_args
= canonicalize_perm (&d
);
40480 if (ix86_expand_vec_perm_const_1 (&d
))
40483 /* If the selector says both arguments are needed, but the operands are the
40484 same, the above tried to expand with one_operand_p and flattened selector.
40485 If that didn't work, retry without one_operand_p; we succeeded with that
40487 if (two_args
&& d
.one_operand_p
)
40489 d
.one_operand_p
= false;
40490 memcpy (d
.perm
, perm
, sizeof (perm
));
40491 return ix86_expand_vec_perm_const_1 (&d
);
40497 /* Implement targetm.vectorize.vec_perm_const_ok. */
40500 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
40501 const unsigned char *sel
)
40503 struct expand_vec_perm_d d
;
40504 unsigned int i
, nelt
, which
;
40508 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40509 d
.testing_p
= true;
40511 /* Given sufficient ISA support we can just return true here
40512 for selected vector modes. */
40513 if (GET_MODE_SIZE (d
.vmode
) == 16)
40515 /* All implementable with a single vpperm insn. */
40518 /* All implementable with 2 pshufb + 1 ior. */
40521 /* All implementable with shufpd or unpck[lh]pd. */
40526 /* Extract the values from the vector CST into the permutation
40528 memcpy (d
.perm
, sel
, nelt
);
40529 for (i
= which
= 0; i
< nelt
; ++i
)
40531 unsigned char e
= d
.perm
[i
];
40532 gcc_assert (e
< 2 * nelt
);
40533 which
|= (e
< nelt
? 1 : 2);
40536 /* For all elements from second vector, fold the elements to first. */
40538 for (i
= 0; i
< nelt
; ++i
)
40541 /* Check whether the mask can be applied to the vector type. */
40542 d
.one_operand_p
= (which
!= 3);
40544 /* Implementable with shufps or pshufd. */
40545 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
40548 /* Otherwise we have to go through the motions and see if we can
40549 figure out how to generate the requested permutation. */
40550 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
40551 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
40552 if (!d
.one_operand_p
)
40553 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
40556 ret
= ix86_expand_vec_perm_const_1 (&d
);
40563 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
40565 struct expand_vec_perm_d d
;
40571 d
.vmode
= GET_MODE (targ
);
40572 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40573 d
.one_operand_p
= false;
40574 d
.testing_p
= false;
40576 for (i
= 0; i
< nelt
; ++i
)
40577 d
.perm
[i
] = i
* 2 + odd
;
40579 /* We'll either be able to implement the permutation directly... */
40580 if (expand_vec_perm_1 (&d
))
40583 /* ... or we use the special-case patterns. */
40584 expand_vec_perm_even_odd_1 (&d
, odd
);
40588 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
40590 struct expand_vec_perm_d d
;
40591 unsigned i
, nelt
, base
;
40597 d
.vmode
= GET_MODE (targ
);
40598 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
40599 d
.one_operand_p
= false;
40600 d
.testing_p
= false;
40602 base
= high_p
? nelt
/ 2 : 0;
40603 for (i
= 0; i
< nelt
/ 2; ++i
)
40605 d
.perm
[i
* 2] = i
+ base
;
40606 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
40609 /* Note that for AVX this isn't one instruction. */
40610 ok
= ix86_expand_vec_perm_const_1 (&d
);
40615 /* Expand a vector operation CODE for a V*QImode in terms of the
40616 same operation on V*HImode. */
40619 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
40621 enum machine_mode qimode
= GET_MODE (dest
);
40622 enum machine_mode himode
;
40623 rtx (*gen_il
) (rtx
, rtx
, rtx
);
40624 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
40625 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
40626 struct expand_vec_perm_d d
;
40627 bool ok
, full_interleave
;
40628 bool uns_p
= false;
40635 gen_il
= gen_vec_interleave_lowv16qi
;
40636 gen_ih
= gen_vec_interleave_highv16qi
;
40639 himode
= V16HImode
;
40640 gen_il
= gen_avx2_interleave_lowv32qi
;
40641 gen_ih
= gen_avx2_interleave_highv32qi
;
40644 gcc_unreachable ();
40647 op2_l
= op2_h
= op2
;
40651 /* Unpack data such that we've got a source byte in each low byte of
40652 each word. We don't care what goes into the high byte of each word.
40653 Rather than trying to get zero in there, most convenient is to let
40654 it be a copy of the low byte. */
40655 op2_l
= gen_reg_rtx (qimode
);
40656 op2_h
= gen_reg_rtx (qimode
);
40657 emit_insn (gen_il (op2_l
, op2
, op2
));
40658 emit_insn (gen_ih (op2_h
, op2
, op2
));
40661 op1_l
= gen_reg_rtx (qimode
);
40662 op1_h
= gen_reg_rtx (qimode
);
40663 emit_insn (gen_il (op1_l
, op1
, op1
));
40664 emit_insn (gen_ih (op1_h
, op1
, op1
));
40665 full_interleave
= qimode
== V16QImode
;
40673 op1_l
= gen_reg_rtx (himode
);
40674 op1_h
= gen_reg_rtx (himode
);
40675 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
40676 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
40677 full_interleave
= true;
40680 gcc_unreachable ();
40683 /* Perform the operation. */
40684 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
40686 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
40688 gcc_assert (res_l
&& res_h
);
40690 /* Merge the data back into the right place. */
40692 d
.op0
= gen_lowpart (qimode
, res_l
);
40693 d
.op1
= gen_lowpart (qimode
, res_h
);
40695 d
.nelt
= GET_MODE_NUNITS (qimode
);
40696 d
.one_operand_p
= false;
40697 d
.testing_p
= false;
40699 if (full_interleave
)
40701 /* For SSE2, we used an full interleave, so the desired
40702 results are in the even elements. */
40703 for (i
= 0; i
< 32; ++i
)
40708 /* For AVX, the interleave used above was not cross-lane. So the
40709 extraction is evens but with the second and third quarter swapped.
40710 Happily, that is even one insn shorter than even extraction. */
40711 for (i
= 0; i
< 32; ++i
)
40712 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
40715 ok
= ix86_expand_vec_perm_const_1 (&d
);
40718 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40719 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
40723 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
40724 bool uns_p
, bool odd_p
)
40726 enum machine_mode mode
= GET_MODE (op1
);
40727 enum machine_mode wmode
= GET_MODE (dest
);
40730 /* We only play even/odd games with vectors of SImode. */
40731 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
40733 /* If we're looking for the odd results, shift those members down to
40734 the even slots. For some cpus this is faster than a PSHUFD. */
40737 if (TARGET_XOP
&& mode
== V4SImode
)
40739 x
= force_reg (wmode
, CONST0_RTX (wmode
));
40740 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
40744 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
40745 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
40746 x
, NULL
, 1, OPTAB_DIRECT
);
40747 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
40748 x
, NULL
, 1, OPTAB_DIRECT
);
40749 op1
= gen_lowpart (mode
, op1
);
40750 op2
= gen_lowpart (mode
, op2
);
40753 if (mode
== V8SImode
)
40756 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
40758 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
40761 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
40762 else if (TARGET_SSE4_1
)
40763 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
40766 rtx s1
, s2
, t0
, t1
, t2
;
40768 /* The easiest way to implement this without PMULDQ is to go through
40769 the motions as if we are performing a full 64-bit multiply. With
40770 the exception that we need to do less shuffling of the elements. */
40772 /* Compute the sign-extension, aka highparts, of the two operands. */
40773 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40774 op1
, pc_rtx
, pc_rtx
);
40775 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
40776 op2
, pc_rtx
, pc_rtx
);
40778 /* Multiply LO(A) * HI(B), and vice-versa. */
40779 t1
= gen_reg_rtx (wmode
);
40780 t2
= gen_reg_rtx (wmode
);
40781 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
40782 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
40784 /* Multiply LO(A) * LO(B). */
40785 t0
= gen_reg_rtx (wmode
);
40786 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
40788 /* Combine and shift the highparts into place. */
40789 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
40790 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
40793 /* Combine high and low parts. */
40794 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
40801 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
40802 bool uns_p
, bool high_p
)
40804 enum machine_mode wmode
= GET_MODE (dest
);
40805 enum machine_mode mode
= GET_MODE (op1
);
40806 rtx t1
, t2
, t3
, t4
, mask
;
40811 t1
= gen_reg_rtx (mode
);
40812 t2
= gen_reg_rtx (mode
);
40813 if (TARGET_XOP
&& !uns_p
)
40815 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
40816 shuffle the elements once so that all elements are in the right
40817 place for immediate use: { A C B D }. */
40818 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
40819 const1_rtx
, GEN_INT (3)));
40820 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
40821 const1_rtx
, GEN_INT (3)));
40825 /* Put the elements into place for the multiply. */
40826 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
40827 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
40830 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
40834 /* Shuffle the elements between the lanes. After this we
40835 have { A B E F | C D G H } for each operand. */
40836 t1
= gen_reg_rtx (V4DImode
);
40837 t2
= gen_reg_rtx (V4DImode
);
40838 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
40839 const0_rtx
, const2_rtx
,
40840 const1_rtx
, GEN_INT (3)));
40841 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
40842 const0_rtx
, const2_rtx
,
40843 const1_rtx
, GEN_INT (3)));
40845 /* Shuffle the elements within the lanes. After this we
40846 have { A A B B | C C D D } or { E E F F | G G H H }. */
40847 t3
= gen_reg_rtx (V8SImode
);
40848 t4
= gen_reg_rtx (V8SImode
);
40849 mask
= GEN_INT (high_p
40850 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
40851 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
40852 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
40853 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
40855 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
40860 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
40861 uns_p
, OPTAB_DIRECT
);
40862 t2
= expand_binop (mode
,
40863 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
40864 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
40865 gcc_assert (t1
&& t2
);
40867 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
40872 t1
= gen_reg_rtx (wmode
);
40873 t2
= gen_reg_rtx (wmode
);
40874 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
40875 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
40877 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
40881 gcc_unreachable ();
40886 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
40890 res_1
= gen_reg_rtx (V4SImode
);
40891 res_2
= gen_reg_rtx (V4SImode
);
40892 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
40893 op1
, op2
, true, false);
40894 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
40895 op1
, op2
, true, true);
40897 /* Move the results in element 2 down to element 1; we don't care
40898 what goes in elements 2 and 3. Then we can merge the parts
40899 back together with an interleave.
40901 Note that two other sequences were tried:
40902 (1) Use interleaves at the start instead of psrldq, which allows
40903 us to use a single shufps to merge things back at the end.
40904 (2) Use shufps here to combine the two vectors, then pshufd to
40905 put the elements in the correct order.
40906 In both cases the cost of the reformatting stall was too high
40907 and the overall sequence slower. */
40909 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
40910 const0_rtx
, const0_rtx
));
40911 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
40912 const0_rtx
, const0_rtx
));
40913 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
40915 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
40919 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
40921 enum machine_mode mode
= GET_MODE (op0
);
40922 rtx t1
, t2
, t3
, t4
, t5
, t6
;
40924 if (TARGET_XOP
&& mode
== V2DImode
)
40926 /* op1: A,B,C,D, op2: E,F,G,H */
40927 op1
= gen_lowpart (V4SImode
, op1
);
40928 op2
= gen_lowpart (V4SImode
, op2
);
40930 t1
= gen_reg_rtx (V4SImode
);
40931 t2
= gen_reg_rtx (V4SImode
);
40932 t3
= gen_reg_rtx (V2DImode
);
40933 t4
= gen_reg_rtx (V2DImode
);
40936 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
40942 /* t2: (B*E),(A*F),(D*G),(C*H) */
40943 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
40945 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
40946 emit_insn (gen_xop_phadddq (t3
, t2
));
40948 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
40949 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
40951 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
40952 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
40956 enum machine_mode nmode
;
40957 rtx (*umul
) (rtx
, rtx
, rtx
);
40959 if (mode
== V2DImode
)
40961 umul
= gen_vec_widen_umult_even_v4si
;
40964 else if (mode
== V4DImode
)
40966 umul
= gen_vec_widen_umult_even_v8si
;
40970 gcc_unreachable ();
40973 /* Multiply low parts. */
40974 t1
= gen_reg_rtx (mode
);
40975 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
40977 /* Shift input vectors right 32 bits so we can multiply high parts. */
40979 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
40980 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
40982 /* Multiply high parts by low parts. */
40983 t4
= gen_reg_rtx (mode
);
40984 t5
= gen_reg_rtx (mode
);
40985 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
40986 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
40988 /* Combine and shift the highparts back. */
40989 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
40990 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
40992 /* Combine high and low parts. */
40993 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
40996 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
40997 gen_rtx_MULT (mode
, op1
, op2
));
41000 /* Expand an insert into a vector register through pinsr insn.
41001 Return true if successful. */
41004 ix86_expand_pinsr (rtx
*operands
)
41006 rtx dst
= operands
[0];
41007 rtx src
= operands
[3];
41009 unsigned int size
= INTVAL (operands
[1]);
41010 unsigned int pos
= INTVAL (operands
[2]);
41012 if (GET_CODE (dst
) == SUBREG
)
41014 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
41015 dst
= SUBREG_REG (dst
);
41018 if (GET_CODE (src
) == SUBREG
)
41019 src
= SUBREG_REG (src
);
41021 switch (GET_MODE (dst
))
41028 enum machine_mode srcmode
, dstmode
;
41029 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
41031 srcmode
= mode_for_size (size
, MODE_INT
, 0);
41036 if (!TARGET_SSE4_1
)
41038 dstmode
= V16QImode
;
41039 pinsr
= gen_sse4_1_pinsrb
;
41045 dstmode
= V8HImode
;
41046 pinsr
= gen_sse2_pinsrw
;
41050 if (!TARGET_SSE4_1
)
41052 dstmode
= V4SImode
;
41053 pinsr
= gen_sse4_1_pinsrd
;
41057 gcc_assert (TARGET_64BIT
);
41058 if (!TARGET_SSE4_1
)
41060 dstmode
= V2DImode
;
41061 pinsr
= gen_sse4_1_pinsrq
;
41068 dst
= gen_lowpart (dstmode
, dst
);
41069 src
= gen_lowpart (srcmode
, src
);
41073 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
41082 /* This function returns the calling abi specific va_list type node.
41083 It returns the FNDECL specific va_list type. */
41086 ix86_fn_abi_va_list (tree fndecl
)
41089 return va_list_type_node
;
41090 gcc_assert (fndecl
!= NULL_TREE
);
41092 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
41093 return ms_va_list_type_node
;
41095 return sysv_va_list_type_node
;
41098 /* Returns the canonical va_list type specified by TYPE. If there
41099 is no valid TYPE provided, it return NULL_TREE. */
41102 ix86_canonical_va_list_type (tree type
)
41106 /* Resolve references and pointers to va_list type. */
41107 if (TREE_CODE (type
) == MEM_REF
)
41108 type
= TREE_TYPE (type
);
41109 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
41110 type
= TREE_TYPE (type
);
41111 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
41112 type
= TREE_TYPE (type
);
41114 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
41116 wtype
= va_list_type_node
;
41117 gcc_assert (wtype
!= NULL_TREE
);
41119 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41121 /* If va_list is an array type, the argument may have decayed
41122 to a pointer type, e.g. by being passed to another function.
41123 In that case, unwrap both types so that we can compare the
41124 underlying records. */
41125 if (TREE_CODE (htype
) == ARRAY_TYPE
41126 || POINTER_TYPE_P (htype
))
41128 wtype
= TREE_TYPE (wtype
);
41129 htype
= TREE_TYPE (htype
);
41132 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41133 return va_list_type_node
;
41134 wtype
= sysv_va_list_type_node
;
41135 gcc_assert (wtype
!= NULL_TREE
);
41137 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41139 /* If va_list is an array type, the argument may have decayed
41140 to a pointer type, e.g. by being passed to another function.
41141 In that case, unwrap both types so that we can compare the
41142 underlying records. */
41143 if (TREE_CODE (htype
) == ARRAY_TYPE
41144 || POINTER_TYPE_P (htype
))
41146 wtype
= TREE_TYPE (wtype
);
41147 htype
= TREE_TYPE (htype
);
41150 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41151 return sysv_va_list_type_node
;
41152 wtype
= ms_va_list_type_node
;
41153 gcc_assert (wtype
!= NULL_TREE
);
41155 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
41157 /* If va_list is an array type, the argument may have decayed
41158 to a pointer type, e.g. by being passed to another function.
41159 In that case, unwrap both types so that we can compare the
41160 underlying records. */
41161 if (TREE_CODE (htype
) == ARRAY_TYPE
41162 || POINTER_TYPE_P (htype
))
41164 wtype
= TREE_TYPE (wtype
);
41165 htype
= TREE_TYPE (htype
);
41168 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
41169 return ms_va_list_type_node
;
41172 return std_canonical_va_list_type (type
);
41175 /* Iterate through the target-specific builtin types for va_list.
41176 IDX denotes the iterator, *PTREE is set to the result type of
41177 the va_list builtin, and *PNAME to its internal type.
41178 Returns zero if there is no element for this index, otherwise
41179 IDX should be increased upon the next call.
41180 Note, do not iterate a base builtin's name like __builtin_va_list.
41181 Used from c_common_nodes_and_builtins. */
41184 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
41194 *ptree
= ms_va_list_type_node
;
41195 *pname
= "__builtin_ms_va_list";
41199 *ptree
= sysv_va_list_type_node
;
41200 *pname
= "__builtin_sysv_va_list";
41208 #undef TARGET_SCHED_DISPATCH
41209 #define TARGET_SCHED_DISPATCH has_dispatch
41210 #undef TARGET_SCHED_DISPATCH_DO
41211 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41212 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41213 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41214 #undef TARGET_SCHED_REORDER
41215 #define TARGET_SCHED_REORDER ix86_sched_reorder
41216 #undef TARGET_SCHED_ADJUST_PRIORITY
41217 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41218 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41219 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
41221 /* The size of the dispatch window is the total number of bytes of
41222 object code allowed in a window. */
41223 #define DISPATCH_WINDOW_SIZE 16
41225 /* Number of dispatch windows considered for scheduling. */
41226 #define MAX_DISPATCH_WINDOWS 3
41228 /* Maximum number of instructions in a window. */
41231 /* Maximum number of immediate operands in a window. */
41234 /* Maximum number of immediate bits allowed in a window. */
41235 #define MAX_IMM_SIZE 128
41237 /* Maximum number of 32 bit immediates allowed in a window. */
41238 #define MAX_IMM_32 4
41240 /* Maximum number of 64 bit immediates allowed in a window. */
41241 #define MAX_IMM_64 2
41243 /* Maximum total of loads or prefetches allowed in a window. */
41246 /* Maximum total of stores allowed in a window. */
41247 #define MAX_STORE 1
41253 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41254 enum dispatch_group
{
41269 /* Number of allowable groups in a dispatch window. It is an array
41270 indexed by dispatch_group enum. 100 is used as a big number,
41271 because the number of these kind of operations does not have any
41272 effect in dispatch window, but we need them for other reasons in
41274 static unsigned int num_allowable_groups
[disp_last
] = {
41275 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
41278 char group_name
[disp_last
+ 1][16] = {
41279 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41280 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41281 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41284 /* Instruction path. */
41287 path_single
, /* Single micro op. */
41288 path_double
, /* Double micro op. */
41289 path_multi
, /* Instructions with more than 2 micro op.. */
41293 /* sched_insn_info defines a window to the instructions scheduled in
41294 the basic block. It contains a pointer to the insn_info table and
41295 the instruction scheduled.
41297 Windows are allocated for each basic block and are linked
41299 typedef struct sched_insn_info_s
{
41301 enum dispatch_group group
;
41302 enum insn_path path
;
41307 /* Linked list of dispatch windows. This is a two way list of
41308 dispatch windows of a basic block. It contains information about
41309 the number of uops in the window and the total number of
41310 instructions and of bytes in the object code for this dispatch
41312 typedef struct dispatch_windows_s
{
41313 int num_insn
; /* Number of insn in the window. */
41314 int num_uops
; /* Number of uops in the window. */
41315 int window_size
; /* Number of bytes in the window. */
41316 int window_num
; /* Window number between 0 or 1. */
41317 int num_imm
; /* Number of immediates in an insn. */
41318 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
41319 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
41320 int imm_size
; /* Total immediates in the window. */
41321 int num_loads
; /* Total memory loads in the window. */
41322 int num_stores
; /* Total memory stores in the window. */
41323 int violation
; /* Violation exists in window. */
41324 sched_insn_info
*window
; /* Pointer to the window. */
41325 struct dispatch_windows_s
*next
;
41326 struct dispatch_windows_s
*prev
;
41327 } dispatch_windows
;
41329 /* Immediate valuse used in an insn. */
41330 typedef struct imm_info_s
41337 static dispatch_windows
*dispatch_window_list
;
41338 static dispatch_windows
*dispatch_window_list1
;
41340 /* Get dispatch group of insn. */
41342 static enum dispatch_group
41343 get_mem_group (rtx insn
)
41345 enum attr_memory memory
;
41347 if (INSN_CODE (insn
) < 0)
41348 return disp_no_group
;
41349 memory
= get_attr_memory (insn
);
41350 if (memory
== MEMORY_STORE
)
41353 if (memory
== MEMORY_LOAD
)
41356 if (memory
== MEMORY_BOTH
)
41357 return disp_load_store
;
41359 return disp_no_group
;
41362 /* Return true if insn is a compare instruction. */
41367 enum attr_type type
;
41369 type
= get_attr_type (insn
);
41370 return (type
== TYPE_TEST
41371 || type
== TYPE_ICMP
41372 || type
== TYPE_FCMP
41373 || GET_CODE (PATTERN (insn
)) == COMPARE
);
41376 /* Return true if a dispatch violation encountered. */
41379 dispatch_violation (void)
41381 if (dispatch_window_list
->next
)
41382 return dispatch_window_list
->next
->violation
;
41383 return dispatch_window_list
->violation
;
41386 /* Return true if insn is a branch instruction. */
41389 is_branch (rtx insn
)
41391 return (CALL_P (insn
) || JUMP_P (insn
));
41394 /* Return true if insn is a prefetch instruction. */
41397 is_prefetch (rtx insn
)
41399 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
41402 /* This function initializes a dispatch window and the list container holding a
41403 pointer to the window. */
41406 init_window (int window_num
)
41409 dispatch_windows
*new_list
;
41411 if (window_num
== 0)
41412 new_list
= dispatch_window_list
;
41414 new_list
= dispatch_window_list1
;
41416 new_list
->num_insn
= 0;
41417 new_list
->num_uops
= 0;
41418 new_list
->window_size
= 0;
41419 new_list
->next
= NULL
;
41420 new_list
->prev
= NULL
;
41421 new_list
->window_num
= window_num
;
41422 new_list
->num_imm
= 0;
41423 new_list
->num_imm_32
= 0;
41424 new_list
->num_imm_64
= 0;
41425 new_list
->imm_size
= 0;
41426 new_list
->num_loads
= 0;
41427 new_list
->num_stores
= 0;
41428 new_list
->violation
= false;
41430 for (i
= 0; i
< MAX_INSN
; i
++)
41432 new_list
->window
[i
].insn
= NULL
;
41433 new_list
->window
[i
].group
= disp_no_group
;
41434 new_list
->window
[i
].path
= no_path
;
41435 new_list
->window
[i
].byte_len
= 0;
41436 new_list
->window
[i
].imm_bytes
= 0;
41441 /* This function allocates and initializes a dispatch window and the
41442 list container holding a pointer to the window. */
41444 static dispatch_windows
*
41445 allocate_window (void)
41447 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
41448 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
41453 /* This routine initializes the dispatch scheduling information. It
41454 initiates building dispatch scheduler tables and constructs the
41455 first dispatch window. */
41458 init_dispatch_sched (void)
41460 /* Allocate a dispatch list and a window. */
41461 dispatch_window_list
= allocate_window ();
41462 dispatch_window_list1
= allocate_window ();
41467 /* This function returns true if a branch is detected. End of a basic block
41468 does not have to be a branch, but here we assume only branches end a
41472 is_end_basic_block (enum dispatch_group group
)
41474 return group
== disp_branch
;
41477 /* This function is called when the end of a window processing is reached. */
41480 process_end_window (void)
41482 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
41483 if (dispatch_window_list
->next
)
41485 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
41486 gcc_assert (dispatch_window_list
->window_size
41487 + dispatch_window_list1
->window_size
<= 48);
41493 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41494 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41495 for 48 bytes of instructions. Note that these windows are not dispatch
41496 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41498 static dispatch_windows
*
41499 allocate_next_window (int window_num
)
41501 if (window_num
== 0)
41503 if (dispatch_window_list
->next
)
41506 return dispatch_window_list
;
41509 dispatch_window_list
->next
= dispatch_window_list1
;
41510 dispatch_window_list1
->prev
= dispatch_window_list
;
41512 return dispatch_window_list1
;
41515 /* Increment the number of immediate operands of an instruction. */
41518 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
41523 switch ( GET_CODE (*in_rtx
))
41528 (imm_values
->imm
)++;
41529 if (x86_64_immediate_operand (*in_rtx
, SImode
))
41530 (imm_values
->imm32
)++;
41532 (imm_values
->imm64
)++;
41536 (imm_values
->imm
)++;
41537 (imm_values
->imm64
)++;
41541 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
41543 (imm_values
->imm
)++;
41544 (imm_values
->imm32
)++;
41555 /* Compute number of immediate operands of an instruction. */
41558 find_constant (rtx in_rtx
, imm_info
*imm_values
)
41560 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
41561 (rtx_function
) find_constant_1
, (void *) imm_values
);
41564 /* Return total size of immediate operands of an instruction along with number
41565 of corresponding immediate-operands. It initializes its parameters to zero
41566 befor calling FIND_CONSTANT.
41567 INSN is the input instruction. IMM is the total of immediates.
41568 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41572 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
41574 imm_info imm_values
= {0, 0, 0};
41576 find_constant (insn
, &imm_values
);
41577 *imm
= imm_values
.imm
;
41578 *imm32
= imm_values
.imm32
;
41579 *imm64
= imm_values
.imm64
;
41580 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
41583 /* This function indicates if an operand of an instruction is an
41587 has_immediate (rtx insn
)
41589 int num_imm_operand
;
41590 int num_imm32_operand
;
41591 int num_imm64_operand
;
41594 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41595 &num_imm64_operand
);
41599 /* Return single or double path for instructions. */
41601 static enum insn_path
41602 get_insn_path (rtx insn
)
41604 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
41606 if ((int)path
== 0)
41607 return path_single
;
41609 if ((int)path
== 1)
41610 return path_double
;
41615 /* Return insn dispatch group. */
41617 static enum dispatch_group
41618 get_insn_group (rtx insn
)
41620 enum dispatch_group group
= get_mem_group (insn
);
41624 if (is_branch (insn
))
41625 return disp_branch
;
41630 if (has_immediate (insn
))
41633 if (is_prefetch (insn
))
41634 return disp_prefetch
;
41636 return disp_no_group
;
41639 /* Count number of GROUP restricted instructions in a dispatch
41640 window WINDOW_LIST. */
41643 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
41645 enum dispatch_group group
= get_insn_group (insn
);
41647 int num_imm_operand
;
41648 int num_imm32_operand
;
41649 int num_imm64_operand
;
41651 if (group
== disp_no_group
)
41654 if (group
== disp_imm
)
41656 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41657 &num_imm64_operand
);
41658 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
41659 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
41660 || (num_imm32_operand
> 0
41661 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
41662 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
41663 || (num_imm64_operand
> 0
41664 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
41665 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
41666 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
41667 && num_imm64_operand
> 0
41668 && ((window_list
->num_imm_64
> 0
41669 && window_list
->num_insn
>= 2)
41670 || window_list
->num_insn
>= 3)))
41676 if ((group
== disp_load_store
41677 && (window_list
->num_loads
>= MAX_LOAD
41678 || window_list
->num_stores
>= MAX_STORE
))
41679 || ((group
== disp_load
41680 || group
== disp_prefetch
)
41681 && window_list
->num_loads
>= MAX_LOAD
)
41682 || (group
== disp_store
41683 && window_list
->num_stores
>= MAX_STORE
))
41689 /* This function returns true if insn satisfies dispatch rules on the
41690 last window scheduled. */
41693 fits_dispatch_window (rtx insn
)
41695 dispatch_windows
*window_list
= dispatch_window_list
;
41696 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
41697 unsigned int num_restrict
;
41698 enum dispatch_group group
= get_insn_group (insn
);
41699 enum insn_path path
= get_insn_path (insn
);
41702 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41703 instructions should be given the lowest priority in the
41704 scheduling process in Haifa scheduler to make sure they will be
41705 scheduled in the same dispatch window as the reference to them. */
41706 if (group
== disp_jcc
|| group
== disp_cmp
)
41709 /* Check nonrestricted. */
41710 if (group
== disp_no_group
|| group
== disp_branch
)
41713 /* Get last dispatch window. */
41714 if (window_list_next
)
41715 window_list
= window_list_next
;
41717 if (window_list
->window_num
== 1)
41719 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
41722 || (min_insn_size (insn
) + sum
) >= 48)
41723 /* Window 1 is full. Go for next window. */
41727 num_restrict
= count_num_restricted (insn
, window_list
);
41729 if (num_restrict
> num_allowable_groups
[group
])
41732 /* See if it fits in the first window. */
41733 if (window_list
->window_num
== 0)
41735 /* The first widow should have only single and double path
41737 if (path
== path_double
41738 && (window_list
->num_uops
+ 2) > MAX_INSN
)
41740 else if (path
!= path_single
)
41746 /* Add an instruction INSN with NUM_UOPS micro-operations to the
41747 dispatch window WINDOW_LIST. */
41750 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
41752 int byte_len
= min_insn_size (insn
);
41753 int num_insn
= window_list
->num_insn
;
41755 sched_insn_info
*window
= window_list
->window
;
41756 enum dispatch_group group
= get_insn_group (insn
);
41757 enum insn_path path
= get_insn_path (insn
);
41758 int num_imm_operand
;
41759 int num_imm32_operand
;
41760 int num_imm64_operand
;
41762 if (!window_list
->violation
&& group
!= disp_cmp
41763 && !fits_dispatch_window (insn
))
41764 window_list
->violation
= true;
41766 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41767 &num_imm64_operand
);
41769 /* Initialize window with new instruction. */
41770 window
[num_insn
].insn
= insn
;
41771 window
[num_insn
].byte_len
= byte_len
;
41772 window
[num_insn
].group
= group
;
41773 window
[num_insn
].path
= path
;
41774 window
[num_insn
].imm_bytes
= imm_size
;
41776 window_list
->window_size
+= byte_len
;
41777 window_list
->num_insn
= num_insn
+ 1;
41778 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
41779 window_list
->imm_size
+= imm_size
;
41780 window_list
->num_imm
+= num_imm_operand
;
41781 window_list
->num_imm_32
+= num_imm32_operand
;
41782 window_list
->num_imm_64
+= num_imm64_operand
;
41784 if (group
== disp_store
)
41785 window_list
->num_stores
+= 1;
41786 else if (group
== disp_load
41787 || group
== disp_prefetch
)
41788 window_list
->num_loads
+= 1;
41789 else if (group
== disp_load_store
)
41791 window_list
->num_stores
+= 1;
41792 window_list
->num_loads
+= 1;
41796 /* Adds a scheduled instruction, INSN, to the current dispatch window.
41797 If the total bytes of instructions or the number of instructions in
41798 the window exceed allowable, it allocates a new window. */
41801 add_to_dispatch_window (rtx insn
)
41804 dispatch_windows
*window_list
;
41805 dispatch_windows
*next_list
;
41806 dispatch_windows
*window0_list
;
41807 enum insn_path path
;
41808 enum dispatch_group insn_group
;
41816 if (INSN_CODE (insn
) < 0)
41819 byte_len
= min_insn_size (insn
);
41820 window_list
= dispatch_window_list
;
41821 next_list
= window_list
->next
;
41822 path
= get_insn_path (insn
);
41823 insn_group
= get_insn_group (insn
);
41825 /* Get the last dispatch window. */
41827 window_list
= dispatch_window_list
->next
;
41829 if (path
== path_single
)
41831 else if (path
== path_double
)
41834 insn_num_uops
= (int) path
;
41836 /* If current window is full, get a new window.
41837 Window number zero is full, if MAX_INSN uops are scheduled in it.
41838 Window number one is full, if window zero's bytes plus window
41839 one's bytes is 32, or if the bytes of the new instruction added
41840 to the total makes it greater than 48, or it has already MAX_INSN
41841 instructions in it. */
41842 num_insn
= window_list
->num_insn
;
41843 num_uops
= window_list
->num_uops
;
41844 window_num
= window_list
->window_num
;
41845 insn_fits
= fits_dispatch_window (insn
);
41847 if (num_insn
>= MAX_INSN
41848 || num_uops
+ insn_num_uops
> MAX_INSN
41851 window_num
= ~window_num
& 1;
41852 window_list
= allocate_next_window (window_num
);
41855 if (window_num
== 0)
41857 add_insn_window (insn
, window_list
, insn_num_uops
);
41858 if (window_list
->num_insn
>= MAX_INSN
41859 && insn_group
== disp_branch
)
41861 process_end_window ();
41865 else if (window_num
== 1)
41867 window0_list
= window_list
->prev
;
41868 sum
= window0_list
->window_size
+ window_list
->window_size
;
41870 || (byte_len
+ sum
) >= 48)
41872 process_end_window ();
41873 window_list
= dispatch_window_list
;
41876 add_insn_window (insn
, window_list
, insn_num_uops
);
41879 gcc_unreachable ();
41881 if (is_end_basic_block (insn_group
))
41883 /* End of basic block is reached do end-basic-block process. */
41884 process_end_window ();
41889 /* Print the dispatch window, WINDOW_NUM, to FILE. */
41891 DEBUG_FUNCTION
static void
41892 debug_dispatch_window_file (FILE *file
, int window_num
)
41894 dispatch_windows
*list
;
41897 if (window_num
== 0)
41898 list
= dispatch_window_list
;
41900 list
= dispatch_window_list1
;
41902 fprintf (file
, "Window #%d:\n", list
->window_num
);
41903 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
41904 list
->num_insn
, list
->num_uops
, list
->window_size
);
41905 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41906 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
41908 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
41910 fprintf (file
, " insn info:\n");
41912 for (i
= 0; i
< MAX_INSN
; i
++)
41914 if (!list
->window
[i
].insn
)
41916 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
41917 i
, group_name
[list
->window
[i
].group
],
41918 i
, (void *)list
->window
[i
].insn
,
41919 i
, list
->window
[i
].path
,
41920 i
, list
->window
[i
].byte_len
,
41921 i
, list
->window
[i
].imm_bytes
);
41925 /* Print to stdout a dispatch window. */
41927 DEBUG_FUNCTION
void
41928 debug_dispatch_window (int window_num
)
41930 debug_dispatch_window_file (stdout
, window_num
);
41933 /* Print INSN dispatch information to FILE. */
41935 DEBUG_FUNCTION
static void
41936 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
41939 enum insn_path path
;
41940 enum dispatch_group group
;
41942 int num_imm_operand
;
41943 int num_imm32_operand
;
41944 int num_imm64_operand
;
41946 if (INSN_CODE (insn
) < 0)
41949 byte_len
= min_insn_size (insn
);
41950 path
= get_insn_path (insn
);
41951 group
= get_insn_group (insn
);
41952 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
41953 &num_imm64_operand
);
41955 fprintf (file
, " insn info:\n");
41956 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
41957 group_name
[group
], path
, byte_len
);
41958 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
41959 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
41962 /* Print to STDERR the status of the ready list with respect to
41963 dispatch windows. */
41965 DEBUG_FUNCTION
void
41966 debug_ready_dispatch (void)
41969 int no_ready
= number_in_ready ();
41971 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
41973 for (i
= 0; i
< no_ready
; i
++)
41974 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
41977 /* This routine is the driver of the dispatch scheduler. */
41980 do_dispatch (rtx insn
, int mode
)
41982 if (mode
== DISPATCH_INIT
)
41983 init_dispatch_sched ();
41984 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
41985 add_to_dispatch_window (insn
);
41988 /* Return TRUE if Dispatch Scheduling is supported. */
41991 has_dispatch (rtx insn
, int action
)
41993 if ((TARGET_BDVER1
|| TARGET_BDVER2
|| TARGET_BDVER3
)
41994 && flag_dispatch_scheduler
)
42000 case IS_DISPATCH_ON
:
42005 return is_cmp (insn
);
42007 case DISPATCH_VIOLATION
:
42008 return dispatch_violation ();
42010 case FITS_DISPATCH_WINDOW
:
42011 return fits_dispatch_window (insn
);
42017 /* Implementation of reassociation_width target hook used by
42018 reassoc phase to identify parallelism level in reassociated
42019 tree. Statements tree_code is passed in OPC. Arguments type
42022 Currently parallel reassociation is enabled for Atom
42023 processors only and we set reassociation width to be 2
42024 because Atom may issue up to 2 instructions per cycle.
42026 Return value should be fixed if parallel reassociation is
42027 enabled for other processors. */
42030 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
42031 enum machine_mode mode
)
42035 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
42037 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
42043 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42044 place emms and femms instructions. */
42046 static enum machine_mode
42047 ix86_preferred_simd_mode (enum machine_mode mode
)
42055 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
42057 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
42059 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
42061 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
42064 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42070 if (!TARGET_VECTORIZE_DOUBLE
)
42072 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
42074 else if (TARGET_SSE2
)
42083 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42086 static unsigned int
42087 ix86_autovectorize_vector_sizes (void)
42089 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
42094 /* Return class of registers which could be used for pseudo of MODE
42095 and of class RCLASS for spilling instead of memory. Return NO_REGS
42096 if it is not possible or non-profitable. */
42098 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
42100 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
42101 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
42102 && INTEGER_CLASS_P (rclass
))
42107 /* Implement targetm.vectorize.init_cost. */
42110 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
42112 unsigned *cost
= XNEWVEC (unsigned, 3);
42113 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
42117 /* Implement targetm.vectorize.add_stmt_cost. */
42120 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
42121 struct _stmt_vec_info
*stmt_info
, int misalign
,
42122 enum vect_cost_model_location where
)
42124 unsigned *cost
= (unsigned *) data
;
42125 unsigned retval
= 0;
42127 if (flag_vect_cost_model
)
42129 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
42130 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
42132 /* Statements in an inner loop relative to the loop being
42133 vectorized are weighted more heavily. The value here is
42134 arbitrary and could potentially be improved with analysis. */
42135 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
42136 count
*= 50; /* FIXME. */
42138 retval
= (unsigned) (count
* stmt_cost
);
42139 cost
[where
] += retval
;
42145 /* Implement targetm.vectorize.finish_cost. */
42148 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
42149 unsigned *body_cost
, unsigned *epilogue_cost
)
42151 unsigned *cost
= (unsigned *) data
;
42152 *prologue_cost
= cost
[vect_prologue
];
42153 *body_cost
= cost
[vect_body
];
42154 *epilogue_cost
= cost
[vect_epilogue
];
42157 /* Implement targetm.vectorize.destroy_cost_data. */
42160 ix86_destroy_cost_data (void *data
)
42165 /* Validate target specific memory model bits in VAL. */
42167 static unsigned HOST_WIDE_INT
42168 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
42170 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
42173 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
42175 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
42177 warning (OPT_Winvalid_memory_model
,
42178 "Unknown architecture specific memory model");
42179 return MEMMODEL_SEQ_CST
;
42181 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
42182 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
42184 warning (OPT_Winvalid_memory_model
,
42185 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42186 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
42188 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
42190 warning (OPT_Winvalid_memory_model
,
42191 "HLE_RELEASE not used with RELEASE or stronger memory model");
42192 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
42197 /* Initialize the GCC target structure. */
42198 #undef TARGET_RETURN_IN_MEMORY
42199 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42201 #undef TARGET_LEGITIMIZE_ADDRESS
42202 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42204 #undef TARGET_ATTRIBUTE_TABLE
42205 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42206 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42207 # undef TARGET_MERGE_DECL_ATTRIBUTES
42208 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42211 #undef TARGET_COMP_TYPE_ATTRIBUTES
42212 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42214 #undef TARGET_INIT_BUILTINS
42215 #define TARGET_INIT_BUILTINS ix86_init_builtins
42216 #undef TARGET_BUILTIN_DECL
42217 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42218 #undef TARGET_EXPAND_BUILTIN
42219 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42221 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42222 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42223 ix86_builtin_vectorized_function
42225 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42226 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42228 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42229 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42231 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42232 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42234 #undef TARGET_BUILTIN_RECIPROCAL
42235 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42237 #undef TARGET_ASM_FUNCTION_EPILOGUE
42238 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42240 #undef TARGET_ENCODE_SECTION_INFO
42241 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42242 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42244 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42247 #undef TARGET_ASM_OPEN_PAREN
42248 #define TARGET_ASM_OPEN_PAREN ""
42249 #undef TARGET_ASM_CLOSE_PAREN
42250 #define TARGET_ASM_CLOSE_PAREN ""
42252 #undef TARGET_ASM_BYTE_OP
42253 #define TARGET_ASM_BYTE_OP ASM_BYTE
42255 #undef TARGET_ASM_ALIGNED_HI_OP
42256 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42257 #undef TARGET_ASM_ALIGNED_SI_OP
42258 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42260 #undef TARGET_ASM_ALIGNED_DI_OP
42261 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42264 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42265 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42267 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42268 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42270 #undef TARGET_ASM_UNALIGNED_HI_OP
42271 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42272 #undef TARGET_ASM_UNALIGNED_SI_OP
42273 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42274 #undef TARGET_ASM_UNALIGNED_DI_OP
42275 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42277 #undef TARGET_PRINT_OPERAND
42278 #define TARGET_PRINT_OPERAND ix86_print_operand
42279 #undef TARGET_PRINT_OPERAND_ADDRESS
42280 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42281 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42282 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42283 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42284 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42286 #undef TARGET_SCHED_INIT_GLOBAL
42287 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42288 #undef TARGET_SCHED_ADJUST_COST
42289 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42290 #undef TARGET_SCHED_ISSUE_RATE
42291 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42292 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42293 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42294 ia32_multipass_dfa_lookahead
42296 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42297 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42299 #undef TARGET_MEMMODEL_CHECK
42300 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42303 #undef TARGET_HAVE_TLS
42304 #define TARGET_HAVE_TLS true
42306 #undef TARGET_CANNOT_FORCE_CONST_MEM
42307 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42308 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42309 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42311 #undef TARGET_DELEGITIMIZE_ADDRESS
42312 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42314 #undef TARGET_MS_BITFIELD_LAYOUT_P
42315 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42318 #undef TARGET_BINDS_LOCAL_P
42319 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42321 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42322 #undef TARGET_BINDS_LOCAL_P
42323 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42326 #undef TARGET_ASM_OUTPUT_MI_THUNK
42327 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42328 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42329 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42331 #undef TARGET_ASM_FILE_START
42332 #define TARGET_ASM_FILE_START x86_file_start
42334 #undef TARGET_OPTION_OVERRIDE
42335 #define TARGET_OPTION_OVERRIDE ix86_option_override
42337 #undef TARGET_REGISTER_MOVE_COST
42338 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42339 #undef TARGET_MEMORY_MOVE_COST
42340 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42341 #undef TARGET_RTX_COSTS
42342 #define TARGET_RTX_COSTS ix86_rtx_costs
42343 #undef TARGET_ADDRESS_COST
42344 #define TARGET_ADDRESS_COST ix86_address_cost
42346 #undef TARGET_FIXED_CONDITION_CODE_REGS
42347 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42348 #undef TARGET_CC_MODES_COMPATIBLE
42349 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42351 #undef TARGET_MACHINE_DEPENDENT_REORG
42352 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42354 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42355 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42357 #undef TARGET_BUILD_BUILTIN_VA_LIST
42358 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42360 #undef TARGET_FOLD_BUILTIN
42361 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42363 #undef TARGET_COMPARE_VERSION_PRIORITY
42364 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42366 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42367 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42368 ix86_generate_version_dispatcher_body
42370 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42371 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42372 ix86_get_function_versions_dispatcher
42374 #undef TARGET_ENUM_VA_LIST_P
42375 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42377 #undef TARGET_FN_ABI_VA_LIST
42378 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42380 #undef TARGET_CANONICAL_VA_LIST_TYPE
42381 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42383 #undef TARGET_EXPAND_BUILTIN_VA_START
42384 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42386 #undef TARGET_MD_ASM_CLOBBERS
42387 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42389 #undef TARGET_PROMOTE_PROTOTYPES
42390 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42391 #undef TARGET_STRUCT_VALUE_RTX
42392 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42393 #undef TARGET_SETUP_INCOMING_VARARGS
42394 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42395 #undef TARGET_MUST_PASS_IN_STACK
42396 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42397 #undef TARGET_FUNCTION_ARG_ADVANCE
42398 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42399 #undef TARGET_FUNCTION_ARG
42400 #define TARGET_FUNCTION_ARG ix86_function_arg
42401 #undef TARGET_FUNCTION_ARG_BOUNDARY
42402 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42403 #undef TARGET_PASS_BY_REFERENCE
42404 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42405 #undef TARGET_INTERNAL_ARG_POINTER
42406 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42407 #undef TARGET_UPDATE_STACK_BOUNDARY
42408 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42409 #undef TARGET_GET_DRAP_RTX
42410 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42411 #undef TARGET_STRICT_ARGUMENT_NAMING
42412 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42413 #undef TARGET_STATIC_CHAIN
42414 #define TARGET_STATIC_CHAIN ix86_static_chain
42415 #undef TARGET_TRAMPOLINE_INIT
42416 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42417 #undef TARGET_RETURN_POPS_ARGS
42418 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42420 #undef TARGET_LEGITIMATE_COMBINED_INSN
42421 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42423 #undef TARGET_ASAN_SHADOW_OFFSET
42424 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42426 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42427 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42429 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42430 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42432 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42433 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42435 #undef TARGET_C_MODE_FOR_SUFFIX
42436 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42439 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42440 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42443 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42444 #undef TARGET_INSERT_ATTRIBUTES
42445 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42448 #undef TARGET_MANGLE_TYPE
42449 #define TARGET_MANGLE_TYPE ix86_mangle_type
42452 #undef TARGET_STACK_PROTECT_FAIL
42453 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42456 #undef TARGET_FUNCTION_VALUE
42457 #define TARGET_FUNCTION_VALUE ix86_function_value
42459 #undef TARGET_FUNCTION_VALUE_REGNO_P
42460 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42462 #undef TARGET_PROMOTE_FUNCTION_MODE
42463 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42465 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42466 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42468 #undef TARGET_INSTANTIATE_DECLS
42469 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42471 #undef TARGET_SECONDARY_RELOAD
42472 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42474 #undef TARGET_CLASS_MAX_NREGS
42475 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42477 #undef TARGET_PREFERRED_RELOAD_CLASS
42478 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42479 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42480 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42481 #undef TARGET_CLASS_LIKELY_SPILLED_P
42482 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42484 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42485 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42486 ix86_builtin_vectorization_cost
42487 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42488 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42489 ix86_vectorize_vec_perm_const_ok
42490 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42491 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42492 ix86_preferred_simd_mode
42493 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42494 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42495 ix86_autovectorize_vector_sizes
42496 #undef TARGET_VECTORIZE_INIT_COST
42497 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42498 #undef TARGET_VECTORIZE_ADD_STMT_COST
42499 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42500 #undef TARGET_VECTORIZE_FINISH_COST
42501 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42502 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42503 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42505 #undef TARGET_SET_CURRENT_FUNCTION
42506 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42508 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42509 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42511 #undef TARGET_OPTION_SAVE
42512 #define TARGET_OPTION_SAVE ix86_function_specific_save
42514 #undef TARGET_OPTION_RESTORE
42515 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42517 #undef TARGET_OPTION_PRINT
42518 #define TARGET_OPTION_PRINT ix86_function_specific_print
42520 #undef TARGET_OPTION_FUNCTION_VERSIONS
42521 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42523 #undef TARGET_CAN_INLINE_P
42524 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42526 #undef TARGET_EXPAND_TO_RTL_HOOK
42527 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42529 #undef TARGET_LEGITIMATE_ADDRESS_P
42530 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42532 #undef TARGET_LRA_P
42533 #define TARGET_LRA_P hook_bool_void_true
42535 #undef TARGET_REGISTER_PRIORITY
42536 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42538 #undef TARGET_LEGITIMATE_CONSTANT_P
42539 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42541 #undef TARGET_FRAME_POINTER_REQUIRED
42542 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42544 #undef TARGET_CAN_ELIMINATE
42545 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42547 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42548 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42550 #undef TARGET_ASM_CODE_END
42551 #define TARGET_ASM_CODE_END ix86_code_end
42553 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42554 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42557 #undef TARGET_INIT_LIBFUNCS
42558 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42561 #undef TARGET_SPILL_CLASS
42562 #define TARGET_SPILL_CLASS ix86_spill_class
42564 struct gcc_target targetm
= TARGET_INITIALIZER
;
42566 #include "gt-i386.h"