1 /* Scheduler hooks for IA-32 which implement CPU specific logic.
2 Copyright (C) 1988-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
30 #include "insn-config.h"
31 #include "insn-attr.h"
35 /* Return the maximum number of instructions a cpu can issue. */
38 ix86_issue_rate (void)
42 case PROCESSOR_PENTIUM
:
43 case PROCESSOR_LAKEMONT
:
44 case PROCESSOR_BONNELL
:
45 case PROCESSOR_SILVERMONT
:
50 case PROCESSOR_BTVER2
:
51 case PROCESSOR_PENTIUM4
:
52 case PROCESSOR_NOCONA
:
55 case PROCESSOR_PENTIUMPRO
:
56 case PROCESSOR_ATHLON
:
58 case PROCESSOR_AMDFAM10
:
59 case PROCESSOR_BTVER1
:
62 case PROCESSOR_BDVER1
:
63 case PROCESSOR_BDVER2
:
64 case PROCESSOR_BDVER3
:
65 case PROCESSOR_BDVER4
:
66 case PROCESSOR_ZNVER1
:
68 case PROCESSOR_NEHALEM
:
69 case PROCESSOR_SANDYBRIDGE
:
70 case PROCESSOR_HASWELL
:
71 case PROCESSOR_GENERIC
:
79 /* Return true iff USE_INSN has a memory address with operands set by
83 ix86_agi_dependent (rtx_insn
*set_insn
, rtx_insn
*use_insn
)
86 extract_insn_cached (use_insn
);
87 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
88 if (MEM_P (recog_data
.operand
[i
]))
90 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
91 if (modified_in_p (addr
, set_insn
) != 0)
93 /* No AGI stall if SET_INSN is a push or pop and USE_INSN
94 has SP based memory (unless index reg is modified in a pop). */
95 rtx set
= single_set (set_insn
);
97 && (push_operand (SET_DEST (set
), GET_MODE (SET_DEST (set
)))
98 || pop_operand (SET_SRC (set
), GET_MODE (SET_SRC (set
)))))
100 struct ix86_address parts
;
101 if (ix86_decompose_address (addr
, &parts
)
102 && parts
.base
== stack_pointer_rtx
103 && (parts
.index
== NULL_RTX
104 || MEM_P (SET_DEST (set
))
105 || !modified_in_p (parts
.index
, set_insn
)))
115 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
116 by DEP_INSN and nothing set by DEP_INSN. */
119 ix86_flags_dependent (rtx_insn
*insn
, rtx_insn
*dep_insn
, enum attr_type insn_type
)
123 /* Simplify the test for uninteresting insns. */
124 if (insn_type
!= TYPE_SETCC
125 && insn_type
!= TYPE_ICMOV
126 && insn_type
!= TYPE_FCMOV
127 && insn_type
!= TYPE_IBR
)
130 if ((set
= single_set (dep_insn
)) != 0)
132 set
= SET_DEST (set
);
135 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
136 && XVECLEN (PATTERN (dep_insn
), 0) == 2
137 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
138 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
140 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
141 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
146 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
149 /* This test is true if the dependent insn reads the flags but
150 not any other potentially set register. */
151 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
154 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
160 /* Helper function for exact_store_load_dependency.
161 Return true if addr is found in insn. */
163 exact_dependency_1 (rtx addr
, rtx insn
)
166 const char *format_ptr
;
169 code
= GET_CODE (insn
);
173 if (rtx_equal_p (addr
, insn
))
188 format_ptr
= GET_RTX_FORMAT (code
);
189 for (i
= 0; i
< GET_RTX_LENGTH (code
); i
++)
191 switch (*format_ptr
++)
194 if (exact_dependency_1 (addr
, XEXP (insn
, i
)))
198 for (j
= 0; j
< XVECLEN (insn
, i
); j
++)
199 if (exact_dependency_1 (addr
, XVECEXP (insn
, i
, j
)))
207 /* Return true if there exists exact dependency for store & load, i.e.
208 the same memory address is used in them. */
210 exact_store_load_dependency (rtx_insn
*store
, rtx_insn
*load
)
214 set1
= single_set (store
);
217 if (!MEM_P (SET_DEST (set1
)))
219 set2
= single_set (load
);
222 if (exact_dependency_1 (SET_DEST (set1
), SET_SRC (set2
)))
228 /* This function corrects the value of COST (latency) based on the relationship
229 between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength
230 DW. It should return the new value.
232 On x86 CPUs this is most commonly used to model the fact that valus of
233 registers used to compute address of memory operand needs to be ready
234 earlier than values of registers used in the actual operation. */
237 ix86_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
, int cost
,
240 enum attr_type insn_type
, dep_insn_type
;
241 enum attr_memory memory
;
243 int dep_insn_code_number
;
245 /* Anti and output dependencies have zero cost on all CPUs. */
249 dep_insn_code_number
= recog_memoized (dep_insn
);
251 /* If we can't recognize the insns, we can't really do anything. */
252 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
255 insn_type
= get_attr_type (insn
);
256 dep_insn_type
= get_attr_type (dep_insn
);
260 case PROCESSOR_PENTIUM
:
261 case PROCESSOR_LAKEMONT
:
262 /* Address Generation Interlock adds a cycle of latency. */
263 if (insn_type
== TYPE_LEA
)
265 rtx addr
= PATTERN (insn
);
267 if (GET_CODE (addr
) == PARALLEL
)
268 addr
= XVECEXP (addr
, 0, 0);
270 gcc_assert (GET_CODE (addr
) == SET
);
272 addr
= SET_SRC (addr
);
273 if (modified_in_p (addr
, dep_insn
))
276 else if (ix86_agi_dependent (dep_insn
, insn
))
279 /* ??? Compares pair with jump/setcc. */
280 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
283 /* Floating point stores require value to be ready one cycle earlier. */
284 if (insn_type
== TYPE_FMOV
285 && get_attr_memory (insn
) == MEMORY_STORE
286 && !ix86_agi_dependent (dep_insn
, insn
))
290 case PROCESSOR_PENTIUMPRO
:
291 /* INT->FP conversion is expensive. */
292 if (get_attr_fp_int_src (dep_insn
))
295 /* There is one cycle extra latency between an FP op and a store. */
296 if (insn_type
== TYPE_FMOV
297 && (set
= single_set (dep_insn
)) != NULL_RTX
298 && (set2
= single_set (insn
)) != NULL_RTX
299 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
300 && MEM_P (SET_DEST (set2
)))
303 memory
= get_attr_memory (insn
);
305 /* Show ability of reorder buffer to hide latency of load by executing
306 in parallel with previous instruction in case
307 previous instruction is not needed to compute the address. */
308 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
309 && !ix86_agi_dependent (dep_insn
, insn
))
311 /* Claim moves to take one cycle, as core can issue one load
312 at time and the next load can start cycle later. */
313 if (dep_insn_type
== TYPE_IMOV
314 || dep_insn_type
== TYPE_FMOV
)
322 /* The esp dependency is resolved before
323 the instruction is really finished. */
324 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
325 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
328 /* INT->FP conversion is expensive. */
329 if (get_attr_fp_int_src (dep_insn
))
332 memory
= get_attr_memory (insn
);
334 /* Show ability of reorder buffer to hide latency of load by executing
335 in parallel with previous instruction in case
336 previous instruction is not needed to compute the address. */
337 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
338 && !ix86_agi_dependent (dep_insn
, insn
))
340 /* Claim moves to take one cycle, as core can issue one load
341 at time and the next load can start cycle later. */
342 if (dep_insn_type
== TYPE_IMOV
343 || dep_insn_type
== TYPE_FMOV
)
352 case PROCESSOR_AMDFAM10
:
353 case PROCESSOR_BDVER1
:
354 case PROCESSOR_BDVER2
:
355 case PROCESSOR_BDVER3
:
356 case PROCESSOR_BDVER4
:
357 case PROCESSOR_BTVER1
:
358 case PROCESSOR_BTVER2
:
359 /* Stack engine allows to execute push&pop instructions in parall. */
360 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
361 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
365 case PROCESSOR_ATHLON
:
367 memory
= get_attr_memory (insn
);
369 /* Show ability of reorder buffer to hide latency of load by executing
370 in parallel with previous instruction in case
371 previous instruction is not needed to compute the address. */
372 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
373 && !ix86_agi_dependent (dep_insn
, insn
))
375 enum attr_unit unit
= get_attr_unit (insn
);
378 /* Because of the difference between the length of integer and
379 floating unit pipeline preparation stages, the memory operands
380 for floating point are cheaper.
382 ??? For Athlon it the difference is most probably 2. */
383 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
386 loadcost
= TARGET_ATHLON
? 2 : 0;
388 if (cost
>= loadcost
)
395 case PROCESSOR_ZNVER1
:
396 /* Stack engine allows to execute push&pop instructions in parall. */
397 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
398 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
401 memory
= get_attr_memory (insn
);
403 /* Show ability of reorder buffer to hide latency of load by executing
404 in parallel with previous instruction in case
405 previous instruction is not needed to compute the address. */
406 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
407 && !ix86_agi_dependent (dep_insn
, insn
))
409 enum attr_unit unit
= get_attr_unit (insn
);
412 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
417 if (cost
>= loadcost
)
424 case PROCESSOR_CORE2
:
425 case PROCESSOR_NEHALEM
:
426 case PROCESSOR_SANDYBRIDGE
:
427 case PROCESSOR_HASWELL
:
428 case PROCESSOR_GENERIC
:
429 /* Stack engine allows to execute push&pop instructions in parall. */
430 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
431 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
434 memory
= get_attr_memory (insn
);
436 /* Show ability of reorder buffer to hide latency of load by executing
437 in parallel with previous instruction in case
438 previous instruction is not needed to compute the address. */
439 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
440 && !ix86_agi_dependent (dep_insn
, insn
))
449 case PROCESSOR_SILVERMONT
:
452 case PROCESSOR_INTEL
:
453 if (!reload_completed
)
456 /* Increase cost of integer loads. */
457 memory
= get_attr_memory (dep_insn
);
458 if (memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
460 enum attr_unit unit
= get_attr_unit (dep_insn
);
461 if (unit
== UNIT_INTEGER
&& cost
== 1)
463 if (memory
== MEMORY_LOAD
)
467 /* Increase cost of ld/st for short int types only
468 because of store forwarding issue. */
469 rtx set
= single_set (dep_insn
);
470 if (set
&& (GET_MODE (SET_DEST (set
)) == QImode
471 || GET_MODE (SET_DEST (set
)) == HImode
))
473 /* Increase cost of store/load insn if exact
474 dependence exists and it is load insn. */
475 enum attr_memory insn_memory
= get_attr_memory (insn
);
476 if (insn_memory
== MEMORY_LOAD
477 && exact_store_load_dependency (dep_insn
, insn
))
491 /* How many alternative schedules to try. This should be as wide as the
492 scheduling freedom in the DFA, but no wider. Making this value too
493 large results extra work for the scheduler. */
496 ia32_multipass_dfa_lookahead (void)
498 /* Generally, we want haifa-sched:max_issue() to look ahead as far
499 as many instructions can be executed on a cycle, i.e.,
501 if (reload_completed
)
502 return ix86_issue_rate ();
503 /* Don't use lookahead for pre-reload schedule to save compile time. */
507 /* Return true if target platform supports macro-fusion. */
510 ix86_macro_fusion_p ()
512 return TARGET_FUSE_CMP_AND_BRANCH
;
515 /* Check whether current microarchitecture support macro fusion
516 for insn pair "CONDGEN + CONDJMP". Refer to
517 "Intel Architectures Optimization Reference Manual". */
520 ix86_macro_fusion_pair_p (rtx_insn
*condgen
, rtx_insn
*condjmp
)
524 rtx compare_set
= NULL_RTX
, test_if
, cond
;
525 rtx alu_set
= NULL_RTX
, addr
= NULL_RTX
;
527 if (!any_condjump_p (condjmp
))
530 unsigned int condreg1
, condreg2
;
532 targetm
.fixed_condition_code_regs (&condreg1
, &condreg2
);
533 cc_reg_1
= gen_rtx_REG (CCmode
, condreg1
);
534 if (!reg_referenced_p (cc_reg_1
, PATTERN (condjmp
))
536 || !modified_in_p (cc_reg_1
, condgen
))
539 if (get_attr_type (condgen
) != TYPE_TEST
540 && get_attr_type (condgen
) != TYPE_ICMP
541 && get_attr_type (condgen
) != TYPE_INCDEC
542 && get_attr_type (condgen
) != TYPE_ALU
)
545 compare_set
= single_set (condgen
);
546 if (compare_set
== NULL_RTX
547 && !TARGET_FUSE_ALU_AND_BRANCH
)
550 if (compare_set
== NULL_RTX
)
553 rtx pat
= PATTERN (condgen
);
554 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
555 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
)
557 rtx set_src
= SET_SRC (XVECEXP (pat
, 0, i
));
558 if (GET_CODE (set_src
) == COMPARE
)
559 compare_set
= XVECEXP (pat
, 0, i
);
561 alu_set
= XVECEXP (pat
, 0, i
);
564 if (compare_set
== NULL_RTX
)
566 src
= SET_SRC (compare_set
);
567 if (GET_CODE (src
) != COMPARE
)
570 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
572 if ((MEM_P (XEXP (src
, 0))
573 && CONST_INT_P (XEXP (src
, 1)))
574 || (MEM_P (XEXP (src
, 1))
575 && CONST_INT_P (XEXP (src
, 0))))
578 /* No fusion for RIP-relative address. */
579 if (MEM_P (XEXP (src
, 0)))
580 addr
= XEXP (XEXP (src
, 0), 0);
581 else if (MEM_P (XEXP (src
, 1)))
582 addr
= XEXP (XEXP (src
, 1), 0);
586 int ok
= ix86_decompose_address (addr
, &parts
);
589 if (ix86_rip_relative_addr_p (&parts
))
593 test_if
= SET_SRC (pc_set (condjmp
));
594 cond
= XEXP (test_if
, 0);
595 ccode
= GET_CODE (cond
);
596 /* Check whether conditional jump use Sign or Overflow Flags. */
597 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
604 /* Return true for TYPE_TEST and TYPE_ICMP. */
605 if (get_attr_type (condgen
) == TYPE_TEST
606 || get_attr_type (condgen
) == TYPE_ICMP
)
609 /* The following is the case that macro-fusion for alu + jmp. */
610 if (!TARGET_FUSE_ALU_AND_BRANCH
|| !alu_set
)
613 /* No fusion for alu op with memory destination operand. */
614 dest
= SET_DEST (alu_set
);
618 /* Macro-fusion for inc/dec + unsigned conditional jump is not
620 if (get_attr_type (condgen
) == TYPE_INCDEC